Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index 4304ae799e..51367ca1a7 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -1,1113 +1,1113 @@
/*
* Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <pacemaker-controld.h>
static void
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
/*
* stonith failure counting
*
* We don't want to get stuck in a permanent fencing loop. Keep track of the
* number of fencing failures for each target node, and the most we'll restart a
* transition for.
*/
struct st_fail_rec {
int count;
};
#define DEFAULT_STONITH_MAX_ATTEMPTS 10
static bool fence_reaction_panic = false;
static unsigned long int stonith_max_attempts = DEFAULT_STONITH_MAX_ATTEMPTS;
static GHashTable *stonith_failures = NULL;
/*!
* \internal
* \brief Update max fencing attempts before giving up
*
* \param[in] value New max fencing attempts
*/
static void
update_stonith_max_attempts(const char *value)
{
int score = 0;
int rc = pcmk_parse_score(value, &score, DEFAULT_STONITH_MAX_ATTEMPTS);
// The option validator ensures invalid values shouldn't be possible
CRM_CHECK((rc == pcmk_rc_ok) && (score > 0), return);
if (stonith_max_attempts != score) {
crm_debug("Maximum fencing attempts per transition is now %d (was %lu)",
score, stonith_max_attempts);
}
stonith_max_attempts = score;
}
/*!
* \internal
* \brief Configure reaction to notification of local node being fenced
*
* \param[in] reaction_s Reaction type
*/
static void
set_fence_reaction(const char *reaction_s)
{
if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
fence_reaction_panic = true;
} else {
if (!pcmk__str_eq(reaction_s, PCMK_VALUE_STOP, pcmk__str_casei)) {
crm_warn("Invalid value '%s' for %s, using 'stop'",
reaction_s, PCMK_OPT_FENCE_REACTION);
}
fence_reaction_panic = false;
}
}
/*!
* \internal
* \brief Configure fencing options based on the CIB
*
* \param[in,out] options Name/value pairs for configured options
*/
void
controld_configure_fencing(GHashTable *options)
{
const char *value = NULL;
value = g_hash_table_lookup(options, PCMK_OPT_FENCE_REACTION);
set_fence_reaction(value);
value = g_hash_table_lookup(options, PCMK_OPT_STONITH_MAX_ATTEMPTS);
update_stonith_max_attempts(value);
}
static gboolean
too_many_st_failures(const char *target)
{
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *value = NULL;
if (stonith_failures == NULL) {
return FALSE;
}
if (target == NULL) {
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &value)) {
if (value->count >= stonith_max_attempts) {
target = (const char*)key;
goto too_many;
}
}
} else {
value = g_hash_table_lookup(stonith_failures, target);
if ((value != NULL) && (value->count >= stonith_max_attempts)) {
goto too_many;
}
}
return FALSE;
too_many:
crm_warn("Too many failures (%d) to fence %s, giving up",
value->count, target);
return TRUE;
}
/*!
* \internal
* \brief Reset a stonith fail count
*
* \param[in] target Name of node to reset, or NULL for all
*/
void
st_fail_count_reset(const char *target)
{
if (stonith_failures == NULL) {
return;
}
if (target) {
struct st_fail_rec *rec = NULL;
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count = 0;
}
} else {
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *rec = NULL;
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &rec)) {
rec->count = 0;
}
}
}
static void
st_fail_count_increment(const char *target)
{
struct st_fail_rec *rec = NULL;
if (stonith_failures == NULL) {
stonith_failures = pcmk__strkey_table(free, free);
}
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count++;
} else {
rec = malloc(sizeof(struct st_fail_rec));
if(rec == NULL) {
return;
}
rec->count = 1;
g_hash_table_insert(stonith_failures, pcmk__str_copy(target), rec);
}
}
/* end stonith fail count functions */
static void
cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
void *user_data)
{
if (rc < pcmk_ok) {
crm_err("Fencing update %d for %s: failed - %s (%d)",
call_id, (char *)user_data, pcmk_strerror(rc), rc);
crm_log_xml_warn(msg, "Failed update");
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_shutdown,
"CIB update failed", NULL);
} else {
crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
}
}
/*!
* \internal
* \brief Update a fencing target's node state
*
* \param[in] target Node that was successfully fenced
* \param[in] target_xml_id CIB XML ID of target
*/
static void
update_node_state_after_fencing(const char *target, const char *target_xml_id)
{
int rc = pcmk_ok;
pcmk__node_status_t *peer = NULL;
xmlNode *node_state = NULL;
/* We (usually) rely on the membership layer to do
* controld_node_update_cluster, and the peer status callback to do
* controld_node_update_peer, because the node might have already rejoined
* before we get the stonith result here.
*/
uint32_t flags = controld_node_update_join|controld_node_update_expected;
CRM_CHECK((target != NULL) && (target_xml_id != NULL), return);
// Ensure target is cached
peer = pcmk__get_node(0, target, target_xml_id, pcmk__node_search_any);
CRM_CHECK(peer != NULL, return);
if (peer->state == NULL) {
/* Usually, we rely on the membership layer to update the cluster state
* in the CIB. However, if the node has never been seen, do it here, so
* the node is not considered unclean.
*/
flags |= controld_node_update_cluster;
}
if (peer->xml_id == NULL) {
crm_info("Recording XML ID '%s' for node '%s'", target_xml_id, target);
peer->xml_id = pcmk__str_copy(target_xml_id);
}
crmd_peer_down(peer, TRUE);
node_state = create_node_state_update(peer, flags, NULL, __func__);
crm_xml_add(node_state, PCMK_XA_ID, target_xml_id);
if (pcmk_is_set(peer->flags, pcmk__node_status_remote)) {
char *now_s = pcmk__ttoa(time(NULL));
crm_xml_add(node_state, PCMK__XA_NODE_FENCED, now_s);
free(now_s);
}
rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
PCMK_XE_STATUS, node_state,
cib_can_create);
pcmk__xml_free(node_state);
crm_debug("Updating node state for %s after fencing (call %d)", target, rc);
fsa_register_cib_callback(rc, pcmk__str_copy(target), cib_fencing_updated);
controld_delete_node_state(peer->name, controld_section_all, cib_none);
}
/*!
* \internal
* \brief Abort transition due to stonith failure
*
* \param[in] abort_action Whether to restart or stop transition
* \param[in] target Don't restart if this (NULL for any) has too many failures
* \param[in] reason Log this stonith action XML as abort reason (or NULL)
*/
static void
abort_for_stonith_failure(enum pcmk__graph_next abort_action,
const char *target, const xmlNode *reason)
{
/* If stonith repeatedly fails, we eventually give up on starting a new
* transition for that reason.
*/
if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) {
abort_action = pcmk__graph_wait;
}
abort_transition(PCMK_SCORE_INFINITY, abort_action, "Stonith failed",
reason);
}
/*
* stonith cleanup list
*
* If the DC is shot, proper notifications might not go out.
* The stonith cleanup list allows the cluster to (re-)send
* notifications once a new DC is elected.
*/
static GList *stonith_cleanup_list = NULL;
/*!
* \internal
* \brief Add a node to the stonith cleanup list
*
* \param[in] target Name of node to add
*/
void
add_stonith_cleanup(const char *target) {
stonith_cleanup_list = g_list_append(stonith_cleanup_list,
pcmk__str_copy(target));
}
/*!
* \internal
* \brief Remove a node from the stonith cleanup list
*
* \param[in] Name of node to remove
*/
void
remove_stonith_cleanup(const char *target)
{
GList *iter = stonith_cleanup_list;
while (iter != NULL) {
GList *tmp = iter;
char *iter_name = tmp->data;
iter = iter->next;
if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
crm_trace("Removing %s from the cleanup list", iter_name);
stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
free(iter_name);
}
}
}
/*!
* \internal
* \brief Purge all entries from the stonith cleanup list
*/
void
purge_stonith_cleanup(void)
{
if (stonith_cleanup_list) {
GList *iter = NULL;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_info("Purging %s from stonith cleanup list", target);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
}
/*!
* \internal
* \brief Send stonith updates for all entries in cleanup list, then purge it
*/
void
execute_stonith_cleanup(void)
{
GList *iter;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
pcmk__node_status_t *target_node =
pcmk__get_node(0, target, NULL, pcmk__node_search_cluster_member);
const char *uuid = pcmk__cluster_get_xml_id(target_node);
crm_notice("Marking %s, target of a previous stonith action, as clean", target);
update_node_state_after_fencing(target, uuid);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
/* end stonith cleanup list functions */
/* stonith API client
*
* Functions that need to interact directly with the fencer via its API
*/
static stonith_t *stonith_api = NULL;
static mainloop_timer_t *controld_fencer_connect_timer = NULL;
static char *te_client_id = NULL;
static gboolean
fail_incompletable_stonith(pcmk__graph_t *graph)
{
GList *lpc = NULL;
const char *task = NULL;
xmlNode *last_action = NULL;
if (graph == NULL) {
return FALSE;
}
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
GList *lpc2 = NULL;
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
continue;
}
for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data;
if ((action->type != pcmk__cluster_graph_action)
|| pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
continue;
}
task = crm_element_value(action->xml, PCMK_XA_OPERATION);
if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
last_action = action->xml;
pcmk__update_graph(graph, action);
crm_notice("Failing action %d (%s): fencer terminated",
action->id, pcmk__xe_id(action->xml));
}
}
}
if (last_action != NULL) {
crm_warn("Fencer failure resulted in unrunnable actions");
abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action);
return TRUE;
}
return FALSE;
}
static void
tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
{
te_cleanup_stonith_history_sync(st, FALSE);
if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
crm_err("Lost fencer connection (will attempt to reconnect)");
if (!mainloop_timer_running(controld_fencer_connect_timer)) {
mainloop_timer_start(controld_fencer_connect_timer);
}
} else {
crm_info("Disconnected from fencer");
}
if (stonith_api) {
/* the client API won't properly reconnect notifications
* if they are still in the table - so remove them
*/
if (stonith_api->state != stonith_disconnected) {
stonith_api->cmds->disconnect(st);
}
stonith_api->cmds->remove_notification(stonith_api, NULL);
}
if (AM_I_DC) {
fail_incompletable_stonith(controld_globals.transition_graph);
trigger_graph();
}
}
/*!
* \internal
* \brief Handle an event notification from the fencing API
*
* \param[in] st Fencing API connection (ignored)
* \param[in] event Fencing API event notification
*/
static void
handle_fence_notification(stonith_t *st, stonith_event_t *event)
{
bool succeeded = true;
const char *executioner = "the cluster";
const char *client = "a client";
const char *reason = NULL;
int exec_status;
if (te_client_id == NULL) {
te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
(unsigned long) getpid());
}
if (event == NULL) {
crm_err("Notify data not found");
return;
}
if (event->executioner != NULL) {
executioner = event->executioner;
}
if (event->client_origin != NULL) {
client = event->client_origin;
}
exec_status = stonith__event_execution_status(event);
if ((stonith__event_exit_status(event) != CRM_EX_OK)
|| (exec_status != PCMK_EXEC_DONE)) {
succeeded = false;
if (exec_status == PCMK_EXEC_DONE) {
exec_status = PCMK_EXEC_ERROR;
}
}
reason = stonith__event_exit_reason(event);
crmd_alert_fencing_op(event);
if (pcmk__str_eq(PCMK_ACTION_ON, event->action, pcmk__str_none)) {
// Unfencing doesn't need special handling, just a log message
if (succeeded) {
crm_notice("%s was unfenced by %s at the request of %s@%s",
event->target, executioner, client, event->origin);
} else {
crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
event->target, executioner,
pcmk_exec_status_str(exec_status),
((reason == NULL)? "" : ": "),
((reason == NULL)? "" : reason),
stonith__event_exit_status(event));
}
return;
}
if (succeeded && controld_is_local_node(event->target)) {
/* We were notified of our own fencing. Most likely, either fencing was
* misconfigured, or fabric fencing that doesn't cut cluster
* communication is in use.
*
* Either way, shutting down the local host is a good idea, to require
* administrator intervention. Also, other nodes would otherwise likely
* set our status to lost because of the fencing callback and discard
* our subsequent election votes as "not part of our cluster".
*/
crm_crit("We were allegedly just fenced by %s for %s!",
executioner, event->origin); // Dumps blackbox if enabled
if (fence_reaction_panic) {
pcmk__panic("Notified of own fencing");
} else {
crm_exit(CRM_EX_FATAL);
}
return; // Should never get here
}
/* Update the count of fencing failures for this target, in case we become
* DC later. The current DC has already updated its fail count in
* tengine_stonith_callback().
*/
if (!AM_I_DC) {
if (succeeded) {
st_fail_count_reset(event->target);
} else {
st_fail_count_increment(event->target);
}
}
crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
"%s%s%s%s " QB_XS " event=%s",
event->target, (succeeded? "" : " not"),
event->action, executioner, client, event->origin,
(succeeded? "OK" : pcmk_exec_status_str(exec_status)),
((reason == NULL)? "" : " ("),
((reason == NULL)? "" : reason),
((reason == NULL)? "" : ")"),
event->id);
if (succeeded) {
const uint32_t flags = pcmk__node_search_any
|pcmk__node_search_cluster_cib;
pcmk__node_status_t *peer = pcmk__search_node_caches(0, event->target,
NULL, flags);
const char *uuid = NULL;
if (peer == NULL) {
return;
}
uuid = pcmk__cluster_get_xml_id(peer);
if (AM_I_DC) {
/* The DC always sends updates */
update_node_state_after_fencing(event->target, uuid);
/* @TODO Ideally, at this point, we'd check whether the fenced node
* hosted any guest nodes, and call remote_node_down() for them.
* Unfortunately, the controller doesn't have a simple, reliable way
* to map hosts to guests. It might be possible to track this in the
* peer cache via refresh_remote_nodes(). For now, we rely on the
* scheduler creating fence pseudo-events for the guests.
*/
if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
/* Abort the current transition if it wasn't the cluster that
* initiated fencing.
*/
crm_info("External fencing operation from %s fenced %s",
client, event->target);
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"External Fencing Operation", NULL);
}
} else if (pcmk__str_eq(controld_globals.dc_name, event->target,
pcmk__str_null_matches|pcmk__str_casei)
&& !pcmk_is_set(peer->flags, pcmk__node_status_remote)) {
// Assume the target was our DC if we don't currently have one
if (controld_globals.dc_name != NULL) {
crm_notice("Fencing target %s was our DC", event->target);
} else {
crm_notice("Fencing target %s may have been our DC",
event->target);
}
/* Given the CIB resyncing that occurs around elections,
* have one node update the CIB now and, if the new DC is different,
* have them do so too after the election
*/
if (controld_is_local_node(event->executioner)) {
update_node_state_after_fencing(event->target, uuid);
}
add_stonith_cleanup(event->target);
}
/* If the target is a remote node, and we host its connection,
* immediately fail all monitors so it can be recovered quickly.
* The connection won't necessarily drop when a remote node is fenced,
* so the failure might not otherwise be detected until the next poke.
*/
if (pcmk_is_set(peer->flags, pcmk__node_status_remote)) {
remote_ra_fail(event->target);
}
crmd_peer_down(peer, TRUE);
}
}
/*!
* \brief Connect to fencer
*
* \param[in] user_data If NULL, retry failures now, otherwise retry in mainloop timer
*
* \return G_SOURCE_REMOVE on success, G_SOURCE_CONTINUE to retry
* \note If user_data is NULL, this will wait 2s between attempts, for up to
* 30 attempts, meaning the controller could be blocked as long as 58s.
*/
gboolean
controld_timer_fencer_connect(gpointer user_data)
{
int rc = pcmk_ok;
if (stonith_api == NULL) {
- stonith_api = stonith_api_new();
+ stonith_api = stonith__api_new();
if (stonith_api == NULL) {
crm_err("Could not connect to fencer: API memory allocation failed");
return G_SOURCE_REMOVE;
}
}
if (stonith_api->state != stonith_disconnected) {
crm_trace("Already connected to fencer, no need to retry");
return G_SOURCE_REMOVE;
}
if (user_data == NULL) {
// Blocking (retry failures now until successful)
- rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
- if (rc != pcmk_ok) {
+ rc = stonith__api_connect_retry(stonith_api, crm_system_name, 30);
+ if (rc != pcmk_rc_ok) {
crm_err("Could not connect to fencer in 30 attempts: %s "
- QB_XS " rc=%d", pcmk_strerror(rc), rc);
+ QB_XS " rc=%d", pcmk_rc_str(rc), rc);
}
} else {
// Non-blocking (retry failures later in main loop)
rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
if (controld_fencer_connect_timer == NULL) {
controld_fencer_connect_timer =
mainloop_timer_add("controld_fencer_connect", 1000,
TRUE, controld_timer_fencer_connect,
GINT_TO_POINTER(TRUE));
}
if (rc != pcmk_ok) {
if (pcmk_is_set(controld_globals.fsa_input_register,
R_ST_REQUIRED)) {
crm_notice("Fencer connection failed (will retry): %s "
QB_XS " rc=%d", pcmk_strerror(rc), rc);
if (!mainloop_timer_running(controld_fencer_connect_timer)) {
mainloop_timer_start(controld_fencer_connect_timer);
}
return G_SOURCE_CONTINUE;
} else {
crm_info("Fencer connection failed (ignoring because no longer required): %s "
QB_XS " rc=%d", pcmk_strerror(rc), rc);
}
return G_SOURCE_REMOVE;
}
}
if (rc == pcmk_ok) {
stonith_api_operations_t *cmds = stonith_api->cmds;
cmds->register_notification(stonith_api,
PCMK__VALUE_ST_NOTIFY_DISCONNECT,
tengine_stonith_connection_destroy);
cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_FENCE,
handle_fence_notification);
cmds->register_notification(stonith_api,
PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED,
tengine_stonith_history_synced);
te_trigger_stonith_history_sync(TRUE);
crm_notice("Fencer successfully connected");
}
return G_SOURCE_REMOVE;
}
void
controld_disconnect_fencer(bool destroy)
{
if (stonith_api) {
// Prevent fencer connection from coming up again
controld_clear_fsa_input_flags(R_ST_REQUIRED);
if (stonith_api->state != stonith_disconnected) {
stonith_api->cmds->disconnect(stonith_api);
}
stonith_api->cmds->remove_notification(stonith_api, NULL);
}
if (destroy) {
if (stonith_api) {
stonith_api->cmds->free(stonith_api);
stonith_api = NULL;
}
if (controld_fencer_connect_timer) {
mainloop_timer_del(controld_fencer_connect_timer);
controld_fencer_connect_timer = NULL;
}
if (te_client_id) {
free(te_client_id);
te_client_id = NULL;
}
}
}
static gboolean
do_stonith_history_sync(gpointer user_data)
{
if (stonith_api && (stonith_api->state != stonith_disconnected)) {
stonith_history_t *history = NULL;
te_cleanup_stonith_history_sync(stonith_api, FALSE);
stonith_api->cmds->history(stonith_api,
st_opt_sync_call | st_opt_broadcast,
NULL, &history, 5);
- stonith_history_free(history);
+ stonith__history_free(history);
return TRUE;
} else {
crm_info("Skip triggering stonith history-sync as stonith is disconnected");
return FALSE;
}
}
static void
tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
{
char *uuid = NULL;
int stonith_id = -1;
int transition_id = -1;
pcmk__graph_action_t *action = NULL;
const char *target = NULL;
if ((data == NULL) || (data->userdata == NULL)) {
crm_err("Ignoring fence operation %d result: "
"No transition key given (bug?)",
((data == NULL)? -1 : data->call_id));
return;
}
if (!AM_I_DC) {
const char *reason = stonith__exit_reason(data);
if (reason == NULL) {
reason = pcmk_exec_status_str(stonith__execution_status(data));
}
crm_notice("Result of fence operation %d: %d (%s) " QB_XS " key=%s",
data->call_id, stonith__exit_status(data), reason,
(const char *) data->userdata);
return;
}
CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
&stonith_id, NULL),
goto bail);
if (controld_globals.transition_graph->complete || (stonith_id < 0)
|| !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none)
|| (controld_globals.transition_graph->id != transition_id)) {
crm_info("Ignoring fence operation %d result: "
"Not from current transition " QB_XS
" complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
data->call_id,
pcmk__btoa(controld_globals.transition_graph->complete),
stonith_id, uuid, controld_globals.te_uuid, transition_id,
controld_globals.transition_graph->id);
goto bail;
}
action = controld_get_action(stonith_id);
if (action == NULL) {
crm_err("Ignoring fence operation %d result: "
"Action %d not found in transition graph (bug?) "
QB_XS " uuid=%s transition=%d",
data->call_id, stonith_id, uuid, transition_id);
goto bail;
}
target = crm_element_value(action->xml, PCMK__META_ON_NODE);
if (target == NULL) {
crm_err("Ignoring fence operation %d result: No target given (bug?)",
data->call_id);
goto bail;
}
stop_te_timer(action);
if (stonith__exit_status(data) == CRM_EX_OK) {
const char *uuid = crm_element_value(action->xml,
PCMK__META_ON_NODE_UUID);
const char *op = crm_meta_value(action->params,
PCMK__META_STONITH_ACTION);
crm_info("Fence operation %d for %s succeeded", data->call_id, target);
if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
te_action_confirmed(action, NULL);
if (pcmk__str_eq(PCMK_ACTION_ON, op, pcmk__str_casei)) {
const char *value = NULL;
char *now = pcmk__ttoa(time(NULL));
gboolean is_remote_node = FALSE;
/* This check is not 100% reliable, since this node is not
* guaranteed to have the remote node cached. However, it
* doesn't have to be reliable, since the attribute manager can
* learn a node's "remoteness" by other means sooner or later.
* This allows it to learn more quickly if this node does have
* the information.
*/
if (g_hash_table_lookup(pcmk__remote_peer_cache,
uuid) != NULL) {
is_remote_node = TRUE;
}
update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
is_remote_node);
free(now);
value = crm_meta_value(action->params, PCMK__META_DIGESTS_ALL);
update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
is_remote_node);
value = crm_meta_value(action->params,
PCMK__META_DIGESTS_SECURE);
update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
is_remote_node);
} else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
update_node_state_after_fencing(target, uuid);
pcmk__set_graph_action_flags(action,
pcmk__graph_action_sent_update);
}
}
st_fail_count_reset(target);
} else {
enum pcmk__graph_next abort_action = pcmk__graph_restart;
int status = stonith__execution_status(data);
const char *reason = stonith__exit_reason(data);
if (reason == NULL) {
if (status == PCMK_EXEC_DONE) {
reason = "Agent returned error";
} else {
reason = pcmk_exec_status_str(status);
}
}
pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
/* If no fence devices were available, there's no use in immediately
* checking again, so don't start a new transition in that case.
*/
if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
crm_warn("Fence operation %d for %s failed: %s "
"(aborting transition and giving up for now)",
data->call_id, target, reason);
abort_action = pcmk__graph_wait;
} else {
crm_notice("Fence operation %d for %s failed: %s "
"(aborting transition)", data->call_id, target, reason);
}
/* Increment the fail count now, so abort_for_stonith_failure() can
* check it. Non-DC nodes will increment it in
* handle_fence_notification().
*/
st_fail_count_increment(target);
abort_for_stonith_failure(abort_action, target, NULL);
}
pcmk__update_graph(controld_globals.transition_graph, action);
trigger_graph();
bail:
free(data->userdata);
free(uuid);
return;
}
static int
fence_with_delay(const char *target, const char *type, int delay)
{
uint32_t options = st_opt_none; // Group of enum stonith_call_options
int timeout_sec = pcmk__timeout_ms2s(controld_globals.transition_graph->stonith_timeout);
if (crmd_join_phase_count(controld_join_confirmed) == 1) {
stonith__set_call_options(options, target, st_opt_allow_self_fencing);
}
return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
type, timeout_sec, 0, delay);
}
/*!
* \internal
* \brief Execute a fencing action from a transition graph
*
* \param[in] graph Transition graph being executed (ignored)
* \param[in] action Fencing action to execute
*
* \return Standard Pacemaker return code
*/
int
controld_execute_fence_action(pcmk__graph_t *graph,
pcmk__graph_action_t *action)
{
int rc = 0;
const char *id = pcmk__xe_id(action->xml);
const char *uuid = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID);
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
const char *type = crm_meta_value(action->params,
PCMK__META_STONITH_ACTION);
char *transition_key = NULL;
const char *priority_delay = NULL;
int delay_i = 0;
gboolean invalid_action = FALSE;
int stonith_timeout = pcmk__timeout_ms2s(controld_globals.transition_graph->stonith_timeout);
CRM_CHECK(id != NULL, invalid_action = TRUE);
CRM_CHECK(uuid != NULL, invalid_action = TRUE);
CRM_CHECK(type != NULL, invalid_action = TRUE);
CRM_CHECK(target != NULL, invalid_action = TRUE);
if (invalid_action) {
crm_log_xml_warn(action->xml, "BadAction");
return EPROTO;
}
priority_delay = crm_meta_value(action->params,
PCMK_OPT_PRIORITY_FENCING_DELAY);
crm_notice("Requesting fencing (%s) targeting node %s "
QB_XS " action=%s timeout=%i%s%s",
type, target, id, stonith_timeout,
priority_delay ? " priority_delay=" : "",
priority_delay ? priority_delay : "");
/* Passing NULL means block until we can connect... */
controld_timer_fencer_connect(NULL);
pcmk__scan_min_int(priority_delay, &delay_i, 0);
rc = fence_with_delay(target, type, delay_i);
transition_key = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, 0,
controld_globals.te_uuid),
stonith_api->cmds->register_callback(stonith_api, rc,
(stonith_timeout
+ (delay_i > 0 ? delay_i : 0)),
st_opt_timeout_updates, transition_key,
"tengine_stonith_callback",
tengine_stonith_callback);
return pcmk_rc_ok;
}
bool
controld_verify_stonith_watchdog_timeout(const char *value)
{
long long st_timeout = (value != NULL)? crm_get_msec(value) : 0;
const char *our_nodename = controld_globals.cluster->priv->node_name;
if (st_timeout == 0
|| (stonith_api && (stonith_api->state != stonith_disconnected) &&
stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
our_nodename))) {
return pcmk__valid_stonith_watchdog_timeout(value);
}
return true;
}
/* end stonith API client functions */
/*
* stonith history synchronization
*
* Each node's fencer keeps track of a cluster-wide fencing history. When a node
* joins or leaves, we need to synchronize the history across all nodes.
*/
static crm_trigger_t *stonith_history_sync_trigger = NULL;
static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
void
te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
{
if (free_timers) {
mainloop_timer_del(stonith_history_sync_timer_short);
stonith_history_sync_timer_short = NULL;
mainloop_timer_del(stonith_history_sync_timer_long);
stonith_history_sync_timer_long = NULL;
} else {
mainloop_timer_stop(stonith_history_sync_timer_short);
mainloop_timer_stop(stonith_history_sync_timer_long);
}
if (st) {
st->cmds->remove_notification(st, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED);
}
}
static void
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
{
te_cleanup_stonith_history_sync(st, FALSE);
crm_debug("Fence-history synced - cancel all timers");
}
static gboolean
stonith_history_sync_set_trigger(gpointer user_data)
{
mainloop_set_trigger(stonith_history_sync_trigger);
return FALSE;
}
void
te_trigger_stonith_history_sync(bool long_timeout)
{
/* trigger a sync in 5s to give more nodes the
* chance to show up so that we don't create
* unnecessary stonith-history-sync traffic
*
* the long timeout of 30s is there as a fallback
* so that after a successful connection to fenced
* we will wait for 30s for the DC to trigger a
* history-sync
* if this doesn't happen we trigger a sync locally
* (e.g. fenced segfaults and is restarted by pacemakerd)
*/
/* as we are finally checking the stonith-connection
* in do_stonith_history_sync we should be fine
* leaving stonith_history_sync_time & stonith_history_sync_trigger
* around
*/
if (stonith_history_sync_trigger == NULL) {
stonith_history_sync_trigger =
mainloop_add_trigger(G_PRIORITY_LOW,
do_stonith_history_sync, NULL);
}
if (long_timeout) {
if(stonith_history_sync_timer_long == NULL) {
stonith_history_sync_timer_long =
mainloop_timer_add("history_sync_long", 30000,
FALSE, stonith_history_sync_set_trigger,
NULL);
}
crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
mainloop_timer_start(stonith_history_sync_timer_long);
} else {
if(stonith_history_sync_timer_short == NULL) {
stonith_history_sync_timer_short =
mainloop_timer_add("history_sync_short", 5000,
FALSE, stonith_history_sync_set_trigger,
NULL);
}
crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
mainloop_timer_start(stonith_history_sync_timer_short);
}
}
/* end stonith history synchronization functions */
diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
index e961d3d661..a59dee2356 100644
--- a/daemons/execd/execd_commands.c
+++ b/daemons/execd/execd_commands.c
@@ -1,2004 +1,2010 @@
/*
* Copyright 2012-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/fencing/internal.h>
#include <glib.h>
#include <libxml/tree.h> // xmlNode
// Check whether we have a high-resolution monotonic clock
#undef PCMK__TIME_USE_CGT
#if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC)
# define PCMK__TIME_USE_CGT
# include <time.h> /* clock_gettime */
#endif
#include <unistd.h>
#include <crm/crm.h>
#include <crm/fencing/internal.h>
#include <crm/services.h>
#include <crm/services_internal.h>
#include <crm/common/mainloop.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/xml.h>
#include "pacemaker-execd.h"
GHashTable *rsc_list = NULL;
typedef struct lrmd_cmd_s {
int timeout;
guint interval_ms;
int start_delay;
int timeout_orig;
int call_id;
int call_opts;
/* Timer ids, must be removed on cmd destruction. */
int delay_id;
int stonith_recurring_id;
int rsc_deleted;
int service_flags;
char *client_id;
char *origin;
char *rsc_id;
char *action;
char *real_action;
char *userdata_str;
pcmk__action_result_t result;
/* We can track operation queue time and run time, to be saved with the CIB
* resource history (and displayed in cluster status). We need
* high-resolution monotonic time for this purpose, so we use
* clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature
* is disabled).
*
* However, we also need epoch timestamps for recording the time the command
* last ran and the time its return value last changed, for use in time
* displays (as opposed to interval calculations). We keep time_t values for
* this purpose.
*
* The last run time is used for both purposes, so we keep redundant
* monotonic and epoch values for this. Technically the two could represent
* different times, but since time_t has only second resolution and the
* values are used for distinct purposes, that is not significant.
*/
#ifdef PCMK__TIME_USE_CGT
/* Recurring and systemd operations may involve more than one executor
* command per operation, so they need info about the original and the most
* recent.
*/
struct timespec t_first_run; // When op first ran
struct timespec t_run; // When op most recently ran
struct timespec t_first_queue; // When op was first queued
struct timespec t_queue; // When op was most recently queued
#endif
time_t epoch_last_run; // Epoch timestamp of when op last ran
time_t epoch_rcchange; // Epoch timestamp of when rc last changed
bool first_notify_sent;
int last_notify_rc;
int last_notify_op_status;
int last_pid;
GHashTable *params;
} lrmd_cmd_t;
static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
static gboolean execute_resource_action(gpointer user_data);
static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
#ifdef PCMK__TIME_USE_CGT
/*!
* \internal
* \brief Check whether a struct timespec has been set
*
* \param[in] timespec Time to check
*
* \return true if timespec has been set (i.e. is nonzero), false otherwise
*/
static inline bool
time_is_set(const struct timespec *timespec)
{
return (timespec != NULL) &&
((timespec->tv_sec != 0) || (timespec->tv_nsec != 0));
}
/*
* \internal
* \brief Set a timespec (and its original if unset) to the current time
*
* \param[out] t_current Where to store current time
* \param[out] t_orig Where to copy t_current if unset
*/
static void
get_current_time(struct timespec *t_current, struct timespec *t_orig)
{
clock_gettime(CLOCK_MONOTONIC, t_current);
if ((t_orig != NULL) && !time_is_set(t_orig)) {
*t_orig = *t_current;
}
}
/*!
* \internal
* \brief Return difference between two times in milliseconds
*
* \param[in] now More recent time (or NULL to use current time)
* \param[in] old Earlier time
*
* \return milliseconds difference (or 0 if old is NULL or unset)
*
* \note Can overflow on 32bit machines when the differences is around
* 24 days or more.
*/
static int
time_diff_ms(const struct timespec *now, const struct timespec *old)
{
int diff_ms = 0;
if (time_is_set(old)) {
struct timespec local_now = { 0, };
if (now == NULL) {
clock_gettime(CLOCK_MONOTONIC, &local_now);
now = &local_now;
}
diff_ms = (now->tv_sec - old->tv_sec) * 1000
+ (now->tv_nsec - old->tv_nsec) / 1000000;
}
return diff_ms;
}
/*!
* \internal
* \brief Reset a command's operation times to their original values.
*
* Reset a command's run and queued timestamps to the timestamps of the original
* command, so we report the entire time since then and not just the time since
* the most recent command (for recurring and systemd operations).
*
* \param[in,out] cmd Executor command object to reset
*
* \note It's not obvious what the queued time should be for a systemd
* start/stop operation, which might go like this:
* initial command queued 5ms, runs 3s
* monitor command queued 10ms, runs 10s
* monitor command queued 10ms, runs 10s
* Is the queued time for that operation 5ms, 10ms or 25ms? The current
* implementation will report 5ms. If it's 25ms, then we need to
* subtract 20ms from the total exec time so as not to count it twice.
* We can implement that later if it matters to anyone ...
*/
static void
cmd_original_times(lrmd_cmd_t * cmd)
{
cmd->t_run = cmd->t_first_run;
cmd->t_queue = cmd->t_first_queue;
}
#endif
static inline bool
action_matches(const lrmd_cmd_t *cmd, const char *action, guint interval_ms)
{
return (cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, action, pcmk__str_casei);
}
/*!
* \internal
* \brief Log the result of an asynchronous command
*
* \param[in] cmd Command to log result for
* \param[in] exec_time_ms Execution time in milliseconds, if known
* \param[in] queue_time_ms Queue time in milliseconds, if known
*/
static void
log_finished(const lrmd_cmd_t *cmd, int exec_time_ms, int queue_time_ms)
{
int log_level = LOG_INFO;
GString *str = g_string_sized_new(100); // reasonable starting size
if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
log_level = LOG_DEBUG;
}
g_string_append_printf(str, "%s %s (call %d",
cmd->rsc_id, cmd->action, cmd->call_id);
if (cmd->last_pid != 0) {
g_string_append_printf(str, ", PID %d", cmd->last_pid);
}
switch (cmd->result.execution_status) {
case PCMK_EXEC_DONE:
g_string_append_printf(str, ") exited with status %d",
cmd->result.exit_status);
break;
case PCMK_EXEC_CANCELLED:
g_string_append_printf(str, ") cancelled");
break;
default:
pcmk__g_strcat(str, ") could not be executed: ",
pcmk_exec_status_str(cmd->result.execution_status),
NULL);
break;
}
if (cmd->result.exit_reason != NULL) {
pcmk__g_strcat(str, " (", cmd->result.exit_reason, ")", NULL);
}
#ifdef PCMK__TIME_USE_CGT
pcmk__g_strcat(str, " (execution time ",
pcmk__readable_interval(exec_time_ms), NULL);
if (queue_time_ms > 0) {
pcmk__g_strcat(str, " after being queued ",
pcmk__readable_interval(queue_time_ms), NULL);
}
g_string_append_c(str, ')');
#endif
do_crm_log(log_level, "%s", str->str);
g_string_free(str, TRUE);
}
static void
log_execute(lrmd_cmd_t * cmd)
{
int log_level = LOG_INFO;
if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
log_level = LOG_DEBUG;
}
do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
cmd->rsc_id, cmd->action, cmd->call_id);
}
static const char *
normalize_action_name(lrmd_rsc_t * rsc, const char *action)
{
if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_casei) &&
pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) {
return PCMK_ACTION_STATUS;
}
return action;
}
static lrmd_rsc_t *
build_rsc_from_xml(xmlNode * msg)
{
xmlNode *rsc_xml = pcmk__xpath_find_one(msg->doc, "//" PCMK__XE_LRMD_RSC,
LOG_ERR);
lrmd_rsc_t *rsc = NULL;
rsc = pcmk__assert_alloc(1, sizeof(lrmd_rsc_t));
crm_element_value_int(msg, PCMK__XA_LRMD_CALLOPT, &rsc->call_opts);
rsc->rsc_id = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_RSC_ID);
rsc->class = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_CLASS);
rsc->provider = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_PROVIDER);
rsc->type = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_TYPE);
rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, execute_resource_action,
rsc);
// Initialize fence device probes (to return "not running")
pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
PCMK_EXEC_NO_FENCE_DEVICE, NULL);
return rsc;
}
static lrmd_cmd_t *
create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client)
{
int call_options = 0;
xmlNode *rsc_xml = pcmk__xpath_find_one(msg->doc, "//" PCMK__XE_LRMD_RSC,
LOG_ERR);
lrmd_cmd_t *cmd = NULL;
cmd = pcmk__assert_alloc(1, sizeof(lrmd_cmd_t));
crm_element_value_int(msg, PCMK__XA_LRMD_CALLOPT, &call_options);
cmd->call_opts = call_options;
cmd->client_id = pcmk__str_copy(client->id);
crm_element_value_int(msg, PCMK__XA_LRMD_CALLID, &cmd->call_id);
crm_element_value_ms(rsc_xml, PCMK__XA_LRMD_RSC_INTERVAL,
&cmd->interval_ms);
crm_element_value_int(rsc_xml, PCMK__XA_LRMD_TIMEOUT, &cmd->timeout);
crm_element_value_int(rsc_xml, PCMK__XA_LRMD_RSC_START_DELAY,
&cmd->start_delay);
cmd->timeout_orig = cmd->timeout;
cmd->origin = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_ORIGIN);
cmd->action = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_RSC_ACTION);
cmd->userdata_str = crm_element_value_copy(rsc_xml,
PCMK__XA_LRMD_RSC_USERDATA_STR);
cmd->rsc_id = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_RSC_ID);
cmd->params = xml2list(rsc_xml);
if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"),
PCMK_VALUE_BLOCK, pcmk__str_casei)) {
crm_debug("Setting flag to leave pid group on timeout and "
"only kill action pid for " PCMK__OP_FMT,
cmd->rsc_id, cmd->action, cmd->interval_ms);
cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__,
LOG_TRACE, "Action",
cmd->action, 0,
SVC_ACTION_LEAVE_GROUP,
"SVC_ACTION_LEAVE_GROUP");
}
return cmd;
}
static void
stop_recurring_timer(lrmd_cmd_t *cmd)
{
if (cmd) {
if (cmd->stonith_recurring_id) {
g_source_remove(cmd->stonith_recurring_id);
}
cmd->stonith_recurring_id = 0;
}
}
static void
free_lrmd_cmd(lrmd_cmd_t * cmd)
{
stop_recurring_timer(cmd);
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->params) {
g_hash_table_destroy(cmd->params);
}
pcmk__reset_result(&(cmd->result));
free(cmd->origin);
free(cmd->action);
free(cmd->real_action);
free(cmd->userdata_str);
free(cmd->rsc_id);
free(cmd->client_id);
free(cmd);
}
static gboolean
stonith_recurring_op_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc;
cmd->stonith_recurring_id = 0;
if (!cmd->rsc_id) {
return FALSE;
}
rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
pcmk__assert(rsc != NULL);
/* take it out of recurring_ops list, and put it in the pending ops
* to be executed */
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef PCMK__TIME_USE_CGT
get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
#endif
mainloop_set_trigger(rsc->work);
return FALSE;
}
static inline void
start_recurring_timer(lrmd_cmd_t *cmd)
{
if (!cmd || (cmd->interval_ms <= 0)) {
return;
}
cmd->stonith_recurring_id = pcmk__create_timer(cmd->interval_ms,
stonith_recurring_op_helper,
cmd);
}
static gboolean
start_delay_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc = NULL;
cmd->delay_id = 0;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (rsc) {
mainloop_set_trigger(rsc->work);
}
return FALSE;
}
/*!
* \internal
* \brief Check whether a list already contains the equivalent of a given action
*
* \param[in] action_list List to search
* \param[in] cmd Action to search for
*/
static lrmd_cmd_t *
find_duplicate_action(const GList *action_list, const lrmd_cmd_t *cmd)
{
for (const GList *item = action_list; item != NULL; item = item->next) {
lrmd_cmd_t *dup = item->data;
if (action_matches(cmd, dup->action, dup->interval_ms)) {
return dup;
}
}
return NULL;
}
static bool
merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
lrmd_cmd_t * dup = NULL;
bool dup_pending = true;
if (cmd->interval_ms == 0) {
return false;
}
// Search for a duplicate of this action (in-flight or not)
dup = find_duplicate_action(rsc->pending_ops, cmd);
if (dup == NULL) {
dup_pending = false;
dup = find_duplicate_action(rsc->recurring_ops, cmd);
if (dup == NULL) {
return false;
}
}
/* Do not merge fencing monitors marked for cancellation, so we can reply to
* the cancellation separately.
*/
if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
pcmk__str_casei)
&& (dup->result.execution_status == PCMK_EXEC_CANCELLED)) {
return false;
}
/* This should not occur. If it does, we need to investigate how something
* like this is possible in the controller.
*/
crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT
"), merging with previous op entry",
rsc->rsc_id, normalize_action_name(rsc, dup->action),
dup->interval_ms);
// Merge new action's call ID and user data into existing action
dup->first_notify_sent = false;
free(dup->userdata_str);
dup->userdata_str = cmd->userdata_str;
cmd->userdata_str = NULL;
dup->call_id = cmd->call_id;
free_lrmd_cmd(cmd);
cmd = NULL;
/* If dup is not pending, that means it has already executed at least once
* and is waiting in the interval. In that case, stop waiting and initiate
* a new instance now.
*/
if (!dup_pending) {
if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
pcmk__str_casei)) {
stop_recurring_timer(dup);
stonith_recurring_op_helper(dup);
} else {
services_action_kick(rsc->rsc_id,
normalize_action_name(rsc, dup->action),
dup->interval_ms);
}
}
return true;
}
static void
schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(rsc != NULL, return);
crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
if (merge_recurring_duplicate(rsc, cmd)) {
// Equivalent of cmd has already been scheduled
return;
}
/* The controller expects the executor to automatically cancel
* recurring operations before a resource stops.
*/
if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
cancel_all_recurring(rsc, NULL);
}
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef PCMK__TIME_USE_CGT
get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
#endif
mainloop_set_trigger(rsc->work);
if (cmd->start_delay) {
cmd->delay_id = pcmk__create_timer(cmd->start_delay, start_delay_helper, cmd);
}
}
static xmlNode *
create_lrmd_reply(const char *origin, int rc, int call_id)
{
xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_LRMD_REPLY);
crm_xml_add(reply, PCMK__XA_LRMD_ORIGIN, origin);
crm_xml_add_int(reply, PCMK__XA_LRMD_RC, rc);
crm_xml_add_int(reply, PCMK__XA_LRMD_CALLID, call_id);
return reply;
}
static void
send_client_notify(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
pcmk__client_t *client = value;
int rc;
int log_level = LOG_WARNING;
const char *msg = NULL;
CRM_CHECK(client != NULL, return);
if (client->name == NULL) {
crm_trace("Skipping notification to client without name");
return;
}
if (pcmk_is_set(client->flags, pcmk__client_to_proxy)) {
/* We only want to notify clients of the executor IPC API. If we are
* running as Pacemaker Remote, we may have clients proxied to other
* IPC services in the cluster, so skip those.
*/
crm_trace("Skipping executor API notification to client %s",
pcmk__client_name(client));
return;
}
rc = lrmd_server_send_notify(client, update_msg);
if (rc == pcmk_rc_ok) {
return;
}
switch (rc) {
case ENOTCONN:
case EPIPE: // Client exited without waiting for notification
log_level = LOG_INFO;
msg = "Disconnected";
break;
default:
msg = pcmk_rc_str(rc);
break;
}
do_crm_log(log_level, "Could not notify client %s: %s " QB_XS " rc=%d",
pcmk__client_name(client), msg, rc);
}
static void
send_cmd_complete_notify(lrmd_cmd_t * cmd)
{
xmlNode *notify = NULL;
int exec_time = 0;
int queue_time = 0;
#ifdef PCMK__TIME_USE_CGT
exec_time = time_diff_ms(NULL, &(cmd->t_run));
queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue));
#endif
log_finished(cmd, exec_time, queue_time);
/* If the originator requested to be notified only for changes in recurring
* operation results, skip the notification if the result hasn't changed.
*/
if (cmd->first_notify_sent
&& pcmk_is_set(cmd->call_opts, lrmd_opt_notify_changes_only)
&& (cmd->last_notify_rc == cmd->result.exit_status)
&& (cmd->last_notify_op_status == cmd->result.execution_status)) {
return;
}
cmd->first_notify_sent = true;
cmd->last_notify_rc = cmd->result.exit_status;
cmd->last_notify_op_status = cmd->result.execution_status;
notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY);
crm_xml_add(notify, PCMK__XA_LRMD_ORIGIN, __func__);
crm_xml_add_int(notify, PCMK__XA_LRMD_TIMEOUT, cmd->timeout);
crm_xml_add_ms(notify, PCMK__XA_LRMD_RSC_INTERVAL, cmd->interval_ms);
crm_xml_add_int(notify, PCMK__XA_LRMD_RSC_START_DELAY, cmd->start_delay);
crm_xml_add_int(notify, PCMK__XA_LRMD_EXEC_RC, cmd->result.exit_status);
crm_xml_add_int(notify, PCMK__XA_LRMD_EXEC_OP_STATUS,
cmd->result.execution_status);
crm_xml_add_int(notify, PCMK__XA_LRMD_CALLID, cmd->call_id);
crm_xml_add_int(notify, PCMK__XA_LRMD_RSC_DELETED, cmd->rsc_deleted);
crm_xml_add_ll(notify, PCMK__XA_LRMD_RUN_TIME,
(long long) cmd->epoch_last_run);
crm_xml_add_ll(notify, PCMK__XA_LRMD_RCCHANGE_TIME,
(long long) cmd->epoch_rcchange);
#ifdef PCMK__TIME_USE_CGT
crm_xml_add_int(notify, PCMK__XA_LRMD_EXEC_TIME, exec_time);
crm_xml_add_int(notify, PCMK__XA_LRMD_QUEUE_TIME, queue_time);
#endif
crm_xml_add(notify, PCMK__XA_LRMD_OP, LRMD_OP_RSC_EXEC);
crm_xml_add(notify, PCMK__XA_LRMD_RSC_ID, cmd->rsc_id);
if(cmd->real_action) {
crm_xml_add(notify, PCMK__XA_LRMD_RSC_ACTION, cmd->real_action);
} else {
crm_xml_add(notify, PCMK__XA_LRMD_RSC_ACTION, cmd->action);
}
crm_xml_add(notify, PCMK__XA_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
crm_xml_add(notify, PCMK__XA_LRMD_RSC_EXIT_REASON, cmd->result.exit_reason);
if (cmd->result.action_stderr != NULL) {
crm_xml_add(notify, PCMK__XA_LRMD_RSC_OUTPUT,
cmd->result.action_stderr);
} else if (cmd->result.action_stdout != NULL) {
crm_xml_add(notify, PCMK__XA_LRMD_RSC_OUTPUT,
cmd->result.action_stdout);
}
if (cmd->params) {
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
xmlNode *args = pcmk__xe_create(notify, PCMK__XE_ATTRIBUTES);
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
hash2smartfield((gpointer) key, (gpointer) value, args);
}
}
if ((cmd->client_id != NULL)
&& pcmk_is_set(cmd->call_opts, lrmd_opt_notify_orig_only)) {
pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id);
if (client != NULL) {
send_client_notify(client->id, client, notify);
}
} else {
pcmk__foreach_ipc_client(send_client_notify, notify);
}
pcmk__xml_free(notify);
}
static void
send_generic_notify(int rc, xmlNode * request)
{
if (pcmk__ipc_client_count() != 0) {
int call_id = 0;
xmlNode *notify = NULL;
xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
"//" PCMK__XE_LRMD_RSC,
LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
const char *op = crm_element_value(request, PCMK__XA_LRMD_OP);
crm_element_value_int(request, PCMK__XA_LRMD_CALLID, &call_id);
notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY);
crm_xml_add(notify, PCMK__XA_LRMD_ORIGIN, __func__);
crm_xml_add_int(notify, PCMK__XA_LRMD_RC, rc);
crm_xml_add_int(notify, PCMK__XA_LRMD_CALLID, call_id);
crm_xml_add(notify, PCMK__XA_LRMD_OP, op);
crm_xml_add(notify, PCMK__XA_LRMD_RSC_ID, rsc_id);
pcmk__foreach_ipc_client(send_client_notify, notify);
pcmk__xml_free(notify);
}
}
static void
cmd_reset(lrmd_cmd_t * cmd)
{
cmd->last_pid = 0;
#ifdef PCMK__TIME_USE_CGT
memset(&cmd->t_run, 0, sizeof(cmd->t_run));
memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
#endif
cmd->epoch_last_run = 0;
pcmk__reset_result(&(cmd->result));
cmd->result.execution_status = PCMK_EXEC_DONE;
}
static void
cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
{
crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action,
rsc ? rsc->active : NULL, cmd);
if (rsc && (rsc->active == cmd)) {
rsc->active = NULL;
mainloop_set_trigger(rsc->work);
}
if (!rsc) {
cmd->rsc_deleted = 1;
}
/* reset original timeout so client notification has correct information */
cmd->timeout = cmd->timeout_orig;
send_cmd_complete_notify(cmd);
if ((cmd->interval_ms != 0)
&& (cmd->result.execution_status == PCMK_EXEC_CANCELLED)) {
if (rsc) {
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else if (cmd->interval_ms == 0) {
if (rsc) {
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else {
/* Clear all the values pertaining just to the last iteration of a recurring op. */
cmd_reset(cmd);
}
}
struct notify_new_client_data {
xmlNode *notify;
pcmk__client_t *new_client;
};
static void
notify_one_client(gpointer key, gpointer value, gpointer user_data)
{
pcmk__client_t *client = value;
struct notify_new_client_data *data = user_data;
if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) {
send_client_notify(key, (gpointer) client, (gpointer) data->notify);
}
}
void
notify_of_new_client(pcmk__client_t *new_client)
{
struct notify_new_client_data data;
data.new_client = new_client;
data.notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY);
crm_xml_add(data.notify, PCMK__XA_LRMD_ORIGIN, __func__);
crm_xml_add(data.notify, PCMK__XA_LRMD_OP, LRMD_OP_NEW_CLIENT);
pcmk__foreach_ipc_client(notify_one_client, &data);
pcmk__xml_free(data.notify);
}
void
client_disconnect_cleanup(const char *client_id)
{
GHashTableIter iter;
lrmd_rsc_t *rsc = NULL;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
if (pcmk_all_flags_set(rsc->call_opts, lrmd_opt_drop_recurring)) {
/* This client is disconnecting, drop any recurring operations
* it may have initiated on the resource */
cancel_all_recurring(rsc, client_id);
}
}
}
static void
action_complete(svc_action_t * action)
{
lrmd_rsc_t *rsc;
lrmd_cmd_t *cmd = action->cb_data;
enum ocf_exitcode code;
#ifdef PCMK__TIME_USE_CGT
const char *rclass = NULL;
bool goagain = false;
int time_sum = 0;
int timeout_left = 0;
int delay = 0;
#endif
if (!cmd) {
crm_err("Completed executor action (%s) does not match any known operations",
action->id);
return;
}
#ifdef PCMK__TIME_USE_CGT
if (cmd->result.exit_status != action->rc) {
cmd->epoch_rcchange = time(NULL);
}
#endif
cmd->last_pid = action->pid;
// Cast variable instead of function return to keep compilers happy
code = services_result2ocf(action->standard, cmd->action, action->rc);
pcmk__set_result(&(cmd->result), (int) code,
action->status, services__exit_reason(action));
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
#ifdef PCMK__TIME_USE_CGT
if (rsc != NULL) {
rclass = rsc->class;
#if PCMK__ENABLE_SERVICE
if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SERVICE,
pcmk__str_casei)) {
rclass = resources_find_service_class(rsc->type);
}
#endif
}
if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
goto finalize;
}
if (pcmk__result_ok(&(cmd->result))
&& pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
PCMK_ACTION_STOP, NULL)) {
/* Getting results for when a start or stop action completes is now
* handled by watching for JobRemoved() signals from systemd and
* reacting to them. So, we can bypass the rest of the code in this
* function for those actions, and simply finalize cmd.
*
* @TODO When monitors are handled in the same way, this function
* can either be drastically simplified or done away with entirely.
*/
services__copy_result(action, &(cmd->result));
goto finalize;
} else if (cmd->result.execution_status == PCMK_EXEC_PENDING &&
pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL) &&
cmd->interval_ms == 0 &&
cmd->real_action == NULL) {
/* If the state is Pending at the time of probe, execute follow-up monitor. */
goagain = true;
cmd->real_action = cmd->action;
cmd->action = pcmk__str_copy(PCMK_ACTION_MONITOR);
} else if (cmd->real_action != NULL) {
// This is follow-up monitor to check whether start/stop/probe(monitor) completed
if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
goagain = true;
} else if (pcmk__result_ok(&(cmd->result))
&& pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP,
pcmk__str_casei)) {
goagain = true;
} else {
int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
int timeout_left = cmd->timeout_orig - time_sum;
crm_debug("%s systemd %s is now complete (elapsed=%dms, "
"remaining=%dms): %s (%d)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left,
crm_exit_str(cmd->result.exit_status),
cmd->result.exit_status);
cmd_original_times(cmd);
// Monitors may return "not running", but start/stop shouldn't
if ((cmd->result.execution_status == PCMK_EXEC_DONE)
&& (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) {
if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_START,
pcmk__str_casei)) {
cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR;
} else if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP,
pcmk__str_casei)) {
cmd->result.exit_status = PCMK_OCF_OK;
}
}
}
} else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL)
&& (cmd->interval_ms > 0)) {
/* For monitors, excluding follow-up monitors, */
/* if the pending state persists from the first notification until its timeout, */
/* it will be treated as a timeout. */
if ((cmd->result.execution_status == PCMK_EXEC_PENDING) &&
(cmd->last_notify_op_status == PCMK_EXEC_PENDING)) {
int time_left = time(NULL) - (cmd->epoch_rcchange + (cmd->timeout_orig/1000));
if (time_left >= 0) {
crm_notice("Giving up on %s %s (rc=%d): monitor pending timeout "
"(first pending notification=%s timeout=%ds)",
cmd->rsc_id, cmd->action, cmd->result.exit_status,
pcmk__trim(ctime(&cmd->epoch_rcchange)), cmd->timeout_orig);
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"Investigate reason for timeout, and adjust "
"configured operation timeout if necessary");
cmd_original_times(cmd);
}
}
}
if (!goagain) {
goto finalize;
}
time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
timeout_left = cmd->timeout_orig - time_sum;
delay = cmd->timeout_orig / 10;
if (delay >= timeout_left && timeout_left > 20) {
delay = timeout_left/2;
}
delay = QB_MIN(2000, delay);
if (delay < timeout_left) {
cmd->start_delay = delay;
cmd->timeout = timeout_left;
if (pcmk__result_ok(&(cmd->result))) {
crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);
} else if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);
} else {
crm_notice("%s %s failed: %s: Re-scheduling (remaining "
"timeout %s) " QB_XS
" exitstatus=%d elapsed=%dms start_delay=%dms)",
cmd->rsc_id, cmd->action,
crm_exit_str(cmd->result.exit_status),
pcmk__readable_interval(timeout_left),
cmd->result.exit_status, time_sum, delay);
}
cmd_reset(cmd);
if (rsc) {
rsc->active = NULL;
}
schedule_lrmd_cmd(rsc, cmd);
/* Don't finalize cmd, we're not done with it yet */
return;
} else {
crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)",
cmd->rsc_id,
(cmd->real_action? cmd->real_action : cmd->action),
cmd->result.exit_status, time_sum, timeout_left);
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"Investigate reason for timeout, and adjust "
"configured operation timeout if necessary");
cmd_original_times(cmd);
}
#endif
finalize:
pcmk__set_result_output(&(cmd->result), services__grab_stdout(action),
services__grab_stderr(action));
cmd_finalize(cmd, rsc);
}
/*!
* \internal
* \brief Process the result of a fence device action (start, stop, or monitor)
*
* \param[in,out] cmd Fence device action that completed
* \param[in] exit_status Fencer API exit status for action
* \param[in] execution_status Fencer API execution status for action
* \param[in] exit_reason Human-friendly detail, if action failed
*/
static void
stonith_action_complete(lrmd_cmd_t *cmd, int exit_status,
enum pcmk_exec_status execution_status,
const char *exit_reason)
{
// This can be NULL if resource was removed before command completed
lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
// Simplify fencer exit status to uniform exit status
if (exit_status != CRM_EX_OK) {
exit_status = PCMK_OCF_UNKNOWN_ERROR;
}
if (cmd->result.execution_status == PCMK_EXEC_CANCELLED) {
/* An in-flight fence action was cancelled. The execution status is
* already correct, so don't overwrite it.
*/
execution_status = PCMK_EXEC_CANCELLED;
} else {
/* Some execution status codes have specific meanings for the fencer
* that executor clients may not expect, so map them to a simple error
* status.
*/
switch (execution_status) {
case PCMK_EXEC_NOT_CONNECTED:
case PCMK_EXEC_INVALID:
execution_status = PCMK_EXEC_ERROR;
break;
case PCMK_EXEC_NO_FENCE_DEVICE:
/* This should be possible only for probes in practice, but
* interpret for all actions to be safe.
*/
if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_none)) {
exit_status = PCMK_OCF_NOT_RUNNING;
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP,
pcmk__str_none)) {
exit_status = PCMK_OCF_OK;
} else {
exit_status = PCMK_OCF_NOT_INSTALLED;
}
execution_status = PCMK_EXEC_ERROR;
break;
case PCMK_EXEC_NOT_SUPPORTED:
exit_status = PCMK_OCF_UNIMPLEMENT_FEATURE;
break;
default:
break;
}
}
pcmk__set_result(&cmd->result, exit_status, execution_status, exit_reason);
// Certain successful actions change the known state of the resource
if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) {
if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
pcmk__set_result(&rsc->fence_probe_result, CRM_EX_OK,
PCMK_EXEC_DONE, NULL); // "running"
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP,
pcmk__str_casei)) {
pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
PCMK_EXEC_NO_FENCE_DEVICE, NULL); // "not running"
}
}
/* The recurring timer should not be running at this point in any case, but
* as a failsafe, stop it if it is.
*/
stop_recurring_timer(cmd);
/* Reschedule this command if appropriate. If a recurring command is *not*
* rescheduled, its status must be PCMK_EXEC_CANCELLED, otherwise it will
* not be removed from recurring_ops by cmd_finalize().
*/
if (rsc && (cmd->interval_ms > 0)
&& (cmd->result.execution_status != PCMK_EXEC_CANCELLED)) {
start_recurring_timer(cmd);
}
cmd_finalize(cmd, rsc);
}
static void
lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
if ((data == NULL) || (data->userdata == NULL)) {
crm_err("Ignoring fence action result: "
"Invalid callback arguments (bug?)");
} else {
stonith_action_complete((lrmd_cmd_t *) data->userdata,
stonith__exit_status(data),
stonith__execution_status(data),
stonith__exit_reason(data));
}
}
void
stonith_connection_failed(void)
{
GHashTableIter iter;
lrmd_rsc_t *rsc = NULL;
crm_warn("Connection to fencer lost (any pending operations for "
"fence devices will be considered failed)");
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &rsc)) {
if (!pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
pcmk__str_none)) {
continue;
}
/* If we registered this fence device, we don't know whether the
* fencer still has the registration or not. Cause future probes to
* return an error until the resource is stopped or started
* successfully. This is especially important if the controller also
* went away (possibly due to a cluster layer restart) and won't
* receive our client notification of any monitors finalized below.
*/
if (rsc->fence_probe_result.execution_status == PCMK_EXEC_DONE) {
pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
PCMK_EXEC_NOT_CONNECTED,
"Lost connection to fencer");
}
// Consider any active, pending, or recurring operations as failed
for (GList *op = rsc->recurring_ops; op != NULL; op = op->next) {
lrmd_cmd_t *cmd = op->data;
/* This won't free a recurring op but instead restart its timer.
* If cmd is rsc->active, this will set rsc->active to NULL, so we
* don't have to worry about finalizing it a second time below.
*/
stonith_action_complete(cmd,
CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED,
"Lost connection to fencer");
}
if (rsc->active != NULL) {
rsc->pending_ops = g_list_prepend(rsc->pending_ops, rsc->active);
}
while (rsc->pending_ops != NULL) {
// This will free the op and remove it from rsc->pending_ops
stonith_action_complete((lrmd_cmd_t *) rsc->pending_ops->data,
CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED,
"Lost connection to fencer");
}
}
}
/*!
* \internal
* \brief Execute a stonith resource "start" action
*
* Start a stonith resource by registering it with the fencer.
* (Stonith agents don't have a start command.)
*
* \param[in,out] stonith_api Connection to fencer
* \param[in] rsc Stonith resource to start
* \param[in] cmd Start command to execute
*
* \return pcmk_ok on success, -errno otherwise
*/
static int
execd_stonith_start(stonith_t *stonith_api, const lrmd_rsc_t *rsc,
const lrmd_cmd_t *cmd)
{
char *key = NULL;
char *value = NULL;
stonith_key_value_t *device_params = NULL;
int rc = pcmk_ok;
// Convert command parameters to stonith API key/values
if (cmd->params) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
- device_params = stonith_key_value_add(device_params, key, value);
+ device_params = stonith__key_value_add(device_params, key, value);
}
}
/* The fencer will automatically register devices via CIB notifications
* when the CIB changes, but to avoid a possible race condition between
* the fencer receiving the notification and the executor requesting that
* resource, the executor registers the device as well. The fencer knows how
* to handle duplicate registrations.
*/
rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call,
cmd->rsc_id, rsc->provider,
rsc->type, device_params);
- stonith_key_value_freeall(device_params, 1, 1);
+ stonith__key_value_freeall(device_params, true, true);
return rc;
}
/*!
* \internal
* \brief Execute a stonith resource "stop" action
*
* Stop a stonith resource by unregistering it with the fencer.
* (Stonith agents don't have a stop command.)
*
* \param[in,out] stonith_api Connection to fencer
* \param[in] rsc Stonith resource to stop
*
* \return pcmk_ok on success, -errno otherwise
*/
static inline int
execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc)
{
/* @TODO Failure would indicate a problem communicating with fencer;
* perhaps we should try reconnecting and retrying a few times?
*/
return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call,
rsc->rsc_id);
}
/*!
* \internal
* \brief Initiate a stonith resource agent recurring "monitor" action
*
* \param[in,out] stonith_api Connection to fencer
* \param[in,out] rsc Stonith resource to monitor
* \param[in] cmd Monitor command being executed
*
* \return pcmk_ok if monitor was successfully initiated, -errno otherwise
*/
static inline int
execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
{
int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id,
pcmk__timeout_ms2s(cmd->timeout));
rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd,
"lrmd_stonith_callback",
lrmd_stonith_callback);
if (rc == TRUE) {
rsc->active = cmd;
rc = pcmk_ok;
} else {
rc = -pcmk_err_generic;
}
return rc;
}
static void
execute_stonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
{
- int rc = 0;
- bool do_monitor = FALSE;
+ int rc = pcmk_ok;
+ const char *rc_s = NULL;
+ bool do_monitor = false;
+ // Don't free; belongs to pacemaker-execd.c
stonith_t *stonith_api = get_stonith_connection();
if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)
&& (cmd->interval_ms == 0)) {
// Probes don't require a fencer connection
stonith_action_complete(cmd, rsc->fence_probe_result.exit_status,
rsc->fence_probe_result.execution_status,
rsc->fence_probe_result.exit_reason);
return;
+ }
- } else if (stonith_api == NULL) {
+ if (stonith_api == NULL) {
stonith_action_complete(cmd, PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_NOT_CONNECTED,
"No connection to fencer");
return;
+ }
- } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
+ if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
rc = execd_stonith_start(stonith_api, rsc, cmd);
if (rc == pcmk_ok) {
- do_monitor = TRUE;
+ do_monitor = true;
}
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
rc = execd_stonith_stop(stonith_api, rsc);
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
- do_monitor = TRUE;
+ do_monitor = true;
} else {
stonith_action_complete(cmd, PCMK_OCF_UNIMPLEMENT_FEATURE,
PCMK_EXEC_ERROR,
"Invalid fence device action (bug?)");
return;
}
if (do_monitor) {
rc = execd_stonith_monitor(stonith_api, rsc, cmd);
if (rc == pcmk_ok) {
- // Don't clean up yet, we will find out result of the monitor later
+ // Don't clean up yet. We will get the result of the monitor later.
return;
}
}
+ if (rc != -pcmk_err_generic) {
+ rc_s = pcmk_strerror(rc);
+ }
stonith_action_complete(cmd,
- ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
- stonith__legacy2status(rc),
- ((rc == -pcmk_err_generic)? NULL : pcmk_strerror(rc)));
+ ((rc == pcmk_rc_ok)? CRM_EX_OK : CRM_EX_ERROR),
+ stonith__legacy2status(rc), rc_s);
}
static void
execute_nonstonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
{
svc_action_t *action = NULL;
GHashTable *params_copy = NULL;
pcmk__assert((rsc != NULL) && (cmd != NULL));
crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
params_copy = pcmk__str_table_dup(cmd->params);
action = services__create_resource_action(rsc->rsc_id, rsc->class, rsc->provider,
rsc->type,
normalize_action_name(rsc, cmd->action),
cmd->interval_ms, cmd->timeout,
params_copy, cmd->service_flags);
if (action == NULL) {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR, strerror(ENOMEM));
cmd_finalize(cmd, rsc);
return;
}
if (action->rc != PCMK_OCF_UNKNOWN) {
services__copy_result(action, &(cmd->result));
services_action_free(action);
cmd_finalize(cmd, rsc);
return;
}
action->cb_data = cmd;
if (services_action_async(action, action_complete)) {
/* The services library has taken responsibility for the action. It
* could be pending, blocked, or merged into a duplicate recurring
* action, in which case the action callback (action_complete())
* will be called when the action completes, otherwise the callback has
* already been called.
*
* action_complete() calls cmd_finalize() which can free cmd, so cmd
* cannot be used here.
*/
} else {
/* This is a recurring action that is not being cancelled and could not
* be initiated. It has been rescheduled, and the action callback
* (action_complete()) has been called, which in this case has already
* called cmd_finalize(), which in this case should only reset (not
* free) cmd.
*/
services__copy_result(action, &(cmd->result));
services_action_free(action);
}
}
static gboolean
execute_resource_action(gpointer user_data)
{
lrmd_rsc_t *rsc = (lrmd_rsc_t *) user_data;
lrmd_cmd_t *cmd = NULL;
CRM_CHECK(rsc != NULL, return FALSE);
if (rsc->active) {
crm_trace("%s is still active", rsc->rsc_id);
return TRUE;
}
if (rsc->pending_ops) {
GList *first = rsc->pending_ops;
cmd = first->data;
if (cmd->delay_id) {
crm_trace
("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
cmd->rsc_id, cmd->action, cmd->start_delay);
return TRUE;
}
rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
g_list_free_1(first);
#ifdef PCMK__TIME_USE_CGT
get_current_time(&(cmd->t_run), &(cmd->t_first_run));
#endif
cmd->epoch_last_run = time(NULL);
}
if (!cmd) {
crm_trace("Nothing further to do for %s", rsc->rsc_id);
return TRUE;
}
rsc->active = cmd; /* only one op at a time for a rsc */
if (cmd->interval_ms) {
rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
}
log_execute(cmd);
if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
execute_stonith_action(rsc, cmd);
} else {
execute_nonstonith_action(rsc, cmd);
}
return TRUE;
}
void
free_rsc(gpointer data)
{
GList *gIter = NULL;
lrmd_rsc_t *rsc = data;
int is_stonith = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
pcmk__str_casei);
gIter = rsc->pending_ops;
while (gIter != NULL) {
GList *next = gIter->next;
lrmd_cmd_t *cmd = gIter->data;
/* command was never executed */
cmd->result.execution_status = PCMK_EXEC_CANCELLED;
cmd_finalize(cmd, NULL);
gIter = next;
}
/* frees list, but not list elements. */
g_list_free(rsc->pending_ops);
gIter = rsc->recurring_ops;
while (gIter != NULL) {
GList *next = gIter->next;
lrmd_cmd_t *cmd = gIter->data;
if (is_stonith) {
cmd->result.execution_status = PCMK_EXEC_CANCELLED;
/* If a stonith command is in-flight, just mark it as cancelled;
* it is not safe to finalize/free the cmd until the stonith api
* says it has either completed or timed out.
*/
if (rsc->active != cmd) {
cmd_finalize(cmd, NULL);
}
} else {
/* This command is already handed off to service library,
* let service library cancel it and tell us via the callback
* when it is cancelled. The rsc can be safely destroyed
* even if we are waiting for the cancel result */
services_action_cancel(rsc->rsc_id,
normalize_action_name(rsc, cmd->action),
cmd->interval_ms);
}
gIter = next;
}
/* frees list, but not list elements. */
g_list_free(rsc->recurring_ops);
free(rsc->rsc_id);
free(rsc->class);
free(rsc->provider);
free(rsc->type);
mainloop_destroy_trigger(rsc->work);
free(rsc);
}
static int
process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id,
xmlNode **reply)
{
int rc = pcmk_ok;
time_t now = time(NULL);
const char *protocol_version =
crm_element_value(request, PCMK__XA_LRMD_PROTOCOL_VERSION);
const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE);
if (compare_version(protocol_version, LRMD_COMPATIBLE_PROTOCOL) < 0) {
crm_err("Cluster API version must be greater than or equal to %s, not %s",
LRMD_COMPATIBLE_PROTOCOL, protocol_version);
rc = -EPROTO;
}
if (pcmk__xe_attr_is_true(request, PCMK__XA_LRMD_IS_IPC_PROVIDER)) {
#ifdef PCMK__COMPILE_REMOTE
if ((client->remote != NULL)
&& pcmk_is_set(client->flags,
pcmk__client_tls_handshake_complete)) {
const char *op = crm_element_value(request, PCMK__XA_LRMD_OP);
// This is a remote connection from a cluster node's controller
ipc_proxy_add_provider(client);
/* @TODO Allowing multiple proxies makes no sense given that clients
* have no way to choose between them. Maybe always use the most
* recent one and switch any existing IPC connections to use it,
* by iterating over ipc_clients here, and if client->id doesn't
* match the client's userdata, replace the userdata with the new
* ID. After the iteration, call lrmd_remote_client_destroy() on any
* of the replaced values in ipc_providers.
*/
/* If this was a register operation, also ask for new schema files but
* only if it's supported by the protocol version.
*/
if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none) &&
LRMD_SUPPORTS_SCHEMA_XFER(protocol_version)) {
remoted_request_cib_schema_files();
}
} else {
rc = -EACCES;
}
#else
rc = -EPROTONOSUPPORT;
#endif
}
*reply = create_lrmd_reply(__func__, rc, call_id);
crm_xml_add(*reply, PCMK__XA_LRMD_OP, CRM_OP_REGISTER);
crm_xml_add(*reply, PCMK__XA_LRMD_CLIENTID, client->id);
crm_xml_add(*reply, PCMK__XA_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
crm_xml_add_ll(*reply, PCMK__XA_UPTIME, now - start_time);
if (start_state) {
crm_xml_add(*reply, PCMK__XA_NODE_START_STATE, start_state);
}
return rc;
}
static int
process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = build_rsc_from_xml(request);
lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
if (dup &&
pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) &&
pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) {
crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id);
free_rsc(rsc);
return rc;
}
g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
crm_info("Cached agent information for '%s'", rsc->rsc_id);
return rc;
}
static xmlNode *
process_lrmd_get_rsc_info(xmlNode *request, int call_id)
{
int rc = pcmk_ok;
xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
"//" PCMK__XE_LRMD_RSC,
LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
xmlNode *reply = NULL;
lrmd_rsc_t *rsc = NULL;
if (rsc_id == NULL) {
rc = -ENODEV;
} else {
rsc = g_hash_table_lookup(rsc_list, rsc_id);
if (rsc == NULL) {
crm_info("Agent information for '%s' not in cache", rsc_id);
rc = -ENODEV;
}
}
reply = create_lrmd_reply(__func__, rc, call_id);
if (rsc) {
crm_xml_add(reply, PCMK__XA_LRMD_RSC_ID, rsc->rsc_id);
crm_xml_add(reply, PCMK__XA_LRMD_CLASS, rsc->class);
crm_xml_add(reply, PCMK__XA_LRMD_PROVIDER, rsc->provider);
crm_xml_add(reply, PCMK__XA_LRMD_TYPE, rsc->type);
}
return reply;
}
static int
process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id,
xmlNode *request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = NULL;
xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
"//" PCMK__XE_LRMD_RSC,
LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
if (!rsc_id) {
return -ENODEV;
}
rsc = g_hash_table_lookup(rsc_list, rsc_id);
if (rsc == NULL) {
crm_info("Ignoring unregistration of resource '%s', which is not registered",
rsc_id);
return pcmk_ok;
}
if (rsc->active) {
/* let the caller know there are still active ops on this rsc to watch for */
crm_trace("Operation (%p) still in progress for unregistered resource %s",
rsc->active, rsc_id);
rc = -EINPROGRESS;
}
g_hash_table_remove(rsc_list, rsc_id);
return rc;
}
static int
process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request)
{
lrmd_rsc_t *rsc = NULL;
lrmd_cmd_t *cmd = NULL;
xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
"//" PCMK__XE_LRMD_RSC,
LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
int call_id;
if (!rsc_id) {
return -EINVAL;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
return -ENODEV;
}
cmd = create_lrmd_cmd(request, client);
call_id = cmd->call_id;
/* Don't reference cmd after handing it off to be scheduled.
* The cmd could get merged and freed. */
schedule_lrmd_cmd(rsc, cmd);
return call_id;
}
static int
cancel_op(const char *rsc_id, const char *action, guint interval_ms)
{
GList *gIter = NULL;
lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
/* How to cancel an action.
* 1. Check pending ops list, if it hasn't been handed off
* to the service library or stonith recurring list remove
* it there and that will stop it.
* 2. If it isn't in the pending ops list, then it's either a
* recurring op in the stonith recurring list, or the service
* library's recurring list. Stop it there
* 3. If not found in any lists, then this operation has either
* been executed already and is not a recurring operation, or
* never existed.
*/
if (!rsc) {
return -ENODEV;
}
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (action_matches(cmd, action, interval_ms)) {
cmd->result.execution_status = PCMK_EXEC_CANCELLED;
cmd_finalize(cmd, rsc);
return pcmk_ok;
}
}
if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
/* The service library does not handle stonith operations.
* We have to handle recurring stonith operations ourselves. */
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (action_matches(cmd, action, interval_ms)) {
cmd->result.execution_status = PCMK_EXEC_CANCELLED;
if (rsc->active != cmd) {
cmd_finalize(cmd, rsc);
}
return pcmk_ok;
}
}
} else if (services_action_cancel(rsc_id,
normalize_action_name(rsc, action),
interval_ms) == TRUE) {
/* The service library will tell the action_complete callback function
* this action was cancelled, which will destroy the cmd and remove
* it from the recurring_op list. Do not do that in this function
* if the service library says it cancelled it. */
return pcmk_ok;
}
return -EOPNOTSUPP;
}
static void
cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
{
GList *cmd_list = NULL;
GList *cmd_iter = NULL;
/* Notice a copy of each list is created when concat is called.
* This prevents odd behavior from occurring when the cmd_list
* is iterated through later on. It is possible the cancel_op
* function may end up modifying the recurring_ops and pending_ops
* lists. If we did not copy those lists, our cmd_list iteration
* could get messed up.*/
if (rsc->recurring_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
}
if (rsc->pending_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
}
if (!cmd_list) {
return;
}
for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
lrmd_cmd_t *cmd = cmd_iter->data;
if (cmd->interval_ms == 0) {
continue;
}
if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) {
continue;
}
cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms);
}
/* frees only the copied list data, not the cmds */
g_list_free(cmd_list);
}
static int
process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request)
{
xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
"//" PCMK__XE_LRMD_RSC,
LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
const char *action = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ACTION);
guint interval_ms = 0;
crm_element_value_ms(rsc_xml, PCMK__XA_LRMD_RSC_INTERVAL, &interval_ms);
if (!rsc_id || !action) {
return -EINVAL;
}
return cancel_op(rsc_id, action, interval_ms);
}
static void
add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc)
{
xmlNode *rsc_xml = pcmk__xe_create(reply, PCMK__XE_LRMD_RSC);
crm_xml_add(rsc_xml, PCMK__XA_LRMD_RSC_ID, rsc->rsc_id);
for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) {
lrmd_cmd_t *cmd = item->data;
xmlNode *op_xml = pcmk__xe_create(rsc_xml, PCMK__XE_LRMD_RSC_OP);
crm_xml_add(op_xml, PCMK__XA_LRMD_RSC_ACTION,
pcmk__s(cmd->real_action, cmd->action));
crm_xml_add_ms(op_xml, PCMK__XA_LRMD_RSC_INTERVAL, cmd->interval_ms);
crm_xml_add_int(op_xml, PCMK__XA_LRMD_TIMEOUT, cmd->timeout_orig);
}
}
static xmlNode *
process_lrmd_get_recurring(xmlNode *request, int call_id)
{
int rc = pcmk_ok;
const char *rsc_id = NULL;
lrmd_rsc_t *rsc = NULL;
xmlNode *reply = NULL;
xmlNode *rsc_xml = NULL;
// Resource ID is optional
rsc_xml = pcmk__xe_first_child(request, PCMK__XE_LRMD_CALLDATA, NULL, NULL);
if (rsc_xml) {
rsc_xml = pcmk__xe_first_child(rsc_xml, PCMK__XE_LRMD_RSC, NULL, NULL);
}
if (rsc_xml) {
rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
}
// If resource ID is specified, resource must exist
if (rsc_id != NULL) {
rsc = g_hash_table_lookup(rsc_list, rsc_id);
if (rsc == NULL) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
rc = -ENODEV;
}
}
reply = create_lrmd_reply(__func__, rc, call_id);
// If resource ID is not specified, check all resources
if (rsc_id == NULL) {
GHashTableIter iter;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &rsc)) {
add_recurring_op_xml(reply, rsc);
}
} else if (rsc) {
add_recurring_op_xml(reply, rsc);
}
return reply;
}
void
process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request)
{
int rc = pcmk_ok;
int call_id = 0;
const char *op = crm_element_value(request, PCMK__XA_LRMD_OP);
int do_reply = 0;
int do_notify = 0;
xmlNode *reply = NULL;
/* Certain IPC commands may be done only by privileged users (i.e. root or
* hacluster), because they would otherwise provide a means of bypassing
* ACLs.
*/
bool allowed = pcmk_is_set(client->flags, pcmk__client_privileged);
crm_trace("Processing %s operation from %s", op, client->id);
crm_element_value_int(request, PCMK__XA_LRMD_CALLID, &call_id);
if (pcmk__str_eq(op, CRM_OP_IPC_FWD, pcmk__str_none)) {
#ifdef PCMK__COMPILE_REMOTE
if (allowed) {
ipc_proxy_forward_client(client, request);
} else {
rc = -EACCES;
}
#else
rc = -EPROTONOSUPPORT;
#endif
do_reply = 1;
} else if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) {
rc = process_lrmd_signon(client, request, call_id, &reply);
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_RSC_REG, pcmk__str_none)) {
if (allowed) {
rc = process_lrmd_rsc_register(client, id, request);
do_notify = 1;
} else {
rc = -EACCES;
}
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_RSC_INFO, pcmk__str_none)) {
if (allowed) {
reply = process_lrmd_get_rsc_info(request, call_id);
} else {
rc = -EACCES;
}
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_RSC_UNREG, pcmk__str_none)) {
if (allowed) {
rc = process_lrmd_rsc_unregister(client, id, request);
/* don't notify anyone about failed un-registers */
if (rc == pcmk_ok || rc == -EINPROGRESS) {
do_notify = 1;
}
} else {
rc = -EACCES;
}
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_RSC_EXEC, pcmk__str_none)) {
if (allowed) {
rc = process_lrmd_rsc_exec(client, id, request);
} else {
rc = -EACCES;
}
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_RSC_CANCEL, pcmk__str_none)) {
if (allowed) {
rc = process_lrmd_rsc_cancel(client, id, request);
} else {
rc = -EACCES;
}
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_POKE, pcmk__str_none)) {
do_notify = 1;
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_CHECK, pcmk__str_none)) {
if (allowed) {
xmlNode *wrapper = pcmk__xe_first_child(request,
PCMK__XE_LRMD_CALLDATA,
NULL, NULL);
xmlNode *data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
const char *timeout = NULL;
CRM_LOG_ASSERT(data != NULL);
timeout = crm_element_value(data, PCMK__XA_LRMD_WATCHDOG);
pcmk__valid_stonith_watchdog_timeout(timeout);
} else {
rc = -EACCES;
}
} else if (pcmk__str_eq(op, LRMD_OP_ALERT_EXEC, pcmk__str_none)) {
if (allowed) {
rc = process_lrmd_alert_exec(client, id, request);
} else {
rc = -EACCES;
}
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_GET_RECURRING, pcmk__str_none)) {
if (allowed) {
reply = process_lrmd_get_recurring(request, call_id);
} else {
rc = -EACCES;
}
do_reply = 1;
} else {
rc = -EOPNOTSUPP;
do_reply = 1;
crm_err("Unknown IPC request '%s' from client %s",
op, pcmk__client_name(client));
}
if (rc == -EACCES) {
crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
op, pcmk__client_name(client));
}
crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d",
op, client->id, rc, do_reply, do_notify);
if (do_reply) {
int send_rc = pcmk_rc_ok;
if (reply == NULL) {
reply = create_lrmd_reply(__func__, rc, call_id);
}
send_rc = lrmd_server_send_reply(client, id, reply);
pcmk__xml_free(reply);
if (send_rc != pcmk_rc_ok) {
crm_warn("Reply to client %s failed: %s " QB_XS " rc=%d",
pcmk__client_name(client), pcmk_rc_str(send_rc), send_rc);
}
}
if (do_notify) {
send_generic_notify(rc, request);
}
}
diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c
index c32f6b26aa..68daf77bf3 100644
--- a/daemons/execd/pacemaker-execd.c
+++ b/daemons/execd/pacemaker-execd.c
@@ -1,576 +1,577 @@
/*
- * Copyright 2012-2024 the Pacemaker project contributors
+ * Copyright 2012-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <glib.h>
#include <signal.h>
#include <sys/types.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/services.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/mainloop.h>
#include <crm/common/output_internal.h>
#include <crm/common/remote_internal.h>
+#include <crm/fencing/internal.h> // stonith__api_new()
#include <crm/lrmd_internal.h>
#include "pacemaker-execd.h"
#ifdef PCMK__COMPILE_REMOTE
# define EXECD_TYPE "remote"
# define EXECD_NAME PCMK__SERVER_REMOTED
# define SUMMARY "resource agent executor daemon for Pacemaker Remote nodes"
#else
# define EXECD_TYPE "local"
# define EXECD_NAME PCMK__SERVER_EXECD
# define SUMMARY "resource agent executor daemon for Pacemaker cluster nodes"
#endif
static GMainLoop *mainloop = NULL;
static qb_ipcs_service_t *ipcs = NULL;
static stonith_t *stonith_api = NULL;
int lrmd_call_id = 0;
time_t start_time;
static struct {
gchar **log_files;
#ifdef PCMK__COMPILE_REMOTE
gchar *port;
#endif // PCMK__COMPILE_REMOTE
} options;
#ifdef PCMK__COMPILE_REMOTE
/* whether shutdown request has been sent */
static gboolean shutting_down = FALSE;
#endif
static void exit_executor(void);
static void
stonith_connection_destroy_cb(stonith_t * st, stonith_event_t * e)
{
stonith_api->state = stonith_disconnected;
stonith_connection_failed();
}
stonith_t *
get_stonith_connection(void)
{
if (stonith_api && stonith_api->state == stonith_disconnected) {
- stonith_api_delete(stonith_api);
+ stonith__api_free(stonith_api);
stonith_api = NULL;
}
if (stonith_api == NULL) {
int rc = pcmk_ok;
- stonith_api = stonith_api_new();
+ stonith_api = stonith__api_new();
if (stonith_api == NULL) {
crm_err("Could not connect to fencer: API memory allocation failed");
return NULL;
}
- rc = stonith_api_connect_retry(stonith_api, crm_system_name, 10);
- if (rc != pcmk_ok) {
+ rc = stonith__api_connect_retry(stonith_api, crm_system_name, 10);
+ if (rc != pcmk_rc_ok) {
crm_err("Could not connect to fencer in 10 attempts: %s "
- QB_XS " rc=%d", pcmk_strerror(rc), rc);
- stonith_api_delete(stonith_api);
+ QB_XS " rc=%d", pcmk_rc_str(rc), rc);
+ stonith__api_free(stonith_api);
stonith_api = NULL;
} else {
stonith_api_operations_t *cmds = stonith_api->cmds;
cmds->register_notification(stonith_api,
PCMK__VALUE_ST_NOTIFY_DISCONNECT,
stonith_connection_destroy_cb);
}
}
return stonith_api;
}
static int32_t
lrmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (pcmk__new_client(c, uid, gid) == NULL) {
return -ENOMEM;
}
return 0;
}
static void
lrmd_ipc_created(qb_ipcs_connection_t * c)
{
pcmk__client_t *new_client = pcmk__find_client(c);
crm_trace("Connection %p", c);
pcmk__assert(new_client != NULL);
/* Now that the connection is offically established, alert
* the other clients a new connection exists. */
notify_of_new_client(new_client);
}
static int32_t
lrmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
pcmk__client_t *client = pcmk__find_client(c);
xmlNode *request = pcmk__client_data2xml(client, data, &id, &flags);
CRM_CHECK(client != NULL, crm_err("Invalid client");
return FALSE);
CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client);
return FALSE);
CRM_CHECK(flags & crm_ipc_client_response, crm_err("Invalid client request: %p", client);
return FALSE);
if (!request) {
return 0;
}
/* @TODO functionize some of this to reduce duplication with
* lrmd_remote_client_msg()
*/
if (!client->name) {
const char *value = crm_element_value(request,
PCMK__XA_LRMD_CLIENTNAME);
if (value == NULL) {
client->name = pcmk__itoa(pcmk__client_pid(c));
} else {
client->name = pcmk__str_copy(value);
}
}
lrmd_call_id++;
if (lrmd_call_id < 1) {
lrmd_call_id = 1;
}
crm_xml_add(request, PCMK__XA_LRMD_CLIENTID, client->id);
crm_xml_add(request, PCMK__XA_LRMD_CLIENTNAME, client->name);
crm_xml_add_int(request, PCMK__XA_LRMD_CALLID, lrmd_call_id);
process_lrmd_message(client, id, request);
pcmk__xml_free(request);
return 0;
}
/*!
* \internal
* \brief Free a client connection, and exit if appropriate
*
* \param[in,out] client Client connection to free
*/
void
lrmd_client_destroy(pcmk__client_t *client)
{
pcmk__free_client(client);
#ifdef PCMK__COMPILE_REMOTE
/* If we were waiting to shut down, we can now safely do so
* if there are no more proxied IPC providers
*/
if (shutting_down && (ipc_proxy_get_provider() == NULL)) {
exit_executor();
}
#endif
}
static int32_t
lrmd_ipc_closed(qb_ipcs_connection_t * c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p", c);
client_disconnect_cleanup(client->id);
#ifdef PCMK__COMPILE_REMOTE
ipc_proxy_remove_provider(client);
#endif
lrmd_client_destroy(client);
return 0;
}
static void
lrmd_ipc_destroy(qb_ipcs_connection_t * c)
{
lrmd_ipc_closed(c);
crm_trace("Connection %p", c);
}
static struct qb_ipcs_service_handlers lrmd_ipc_callbacks = {
.connection_accept = lrmd_ipc_accept,
.connection_created = lrmd_ipc_created,
.msg_process = lrmd_ipc_dispatch,
.connection_closed = lrmd_ipc_closed,
.connection_destroyed = lrmd_ipc_destroy
};
// \return Standard Pacemaker return code
int
lrmd_server_send_reply(pcmk__client_t *client, uint32_t id, xmlNode *reply)
{
crm_trace("Sending reply (%d) to client (%s)", id, client->id);
switch (PCMK__CLIENT_TYPE(client)) {
case pcmk__client_ipc:
return pcmk__ipc_send_xml(client, id, reply, FALSE);
#ifdef PCMK__COMPILE_REMOTE
case pcmk__client_tls:
return lrmd__remote_send_xml(client->remote, reply, id, "reply");
#endif
default:
crm_err("Could not send reply: unknown type for client %s "
QB_XS " flags=%#llx",
pcmk__client_name(client), client->flags);
}
return ENOTCONN;
}
// \return Standard Pacemaker return code
int
lrmd_server_send_notify(pcmk__client_t *client, xmlNode *msg)
{
crm_trace("Sending notification to client (%s)", client->id);
switch (PCMK__CLIENT_TYPE(client)) {
case pcmk__client_ipc:
if (client->ipcs == NULL) {
crm_trace("Could not notify local client: disconnected");
return ENOTCONN;
}
return pcmk__ipc_send_xml(client, 0, msg, crm_ipc_server_event);
#ifdef PCMK__COMPILE_REMOTE
case pcmk__client_tls:
if (client->remote == NULL) {
crm_trace("Could not notify remote client: disconnected");
return ENOTCONN;
} else {
return lrmd__remote_send_xml(client->remote, msg, 0, "notify");
}
#endif
default:
crm_err("Could not notify client %s with unknown transport "
QB_XS " flags=%#llx",
pcmk__client_name(client), client->flags);
}
return ENOTCONN;
}
/*!
* \internal
* \brief Clean up and exit immediately
*/
static void
exit_executor(void)
{
const guint nclients = pcmk__ipc_client_count();
crm_info("Terminating with %d client%s",
nclients, pcmk__plural_s(nclients));
- stonith_api_delete(stonith_api);
+ stonith__api_free(stonith_api);
if (ipcs) {
mainloop_del_ipc_server(ipcs);
}
#ifdef PCMK__COMPILE_REMOTE
execd_stop_tls_server();
ipc_proxy_cleanup();
#endif
pcmk__client_cleanup();
if (mainloop) {
lrmd_drain_alerts(mainloop);
}
g_hash_table_destroy(rsc_list);
// @TODO End mainloop instead so all cleanup is done
crm_exit(CRM_EX_OK);
}
/*!
* \internal
* \brief Request cluster shutdown if appropriate, otherwise exit immediately
*
* \param[in] nsig Signal that caused invocation (ignored)
*/
static void
lrmd_shutdown(int nsig)
{
#ifdef PCMK__COMPILE_REMOTE
pcmk__client_t *ipc_proxy = ipc_proxy_get_provider();
/* If there are active proxied IPC providers, then we may be running
* resources, so notify the cluster that we wish to shut down.
*/
if (ipc_proxy) {
if (shutting_down) {
crm_notice("Waiting for cluster to stop resources before exiting");
return;
}
crm_info("Sending shutdown request to cluster");
if (ipc_proxy_shutdown_req(ipc_proxy) < 0) {
crm_crit("Shutdown request failed, exiting immediately");
} else {
/* We requested a shutdown. Now, we need to wait for an
* acknowledgement from the proxy host, then wait for all proxy
* hosts to disconnect (which ensures that all resources have been
* stopped).
*/
shutting_down = TRUE;
/* Stop accepting new proxy connections */
execd_stop_tls_server();
/* Currently, we let the OS kill us if the clients don't disconnect
* in a reasonable time. We could instead set a long timer here
* (shorter than what the OS is likely to use) and exit immediately
* if it pops.
*/
return;
}
}
#endif
exit_executor();
}
/*!
* \internal
* \brief Log a shutdown acknowledgment
*/
void
handle_shutdown_ack(void)
{
#ifdef PCMK__COMPILE_REMOTE
if (shutting_down) {
crm_info("IPC proxy provider acknowledged shutdown request");
return;
}
#endif
crm_debug("Ignoring unexpected shutdown acknowledgment "
"from IPC proxy provider");
}
/*!
* \internal
* \brief Handle rejection of shutdown request
*/
void
handle_shutdown_nack(void)
{
#ifdef PCMK__COMPILE_REMOTE
if (shutting_down) {
crm_info("Exiting immediately after IPC proxy provider "
"indicated no resources will be stopped");
exit_executor();
return;
}
#endif
crm_debug("Ignoring unexpected shutdown rejection from IPC proxy provider");
}
static GOptionEntry entries[] = {
{ "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY,
&options.log_files, "Send logs to the additional named logfile", NULL },
#ifdef PCMK__COMPILE_REMOTE
{ "port", 'p', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.port,
"Port to listen on (defaults to " G_STRINGIFY(DEFAULT_REMOTE_PORT) ")", NULL },
#endif // PCMK__COMPILE_REMOTE
{ NULL }
};
static pcmk__supported_format_t formats[] = {
PCMK__SUPPORTED_FORMAT_NONE,
PCMK__SUPPORTED_FORMAT_TEXT,
PCMK__SUPPORTED_FORMAT_XML,
{ NULL, NULL, NULL }
};
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group)
{
GOptionContext *context = NULL;
context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
pcmk__add_main_args(context, entries);
return context;
}
int
main(int argc, char **argv, char **envp)
{
int rc = pcmk_rc_ok;
crm_exit_t exit_code = CRM_EX_OK;
const char *option = NULL;
pcmk__output_t *out = NULL;
GError *error = NULL;
GOptionGroup *output_group = NULL;
pcmk__common_args_t *args = NULL;
gchar **processed_args = NULL;
GOptionContext *context = NULL;
#ifdef PCMK__COMPILE_REMOTE
// If necessary, create PID 1 now before any file descriptors are opened
remoted_spawn_pidone(argc, argv, envp);
#endif
args = pcmk__new_common_args(SUMMARY);
#ifdef PCMK__COMPILE_REMOTE
processed_args = pcmk__cmdline_preproc(argv, "lp");
#else
processed_args = pcmk__cmdline_preproc(argv, "l");
#endif // PCMK__COMPILE_REMOTE
context = build_arg_context(args, &output_group);
crm_log_preinit(EXECD_NAME, argc, argv);
pcmk__register_formats(output_group, formats);
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
exit_code = CRM_EX_USAGE;
goto done;
}
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_ERROR;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Error creating output format %s: %s",
args->output_ty, pcmk_rc_str(rc));
goto done;
}
if (args->version) {
out->version(out, false);
goto done;
}
// Open additional log files
if (options.log_files != NULL) {
for (gchar **fname = options.log_files; *fname != NULL; fname++) {
rc = pcmk__add_logfile(*fname);
if (rc != pcmk_rc_ok) {
out->err(out, "Logging to %s is disabled: %s",
*fname, pcmk_rc_str(rc));
}
}
}
pcmk__cli_init_logging(EXECD_NAME, args->verbosity);
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
// ocf_log() (in resource-agents) uses the capitalized env options below
option = pcmk__env_option(PCMK__ENV_LOGFACILITY);
if (!pcmk__str_eq(option, PCMK_VALUE_NONE,
pcmk__str_casei|pcmk__str_null_matches)
&& !pcmk__str_eq(option, "/dev/null", pcmk__str_none)) {
pcmk__set_env_option("LOGFACILITY", option, true);
}
option = pcmk__env_option(PCMK__ENV_LOGFILE);
if (!pcmk__str_eq(option, PCMK_VALUE_NONE,
pcmk__str_casei|pcmk__str_null_matches)) {
pcmk__set_env_option("LOGFILE", option, true);
if (pcmk__env_option_enabled(crm_system_name, PCMK__ENV_DEBUG)) {
pcmk__set_env_option("DEBUGLOG", option, true);
}
}
#ifdef PCMK__COMPILE_REMOTE
if (options.port != NULL) {
pcmk__set_env_option(PCMK__ENV_REMOTE_PORT, options.port, false);
}
#endif // PCMK__COMPILE_REMOTE
start_time = time(NULL);
crm_notice("Starting Pacemaker " EXECD_TYPE " executor");
/* The presence of this variable allegedly controls whether child
* processes like httpd will try and use Systemd's sd_notify
* API
*/
unsetenv("NOTIFY_SOCKET");
{
// Temporary directory for resource agent use (leave owned by root)
int rc = pcmk__build_path(PCMK__OCF_TMP_DIR, 0755);
if (rc != pcmk_rc_ok) {
crm_warn("Could not create resource agent temporary directory "
PCMK__OCF_TMP_DIR ": %s", pcmk_rc_str(rc));
}
}
rsc_list = pcmk__strkey_table(NULL, free_rsc);
ipcs = mainloop_add_ipc_server(CRM_SYSTEM_LRMD, QB_IPC_SHM, &lrmd_ipc_callbacks);
if (ipcs == NULL) {
crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
exit_code = CRM_EX_FATAL;
goto done;
}
#ifdef PCMK__COMPILE_REMOTE
if (lrmd_init_remote_tls_server() < 0) {
crm_err("Failed to create TLS listener: shutting down and staying down");
exit_code = CRM_EX_FATAL;
goto done;
}
ipc_proxy_init();
#endif
mainloop_add_signal(SIGTERM, lrmd_shutdown);
mainloop = g_main_loop_new(NULL, FALSE);
crm_notice("Pacemaker " EXECD_TYPE " executor successfully started and accepting connections");
crm_notice("OCF resource agent search path is %s", PCMK__OCF_RA_PATH);
g_main_loop_run(mainloop);
/* should never get here */
exit_executor();
done:
g_strfreev(options.log_files);
#ifdef PCMK__COMPILE_REMOTE
g_free(options.port);
#endif // PCMK__COMPILE_REMOTE
g_strfreev(processed_args);
pcmk__free_arg_context(context);
pcmk__output_and_clear_error(&error, out);
if (out != NULL) {
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
}
pcmk__unregister_formats();
crm_exit(exit_code);
}
diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c
index 1424a31623..cc83fd32a3 100644
--- a/daemons/fenced/cts-fence-helper.c
+++ b/daemons/fenced/cts-fence-helper.c
@@ -1,695 +1,664 @@
/*
- * Copyright 2009-2024 the Pacemaker project contributors
+ * Copyright 2009-2025 the Pacemaker project contributors
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <crm/crm.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/agents.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/xml.h>
#include <crm/common/mainloop.h>
#define SUMMARY "cts-fence-helper - inject commands into the Pacemaker fencer and watch for events"
static GMainLoop *mainloop = NULL;
static crm_trigger_t *trig = NULL;
static int mainloop_iter = 0;
static pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
typedef void (*mainloop_test_iteration_cb) (int check_event);
#define MAINLOOP_DEFAULT_TIMEOUT 2
enum test_modes {
test_standard = 0, // test using a specific developer environment
- test_passive, // watch notifications only
test_api_sanity, // sanity-test stonith client API using fence_dummy
test_api_mainloop, // sanity-test mainloop code with async responses
};
struct {
enum test_modes mode;
} options = {
.mode = test_standard
};
static gboolean
mode_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
if (pcmk__str_any_of(option_name, "--mainloop_api_test", "-m", NULL)) {
options.mode = test_api_mainloop;
} else if (pcmk__str_any_of(option_name, "--api_test", "-t", NULL)) {
options.mode = test_api_sanity;
- } else if (pcmk__str_any_of(option_name, "--passive", "-p", NULL)) {
- options.mode = test_passive;
}
return TRUE;
}
static GOptionEntry entries[] = {
{ "mainloop_api_test", 'm', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, mode_cb,
NULL, NULL,
},
{ "api_test", 't', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, mode_cb,
NULL, NULL,
},
- { "passive", 'p', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, mode_cb,
- NULL, NULL,
- },
-
{ NULL }
};
static stonith_t *st = NULL;
static struct pollfd pollfd;
static const int st_opts = st_opt_sync_call;
static int expected_notifications = 0;
static int verbose = 0;
static void
mainloop_test_done(const char *origin, bool pass)
{
if (pass) {
crm_info("SUCCESS - %s", origin);
mainloop_iter++;
mainloop_set_trigger(trig);
result.execution_status = PCMK_EXEC_DONE;
result.exit_status = CRM_EX_OK;
} else {
crm_err("FAILURE - %s (%d: %s)", origin, result.exit_status,
pcmk_exec_status_str(result.execution_status));
crm_exit(CRM_EX_ERROR);
}
}
static void
dispatch_helper(int timeout)
{
int rc;
crm_debug("Looking for notification");
pollfd.events = POLLIN;
while (true) {
rc = poll(&pollfd, 1, timeout); /* wait 10 minutes, -1 forever */
if (rc > 0) {
- if (!stonith_dispatch(st)) {
+ if (stonith__api_dispatch(st) != pcmk_rc_ok) {
break;
}
} else {
break;
}
}
}
static void
st_callback(stonith_t * st, stonith_event_t * e)
{
char *desc = NULL;
if (st->state == stonith_disconnected) {
crm_exit(CRM_EX_DISCONNECT);
}
desc = stonith__event_description(e);
crm_notice("%s", desc);
free(desc);
if (expected_notifications) {
expected_notifications--;
}
}
static void
st_global_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
crm_notice("Call %d exited %d: %s (%s)",
data->call_id, stonith__exit_status(data),
stonith__execution_status(data),
pcmk__s(stonith__exit_reason(data), "unspecified reason"));
}
-static void
-passive_test(void)
-{
- int rc = 0;
-
- rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
- if (rc != pcmk_ok) {
- stonith_api_delete(st);
- crm_exit(CRM_EX_DISCONNECT);
- }
- st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_DISCONNECT,
- st_callback);
- st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_FENCE,
- st_callback);
- st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback);
- st->cmds->register_notification(st, STONITH_OP_DEVICE_DEL, st_callback);
- st->cmds->register_callback(st, 0, 120, st_opt_timeout_updates, NULL, "st_global_callback",
- st_global_callback);
-
- dispatch_helper(600 * 1000);
-}
-
#define single_test(cmd, str, num_notifications, expected_rc) \
{ \
int rc = 0; \
rc = cmd; \
expected_notifications = 0; \
if (num_notifications) { \
expected_notifications = num_notifications; \
dispatch_helper(500); \
} \
if (rc != expected_rc) { \
crm_err("FAILURE - expected rc %d != %d(%s) for cmd - %s", expected_rc, rc, pcmk_strerror(rc), str); \
crm_exit(CRM_EX_ERROR); \
} else if (expected_notifications) { \
crm_err("FAILURE - expected %d notifications, got only %d for cmd - %s", \
num_notifications, num_notifications - expected_notifications, str); \
crm_exit(CRM_EX_ERROR); \
} else { \
if (verbose) { \
crm_info("SUCCESS - %s: %d", str, rc); \
} else { \
crm_debug("SUCCESS - %s: %d", str, rc); \
} \
} \
}\
static void
run_fence_failure_test(void)
{
stonith_key_value_t *params = NULL;
- params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
- "false_1_node1=1,2 false_1_node2=3,4");
- params = stonith_key_value_add(params, "mode", "fail");
+ params = stonith__key_value_add(params, PCMK_STONITH_HOST_MAP,
+ "false_1_node1=1,2 false_1_node2=3,4");
+ params = stonith__key_value_add(params, "mode", "fail");
single_test(st->
cmds->register_device(st, st_opts, "test-id1", "stonith-ng", "fence_dummy", params),
"Register device1 for failure test", 1, 0);
single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_OFF,
3, 0),
"Fence failure results off", 1, -ENODATA);
single_test(st->cmds->fence(st, st_opts, "false_1_node2",
PCMK_ACTION_REBOOT, 3, 0),
"Fence failure results reboot", 1, -ENODATA);
single_test(st->cmds->remove_device(st, st_opts, "test-id1"),
"Remove device1 for failure test", 1, 0);
- stonith_key_value_freeall(params, 1, 1);
+ stonith__key_value_freeall(params, true, true);
}
static void
run_fence_failure_rollover_test(void)
{
stonith_key_value_t *params = NULL;
- params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
- "false_1_node1=1,2 false_1_node2=3,4");
- params = stonith_key_value_add(params, "mode", "fail");
+ params = stonith__key_value_add(params, PCMK_STONITH_HOST_MAP,
+ "false_1_node1=1,2 false_1_node2=3,4");
+ params = stonith__key_value_add(params, "mode", "fail");
single_test(st->
cmds->register_device(st, st_opts, "test-id1", "stonith-ng", "fence_dummy", params),
"Register device1 for rollover test", 1, 0);
- stonith_key_value_freeall(params, 1, 1);
+ stonith__key_value_freeall(params, true, true);
params = NULL;
- params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
- "false_1_node1=1,2 false_1_node2=3,4");
- params = stonith_key_value_add(params, "mode", "pass");
+ params = stonith__key_value_add(params, PCMK_STONITH_HOST_MAP,
+ "false_1_node1=1,2 false_1_node2=3,4");
+ params = stonith__key_value_add(params, "mode", "pass");
single_test(st->
cmds->register_device(st, st_opts, "test-id2", "stonith-ng", "fence_dummy", params),
"Register device2 for rollover test", 1, 0);
single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_OFF,
3, 0),
"Fence rollover results off", 1, 0);
/* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */
single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_ON, 3,
0),
"Fence rollover results on", 1, -ENODEV);
single_test(st->cmds->remove_device(st, st_opts, "test-id1"),
"Remove device1 for rollover tests", 1, 0);
single_test(st->cmds->remove_device(st, st_opts, "test-id2"),
"Remove device2 for rollover tests", 1, 0);
- stonith_key_value_freeall(params, 1, 1);
+ stonith__key_value_freeall(params, true, true);
}
static void
run_standard_test(void)
{
stonith_key_value_t *params = NULL;
- params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
- "false_1_node1=1,2 false_1_node2=3,4");
- params = stonith_key_value_add(params, "mode", "pass");
- params = stonith_key_value_add(params, "mock_dynamic_hosts", "false_1_node1 false_1_node2");
+ params = stonith__key_value_add(params, PCMK_STONITH_HOST_MAP,
+ "false_1_node1=1,2 false_1_node2=3,4");
+ params = stonith__key_value_add(params, "mode", "pass");
+ params = stonith__key_value_add(params, "mock_dynamic_hosts",
+ "false_1_node1 false_1_node2");
single_test(st->
cmds->register_device(st, st_opts, "test-id", "stonith-ng", "fence_dummy", params),
"Register", 1, 0);
- stonith_key_value_freeall(params, 1, 1);
+ stonith__key_value_freeall(params, true, true);
params = NULL;
single_test(st->cmds->list(st, st_opts, "test-id", NULL, 1),
PCMK_ACTION_LIST, 0, 0);
single_test(st->cmds->monitor(st, st_opts, "test-id", 1), "Monitor", 0, 0);
single_test(st->cmds->status(st, st_opts, "test-id", "false_1_node2", 1),
"Status false_1_node2", 0, 0);
single_test(st->cmds->status(st, st_opts, "test-id", "false_1_node1", 1),
"Status false_1_node1", 0, 0);
single_test(st->cmds->fence(st, st_opts, "unknown-host", PCMK_ACTION_OFF,
1, 0),
"Fence unknown-host (expected failure)", 0, -ENODEV);
single_test(st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_OFF,
1, 0),
"Fence false_1_node1", 1, 0);
/* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */
single_test(st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_ON, 1,
0),
"Unfence false_1_node1", 1, -ENODEV);
/* Confirm that an invalid level index is rejected */
single_test(st->cmds->register_level(st, st_opts, "node1", 999, params),
"Attempt to register an invalid level index", 0, -EINVAL);
single_test(st->cmds->remove_device(st, st_opts, "test-id"), "Remove test-id", 1, 0);
- stonith_key_value_freeall(params, 1, 1);
+ stonith__key_value_freeall(params, true, true);
}
static void
sanity_tests(void)
{
int rc = 0;
rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
if (rc != pcmk_ok) {
- stonith_api_delete(st);
+ stonith__api_free(st);
crm_exit(CRM_EX_DISCONNECT);
}
st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_DISCONNECT,
st_callback);
st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_FENCE,
st_callback);
st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback);
st->cmds->register_notification(st, STONITH_OP_DEVICE_DEL, st_callback);
st->cmds->register_callback(st, 0, 120, st_opt_timeout_updates, NULL, "st_global_callback",
st_global_callback);
crm_info("Starting API Sanity Tests");
run_standard_test();
run_fence_failure_test();
run_fence_failure_rollover_test();
crm_info("Sanity Tests Passed");
}
static void
standard_dev_test(void)
{
int rc = 0;
char *tmp = NULL;
stonith_key_value_t *params = NULL;
rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
if (rc != pcmk_ok) {
- stonith_api_delete(st);
+ stonith__api_free(st);
crm_exit(CRM_EX_DISCONNECT);
}
- params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
- "some-host=pcmk-7 true_1_node1=3,4");
+ params = stonith__key_value_add(params, PCMK_STONITH_HOST_MAP,
+ "some-host=pcmk-7 true_1_node1=3,4");
rc = st->cmds->register_device(st, st_opts, "test-id", "stonith-ng", "fence_xvm", params);
crm_debug("Register: %d", rc);
rc = st->cmds->list(st, st_opts, "test-id", &tmp, 10);
crm_debug("List: %d output: %s", rc, tmp ? tmp : "<none>");
rc = st->cmds->monitor(st, st_opts, "test-id", 10);
crm_debug("Monitor: %d", rc);
rc = st->cmds->status(st, st_opts, "test-id", "false_1_node2", 10);
crm_debug("Status false_1_node2: %d", rc);
rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10);
crm_debug("Status false_1_node1: %d", rc);
rc = st->cmds->fence(st, st_opts, "unknown-host", PCMK_ACTION_OFF, 60, 0);
crm_debug("Fence unknown-host: %d", rc);
rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10);
crm_debug("Status false_1_node1: %d", rc);
rc = st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_OFF, 60, 0);
crm_debug("Fence false_1_node1: %d", rc);
rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10);
crm_debug("Status false_1_node1: %d", rc);
rc = st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_ON, 10, 0);
crm_debug("Unfence false_1_node1: %d", rc);
rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10);
crm_debug("Status false_1_node1: %d", rc);
rc = st->cmds->fence(st, st_opts, "some-host", PCMK_ACTION_OFF, 10, 0);
crm_debug("Fence alias: %d", rc);
rc = st->cmds->status(st, st_opts, "test-id", "some-host", 10);
crm_debug("Status alias: %d", rc);
rc = st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_ON, 10, 0);
crm_debug("Unfence false_1_node1: %d", rc);
rc = st->cmds->remove_device(st, st_opts, "test-id");
crm_debug("Remove test-id: %d", rc);
- stonith_key_value_freeall(params, 1, 1);
+ stonith__key_value_freeall(params, true, true);
}
static void
iterate_mainloop_tests(gboolean event_ready);
static void
mainloop_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
pcmk__set_result(&result, stonith__exit_status(data),
stonith__execution_status(data),
stonith__exit_reason(data));
iterate_mainloop_tests(TRUE);
}
static int
register_callback_helper(int callid)
{
return st->cmds->register_callback(st,
callid,
MAINLOOP_DEFAULT_TIMEOUT,
st_opt_timeout_updates, NULL, "callback", mainloop_callback);
}
static void
test_async_fence_pass(int check_event)
{
int rc = 0;
if (check_event) {
mainloop_test_done(__func__, (result.exit_status == CRM_EX_OK));
return;
}
rc = st->cmds->fence(st, 0, "true_1_node1", PCMK_ACTION_OFF,
MAINLOOP_DEFAULT_TIMEOUT, 0);
if (rc < 0) {
crm_err("fence failed with rc %d", rc);
mainloop_test_done(__func__, false);
}
register_callback_helper(rc);
/* wait for event */
}
#define CUSTOM_TIMEOUT_ADDITION 10
static void
test_async_fence_custom_timeout(int check_event)
{
int rc = 0;
static time_t begin = 0;
if (check_event) {
uint32_t diff = (time(NULL) - begin);
if (result.execution_status != PCMK_EXEC_TIMEOUT) {
mainloop_test_done(__func__, false);
} else if (diff < CUSTOM_TIMEOUT_ADDITION + MAINLOOP_DEFAULT_TIMEOUT) {
crm_err
("Custom timeout test failed, callback expiration should be updated to %d, actual timeout was %d",
CUSTOM_TIMEOUT_ADDITION + MAINLOOP_DEFAULT_TIMEOUT, diff);
mainloop_test_done(__func__, false);
} else {
mainloop_test_done(__func__, true);
}
return;
}
begin = time(NULL);
rc = st->cmds->fence(st, 0, "custom_timeout_node1", PCMK_ACTION_OFF,
MAINLOOP_DEFAULT_TIMEOUT, 0);
if (rc < 0) {
crm_err("fence failed with rc %d", rc);
mainloop_test_done(__func__, false);
}
register_callback_helper(rc);
/* wait for event */
}
static void
test_async_fence_timeout(int check_event)
{
int rc = 0;
if (check_event) {
mainloop_test_done(__func__,
(result.execution_status == PCMK_EXEC_NO_FENCE_DEVICE));
return;
}
rc = st->cmds->fence(st, 0, "false_1_node2", PCMK_ACTION_OFF,
MAINLOOP_DEFAULT_TIMEOUT, 0);
if (rc < 0) {
crm_err("fence failed with rc %d", rc);
mainloop_test_done(__func__, false);
}
register_callback_helper(rc);
/* wait for event */
}
static void
test_async_monitor(int check_event)
{
int rc = 0;
if (check_event) {
mainloop_test_done(__func__, (result.exit_status == CRM_EX_OK));
return;
}
rc = st->cmds->monitor(st, 0, "false_1", MAINLOOP_DEFAULT_TIMEOUT);
if (rc < 0) {
crm_err("monitor failed with rc %d", rc);
mainloop_test_done(__func__, false);
}
register_callback_helper(rc);
/* wait for event */
}
static void
test_register_async_devices(int check_event)
{
char buf[16] = { 0, };
stonith_key_value_t *params = NULL;
- params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
- "false_1_node1=1,2");
- params = stonith_key_value_add(params, "mode", "fail");
+ params = stonith__key_value_add(params, PCMK_STONITH_HOST_MAP,
+ "false_1_node1=1,2");
+ params = stonith__key_value_add(params, "mode", "fail");
st->cmds->register_device(st, st_opts, "false_1", "stonith-ng", "fence_dummy", params);
- stonith_key_value_freeall(params, 1, 1);
+ stonith__key_value_freeall(params, true, true);
params = NULL;
- params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
- "true_1_node1=1,2");
- params = stonith_key_value_add(params, "mode", "pass");
+ params = stonith__key_value_add(params, PCMK_STONITH_HOST_MAP,
+ "true_1_node1=1,2");
+ params = stonith__key_value_add(params, "mode", "pass");
st->cmds->register_device(st, st_opts, "true_1", "stonith-ng", "fence_dummy", params);
- stonith_key_value_freeall(params, 1, 1);
+ stonith__key_value_freeall(params, true, true);
params = NULL;
- params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
- "custom_timeout_node1=1,2");
- params = stonith_key_value_add(params, "mode", "fail");
- params = stonith_key_value_add(params, "delay", "1000");
+ params = stonith__key_value_add(params, PCMK_STONITH_HOST_MAP,
+ "custom_timeout_node1=1,2");
+ params = stonith__key_value_add(params, "mode", "fail");
+ params = stonith__key_value_add(params, "delay", "1000");
snprintf(buf, sizeof(buf) - 1, "%d", MAINLOOP_DEFAULT_TIMEOUT + CUSTOM_TIMEOUT_ADDITION);
- params = stonith_key_value_add(params, "pcmk_off_timeout", buf);
+ params = stonith__key_value_add(params, "pcmk_off_timeout", buf);
st->cmds->register_device(st, st_opts, "false_custom_timeout", "stonith-ng", "fence_dummy",
params);
- stonith_key_value_freeall(params, 1, 1);
+ stonith__key_value_freeall(params, true, true);
mainloop_test_done(__func__, true);
}
static void
try_mainloop_connect(int check_event)
{
- int rc = stonith_api_connect_retry(st, crm_system_name, 10);
+ int rc = stonith__api_connect_retry(st, crm_system_name, 10);
- if (rc == pcmk_ok) {
+ if (rc == pcmk_rc_ok) {
mainloop_test_done(__func__, true);
return;
}
crm_err("API CONNECTION FAILURE");
mainloop_test_done(__func__, false);
}
static void
iterate_mainloop_tests(gboolean event_ready)
{
static mainloop_test_iteration_cb callbacks[] = {
try_mainloop_connect,
test_register_async_devices,
test_async_monitor,
test_async_fence_pass,
test_async_fence_timeout,
test_async_fence_custom_timeout,
};
if (mainloop_iter == (sizeof(callbacks) / sizeof(mainloop_test_iteration_cb))) {
/* all tests ran, everything passed */
crm_info("ALL MAINLOOP TESTS PASSED!");
crm_exit(CRM_EX_OK);
}
callbacks[mainloop_iter] (event_ready);
}
static gboolean
trigger_iterate_mainloop_tests(gpointer user_data)
{
iterate_mainloop_tests(FALSE);
return TRUE;
}
static void
test_shutdown(int nsig)
{
int rc = 0;
if (st) {
rc = st->cmds->disconnect(st);
crm_info("Disconnect: %d", rc);
crm_debug("Destroy");
- stonith_api_delete(st);
+ stonith__api_free(st);
}
if (rc) {
crm_exit(CRM_EX_ERROR);
}
}
static void
mainloop_tests(void)
{
trig = mainloop_add_trigger(G_PRIORITY_HIGH, trigger_iterate_mainloop_tests, NULL);
mainloop_set_trigger(trig);
mainloop_add_signal(SIGTERM, test_shutdown);
crm_info("Starting");
mainloop = g_main_loop_new(NULL, FALSE);
g_main_loop_run(mainloop);
}
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
GOptionContext *context = NULL;
context = pcmk__build_arg_context(args, NULL, group, NULL);
pcmk__add_main_args(context, entries);
return context;
}
int
main(int argc, char **argv)
{
GError *error = NULL;
crm_exit_t exit_code = CRM_EX_OK;
pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
gchar **processed_args = pcmk__cmdline_preproc(argv, NULL);
GOptionContext *context = build_arg_context(args, NULL);
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
exit_code = CRM_EX_USAGE;
goto done;
}
/* We have to use crm_log_init here to set up the logging because there's
* different handling for daemons vs. command line programs, and
* pcmk__cli_init_logging is set up to only handle the latter.
*/
crm_log_init(NULL, LOG_INFO, TRUE, (verbose? TRUE : FALSE), argc, argv,
FALSE);
for (int i = 0; i < args->verbosity; i++) {
crm_bump_log_level(argc, argv);
}
- st = stonith_api_new();
+ st = stonith__api_new();
if (st == NULL) {
exit_code = CRM_EX_DISCONNECT;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Could not connect to fencer: API memory allocation failed");
goto done;
}
switch (options.mode) {
case test_standard:
standard_dev_test();
break;
- case test_passive:
- passive_test();
- break;
case test_api_sanity:
sanity_tests();
break;
case test_api_mainloop:
mainloop_tests();
break;
}
test_shutdown(0);
done:
g_strfreev(processed_args);
pcmk__free_arg_context(context);
pcmk__output_and_clear_error(&error, NULL);
crm_exit(exit_code);
}
diff --git a/daemons/fenced/fenced_cib.c b/daemons/fenced/fenced_cib.c
index 90c225569e..e76b25aeef 100644
--- a/daemons/fenced/fenced_cib.c
+++ b/daemons/fenced/fenced_cib.c
@@ -1,658 +1,636 @@
/*
* Copyright 2009-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <stdio.h>
#include <libxml/tree.h> // xmlNode
#include <libxml/xpath.h> // xmlXPathObject, etc.
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/cluster/internal.h>
#include <crm/cib.h>
#include <crm/cib/internal.h>
#include <pacemaker-fenced.h>
static xmlNode *local_cib = NULL;
static cib_t *cib_api = NULL;
static bool have_cib_devices = FALSE;
/*!
* \internal
* \brief Check whether a node has a specific attribute name/value
*
* \param[in] node Name of node to check
* \param[in] name Name of an attribute to look for
* \param[in] value The value the named attribute needs to be set to in order to be considered a match
*
* \return TRUE if the locally cached CIB has the specified node attribute
*/
gboolean
node_has_attr(const char *node, const char *name, const char *value)
{
GString *xpath = NULL;
xmlNode *match;
CRM_CHECK((local_cib != NULL) && (node != NULL) && (name != NULL)
&& (value != NULL), return FALSE);
/* Search for the node's attributes in the CIB. While the schema allows
* multiple sets of instance attributes, and allows instance attributes to
* use id-ref to reference values elsewhere, that is intended for resources,
* so we ignore that here.
*/
xpath = g_string_sized_new(256);
pcmk__g_strcat(xpath,
"//" PCMK_XE_NODES "/" PCMK_XE_NODE
"[@" PCMK_XA_UNAME "='", node, "']"
"/" PCMK_XE_INSTANCE_ATTRIBUTES
"/" PCMK_XE_NVPAIR
"[@" PCMK_XA_NAME "='", name, "' "
"and @" PCMK_XA_VALUE "='", value, "']", NULL);
match = pcmk__xpath_find_one(local_cib->doc, xpath->str, LOG_NEVER);
g_string_free(xpath, TRUE);
return (match != NULL);
}
-static void
-add_topology_level(xmlNode *match)
-{
- char *desc = NULL;
- pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
-
- CRM_CHECK(match != NULL, return);
-
- fenced_register_level(match, &desc, &result);
- fenced_send_config_notification(STONITH_OP_LEVEL_ADD, &result, desc);
- pcmk__reset_result(&result);
- free(desc);
-}
-
-static void
-topology_remove_helper(const char *node, int level)
-{
- char *desc = NULL;
- pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
- xmlNode *data = pcmk__xe_create(NULL, PCMK_XE_FENCING_LEVEL);
-
- crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__);
- crm_xml_add_int(data, PCMK_XA_INDEX, level);
- crm_xml_add(data, PCMK_XA_TARGET, node);
-
- fenced_unregister_level(data, &desc, &result);
- fenced_send_config_notification(STONITH_OP_LEVEL_DEL, &result, desc);
- pcmk__reset_result(&result);
- pcmk__xml_free(data);
- free(desc);
-}
-
static void
remove_topology_level(xmlNode *match)
{
int index = 0;
char *key = NULL;
+ xmlNode *data = NULL;
CRM_CHECK(match != NULL, return);
key = stonith_level_key(match, fenced_target_by_unknown);
crm_element_value_int(match, PCMK_XA_INDEX, &index);
- topology_remove_helper(key, index);
+
+ data = pcmk__xe_create(NULL, PCMK_XE_FENCING_LEVEL);
+ crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__);
+ crm_xml_add(data, PCMK_XA_TARGET, key);
+ crm_xml_add_int(data, PCMK_XA_INDEX, index);
+
+ fenced_unregister_level(data, NULL);
+
free(key);
+ pcmk__xml_free(data);
}
static void
register_fencing_topology(xmlXPathObjectPtr xpathObj)
{
int max = pcmk__xpath_num_results(xpathObj);
for (int lpc = 0; lpc < max; lpc++) {
xmlNode *match = pcmk__xpath_result(xpathObj, lpc);
if (match == NULL) {
continue;
}
remove_topology_level(match);
- add_topology_level(match);
+ fenced_register_level(match, NULL);
}
}
/* Fencing
<diff crm_feature_set="3.0.6">
<diff-removed>
<fencing-topology>
<fencing-level id="f-p1.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="removed:top"/>
<fencing-level id="f-p1.2" target="pcmk-1" index="2" devices="power" __crm_diff_marker__="removed:top"/>
<fencing-level devices="disk,network" id="f-p2.1"/>
</fencing-topology>
</diff-removed>
<diff-added>
<fencing-topology>
<fencing-level id="f-p.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="added:top"/>
<fencing-level id="f-p2.1" target="pcmk-2" index="1" devices="disk,something"/>
<fencing-level id="f-p3.1" target="pcmk-2" index="2" devices="power" __crm_diff_marker__="added:top"/>
</fencing-topology>
</diff-added>
</diff>
*/
void
fencing_topology_init(void)
{
xmlXPathObject *xpathObj = NULL;
const char *xpath = "//" PCMK_XE_FENCING_LEVEL;
crm_trace("Full topology refresh");
free_topology_list();
init_topology_list();
/* Grab everything */
xpathObj = pcmk__xpath_search(local_cib->doc, xpath);
register_fencing_topology(xpathObj);
xmlXPathFreeObject(xpathObj);
}
#define XPATH_WATCHDOG_TIMEOUT "//" PCMK_XE_NVPAIR \
"[@" PCMK_XA_NAME "='" \
PCMK_OPT_STONITH_WATCHDOG_TIMEOUT "']"
static void
update_stonith_watchdog_timeout_ms(xmlNode *cib)
{
long long timeout_ms = 0;
xmlNode *stonith_watchdog_xml = NULL;
const char *value = NULL;
// @TODO An XPath search can't handle multiple instances or rules
stonith_watchdog_xml = pcmk__xpath_find_one(cib->doc,
XPATH_WATCHDOG_TIMEOUT,
LOG_NEVER);
if (stonith_watchdog_xml) {
value = crm_element_value(stonith_watchdog_xml, PCMK_XA_VALUE);
}
if (value) {
timeout_ms = crm_get_msec(value);
}
if (timeout_ms < 0) {
timeout_ms = pcmk__auto_stonith_watchdog_timeout();
}
stonith_watchdog_timeout_ms = timeout_ms;
}
/*!
* \internal
- * \brief Mark a fence device dirty if its \c cib_registered flag is \c TRUE
+ * \brief Mark a fence device dirty if its \c fenced_df_cib_registered flag is
+ * set
*
* \param[in] key Ignored
* \param[in,out] value Fence device (<tt>fenced_device_t *</tt>)
* \param[in] user_data Ignored
*
* \note This function is suitable for use with \c g_hash_table_foreach().
*/
static void
mark_dirty_if_cib_registered(gpointer key, gpointer value, gpointer user_data)
{
fenced_device_t *device = value;
- if (device->cib_registered) {
- device->dirty = TRUE;
+ if (pcmk_is_set(device->flags, fenced_df_cib_registered)) {
+ fenced_device_set_flags(device, fenced_df_dirty);
}
}
/*!
* \internal
* \brief Return the value of a fence device's \c dirty flag
*
* \param[in] key Ignored
* \param[in] value Fence device (<tt>fenced_device_t *</tt>)
* \param[in] user_data Ignored
*
* \return \c dirty flag of \p value
*
* \note This function is suitable for use with
* \c g_hash_table_foreach_remove().
*/
static gboolean
device_is_dirty(gpointer key, gpointer value, gpointer user_data)
{
fenced_device_t *device = value;
- return device->dirty;
+ return pcmk_is_set(device->flags, fenced_df_dirty);
}
/*!
* \internal
* \brief Update all STONITH device definitions based on current CIB
*/
static void
cib_devices_update(void)
{
crm_info("Updating devices to version %s.%s.%s",
crm_element_value(local_cib, PCMK_XA_ADMIN_EPOCH),
crm_element_value(local_cib, PCMK_XA_EPOCH),
crm_element_value(local_cib, PCMK_XA_NUM_UPDATES));
fenced_foreach_device(mark_dirty_if_cib_registered, NULL);
/* have list repopulated if cib has a watchdog-fencing-resource
TODO: keep a cached list for queries happening while we are refreshing
*/
g_list_free_full(stonith_watchdog_targets, free);
stonith_watchdog_targets = NULL;
fenced_scheduler_run(local_cib);
fenced_foreach_device_remove(device_is_dirty);
}
#define PRIMITIVE_ID_XP_FRAGMENT "/" PCMK_XE_PRIMITIVE "[@" PCMK_XA_ID "='"
static void
update_cib_stonith_devices(const xmlNode *patchset)
{
char *reason = NULL;
for (const xmlNode *change = pcmk__xe_first_child(patchset, NULL, NULL,
NULL);
change != NULL; change = pcmk__xe_next(change, NULL)) {
const char *op = crm_element_value(change, PCMK_XA_OPERATION);
const char *xpath = crm_element_value(change, PCMK_XA_PATH);
const char *primitive_xpath = NULL;
if (pcmk__str_eq(op, PCMK_VALUE_MOVE, pcmk__str_null_matches)
|| (strstr(xpath, "/" PCMK_XE_STATUS) != NULL)) {
continue;
}
primitive_xpath = strstr(xpath, PRIMITIVE_ID_XP_FRAGMENT);
if ((primitive_xpath != NULL)
&& pcmk__str_eq(op, PCMK_VALUE_DELETE, pcmk__str_none)) {
const char *rsc_id = NULL;
const char *end_quote = NULL;
if ((strstr(primitive_xpath, PCMK_XE_INSTANCE_ATTRIBUTES) != NULL)
|| (strstr(primitive_xpath, PCMK_XE_META_ATTRIBUTES) != NULL)) {
reason = pcmk__str_copy("(meta) attribute deleted from "
"resource");
break;
}
rsc_id = primitive_xpath + sizeof(PRIMITIVE_ID_XP_FRAGMENT) - 1;
end_quote = strchr(rsc_id, '\'');
CRM_LOG_ASSERT(end_quote != NULL);
if (end_quote == NULL) {
crm_err("Bug: Malformed item in Pacemaker-generated patchset");
continue;
}
if (strchr(end_quote, '/') == NULL) {
/* The primitive element itself was deleted. If this was a
* fencing resource, it's faster to remove it directly than to
* run the scheduler and update all device registrations.
*/
char *copy = strndup(rsc_id, end_quote - rsc_id);
pcmk__assert(copy != NULL);
stonith_device_remove(copy, true);
/* watchdog_device_update called afterwards
to fall back to implicit definition if needed */
free(copy);
continue;
}
}
if (strstr(xpath, "/" PCMK_XE_RESOURCES)
|| strstr(xpath, "/" PCMK_XE_CONSTRAINTS)
|| strstr(xpath, "/" PCMK_XE_RSC_DEFAULTS)) {
const char *shortpath = strrchr(xpath, '/');
reason = crm_strdup_printf("%s %s", op, shortpath + 1);
break;
}
}
if (reason != NULL) {
crm_info("Updating device list from CIB: %s", reason);
cib_devices_update();
free(reason);
} else {
crm_trace("No updates for device list found in CIB");
}
}
static void
watchdog_device_update(void)
{
if (stonith_watchdog_timeout_ms > 0) {
if (!fenced_has_watchdog_device()
&& (stonith_watchdog_targets == NULL)) {
/* getting here watchdog-fencing enabled, no device there yet
and reason isn't stonith_watchdog_targets preventing that
*/
int rc;
xmlNode *xml;
xml = create_device_registration_xml(
STONITH_WATCHDOG_ID,
st_namespace_internal,
STONITH_WATCHDOG_AGENT,
NULL, /* fenced_device_register() will add our
own name as PCMK_STONITH_HOST_LIST param
so we can skip that here
*/
NULL);
rc = fenced_device_register(xml, true);
pcmk__xml_free(xml);
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_FATAL;
crm_crit("Cannot register watchdog pseudo fence agent: %s",
pcmk_rc_str(rc));
stonith_shutdown(0);
}
}
} else if (fenced_has_watchdog_device()) {
/* be silent if no device - todo parameter to stonith_device_remove */
stonith_device_remove(STONITH_WATCHDOG_ID, true);
}
}
/*!
* \internal
* \brief Query the full CIB
*
* \return Standard Pacemaker return code
*/
static int
fenced_query_cib(void)
{
int rc = pcmk_ok;
crm_trace("Re-requesting full CIB");
rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_sync_call);
rc = pcmk_legacy2rc(rc);
if (rc == pcmk_rc_ok) {
pcmk__assert(local_cib != NULL);
} else {
crm_err("Couldn't retrieve the CIB: %s " QB_XS " rc=%d",
pcmk_rc_str(rc), rc);
}
return rc;
}
static void
update_fencing_topology(const char *event, xmlNode *msg)
{
xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT,
NULL, NULL);
xmlNode *patchset = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
int format = 1;
int add[] = { 0, 0, 0 };
int del[] = { 0, 0, 0 };
CRM_CHECK(patchset != NULL, return);
crm_element_value_int(patchset, PCMK_XA_FORMAT, &format);
if (format != 2) {
crm_warn("Unknown patch format: %d", format);
return;
}
xml_patch_versions(patchset, add, del);
for (xmlNode *change = pcmk__xe_first_child(patchset, NULL, NULL, NULL);
change != NULL; change = pcmk__xe_next(change, NULL)) {
const char *op = crm_element_value(change, PCMK_XA_OPERATION);
const char *xpath = crm_element_value(change, PCMK_XA_PATH);
if (op == NULL) {
continue;
}
if (strstr(xpath, "/" PCMK_XE_FENCING_LEVEL) != NULL) {
// Change to a specific entry
crm_trace("Handling %s operation %d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
if (strcmp(op, PCMK_VALUE_DELETE) == 0) {
/* We have only path and ID, which is not enough info to remove
* a specific entry. Re-initialize the whole topology.
*/
crm_info("Re-initializing fencing topology after %s operation "
"%d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
fencing_topology_init();
return;
}
if (strcmp(op, PCMK_VALUE_CREATE) == 0) {
- add_topology_level(change->children);
+ fenced_register_level(change->children, NULL);
} else if (strcmp(op, PCMK_VALUE_MODIFY) == 0) {
xmlNode *match = pcmk__xe_first_child(change,
PCMK_XE_CHANGE_RESULT,
NULL, NULL);
if (match != NULL) {
remove_topology_level(match->children);
- add_topology_level(match->children);
+ fenced_register_level(match->children, NULL);
}
}
continue;
}
if (strstr(xpath, "/" PCMK_XE_FENCING_TOPOLOGY) != NULL) {
// Change to the topology in general
crm_info("Re-initializing fencing topology after top-level "
"%s operation %d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
fencing_topology_init();
return;
}
if ((strstr(xpath, "/" PCMK_XE_CONFIGURATION) != NULL)
&& (pcmk__xe_first_child(change, PCMK_XE_FENCING_TOPOLOGY, NULL,
NULL) != NULL)
&& pcmk__str_any_of(op, PCMK_VALUE_CREATE, PCMK_VALUE_DELETE,
NULL)) {
// Topology was created or entire configuration section was deleted
crm_info("Re-initializing fencing topology after top-level "
"%s operation %d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
fencing_topology_init();
return;
}
crm_trace("Nothing for us in %s operation %d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
}
}
static void
update_cib_cache_cb(const char *event, xmlNode * msg)
{
xmlNode *patchset = NULL;
long long timeout_ms_saved = stonith_watchdog_timeout_ms;
bool need_full_refresh = false;
if(!have_cib_devices) {
crm_trace("Skipping updates until we get a full dump");
return;
} else if(msg == NULL) {
crm_trace("Missing %s update", event);
return;
}
/* Maintain a local copy of the CIB so that we have full access
* to device definitions, location constraints, and node attributes
*/
if (local_cib != NULL) {
int rc = pcmk_ok;
xmlNode *wrapper = NULL;
crm_element_value_int(msg, PCMK__XA_CIB_RC, &rc);
if (rc != pcmk_ok) {
return;
}
wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT, NULL,
NULL);
patchset = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
rc = xml_apply_patchset(local_cib, patchset, TRUE);
switch (rc) {
case pcmk_ok:
case -pcmk_err_old_data:
/* @TODO Full refresh (with or without query) in case of
* -pcmk_err_old_data? It seems wrong to call
* stonith_device_remove() based on primitive deletion in an
* old diff.
*/
break;
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
pcmk__xml_free(local_cib);
local_cib = NULL;
break;
default:
crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
pcmk__xml_free(local_cib);
local_cib = NULL;
}
}
if (local_cib == NULL) {
if (fenced_query_cib() != pcmk_rc_ok) {
return;
}
need_full_refresh = true;
}
pcmk__refresh_node_caches_from_cib(local_cib);
update_stonith_watchdog_timeout_ms(local_cib);
if (timeout_ms_saved != stonith_watchdog_timeout_ms) {
need_full_refresh = true;
}
if (need_full_refresh) {
fencing_topology_init();
cib_devices_update();
} else {
// Partial refresh
update_fencing_topology(event, msg);
update_cib_stonith_devices(patchset);
}
watchdog_device_update();
}
static void
init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
crm_info("Updating device list from CIB");
have_cib_devices = TRUE;
local_cib = pcmk__xml_copy(NULL, output);
pcmk__refresh_node_caches_from_cib(local_cib);
update_stonith_watchdog_timeout_ms(local_cib);
fencing_topology_init();
cib_devices_update();
watchdog_device_update();
}
static void
cib_connection_destroy(gpointer user_data)
{
if (stonith_shutdown_flag) {
crm_info("Connection to the CIB manager closed");
return;
} else {
crm_crit("Lost connection to the CIB manager, shutting down");
}
if (cib_api) {
cib_api->cmds->signoff(cib_api);
}
stonith_shutdown(0);
}
/*!
* \internal
* \brief Disconnect from CIB manager
*/
void
fenced_cib_cleanup(void)
{
if (cib_api != NULL) {
cib_api->cmds->del_notify_callback(cib_api, PCMK__VALUE_CIB_DIFF_NOTIFY,
update_cib_cache_cb);
cib__clean_up_connection(&cib_api);
}
pcmk__xml_free(local_cib);
local_cib = NULL;
}
void
setup_cib(void)
{
int rc, retries = 0;
cib_api = cib_new();
if (cib_api == NULL) {
crm_err("No connection to the CIB manager");
return;
}
do {
sleep(retries);
rc = cib_api->cmds->signon(cib_api, crm_system_name, cib_command);
} while (rc == -ENOTCONN && ++retries < 5);
if (rc != pcmk_ok) {
crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc);
return;
}
rc = cib_api->cmds->add_notify_callback(cib_api,
PCMK__VALUE_CIB_DIFF_NOTIFY,
update_cib_cache_cb);
if (rc != pcmk_ok) {
crm_err("Could not set CIB notification callback");
return;
}
rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_none);
cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL,
"init_cib_cache_cb", init_cib_cache_cb);
cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy);
crm_info("Watching for fencing topology changes");
}
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index b31c52bf94..f70f2c56e7 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -1,3677 +1,3651 @@
/*
* Copyright 2009-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdbool.h> // bool
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <ctype.h>
#include <libxml/tree.h> // xmlNode
#include <libxml/xpath.h> // xmlXPathObject, etc.
#include <crm/crm.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/cluster/internal.h>
#include <crm/common/mainloop.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <pacemaker-fenced.h>
static GHashTable *device_table = NULL;
GHashTable *topology = NULL;
static GList *cmd_list = NULL;
static GHashTable *fenced_handlers = NULL;
struct device_search_s {
/* target of fence action */
char *host;
/* requested fence action */
char *action;
/* timeout to use if a device is queried dynamically for possible targets */
// @TODO This name is misleading now, it's the value of stonith-timeout
int per_device_timeout;
/* number of registered fencing devices at time of request */
int replies_needed;
/* number of device replies received so far */
int replies_received;
/* whether the target is eligible to perform requested action (or off) */
bool allow_self;
/* private data to pass to search callback function */
void *user_data;
/* function to call when all replies have been received */
void (*callback) (GList * devices, void *user_data);
/* devices capable of performing requested action (or off if remapping) */
GList *capable;
/* Whether to perform searches that support the action */
uint32_t support_action_only;
};
static gboolean stonith_device_dispatch(gpointer user_data);
static void st_child_done(int pid, const pcmk__action_result_t *result,
void *user_data);
static void search_devices_record_result(struct device_search_s *search, const char *device,
gboolean can_fence);
static int get_agent_metadata(const char *agent, xmlNode **metadata);
static void read_action_metadata(fenced_device_t *device);
static enum fenced_target_by unpack_level_kind(const xmlNode *level);
typedef struct {
int id;
uint32_t options;
int default_timeout; /* seconds */
int timeout; /* seconds */
int start_delay; // seconds (-1 means disable static/random fencing delays)
int delay_id;
char *op;
char *origin;
char *client;
char *client_name;
char *remote_op_id;
char *target;
char *action;
char *device;
//! Head of device list (used only for freeing list with command object)
GList *device_list;
//! Next item to process in \c device_list
GList *next_device_iter;
void *internal_user_data;
void (*done_cb) (int pid, const pcmk__action_result_t *result,
void *user_data);
fenced_device_t *active_on;
fenced_device_t *activating_on;
} async_command_t;
static xmlNode *construct_async_reply(const async_command_t *cmd,
const pcmk__action_result_t *result);
+/*!
+ * \internal
+ * \brief Set a bad fencer API request error in a result object
+ *
+ * \param[out] result Result to set
+ */
+static inline void
+set_bad_request_result(pcmk__action_result_t *result)
+{
+ pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
+ "Fencer API request missing required information (bug?)");
+}
+
/*!
* \internal
* \brief Check whether the fencer's device table contains a watchdog device
*
* \retval \c true If the device table contains a watchdog device
* \retval \c false Otherwise
*/
bool
fenced_has_watchdog_device(void)
{
return (device_table != NULL)
&& (g_hash_table_lookup(device_table, STONITH_WATCHDOG_ID) != NULL);
}
/*!
* \internal
* \brief Call a function for each known fence device
*
* \param[in] fn Function to call for each device
* \param[in,out] user_data User data
*/
void
fenced_foreach_device(GHFunc fn, gpointer user_data)
{
if (device_table != NULL) {
g_hash_table_foreach(device_table, fn, user_data);
}
}
/*!
* \internal
* \brief Remove each known fence device matching a given predicate
*
* \param[in] fn Function that returns \c TRUE to remove a fence device or
* \c FALSE to keep it
*/
void
fenced_foreach_device_remove(GHRFunc fn)
{
if (device_table != NULL) {
g_hash_table_foreach_remove(device_table, fn, NULL);
}
}
static gboolean
is_action_required(const char *action, const fenced_device_t *device)
{
- return (device != NULL) && device->automatic_unfencing
+ return (device != NULL)
+ && pcmk_is_set(device->flags, fenced_df_auto_unfence)
&& pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none);
}
static int
get_action_delay_max(const fenced_device_t *device, const char *action)
{
const char *value = NULL;
guint delay_max = 0U;
if (!pcmk__is_fencing_action(action)) {
return 0;
}
value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX);
if (value) {
pcmk_parse_interval_spec(value, &delay_max);
delay_max /= 1000;
}
return (int) delay_max;
}
static int
get_action_delay_base(const fenced_device_t *device, const char *action,
const char *target)
{
char *hash_value = NULL;
guint delay_base = 0U;
if (!pcmk__is_fencing_action(action)) {
return 0;
}
hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE);
if (hash_value) {
char *value = pcmk__str_copy(hash_value);
char *valptr = value;
if (target != NULL) {
for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) {
char *mapval = strchr(val, ':');
if (mapval == NULL || mapval[1] == 0) {
crm_err("pcmk_delay_base: empty value in mapping", val);
continue;
}
if (mapval != val && strncasecmp(target, val, (size_t)(mapval - val)) == 0) {
value = mapval + 1;
crm_debug("pcmk_delay_base mapped to %s for %s",
value, target);
break;
}
}
}
if (strchr(value, ':') == 0) {
pcmk_parse_interval_spec(value, &delay_base);
delay_base /= 1000;
}
free(valptr);
}
return (int) delay_base;
}
/*!
* \internal
* \brief Override STONITH timeout with pcmk_*_timeout if available
*
* \param[in] device STONITH device to use
* \param[in] action STONITH action name
* \param[in] default_timeout Timeout to use if device does not have
* a pcmk_*_timeout parameter for action
*
* \return Value of pcmk_(action)_timeout if available, otherwise default_timeout
* \note For consistency, it would be nice if reboot/off/on timeouts could be
* set the same way as start/stop/monitor timeouts, i.e. with an
* <operation> entry in the fencing resource configuration. However that
* is insufficient because fencing devices may be registered directly via
* the fencer's register_device() API instead of going through the CIB
* (e.g. stonith_admin uses it for its -R option, and the executor uses it
* to ensure a device is registered when a command is issued). As device
* properties, pcmk_*_timeout parameters can be grabbed by the fencer when
* the device is registered, whether by CIB change or API call.
*/
static int
get_action_timeout(const fenced_device_t *device, const char *action,
int default_timeout)
{
if (action && device && device->params) {
char buffer[64] = { 0, };
const char *value = NULL;
/* If "reboot" was requested but the device does not support it,
* we will remap to "off", so check timeout for "off" instead
*/
if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)
- && !pcmk_is_set(device->flags, st_device_supports_reboot)) {
+ && !pcmk_is_set(device->flags, fenced_df_supports_reboot)) {
crm_trace("%s doesn't support reboot, using timeout for off instead",
device->id);
action = PCMK_ACTION_OFF;
}
/* If the device config specified an action-specific timeout, use it */
snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action);
value = g_hash_table_lookup(device->params, buffer);
if (value) {
long long timeout_ms = crm_get_msec(value);
return (int) QB_MIN(pcmk__timeout_ms2s(timeout_ms), INT_MAX);
}
}
return default_timeout;
}
/*!
* \internal
* \brief Get the currently executing device for a fencing operation
*
* \param[in] cmd Fencing operation to check
*
* \return Currently executing device for \p cmd if any, otherwise NULL
*/
static fenced_device_t *
cmd_device(const async_command_t *cmd)
{
if ((cmd == NULL) || (cmd->device == NULL) || (device_table == NULL)) {
return NULL;
}
return g_hash_table_lookup(device_table, cmd->device);
}
/*!
* \internal
* \brief Return the configured reboot action for a given device
*
* \param[in] device_id Device ID
*
* \return Configured reboot action for \p device_id
*/
const char *
fenced_device_reboot_action(const char *device_id)
{
const char *action = NULL;
if ((device_table != NULL) && (device_id != NULL)) {
fenced_device_t *device = g_hash_table_lookup(device_table, device_id);
if ((device != NULL) && (device->params != NULL)) {
action = g_hash_table_lookup(device->params, "pcmk_reboot_action");
}
}
return pcmk__s(action, PCMK_ACTION_REBOOT);
}
/*!
* \internal
* \brief Check whether a given device supports the "on" action
*
* \param[in] device_id Device ID
*
* \return true if \p device_id supports "on", otherwise false
*/
bool
fenced_device_supports_on(const char *device_id)
{
if ((device_table != NULL) && (device_id != NULL)) {
fenced_device_t *device = g_hash_table_lookup(device_table, device_id);
if (device != NULL) {
- return pcmk_is_set(device->flags, st_device_supports_on);
+ return pcmk_is_set(device->flags, fenced_df_supports_on);
}
}
return false;
}
static void
free_async_command(async_command_t * cmd)
{
if (!cmd) {
return;
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
cmd_list = g_list_remove(cmd_list, cmd);
g_list_free_full(cmd->device_list, free);
free(cmd->device);
free(cmd->action);
free(cmd->target);
free(cmd->remote_op_id);
free(cmd->client);
free(cmd->client_name);
free(cmd->origin);
free(cmd->op);
free(cmd);
}
/*!
* \internal
* \brief Create a new asynchronous fencing operation from request XML
*
* \param[in] msg Fencing request XML (from IPC or CPG)
*
* \return Newly allocated fencing operation on success, otherwise NULL
*
* \note This asserts on memory errors, so a NULL return indicates an
* unparseable message.
*/
static async_command_t *
create_async_command(xmlNode *msg)
{
xmlNode *op = NULL;
async_command_t *cmd = NULL;
int rc = pcmk_rc_ok;
if (msg == NULL) {
return NULL;
}
op = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_DEVICE_ACTION "]",
LOG_ERR);
if (op == NULL) {
return NULL;
}
cmd = pcmk__assert_alloc(1, sizeof(async_command_t));
// All messages must include these
cmd->action = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ACTION);
cmd->op = crm_element_value_copy(msg, PCMK__XA_ST_OP);
cmd->client = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTID);
if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) {
free_async_command(cmd);
return NULL;
}
crm_element_value_int(msg, PCMK__XA_ST_CALLID, &(cmd->id));
crm_element_value_int(msg, PCMK__XA_ST_DELAY, &(cmd->start_delay));
crm_element_value_int(msg, PCMK__XA_ST_TIMEOUT, &(cmd->default_timeout));
cmd->timeout = cmd->default_timeout;
rc = pcmk__xe_get_flags(msg, PCMK__XA_ST_CALLOPT, &(cmd->options),
st_opt_none);
if (rc != pcmk_rc_ok) {
crm_warn("Couldn't parse options from request: %s", pcmk_rc_str(rc));
}
cmd->origin = crm_element_value_copy(msg, PCMK__XA_SRC);
cmd->remote_op_id = crm_element_value_copy(msg, PCMK__XA_ST_REMOTE_OP);
cmd->client_name = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTNAME);
cmd->target = crm_element_value_copy(op, PCMK__XA_ST_TARGET);
cmd->device = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ID);
cmd->done_cb = st_child_done;
// Track in global command list
cmd_list = g_list_append(cmd_list, cmd);
return cmd;
}
static int
get_action_limit(fenced_device_t *device)
{
const char *value = NULL;
int action_limit = 1;
value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT);
if ((value == NULL)
|| (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok)
|| (action_limit == 0)) {
action_limit = 1;
}
return action_limit;
}
static int
get_active_cmds(fenced_device_t *device)
{
int counter = 0;
GList *gIter = NULL;
GList *gIterNext = NULL;
CRM_CHECK(device != NULL, return 0);
for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
async_command_t *cmd = gIter->data;
gIterNext = gIter->next;
if (cmd->active_on == device) {
counter++;
}
}
return counter;
}
static void
fork_cb(int pid, void *user_data)
{
async_command_t *cmd = (async_command_t *) user_data;
fenced_device_t *device = cmd->activating_on;
if (device == NULL) {
/* In case of a retry, we've done the move from activating_on to
* active_on already
*/
device = cmd->active_on;
}
pcmk__assert(device != NULL);
crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout",
cmd->action, pid,
((cmd->target == NULL)? "" : " targeting "),
pcmk__s(cmd->target, ""), device->id, cmd->timeout);
cmd->active_on = device;
cmd->activating_on = NULL;
}
static int
get_agent_metadata_cb(gpointer data) {
fenced_device_t *device = data;
guint period_ms;
switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
case pcmk_rc_ok:
if (device->agent_metadata) {
read_action_metadata(device);
- stonith__device_parameter_flags(&(device->flags), device->id,
- device->agent_metadata);
+ device->default_host_arg =
+ stonith__default_host_arg(device->agent_metadata);
}
return G_SOURCE_REMOVE;
case EAGAIN:
period_ms = pcmk__mainloop_timer_get_period(device->timer);
if (period_ms < 160 * 1000) {
mainloop_timer_set_period(device->timer, 2 * period_ms);
}
return G_SOURCE_CONTINUE;
default:
return G_SOURCE_REMOVE;
}
}
/*!
* \internal
* \brief Call a command's action callback for an internal (not library) result
*
* \param[in,out] cmd Command to report result for
* \param[in] execution_status Execution status to use for result
* \param[in] exit_status Exit status to use for result
* \param[in] exit_reason Exit reason to use for result
*/
static void
report_internal_result(async_command_t *cmd, int exit_status,
int execution_status, const char *exit_reason)
{
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
pcmk__set_result(&result, exit_status, execution_status, exit_reason);
cmd->done_cb(0, &result, cmd);
pcmk__reset_result(&result);
}
static gboolean
stonith_device_execute(fenced_device_t *device)
{
int exec_rc = 0;
const char *action_str = NULL;
- const char *host_arg = NULL;
async_command_t *cmd = NULL;
stonith_action_t *action = NULL;
int active_cmds = 0;
int action_limit = 0;
GList *gIter = NULL;
GList *gIterNext = NULL;
CRM_CHECK(device != NULL, return FALSE);
active_cmds = get_active_cmds(device);
action_limit = get_action_limit(device);
if (action_limit > -1 && active_cmds >= action_limit) {
crm_trace("%s is over its action limit of %d (%u active action%s)",
device->id, action_limit, active_cmds,
pcmk__plural_s(active_cmds));
return TRUE;
}
for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) {
async_command_t *pending_op = gIter->data;
gIterNext = gIter->next;
if (pending_op && pending_op->delay_id) {
crm_trace("Operation '%s'%s%s using %s was asked to run too early, "
"waiting for start delay of %ds",
pending_op->action,
((pending_op->target == NULL)? "" : " targeting "),
pcmk__s(pending_op->target, ""),
device->id, pending_op->start_delay);
continue;
}
device->pending_ops = g_list_remove_link(device->pending_ops, gIter);
g_list_free_1(gIter);
cmd = pending_op;
break;
}
if (cmd == NULL) {
crm_trace("No actions using %s are needed", device->id);
return TRUE;
}
if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
if (pcmk__is_fencing_action(cmd->action)) {
if (node_does_watchdog_fencing(fenced_get_local_node())) {
pcmk__panic("Watchdog self-fencing required");
goto done;
}
} else {
crm_info("Faking success for %s watchdog operation", cmd->action);
report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
goto done;
}
}
#if PCMK__ENABLE_CIBSECRETS
exec_rc = pcmk__substitute_secrets(device->id, device->params);
if (exec_rc != pcmk_rc_ok) {
if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) {
crm_info("Proceeding with stop operation for %s "
"despite being unable to load CIB secrets (%s)",
device->id, pcmk_rc_str(exec_rc));
} else {
crm_err("Considering %s unconfigured "
"because unable to load CIB secrets: %s",
device->id, pcmk_rc_str(exec_rc));
report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS,
"Failed to get CIB secrets");
goto done;
}
}
#endif
action_str = cmd->action;
if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none)
- && !pcmk_is_set(device->flags, st_device_supports_reboot)) {
+ && !pcmk_is_set(device->flags, fenced_df_supports_reboot)) {
crm_notice("Remapping 'reboot' action%s%s using %s to 'off' "
"because agent '%s' does not support reboot",
((cmd->target == NULL)? "" : " targeting "),
pcmk__s(cmd->target, ""), device->id, device->agent);
action_str = PCMK_ACTION_OFF;
}
- if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) {
- host_arg = "port";
-
- } else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) {
- host_arg = "plug";
- }
-
action = stonith__action_create(device->agent, action_str, cmd->target,
cmd->timeout, device->params,
- device->aliases, host_arg);
+ device->aliases, device->default_host_arg);
/* for async exec, exec_rc is negative for early error exit
otherwise handling of success/errors is done via callbacks */
cmd->activating_on = device;
exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb,
fork_cb);
if (exec_rc < 0) {
cmd->activating_on = NULL;
cmd->done_cb(0, stonith__action_result(action), cmd);
stonith__destroy_action(action);
}
done:
/* Device might get triggered to work by multiple fencing commands
* simultaneously. Trigger the device again to make sure any
* remaining concurrent commands get executed. */
if (device->pending_ops) {
mainloop_set_trigger(device->work);
}
return TRUE;
}
static gboolean
stonith_device_dispatch(gpointer user_data)
{
return stonith_device_execute(user_data);
}
static gboolean
start_delay_helper(gpointer data)
{
async_command_t *cmd = data;
fenced_device_t *device = cmd_device(cmd);
cmd->delay_id = 0;
if (device) {
mainloop_set_trigger(device->work);
}
return FALSE;
}
static void
schedule_stonith_command(async_command_t *cmd, fenced_device_t *device)
{
int delay_max = 0;
int delay_base = 0;
int requested_delay = cmd->start_delay;
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(device != NULL, return);
if (cmd->device) {
free(cmd->device);
}
cmd->device = pcmk__str_copy(device->id);
cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout);
if (cmd->remote_op_id) {
crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s "
"with op id %.8s and timeout %ds",
cmd->action,
(cmd->target == NULL)? "" : " targeting ",
pcmk__s(cmd->target, ""),
device->id, cmd->origin, cmd->remote_op_id, cmd->timeout);
} else {
crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds",
cmd->action,
(cmd->target == NULL)? "" : " targeting ",
pcmk__s(cmd->target, ""),
device->id, cmd->client, cmd->timeout);
}
device->pending_ops = g_list_append(device->pending_ops, cmd);
mainloop_set_trigger(device->work);
// Value -1 means disable any static/random fencing delays
if (requested_delay < 0) {
return;
}
delay_max = get_action_delay_max(device, cmd->action);
delay_base = get_action_delay_base(device, cmd->action, cmd->target);
if (delay_max == 0) {
delay_max = delay_base;
}
if (delay_max < delay_base) {
crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than "
PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s "
"(limiting to maximum delay)",
delay_base, delay_max, cmd->action, device->id);
delay_base = delay_max;
}
if (delay_max > 0) {
// coverity[dontcall] It doesn't matter here if rand() is predictable
cmd->start_delay +=
((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
+ delay_base;
}
if (cmd->start_delay > 0) {
crm_notice("Delaying '%s' action%s%s using %s for %ds " QB_XS
" timeout=%ds requested_delay=%ds base=%ds max=%ds",
cmd->action,
(cmd->target == NULL)? "" : " targeting ",
pcmk__s(cmd->target, ""),
device->id, cmd->start_delay, cmd->timeout,
requested_delay, delay_base, delay_max);
cmd->delay_id =
pcmk__create_timer(cmd->start_delay * 1000, start_delay_helper, cmd);
}
}
static void
free_device(gpointer data)
{
GList *gIter = NULL;
fenced_device_t *device = data;
g_hash_table_destroy(device->params);
g_hash_table_destroy(device->aliases);
for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) {
async_command_t *cmd = gIter->data;
crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action);
report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"Device was removed before action could be executed");
}
g_list_free(device->pending_ops);
g_list_free_full(device->targets, free);
if (device->timer) {
mainloop_timer_stop(device->timer);
mainloop_timer_del(device->timer);
}
mainloop_destroy_trigger(device->work);
pcmk__xml_free(device->agent_metadata);
free(device->namespace);
if (device->on_target_actions != NULL) {
g_string_free(device->on_target_actions, TRUE);
}
free(device->agent);
free(device->id);
free(device);
}
/*!
* \internal
* \brief Initialize the table of known fence devices
*/
void
fenced_init_device_table(void)
{
if (device_table == NULL) {
device_table = pcmk__strkey_table(NULL, free_device);
}
}
/*!
* \internal
* \brief Free the table of known fence devices
*/
void
fenced_free_device_table(void)
{
if (device_table != NULL) {
g_hash_table_destroy(device_table);
device_table = NULL;
}
}
static GHashTable *
build_port_aliases(const char *hostmap, GList ** targets)
{
char *name = NULL;
int last = 0, lpc = 0, max = 0, added = 0;
GHashTable *aliases = pcmk__strikey_table(free, free);
if (hostmap == NULL) {
return aliases;
}
max = strlen(hostmap);
for (; lpc <= max; lpc++) {
switch (hostmap[lpc]) {
/* Skip escaped chars */
case '\\':
lpc++;
break;
/* Assignment chars */
case '=':
case ':':
if (lpc > last) {
free(name);
name = pcmk__assert_alloc(1, 1 + lpc - last);
memcpy(name, hostmap + last, lpc - last);
}
last = lpc + 1;
break;
/* Delimeter chars */
/* case ',': Potentially used to specify multiple ports */
case 0:
case ';':
case ' ':
case '\t':
if (name) {
char *value = NULL;
int k = 0;
value = pcmk__assert_alloc(1, 1 + lpc - last);
memcpy(value, hostmap + last, lpc - last);
for (int i = 0; value[i] != '\0'; i++) {
if (value[i] != '\\') {
value[k++] = value[i];
}
}
value[k] = '\0';
crm_debug("Adding alias '%s'='%s'", name, value);
g_hash_table_replace(aliases, name, value);
if (targets) {
*targets = g_list_append(*targets, pcmk__str_copy(value));
}
value = NULL;
name = NULL;
added++;
} else if (lpc > last) {
crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last);
}
last = lpc + 1;
break;
}
if (hostmap[lpc] == 0) {
break;
}
}
if (added == 0) {
crm_info("No host mappings detected in '%s'", hostmap);
}
free(name);
return aliases;
}
GHashTable *metadata_cache = NULL;
void
free_metadata_cache(void) {
if (metadata_cache != NULL) {
g_hash_table_destroy(metadata_cache);
metadata_cache = NULL;
}
}
static void
init_metadata_cache(void) {
if (metadata_cache == NULL) {
metadata_cache = pcmk__strkey_table(free, free);
}
}
int
get_agent_metadata(const char *agent, xmlNode ** metadata)
{
char *buffer = NULL;
if (metadata == NULL) {
return EINVAL;
}
*metadata = NULL;
if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) {
return pcmk_rc_ok;
}
init_metadata_cache();
buffer = g_hash_table_lookup(metadata_cache, agent);
if (buffer == NULL) {
- stonith_t *st = stonith_api_new();
+ stonith_t *st = stonith__api_new();
int rc;
if (st == NULL) {
crm_warn("Could not get agent meta-data: "
"API memory allocation failed");
return EAGAIN;
}
rc = st->cmds->metadata(st, st_opt_sync_call, agent,
NULL, &buffer, 10);
- stonith_api_delete(st);
+ stonith__api_free(st);
if (rc || !buffer) {
crm_err("Could not retrieve metadata for fencing agent %s", agent);
return EAGAIN;
}
g_hash_table_replace(metadata_cache, pcmk__str_copy(agent), buffer);
}
*metadata = pcmk__xml_parse(buffer);
return pcmk_rc_ok;
}
static void
read_action_metadata(fenced_device_t *device)
{
xmlXPathObject *xpath = NULL;
int max = 0;
int lpc = 0;
if (device->agent_metadata == NULL) {
return;
}
xpath = pcmk__xpath_search(device->agent_metadata->doc,
"//" PCMK_XE_ACTION);
max = pcmk__xpath_num_results(xpath);
if (max == 0) {
xmlXPathFreeObject(xpath);
return;
}
for (lpc = 0; lpc < max; lpc++) {
const char *action = NULL;
xmlNode *match = pcmk__xpath_result(xpath, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match == NULL) { continue; };
action = crm_element_value(match, PCMK_XA_NAME);
if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) {
- stonith__set_device_flags(device->flags, device->id,
- st_device_supports_list);
+ fenced_device_set_flags(device, fenced_df_supports_list);
+
} else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) {
- stonith__set_device_flags(device->flags, device->id,
- st_device_supports_status);
+ fenced_device_set_flags(device, fenced_df_supports_status);
+
} else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
- stonith__set_device_flags(device->flags, device->id,
- st_device_supports_reboot);
+ fenced_device_set_flags(device, fenced_df_supports_reboot);
+
} else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) {
/* PCMK_XA_AUTOMATIC means the cluster will unfence a node when it
* joins.
*
* @COMPAT PCMK__XA_REQUIRED is a deprecated synonym for
* PCMK_XA_AUTOMATIC.
*/
if (pcmk__xe_attr_is_true(match, PCMK_XA_AUTOMATIC)
|| pcmk__xe_attr_is_true(match, PCMK__XA_REQUIRED)) {
- device->automatic_unfencing = TRUE;
+
+ fenced_device_set_flags(device, fenced_df_auto_unfence);
}
- stonith__set_device_flags(device->flags, device->id,
- st_device_supports_on);
+ fenced_device_set_flags(device, fenced_df_supports_on);
}
if ((action != NULL)
&& pcmk__xe_attr_is_true(match, PCMK_XA_ON_TARGET)) {
pcmk__add_word(&(device->on_target_actions), 64, action);
}
}
xmlXPathFreeObject(xpath);
}
static const char *
target_list_type(fenced_device_t *dev)
{
const char *check_type = NULL;
check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK);
if (check_type == NULL) {
if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) {
check_type = PCMK_VALUE_STATIC_LIST;
} else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) {
check_type = PCMK_VALUE_STATIC_LIST;
- } else if (pcmk_is_set(dev->flags, st_device_supports_list)) {
+ } else if (pcmk_is_set(dev->flags, fenced_df_supports_list)) {
check_type = PCMK_VALUE_DYNAMIC_LIST;
- } else if (pcmk_is_set(dev->flags, st_device_supports_status)) {
+ } else if (pcmk_is_set(dev->flags, fenced_df_supports_status)) {
check_type = PCMK_VALUE_STATUS;
} else {
check_type = PCMK_VALUE_NONE;
}
}
return check_type;
}
static fenced_device_t *
build_device_from_xml(const xmlNode *dev)
{
const char *value;
fenced_device_t *device = NULL;
char *agent = crm_element_value_copy(dev, PCMK_XA_AGENT);
CRM_CHECK(agent != NULL, return device);
device = pcmk__assert_alloc(1, sizeof(fenced_device_t));
device->id = crm_element_value_copy(dev, PCMK_XA_ID);
device->agent = agent;
device->namespace = crm_element_value_copy(dev, PCMK__XA_NAMESPACE);
device->params = xml2list(dev);
value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST);
if (value) {
device->targets = stonith__parse_targets(value);
}
value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP);
device->aliases = build_port_aliases(value, &(device->targets));
value = target_list_type(device);
if (!pcmk__str_eq(value, PCMK_VALUE_STATIC_LIST, pcmk__str_casei)
&& (device->targets != NULL)) {
// device->targets is necessary only with PCMK_VALUE_STATIC_LIST
g_list_free_full(device->targets, free);
device->targets = NULL;
}
switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
case pcmk_rc_ok:
if (device->agent_metadata) {
read_action_metadata(device);
- stonith__device_parameter_flags(&(device->flags), device->id,
- device->agent_metadata);
+ device->default_host_arg =
+ stonith__default_host_arg(device->agent_metadata);
}
break;
case EAGAIN:
if (device->timer == NULL) {
device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
TRUE, get_agent_metadata_cb, device);
}
if (!mainloop_timer_running(device->timer)) {
mainloop_timer_start(device->timer);
}
break;
default:
break;
}
value = crm_element_value(dev, PCMK__XA_RSC_PROVIDES);
if (pcmk__str_eq(value, PCMK_VALUE_UNFENCING, pcmk__str_casei)) {
- device->automatic_unfencing = TRUE;
+ fenced_device_set_flags(device, fenced_df_auto_unfence);
}
if (is_action_required(PCMK_ACTION_ON, device)) {
crm_info("Fencing device '%s' requires unfencing", device->id);
}
if (device->on_target_actions != NULL) {
crm_info("Fencing device '%s' requires actions (%s) to be executed "
"on target", device->id,
(const char *) device->on_target_actions->str);
}
device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device);
return device;
}
static void
schedule_internal_command(const char *origin, fenced_device_t *device,
const char *action, const char *target, int timeout,
void *internal_user_data,
void (*done_cb) (int pid,
const pcmk__action_result_t *result,
void *user_data))
{
async_command_t *cmd = NULL;
cmd = pcmk__assert_alloc(1, sizeof(async_command_t));
cmd->id = -1;
cmd->default_timeout = timeout ? timeout : 60;
cmd->timeout = cmd->default_timeout;
cmd->action = pcmk__str_copy(action);
cmd->target = pcmk__str_copy(target);
cmd->device = pcmk__str_copy(device->id);
cmd->origin = pcmk__str_copy(origin);
cmd->client = pcmk__str_copy(crm_system_name);
cmd->client_name = pcmk__str_copy(crm_system_name);
cmd->internal_user_data = internal_user_data;
cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */
schedule_stonith_command(cmd, device);
}
// Fence agent status commands use custom exit status codes
enum fence_status_code {
fence_status_invalid = -1,
fence_status_active = 0,
fence_status_unknown = 1,
fence_status_inactive = 2,
};
static void
status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data)
{
async_command_t *cmd = user_data;
struct device_search_s *search = cmd->internal_user_data;
fenced_device_t *dev = cmd_device(cmd);
gboolean can = FALSE;
free_async_command(cmd);
if (!dev) {
search_devices_record_result(search, NULL, FALSE);
return;
}
mainloop_set_trigger(dev->work);
if (result->execution_status != PCMK_EXEC_DONE) {
crm_warn("Assuming %s cannot fence %s "
"because status could not be executed: %s%s%s%s",
dev->id, search->host,
pcmk_exec_status_str(result->execution_status),
((result->exit_reason == NULL)? "" : " ("),
((result->exit_reason == NULL)? "" : result->exit_reason),
((result->exit_reason == NULL)? "" : ")"));
search_devices_record_result(search, dev->id, FALSE);
return;
}
switch (result->exit_status) {
case fence_status_unknown:
crm_trace("%s reported it cannot fence %s", dev->id, search->host);
break;
case fence_status_active:
case fence_status_inactive:
crm_trace("%s reported it can fence %s", dev->id, search->host);
can = TRUE;
break;
default:
crm_warn("Assuming %s cannot fence %s "
"(status returned unknown code %d)",
dev->id, search->host, result->exit_status);
break;
}
search_devices_record_result(search, dev->id, can);
}
static void
dynamic_list_search_cb(int pid, const pcmk__action_result_t *result,
void *user_data)
{
async_command_t *cmd = user_data;
struct device_search_s *search = cmd->internal_user_data;
fenced_device_t *dev = cmd_device(cmd);
gboolean can_fence = FALSE;
free_async_command(cmd);
/* Host/alias must be in the list output to be eligible to be fenced
*
* Will cause problems if down'd nodes aren't listed or (for virtual nodes)
* if the guest is still listed despite being moved to another machine
*/
if (!dev) {
search_devices_record_result(search, NULL, FALSE);
return;
}
mainloop_set_trigger(dev->work);
if (pcmk__result_ok(result)) {
crm_info("Refreshing target list for %s", dev->id);
g_list_free_full(dev->targets, free);
dev->targets = stonith__parse_targets(result->action_stdout);
dev->targets_age = time(NULL);
} else if (dev->targets != NULL) {
if (result->execution_status == PCMK_EXEC_DONE) {
crm_info("Reusing most recent target list for %s "
"because list returned error code %d",
dev->id, result->exit_status);
} else {
crm_info("Reusing most recent target list for %s "
"because list could not be executed: %s%s%s%s",
dev->id, pcmk_exec_status_str(result->execution_status),
((result->exit_reason == NULL)? "" : " ("),
((result->exit_reason == NULL)? "" : result->exit_reason),
((result->exit_reason == NULL)? "" : ")"));
}
} else { // We have never successfully executed list
if (result->execution_status == PCMK_EXEC_DONE) {
crm_warn("Assuming %s cannot fence %s "
"because list returned error code %d",
dev->id, search->host, result->exit_status);
} else {
crm_warn("Assuming %s cannot fence %s "
"because list could not be executed: %s%s%s%s",
dev->id, search->host,
pcmk_exec_status_str(result->execution_status),
((result->exit_reason == NULL)? "" : " ("),
((result->exit_reason == NULL)? "" : result->exit_reason),
((result->exit_reason == NULL)? "" : ")"));
}
/* Fall back to pcmk_host_check=PCMK_VALUE_STATUS if the user didn't
* explicitly specify PCMK_VALUE_DYNAMIC_LIST
*/
if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) {
crm_notice("Switching to pcmk_host_check='status' for %s", dev->id);
pcmk__insert_dup(dev->params, PCMK_STONITH_HOST_CHECK,
PCMK_VALUE_STATUS);
}
}
if (dev->targets) {
const char *alias = g_hash_table_lookup(dev->aliases, search->host);
if (!alias) {
alias = search->host;
}
if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) {
can_fence = TRUE;
}
}
search_devices_record_result(search, dev->id, can_fence);
}
/*!
* \internal
* \brief Returns true if any key in first is not in second or second has a different value for key
*/
static int
device_params_diff(GHashTable *first, GHashTable *second) {
char *key = NULL;
char *value = NULL;
GHashTableIter gIter;
g_hash_table_iter_init(&gIter, first);
while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) {
if(strstr(key, "CRM_meta") == key) {
continue;
} else if (strcmp(key, PCMK_XA_CRM_FEATURE_SET) == 0) {
continue;
} else {
char *other_value = g_hash_table_lookup(second, key);
if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) {
crm_trace("Different value for %s: %s != %s", key, other_value, value);
return 1;
}
}
}
return 0;
}
/*!
* \internal
* \brief Checks to see if an identical device already exists in the table
*/
static fenced_device_t *
device_has_duplicate(const fenced_device_t *device)
{
fenced_device_t *dup = g_hash_table_lookup(device_table, device->id);
if (!dup) {
crm_trace("No match for %s", device->id);
return NULL;
} else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) {
crm_trace("Different agent: %s != %s", dup->agent, device->agent);
return NULL;
}
// Use pcmk__digest_operation() here?
if (device_params_diff(device->params, dup->params) ||
device_params_diff(dup->params, device->params)) {
return NULL;
}
crm_trace("Match");
return dup;
}
int
fenced_device_register(const xmlNode *dev, bool from_cib)
{
const char *local_node_name = fenced_get_local_node();
fenced_device_t *dup = NULL;
fenced_device_t *device = build_device_from_xml(dev);
int rc = pcmk_rc_ok;
CRM_CHECK(device != NULL, return ENOMEM);
/* do we have a watchdog-device? */
if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none)
|| pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
if (stonith_watchdog_timeout_ms <= 0) {
crm_err("Ignoring watchdog fence device without "
PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " set");
rc = ENODEV;
goto done;
}
if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
crm_err("Ignoring watchdog fence device with unknown agent '%s' "
"rather than '" STONITH_WATCHDOG_AGENT "'",
pcmk__s(device->agent, ""));
rc = ENODEV;
goto done;
}
if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
crm_err("Ignoring watchdog fence device named '%s' rather than "
"'" STONITH_WATCHDOG_ID "'",
pcmk__s(device->id, ""));
rc = ENODEV;
goto done;
}
if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT,
pcmk__str_none)) {
/* This has either an empty list or the targets configured for
* watchdog fencing
*/
g_list_free_full(stonith_watchdog_targets, free);
stonith_watchdog_targets = device->targets;
device->targets = NULL;
}
if (!node_does_watchdog_fencing(local_node_name)) {
crm_debug("Skip registration of watchdog fence device on node not "
"in host list");
device->targets = NULL;
stonith_device_remove(device->id, from_cib);
goto done;
}
// Proceed as with any other fencing device
g_list_free_full(device->targets, free);
device->targets = stonith__parse_targets(local_node_name);
pcmk__insert_dup(device->params, PCMK_STONITH_HOST_LIST,
local_node_name);
}
dup = device_has_duplicate(device);
if (dup != NULL) {
guint ndevices = g_hash_table_size(device_table);
crm_debug("Device '%s' already in device list (%d active device%s)",
device->id, ndevices, pcmk__plural_s(ndevices));
free_device(device);
device = dup;
- device->dirty = FALSE;
+ fenced_device_clear_flags(device, fenced_df_dirty);
} else {
guint ndevices = 0;
fenced_device_t *old = g_hash_table_lookup(device_table, device->id);
- if (from_cib && (old != NULL) && old->api_registered) {
+ if (from_cib && (old != NULL)
+ && pcmk_is_set(old->flags, fenced_df_api_registered)) {
/* If the CIB is writing over an entry that is shared with a stonith
* client, copy any pending ops that currently exist on the old
* entry to the new one. Otherwise the pending ops will be reported
* as failures.
*/
crm_info("Overwriting existing entry for %s from CIB", device->id);
device->pending_ops = old->pending_ops;
- device->api_registered = TRUE;
+ fenced_device_set_flags(device, fenced_df_api_registered);
old->pending_ops = NULL;
if (device->pending_ops != NULL) {
mainloop_set_trigger(device->work);
}
}
g_hash_table_replace(device_table, device->id, device);
ndevices = g_hash_table_size(device_table);
crm_notice("Added '%s' to device list (%d active device%s)",
device->id, ndevices, pcmk__plural_s(ndevices));
}
if (from_cib) {
- device->cib_registered = TRUE;
+ fenced_device_set_flags(device, fenced_df_cib_registered);
} else {
- device->api_registered = TRUE;
+ fenced_device_set_flags(device, fenced_df_api_registered);
}
done:
if (rc != pcmk_rc_ok) {
free_device(device);
}
return rc;
}
void
stonith_device_remove(const char *id, bool from_cib)
{
fenced_device_t *device = g_hash_table_lookup(device_table, id);
guint ndevices = 0;
if (device == NULL) {
ndevices = g_hash_table_size(device_table);
crm_info("Device '%s' not found (%u active device%s)", id, ndevices,
pcmk__plural_s(ndevices));
return;
}
if (from_cib) {
- device->cib_registered = FALSE;
+ fenced_device_clear_flags(device, fenced_df_cib_registered);
} else {
- device->verified = FALSE;
- device->api_registered = FALSE;
+ fenced_device_clear_flags(device,
+ fenced_df_api_registered|fenced_df_verified);
}
- if (!device->cib_registered && !device->api_registered) {
+ if (!pcmk_any_flags_set(device->flags,
+ fenced_df_api_registered
+ |fenced_df_cib_registered)) {
g_hash_table_remove(device_table, id);
ndevices = g_hash_table_size(device_table);
crm_info("Removed '%s' from device list (%u active device%s)",
id, ndevices, pcmk__plural_s(ndevices));
} else {
// Exactly one is true at this point
+ const bool cib_registered = pcmk_is_set(device->flags,
+ fenced_df_cib_registered);
+
crm_trace("Not removing '%s' from device list (%u active) because "
"still registered via %s",
id, g_hash_table_size(device_table),
- (device->cib_registered? "CIB" : "API"));
+ (cib_registered? "CIB" : "API"));
}
}
/*!
* \internal
* \brief Return the number of stonith levels registered for a node
*
* \param[in] tp Node's topology table entry
*
* \return Number of non-NULL levels in topology entry
* \note This function is used only for log messages.
*/
static int
count_active_levels(const stonith_topology_t *tp)
{
int lpc = 0;
int count = 0;
for (lpc = 0; lpc < ST__LEVEL_COUNT; lpc++) {
if (tp->levels[lpc] != NULL) {
count++;
}
}
return count;
}
static void
free_topology_entry(gpointer data)
{
stonith_topology_t *tp = data;
int lpc = 0;
for (lpc = 0; lpc < ST__LEVEL_COUNT; lpc++) {
if (tp->levels[lpc] != NULL) {
g_list_free_full(tp->levels[lpc], free);
}
}
free(tp->target);
free(tp->target_value);
free(tp->target_pattern);
free(tp->target_attribute);
free(tp);
}
void
free_topology_list(void)
{
if (topology != NULL) {
g_hash_table_destroy(topology);
topology = NULL;
}
}
void
init_topology_list(void)
{
if (topology == NULL) {
topology = pcmk__strkey_table(NULL, free_topology_entry);
}
}
char *
stonith_level_key(const xmlNode *level, enum fenced_target_by mode)
{
if (mode == fenced_target_by_unknown) {
mode = unpack_level_kind(level);
}
switch (mode) {
case fenced_target_by_name:
return crm_element_value_copy(level, PCMK_XA_TARGET);
case fenced_target_by_pattern:
return crm_element_value_copy(level, PCMK_XA_TARGET_PATTERN);
case fenced_target_by_attribute:
return crm_strdup_printf("%s=%s",
crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE),
crm_element_value(level, PCMK_XA_TARGET_VALUE));
default:
return crm_strdup_printf("unknown-%s", pcmk__xe_id(level));
}
}
/*!
* \internal
* \brief Parse target identification from topology level XML
*
* \param[in] level Topology level XML to parse
*
* \return How to identify target of \p level
*/
static enum fenced_target_by
unpack_level_kind(const xmlNode *level)
{
if (crm_element_value(level, PCMK_XA_TARGET) != NULL) {
return fenced_target_by_name;
}
if (crm_element_value(level, PCMK_XA_TARGET_PATTERN) != NULL) {
return fenced_target_by_pattern;
}
if ((crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE) != NULL)
&& (crm_element_value(level, PCMK_XA_TARGET_VALUE) != NULL)) {
return fenced_target_by_attribute;
}
return fenced_target_by_unknown;
}
-static stonith_key_value_t *
-parse_device_list(const char *devices)
-{
- int lpc = 0;
- int max = 0;
- int last = 0;
- stonith_key_value_t *output = NULL;
-
- if (devices == NULL) {
- return output;
- }
-
- max = strlen(devices);
- for (lpc = 0; lpc <= max; lpc++) {
- if (devices[lpc] == ',' || devices[lpc] == 0) {
- char *line = strndup(devices + last, lpc - last);
-
- output = stonith_key_value_add(output, NULL, line);
- free(line);
-
- last = lpc + 1;
- }
- }
-
- return output;
-}
-
/*!
* \internal
* \brief Unpack essential information from topology request XML
*
* \param[in] xml Request XML to search
* \param[out] mode If not NULL, where to store level kind
* \param[out] target If not NULL, where to store representation of target
* \param[out] id If not NULL, where to store level number
- * \param[out] desc If not NULL, where to store log-friendly level description
*
* \return Topology level XML from within \p xml, or NULL if not found
- * \note The caller is responsible for freeing \p *target and \p *desc if set.
+ * \note The caller is responsible for freeing \p *target if set.
*/
static xmlNode *
unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target,
- int *id, char **desc)
+ int *id)
{
enum fenced_target_by local_mode = fenced_target_by_unknown;
char *local_target = NULL;
int local_id = 0;
/* The level element can be the top element or lower. If top level, don't
* search by xpath, because it might give multiple hits if the XML is the
* CIB.
*/
if ((xml != NULL) && !pcmk__xe_is(xml, PCMK_XE_FENCING_LEVEL)) {
xml = pcmk__xpath_find_one(xml->doc, "//" PCMK_XE_FENCING_LEVEL,
LOG_WARNING);
}
- if (xml == NULL) {
- if (desc != NULL) {
- *desc = crm_strdup_printf("missing");
- }
- } else {
+ if (xml != NULL) {
local_mode = unpack_level_kind(xml);
local_target = stonith_level_key(xml, local_mode);
crm_element_value_int(xml, PCMK_XA_INDEX, &local_id);
- if (desc != NULL) {
- *desc = crm_strdup_printf("%s[%d]", local_target, local_id);
- }
}
if (mode != NULL) {
*mode = local_mode;
}
if (id != NULL) {
*id = local_id;
}
if (target != NULL) {
*target = local_target;
} else {
free(local_target);
}
return xml;
}
/*!
* \internal
* \brief Register a fencing topology level for a target
*
* Given an XML request specifying the target name, level index, and device IDs
* for the level, this will create an entry for the target in the global topology
* table if one does not already exist, then append the specified device IDs to
* the entry's device list for the specified level.
*
* \param[in] msg XML request for STONITH level registration
- * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]"
- * \param[out] result Where to set result of registration
+ * \param[out] result Where to set result of registration (can be \c NULL)
*/
void
-fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result)
+fenced_register_level(xmlNode *msg, pcmk__action_result_t *result)
{
int id = 0;
xmlNode *level;
enum fenced_target_by mode;
char *target;
stonith_topology_t *tp;
- stonith_key_value_t *dIter = NULL;
- stonith_key_value_t *devices = NULL;
+ const char *value = NULL;
- CRM_CHECK((msg != NULL) && (result != NULL), return);
+ CRM_CHECK(msg != NULL, return);
- level = unpack_level_request(msg, &mode, &target, &id, desc);
+ level = unpack_level_request(msg, &mode, &target, &id);
if (level == NULL) {
- fenced_set_protocol_error(result);
+ set_bad_request_result(result);
return;
}
// Ensure an ID was given (even the client API adds an ID)
if (pcmk__str_empty(pcmk__xe_id(level))) {
crm_warn("Ignoring registration for topology level without ID");
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Topology level is invalid without ID");
return;
}
// Ensure a valid target was specified
if (mode == fenced_target_by_unknown) {
crm_warn("Ignoring registration for topology level '%s' "
"without valid target", pcmk__xe_id(level));
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Invalid target for topology level '%s'",
pcmk__xe_id(level));
return;
}
// Ensure level ID is in allowed range
if ((id < ST__LEVEL_MIN) || (id > ST__LEVEL_MAX)) {
crm_warn("Ignoring topology registration for %s with invalid level %d",
target, id);
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Invalid level number '%s' for topology level '%s'",
pcmk__s(crm_element_value(level, PCMK_XA_INDEX),
""),
pcmk__xe_id(level));
return;
}
/* Find or create topology table entry */
tp = g_hash_table_lookup(topology, target);
if (tp == NULL) {
tp = pcmk__assert_alloc(1, sizeof(stonith_topology_t));
tp->kind = mode;
tp->target = target;
tp->target_value = crm_element_value_copy(level, PCMK_XA_TARGET_VALUE);
tp->target_pattern = crm_element_value_copy(level,
PCMK_XA_TARGET_PATTERN);
tp->target_attribute = crm_element_value_copy(level,
PCMK_XA_TARGET_ATTRIBUTE);
g_hash_table_replace(topology, tp->target, tp);
crm_trace("Added %s (%d) to the topology (%d active entries)",
target, (int) mode, g_hash_table_size(topology));
} else {
free(target);
}
if (tp->levels[id] != NULL) {
crm_info("Adding to the existing %s[%d] topology entry",
tp->target, id);
}
- devices = parse_device_list(crm_element_value(level, PCMK_XA_DEVICES));
- for (dIter = devices; dIter; dIter = dIter->next) {
- const char *device = dIter->value;
+ value = crm_element_value(level, PCMK_XA_DEVICES);
+ if (value != NULL) {
+ /* Empty string and whitespace are not possible with schema validation
+ * enabled. Don't bother handling them specially here.
+ */
+ gchar **devices = g_strsplit(value, ",", 0);
- crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id);
- tp->levels[id] = g_list_append(tp->levels[id], pcmk__str_copy(device));
+ for (char **dev = devices; (dev != NULL) && (*dev != NULL); dev++) {
+ crm_trace("Adding device '%s' for %s[%d]", *dev, tp->target, id);
+ tp->levels[id] = g_list_append(tp->levels[id],
+ pcmk__str_copy(*dev));
+ }
+ g_strfreev(devices);
}
- stonith_key_value_freeall(devices, 1, 1);
{
int nlevels = count_active_levels(tp);
crm_info("Target %s has %d active fencing level%s",
tp->target, nlevels, pcmk__plural_s(nlevels));
}
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
}
/*!
* \internal
* \brief Unregister a fencing topology level for a target
*
* Given an XML request specifying the target name and level index (or 0 for all
* levels), this will remove any corresponding entry for the target from the
* global topology table.
*
* \param[in] msg XML request for STONITH level registration
- * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]"
- * \param[out] result Where to set result of unregistration
+ * \param[out] result Where to set result of unregistration (can be \c NULL)
*/
void
-fenced_unregister_level(xmlNode *msg, char **desc,
- pcmk__action_result_t *result)
+fenced_unregister_level(xmlNode *msg, pcmk__action_result_t *result)
{
int id = -1;
stonith_topology_t *tp;
char *target;
xmlNode *level = NULL;
- CRM_CHECK(result != NULL, return);
-
- level = unpack_level_request(msg, NULL, &target, &id, desc);
+ level = unpack_level_request(msg, NULL, &target, &id);
if (level == NULL) {
- fenced_set_protocol_error(result);
+ set_bad_request_result(result);
return;
}
// Ensure level ID is in allowed range
if ((id < 0) || (id >= ST__LEVEL_COUNT)) {
crm_warn("Ignoring topology unregistration for %s with invalid level %d",
target, id);
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Invalid level number '%s' for topology level %s",
pcmk__s(crm_element_value(level, PCMK_XA_INDEX),
"<null>"),
// Client API doesn't add ID to unregistration XML
pcmk__s(pcmk__xe_id(level), ""));
return;
}
tp = g_hash_table_lookup(topology, target);
if (tp == NULL) {
guint nentries = g_hash_table_size(topology);
crm_info("No fencing topology found for %s (%d active %s)",
target, nentries,
pcmk__plural_alt(nentries, "entry", "entries"));
} else if (id == 0 && g_hash_table_remove(topology, target)) {
guint nentries = g_hash_table_size(topology);
crm_info("Removed all fencing topology entries related to %s "
"(%d active %s remaining)", target, nentries,
pcmk__plural_alt(nentries, "entry", "entries"));
} else if (tp->levels[id] != NULL) {
guint nlevels;
g_list_free_full(tp->levels[id], free);
tp->levels[id] = NULL;
nlevels = count_active_levels(tp);
crm_info("Removed level %d from fencing topology for %s "
"(%d active level%s remaining)",
id, target, nlevels, pcmk__plural_s(nlevels));
}
free(target);
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
}
static char *
list_to_string(GList *list, const char *delim, gboolean terminate_with_delim)
{
int max = g_list_length(list);
size_t delim_len = delim?strlen(delim):0;
size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0);
char *rv;
GList *gIter;
char *pos = NULL;
const char *lead_delim = "";
for (gIter = list; gIter != NULL; gIter = gIter->next) {
const char *value = (const char *) gIter->data;
alloc_size += strlen(value);
}
rv = pcmk__assert_alloc(alloc_size, sizeof(char));
pos = rv;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
const char *value = (const char *) gIter->data;
pos = &pos[sprintf(pos, "%s%s", lead_delim, value)];
lead_delim = delim;
}
if (max && terminate_with_delim) {
sprintf(pos, "%s", delim);
}
return rv;
}
/*!
* \internal
* \brief Execute a fence agent action directly (and asynchronously)
*
* Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
* directly on a specified device. Only list, monitor, and status actions are
* expected to use this call, though it should work with any agent command.
*
* \param[in] msg Request XML specifying action
* \param[out] result Where to store result of action
*
* \note If the action is monitor, the device must be registered via the API
* (CIB registration is not sufficient), because monitor should not be
* possible unless the device is "started" (API registered).
*/
static void
execute_agent_action(xmlNode *msg, pcmk__action_result_t *result)
{
xmlNode *dev = pcmk__xpath_find_one(msg->doc, "//" PCMK__XE_ST_DEVICE_ID,
LOG_ERR);
xmlNode *op = pcmk__xpath_find_one(msg->doc,
"//*[@" PCMK__XA_ST_DEVICE_ACTION "]",
LOG_ERR);
const char *id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
const char *action = crm_element_value(op, PCMK__XA_ST_DEVICE_ACTION);
async_command_t *cmd = NULL;
fenced_device_t *device = NULL;
if ((id == NULL) || (action == NULL)) {
crm_info("Malformed API action request: device %s, action %s",
(id? id : "not specified"),
(action? action : "not specified"));
- fenced_set_protocol_error(result);
+ set_bad_request_result(result);
return;
}
if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
// Watchdog agent actions are implemented internally
if (stonith_watchdog_timeout_ms <= 0) {
pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"Watchdog fence device not configured");
return;
} else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) {
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
pcmk__set_result_output(result,
list_to_string(stonith_watchdog_targets,
"\n", TRUE),
NULL);
return;
} else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) {
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return;
}
}
device = g_hash_table_lookup(device_table, id);
if (device == NULL) {
crm_info("Ignoring API '%s' action request because device %s not found",
action, id);
pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"'%s' not found", id);
return;
- } else if (!device->api_registered
+ } else if (!pcmk_is_set(device->flags, fenced_df_api_registered)
&& (strcmp(action, PCMK_ACTION_MONITOR) == 0)) {
// Monitors may run only on "started" (API-registered) devices
crm_info("Ignoring API '%s' action request because device %s not active",
action, id);
pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"'%s' not active", id);
return;
}
cmd = create_async_command(msg);
if (cmd == NULL) {
crm_log_xml_warn(msg, "invalid");
- fenced_set_protocol_error(result);
+ set_bad_request_result(result);
return;
}
schedule_stonith_command(cmd, device);
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
}
static void
search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence)
{
search->replies_received++;
if (can_fence && device) {
- if (search->support_action_only != st_device_supports_none) {
+ if (search->support_action_only != fenced_df_none) {
fenced_device_t *dev = g_hash_table_lookup(device_table, device);
if (dev && !pcmk_is_set(dev->flags, search->support_action_only)) {
return;
}
}
search->capable = g_list_append(search->capable,
pcmk__str_copy(device));
}
if (search->replies_needed == search->replies_received) {
guint ndevices = g_list_length(search->capable);
crm_debug("Search found %d device%s that can perform '%s' targeting %s",
ndevices, pcmk__plural_s(ndevices),
(search->action? search->action : "unknown action"),
(search->host? search->host : "any node"));
search->callback(search->capable, search->user_data);
free(search->host);
free(search->action);
free(search);
}
}
/*!
* \internal
* \brief Check whether the local host is allowed to execute a fencing action
*
* \param[in] device Fence device to check
* \param[in] action Fence action to check
* \param[in] target Hostname of fence target
* \param[in] allow_self Whether self-fencing is allowed for this operation
*
* \return TRUE if local host is allowed to execute action, FALSE otherwise
*/
static gboolean
localhost_is_eligible(const fenced_device_t *device, const char *action,
const char *target, gboolean allow_self)
{
gboolean localhost_is_target = pcmk__str_eq(target, fenced_get_local_node(),
pcmk__str_casei);
if ((device != NULL) && (action != NULL)
&& (device->on_target_actions != NULL)
&& (strstr((const char*) device->on_target_actions->str,
action) != NULL)) {
if (!localhost_is_target) {
crm_trace("Operation '%s' using %s can only be executed for local "
"host, not %s", action, device->id, target);
return FALSE;
}
} else if (localhost_is_target && !allow_self) {
crm_trace("'%s' operation does not support self-fencing", action);
return FALSE;
}
return TRUE;
}
/*!
* \internal
* \brief Check if local node is allowed to execute (possibly remapped) action
*
* \param[in] device Fence device to check
* \param[in] action Fence action to check
* \param[in] target Node name of fence target
* \param[in] allow_self Whether self-fencing is allowed for this operation
*
* \return true if local node is allowed to execute \p action or any actions it
* might be remapped to, otherwise false
*/
static bool
localhost_is_eligible_with_remap(const fenced_device_t *device,
const char *action, const char *target,
gboolean allow_self)
{
// Check exact action
if (localhost_is_eligible(device, action, target, allow_self)) {
return true;
}
// Check potential remaps
if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
/* "reboot" might get remapped to "off" then "on", so even if reboot is
* disallowed, return true if either of those is allowed. We'll report
* the disallowed actions with the results. We never allow self-fencing
* for remapped "on" actions because the target is off at that point.
*/
if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self)
|| localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Check whether we can use a device's cached target list
*
* \param[in] dev Fencing device to check
*
* \return \c true if \p dev cached its targets less than a minute ago,
* otherwise \c false
*/
static inline bool
can_use_target_cache(const fenced_device_t *dev)
{
return (dev->targets != NULL) && (time(NULL) < (dev->targets_age + 60));
}
static void
can_fence_host_with_device(fenced_device_t *dev,
struct device_search_s *search)
{
gboolean can = FALSE;
const char *check_type = "Internal bug";
const char *target = NULL;
const char *alias = NULL;
const char *dev_id = "Unspecified device";
const char *action = (search == NULL)? NULL : search->action;
CRM_CHECK((dev != NULL) && (action != NULL), goto search_report_results);
if (dev->id != NULL) {
dev_id = dev->id;
}
target = search->host;
if (target == NULL) {
can = TRUE;
check_type = "No target";
goto search_report_results;
}
/* Answer immediately if the device does not support the action
* or the local node is not allowed to perform it
*/
if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)
- && !pcmk_is_set(dev->flags, st_device_supports_on)) {
+ && !pcmk_is_set(dev->flags, fenced_df_supports_on)) {
check_type = "Agent does not support 'on'";
goto search_report_results;
} else if (!localhost_is_eligible_with_remap(dev, action, target,
search->allow_self)) {
check_type = "This node is not allowed to execute action";
goto search_report_results;
}
// Check eligibility as specified by pcmk_host_check
check_type = target_list_type(dev);
alias = g_hash_table_lookup(dev->aliases, target);
if (pcmk__str_eq(check_type, PCMK_VALUE_NONE, pcmk__str_casei)) {
can = TRUE;
} else if (pcmk__str_eq(check_type, PCMK_VALUE_STATIC_LIST,
pcmk__str_casei)) {
if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) {
can = TRUE;
} else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)
&& g_hash_table_lookup(dev->aliases, target)) {
can = TRUE;
}
} else if (pcmk__str_eq(check_type, PCMK_VALUE_DYNAMIC_LIST,
pcmk__str_casei)) {
if (!can_use_target_cache(dev)) {
int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST,
search->per_device_timeout);
if (device_timeout > search->per_device_timeout) {
crm_notice("Since the pcmk_list_timeout (%ds) parameter of %s "
"is larger than " PCMK_OPT_STONITH_TIMEOUT
" (%ds), timeout may occur",
device_timeout, dev_id, search->per_device_timeout);
}
crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
check_type, dev_id, target, action);
schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL,
search->per_device_timeout, search, dynamic_list_search_cb);
/* we'll respond to this search request async in the cb */
return;
}
if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets,
pcmk__str_casei)) {
can = TRUE;
}
} else if (pcmk__str_eq(check_type, PCMK_VALUE_STATUS, pcmk__str_casei)) {
int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout);
if (device_timeout > search->per_device_timeout) {
crm_notice("Since the pcmk_status_timeout (%ds) parameter of %s is "
"larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), "
"timeout may occur",
device_timeout, dev_id, search->per_device_timeout);
}
crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
check_type, dev_id, target, action);
schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target,
search->per_device_timeout, search, status_search_cb);
/* we'll respond to this search request async in the cb */
return;
} else {
crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type);
check_type = "Invalid " PCMK_STONITH_HOST_CHECK;
}
search_report_results:
crm_info("%s is%s eligible to fence (%s) %s%s%s%s: %s",
dev_id, (can? "" : " not"), pcmk__s(action, "unspecified action"),
pcmk__s(target, "unspecified target"),
(alias == NULL)? "" : " (as '", pcmk__s(alias, ""),
(alias == NULL)? "" : "')", check_type);
search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can);
}
static void
search_devices(gpointer key, gpointer value, gpointer user_data)
{
fenced_device_t *dev = value;
struct device_search_s *search = user_data;
can_fence_host_with_device(dev, search);
}
#define DEFAULT_QUERY_TIMEOUT 20
static void
get_capable_devices(const char *host, const char *action, int timeout,
bool allow_self, void *user_data,
void (*callback) (GList * devices, void *user_data),
uint32_t support_action_only)
{
struct device_search_s *search;
guint ndevices = g_hash_table_size(device_table);
if (ndevices == 0) {
callback(NULL, user_data);
return;
}
search = pcmk__assert_alloc(1, sizeof(struct device_search_s));
search->host = pcmk__str_copy(host);
search->action = pcmk__str_copy(action);
search->per_device_timeout = timeout;
search->allow_self = allow_self;
search->callback = callback;
search->user_data = user_data;
search->support_action_only = support_action_only;
/* We are guaranteed this many replies, even if a device is
* unregistered while the search is in progress.
*/
search->replies_needed = ndevices;
crm_debug("Searching %d device%s to see which can execute '%s' targeting %s",
ndevices, pcmk__plural_s(ndevices),
(search->action? search->action : "unknown action"),
(search->host? search->host : "any node"));
fenced_foreach_device(search_devices, search);
}
struct st_query_data {
xmlNode *reply;
char *remote_peer;
char *client_id;
char *target;
char *action;
int call_options;
};
/*!
* \internal
* \brief Add action-specific attributes to query reply XML
*
* \param[in,out] xml XML to add attributes to
* \param[in] action Fence action
* \param[in] device Fence device
* \param[in] target Fence target
*/
static void
add_action_specific_attributes(xmlNode *xml, const char *action,
const fenced_device_t *device,
const char *target)
{
int action_specific_timeout;
int delay_max;
int delay_base;
CRM_CHECK(xml && action && device, return);
// PCMK__XA_ST_REQUIRED is currently used only for unfencing
if (is_action_required(action, device)) {
crm_trace("Action '%s' is required using %s", action, device->id);
crm_xml_add_int(xml, PCMK__XA_ST_REQUIRED, 1);
}
// pcmk_<action>_timeout if configured
action_specific_timeout = get_action_timeout(device, action, 0);
if (action_specific_timeout) {
crm_trace("Action '%s' has timeout %ds using %s",
action, action_specific_timeout, device->id);
crm_xml_add_int(xml, PCMK__XA_ST_ACTION_TIMEOUT,
action_specific_timeout);
}
delay_max = get_action_delay_max(device, action);
if (delay_max > 0) {
crm_trace("Action '%s' has maximum random delay %ds using %s",
action, delay_max, device->id);
crm_xml_add_int(xml, PCMK__XA_ST_DELAY_MAX, delay_max);
}
delay_base = get_action_delay_base(device, action, target);
if (delay_base > 0) {
crm_xml_add_int(xml, PCMK__XA_ST_DELAY_BASE, delay_base);
}
if ((delay_max > 0) && (delay_base == 0)) {
crm_trace("Action '%s' has maximum random delay %ds using %s",
action, delay_max, device->id);
} else if ((delay_max == 0) && (delay_base > 0)) {
crm_trace("Action '%s' has a static delay of %ds using %s",
action, delay_base, device->id);
} else if ((delay_max > 0) && (delay_base > 0)) {
crm_trace("Action '%s' has a minimum delay of %ds and a randomly chosen "
"maximum delay of %ds using %s",
action, delay_base, delay_max, device->id);
}
}
/*!
* \internal
* \brief Add "disallowed" attribute to query reply XML if appropriate
*
* \param[in,out] xml XML to add attribute to
* \param[in] action Fence action
* \param[in] device Fence device
* \param[in] target Fence target
* \param[in] allow_self Whether self-fencing is allowed
*/
static void
add_disallowed(xmlNode *xml, const char *action, const fenced_device_t *device,
const char *target, gboolean allow_self)
{
if (!localhost_is_eligible(device, action, target, allow_self)) {
crm_trace("Action '%s' using %s is disallowed for local host",
action, device->id);
pcmk__xe_set_bool_attr(xml, PCMK__XA_ST_ACTION_DISALLOWED, true);
}
}
/*!
* \internal
* \brief Add child element with action-specific values to query reply XML
*
* \param[in,out] xml XML to add attribute to
* \param[in] action Fence action
* \param[in] device Fence device
* \param[in] target Fence target
* \param[in] allow_self Whether self-fencing is allowed
*/
static void
add_action_reply(xmlNode *xml, const char *action,
const fenced_device_t *device, const char *target,
gboolean allow_self)
{
xmlNode *child = pcmk__xe_create(xml, PCMK__XE_ST_DEVICE_ACTION);
crm_xml_add(child, PCMK_XA_ID, action);
add_action_specific_attributes(child, action, device, target);
add_disallowed(child, action, device, target, allow_self);
}
/*!
* \internal
* \brief Send a reply to a CPG peer or IPC client
*
* \param[in] reply XML reply to send
* \param[in] call_options Send synchronously if st_opt_sync_call is set
* \param[in] remote_peer If not NULL, name of peer node to send CPG reply
* \param[in,out] client If not NULL, client to send IPC reply
*/
static void
stonith_send_reply(const xmlNode *reply, int call_options,
const char *remote_peer, pcmk__client_t *client)
{
CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)),
return);
if (remote_peer == NULL) {
do_local_reply(reply, client, call_options);
} else {
const pcmk__node_status_t *node =
pcmk__get_node(0, remote_peer, NULL,
pcmk__node_search_cluster_member);
pcmk__cluster_send_message(node, pcmk_ipc_fenced, reply);
}
}
static void
stonith_query_capable_device_cb(GList * devices, void *user_data)
{
struct st_query_data *query = user_data;
int available_devices = 0;
xmlNode *wrapper = NULL;
xmlNode *list = NULL;
GList *lpc = NULL;
pcmk__client_t *client = NULL;
if (query->client_id != NULL) {
client = pcmk__find_client_by_id(query->client_id);
if ((client == NULL) && (query->remote_peer == NULL)) {
crm_trace("Skipping reply to %s: no longer a client",
query->client_id);
goto done;
}
}
// Pack the results into XML
wrapper = pcmk__xe_create(query->reply, PCMK__XE_ST_CALLDATA);
list = pcmk__xe_create(wrapper, __func__);
crm_xml_add(list, PCMK__XA_ST_TARGET, query->target);
for (lpc = devices; lpc != NULL; lpc = lpc->next) {
fenced_device_t *device = g_hash_table_lookup(device_table, lpc->data);
const char *action = query->action;
xmlNode *dev = NULL;
if (!device) {
/* It is possible the device got unregistered while
* determining who can fence the target */
continue;
}
available_devices++;
dev = pcmk__xe_create(list, PCMK__XE_ST_DEVICE_ID);
crm_xml_add(dev, PCMK_XA_ID, device->id);
crm_xml_add(dev, PCMK__XA_NAMESPACE, device->namespace);
crm_xml_add(dev, PCMK_XA_AGENT, device->agent);
// Has had successful monitor, list, or status on this node
- crm_xml_add_int(dev, PCMK__XA_ST_MONITOR_VERIFIED, device->verified);
+ crm_xml_add_int(dev, PCMK__XA_ST_MONITOR_VERIFIED,
+ pcmk_is_set(device->flags, fenced_df_verified));
crm_xml_add_int(dev, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, device->flags);
/* If the originating fencer wants to reboot the node, and we have a
* capable device that doesn't support "reboot", remap to "off" instead.
*/
- if (!pcmk_is_set(device->flags, st_device_supports_reboot)
+ if (!pcmk_is_set(device->flags, fenced_df_supports_reboot)
&& pcmk__str_eq(query->action, PCMK_ACTION_REBOOT,
pcmk__str_none)) {
crm_trace("%s doesn't support reboot, using values for off instead",
device->id);
action = PCMK_ACTION_OFF;
}
/* Add action-specific values if available */
add_action_specific_attributes(dev, action, device, query->target);
if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
/* A "reboot" *might* get remapped to "off" then "on", so after
* sending the "reboot"-specific values in the main element, we add
* sub-elements for "off" and "on" values.
*
* We short-circuited earlier if "reboot", "off" and "on" are all
* disallowed for the local host. However if only one or two are
* disallowed, we send back the results and mark which ones are
* disallowed. If "reboot" is disallowed, this might cause problems
* with older fencer versions, which won't check for it. Older
* versions will ignore "off" and "on", so they are not a problem.
*/
add_disallowed(dev, action, device, query->target,
pcmk_is_set(query->call_options,
st_opt_allow_self_fencing));
add_action_reply(dev, PCMK_ACTION_OFF, device, query->target,
pcmk_is_set(query->call_options,
st_opt_allow_self_fencing));
add_action_reply(dev, PCMK_ACTION_ON, device, query->target, FALSE);
}
/* A query without a target wants device parameters */
if (query->target == NULL) {
xmlNode *attrs = pcmk__xe_create(dev, PCMK__XE_ATTRIBUTES);
g_hash_table_foreach(device->params, hash2field, attrs);
}
}
crm_xml_add_int(list, PCMK__XA_ST_AVAILABLE_DEVICES, available_devices);
if (query->target) {
crm_debug("Found %d matching device%s for target '%s'",
available_devices, pcmk__plural_s(available_devices),
query->target);
} else {
crm_debug("%d device%s installed",
available_devices, pcmk__plural_s(available_devices));
}
crm_log_xml_trace(list, "query-result");
stonith_send_reply(query->reply, query->call_options, query->remote_peer,
client);
done:
pcmk__xml_free(query->reply);
free(query->remote_peer);
free(query->client_id);
free(query->target);
free(query->action);
free(query);
g_list_free_full(devices, free);
}
/*!
* \internal
* \brief Log the result of an asynchronous command
*
* \param[in] cmd Command the result is for
* \param[in] result Result of command
* \param[in] pid Process ID of command, if available
* \param[in] next Alternate device that will be tried if command failed
* \param[in] op_merged Whether this command was merged with an earlier one
*/
static void
log_async_result(const async_command_t *cmd,
const pcmk__action_result_t *result,
int pid, const char *next, bool op_merged)
{
int log_level = LOG_ERR;
int output_log_level = LOG_NEVER;
guint devices_remaining = g_list_length(cmd->next_device_iter);
GString *msg = g_string_sized_new(80); // Reasonable starting size
// Choose log levels appropriately if we have a result
if (pcmk__result_ok(result)) {
log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE;
if ((result->action_stdout != NULL)
&& !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA,
pcmk__str_none)) {
output_log_level = LOG_DEBUG;
}
next = NULL;
} else {
log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR;
if ((result->action_stdout != NULL)
&& !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA,
pcmk__str_none)) {
output_log_level = LOG_WARNING;
}
}
// Build the log message piece by piece
pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL);
if (pid != 0) {
g_string_append_printf(msg, "[%d] ", pid);
}
if (cmd->target != NULL) {
pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL);
}
if (cmd->device != NULL) {
pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL);
}
// Add exit status or execution status as appropriate
if (result->execution_status == PCMK_EXEC_DONE) {
g_string_append_printf(msg, "returned %d", result->exit_status);
} else {
pcmk__g_strcat(msg, "could not be executed: ",
pcmk_exec_status_str(result->execution_status), NULL);
}
// Add exit reason and next device if appropriate
if (result->exit_reason != NULL) {
pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL);
}
if (next != NULL) {
pcmk__g_strcat(msg, ", retrying with ", next, NULL);
}
if (devices_remaining > 0) {
g_string_append_printf(msg, " (%u device%s remaining)",
(unsigned int) devices_remaining,
pcmk__plural_s(devices_remaining));
}
g_string_append_printf(msg, " " QB_XS " %scall %d from %s",
(op_merged? "merged " : ""), cmd->id,
cmd->client_name);
// Log the result
do_crm_log(log_level, "%s", msg->str);
g_string_free(msg, TRUE);
// Log the output (which may have multiple lines), if appropriate
if (output_log_level != LOG_NEVER) {
char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid);
crm_log_output(output_log_level, prefix, result->action_stdout);
free(prefix);
}
}
/*!
* \internal
* \brief Reply to requester after asynchronous command completion
*
* \param[in] cmd Command that completed
* \param[in] result Result of command
* \param[in] pid Process ID of command, if available
* \param[in] merged If true, command was merged with another, not executed
*/
static void
send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result,
int pid, bool merged)
{
xmlNode *reply = NULL;
pcmk__client_t *client = NULL;
CRM_CHECK((cmd != NULL) && (result != NULL), return);
log_async_result(cmd, result, pid, NULL, merged);
if (cmd->client != NULL) {
client = pcmk__find_client_by_id(cmd->client);
if ((client == NULL) && (cmd->origin == NULL)) {
crm_trace("Skipping reply to %s: no longer a client", cmd->client);
return;
}
}
reply = construct_async_reply(cmd, result);
if (merged) {
pcmk__xe_set_bool_attr(reply, PCMK__XA_ST_OP_MERGED, true);
}
if (pcmk__is_fencing_action(cmd->action)
&& pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) {
/* The target was also the originator, so broadcast the result on its
* behalf (since it will be unable to).
*/
crm_trace("Broadcast '%s' result for %s (target was also originator)",
cmd->action, cmd->target);
crm_xml_add(reply, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST);
crm_xml_add(reply, PCMK__XA_ST_OP, STONITH_OP_NOTIFY);
pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, reply);
} else {
// Reply only to the originator
stonith_send_reply(reply, cmd->options, cmd->origin, client);
}
crm_log_xml_trace(reply, "Reply");
pcmk__xml_free(reply);
}
static void
cancel_stonith_command(async_command_t * cmd)
{
fenced_device_t *device = cmd_device(cmd);
if (device) {
crm_trace("Cancel scheduled '%s' action using %s",
cmd->action, device->id);
device->pending_ops = g_list_remove(device->pending_ops, cmd);
}
}
/*!
* \internal
* \brief Cancel and reply to any duplicates of a just-completed operation
*
* Check whether any fencing operations are scheduled to do the same thing as
* one that just succeeded. If so, rather than performing the same operation
* twice, return the result of this operation for all matching pending commands.
*
* \param[in,out] cmd Fencing operation that just succeeded
* \param[in] result Result of \p cmd
* \param[in] pid If nonzero, process ID of agent invocation (for logs)
*
* \note Duplicate merging will do the right thing for either type of remapped
* reboot. If the executing fencer remapped an unsupported reboot to off,
* then cmd->action will be "reboot" and will be merged with any other
* reboot requests. If the originating fencer remapped a topology reboot
* to off then on, we will get here once with cmd->action "off" and once
* with "on", and they will be merged separately with similar requests.
*/
static void
reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result,
int pid)
{
GList *next = NULL;
for (GList *iter = cmd_list; iter != NULL; iter = next) {
async_command_t *cmd_other = iter->data;
next = iter->next; // We might delete this entry, so grab next now
if (cmd == cmd_other) {
continue;
}
/* A pending operation matches if:
* 1. The client connections are different.
* 2. The target is the same.
* 3. The fencing action is the same.
* 4. The device scheduled to execute the action is the same.
*/
if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) ||
!pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) ||
!pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) ||
!pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) {
continue;
}
crm_notice("Merging fencing action '%s'%s%s originating from "
"client %s with identical fencing request from client %s",
cmd_other->action,
(cmd_other->target == NULL)? "" : " targeting ",
pcmk__s(cmd_other->target, ""), cmd_other->client_name,
cmd->client_name);
// Stop tracking the duplicate, send its result, and cancel it
cmd_list = g_list_remove_link(cmd_list, iter);
send_async_reply(cmd_other, result, pid, true);
cancel_stonith_command(cmd_other);
free_async_command(cmd_other);
g_list_free_1(iter);
}
}
/*!
* \internal
* \brief Return the next required device (if any) for an operation
*
* \param[in,out] cmd Fencing operation that just succeeded
*
* \return Next device required for action if any, otherwise NULL
*/
static fenced_device_t *
next_required_device(async_command_t *cmd)
{
for (GList *iter = cmd->next_device_iter; iter != NULL; iter = iter->next) {
fenced_device_t *next_device = g_hash_table_lookup(device_table,
iter->data);
if (is_action_required(cmd->action, next_device)) {
/* This is only called for successful actions, so it's OK to skip
* non-required devices.
*/
cmd->next_device_iter = iter->next;
return next_device;
}
}
return NULL;
}
static void
st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
{
async_command_t *cmd = user_data;
fenced_device_t *device = NULL;
fenced_device_t *next_device = NULL;
CRM_CHECK(cmd != NULL, return);
device = cmd_device(cmd);
cmd->active_on = NULL;
/* The device is ready to do something else now */
if (device) {
- if (!device->verified && pcmk__result_ok(result)
+ if (!pcmk_is_set(device->flags, fenced_df_verified)
+ && pcmk__result_ok(result)
&& pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST,
PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS,
NULL)) {
- device->verified = TRUE;
+ fenced_device_set_flags(device, fenced_df_verified);
}
mainloop_set_trigger(device->work);
}
if (pcmk__result_ok(result)) {
next_device = next_required_device(cmd);
} else if ((cmd->next_device_iter != NULL)
&& !is_action_required(cmd->action, device)) {
/* if this device didn't work out, see if there are any others we can try.
* if the failed device was 'required', we can't pick another device. */
next_device = g_hash_table_lookup(device_table,
cmd->next_device_iter->data);
cmd->next_device_iter = cmd->next_device_iter->next;
}
if (next_device == NULL) {
send_async_reply(cmd, result, pid, false);
if (pcmk__result_ok(result)) {
reply_to_duplicates(cmd, result, pid);
}
free_async_command(cmd);
} else { // This operation requires more fencing
log_async_result(cmd, result, pid, next_device->id, false);
schedule_stonith_command(cmd, next_device);
}
}
static void
stonith_fence_get_devices_cb(GList * devices, void *user_data)
{
async_command_t *cmd = user_data;
fenced_device_t *device = NULL;
guint ndevices = g_list_length(devices);
crm_info("Found %d matching device%s for target '%s'",
ndevices, pcmk__plural_s(ndevices), cmd->target);
if (devices != NULL) {
device = g_hash_table_lookup(device_table, devices->data);
}
if (device == NULL) { // No device found
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"No device configured for target '%s'",
cmd->target);
send_async_reply(cmd, &result, 0, false);
pcmk__reset_result(&result);
free_async_command(cmd);
g_list_free_full(devices, free);
} else {
/* Device found. Schedule a fencing command for it.
*
* Assign devices to device_list so that it will be freed with cmd.
*/
cmd->device_list = devices;
cmd->next_device_iter = devices->next;
schedule_stonith_command(cmd, device);
}
}
/*!
* \internal
* \brief Execute a fence action via the local node
*
* \param[in] msg Fencing request
* \param[out] result Where to store result of fence action
*/
static void
fence_locally(xmlNode *msg, pcmk__action_result_t *result)
{
const char *device_id = NULL;
fenced_device_t *device = NULL;
async_command_t *cmd = NULL;
xmlNode *dev = NULL;
CRM_CHECK((msg != NULL) && (result != NULL), return);
dev = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_TARGET "]",
LOG_ERR);
cmd = create_async_command(msg);
if (cmd == NULL) {
crm_log_xml_warn(msg, "invalid");
- fenced_set_protocol_error(result);
+ set_bad_request_result(result);
return;
}
device_id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
if (device_id != NULL) {
device = g_hash_table_lookup(device_table, device_id);
if (device == NULL) {
crm_err("Requested device '%s' is not available", device_id);
pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"Requested device '%s' not found", device_id);
return;
}
schedule_stonith_command(cmd, device);
} else {
const char *host = crm_element_value(dev, PCMK__XA_ST_TARGET);
if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) {
int nodeid = 0;
pcmk__node_status_t *node = NULL;
pcmk__scan_min_int(host, &nodeid, 0);
node = pcmk__search_node_caches(nodeid, NULL, NULL,
pcmk__node_search_any
|pcmk__node_search_cluster_cib);
if (node != NULL) {
host = node->name;
}
}
/* If we get to here, then self-fencing is implicitly allowed */
get_capable_devices(host, cmd->action, cmd->default_timeout,
TRUE, cmd, stonith_fence_get_devices_cb,
fenced_support_flag(cmd->action));
}
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
}
/*!
* \internal
* \brief Build an XML reply for a fencing operation
*
* \param[in] request Request that reply is for
* \param[in] data If not NULL, add to reply as call data
* \param[in] result Full result of fencing operation
*
* \return Newly created XML reply
* \note The caller is responsible for freeing the result.
* \note This has some overlap with construct_async_reply(), but that copies
* values from an async_command_t, whereas this one copies them from the
* request.
*/
xmlNode *
fenced_construct_reply(const xmlNode *request, xmlNode *data,
const pcmk__action_result_t *result)
{
xmlNode *reply = NULL;
reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY);
crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__);
crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
stonith__xe_set_result(reply, result);
if (request == NULL) {
/* Most likely, this is the result of a stonith operation that was
* initiated before we came up. Unfortunately that means we lack enough
* information to provide clients with a full result.
*
* @TODO Maybe synchronize this information at start-up?
*/
crm_warn("Missing request information for client notifications for "
"operation with result '%s' (initiated before we came up?)",
pcmk_exec_status_str(result->execution_status));
} else {
const char *name = NULL;
const char *value = NULL;
// Attributes to copy from request to reply
const char *names[] = {
PCMK__XA_ST_OP,
PCMK__XA_ST_CALLID,
PCMK__XA_ST_CLIENTID,
PCMK__XA_ST_CLIENTNAME,
PCMK__XA_ST_REMOTE_OP,
PCMK__XA_ST_CALLOPT,
};
for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) {
name = names[lpc];
value = crm_element_value(request, name);
crm_xml_add(reply, name, value);
}
if (data != NULL) {
xmlNode *wrapper = pcmk__xe_create(reply, PCMK__XE_ST_CALLDATA);
pcmk__xml_copy(wrapper, data);
}
}
return reply;
}
/*!
* \internal
* \brief Build an XML reply to an asynchronous fencing command
*
* \param[in] cmd Fencing command that reply is for
* \param[in] result Command result
*/
static xmlNode *
construct_async_reply(const async_command_t *cmd,
const pcmk__action_result_t *result)
{
xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY);
crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__);
crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
crm_xml_add(reply, PCMK__XA_ST_OP, cmd->op);
crm_xml_add(reply, PCMK__XA_ST_DEVICE_ID, cmd->device);
crm_xml_add(reply, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id);
crm_xml_add(reply, PCMK__XA_ST_CLIENTID, cmd->client);
crm_xml_add(reply, PCMK__XA_ST_CLIENTNAME, cmd->client_name);
crm_xml_add(reply, PCMK__XA_ST_TARGET, cmd->target);
crm_xml_add(reply, PCMK__XA_ST_DEVICE_ACTION, cmd->op);
crm_xml_add(reply, PCMK__XA_ST_ORIGIN, cmd->origin);
crm_xml_add_int(reply, PCMK__XA_ST_CALLID, cmd->id);
crm_xml_add_int(reply, PCMK__XA_ST_CALLOPT, cmd->options);
stonith__xe_set_result(reply, result);
return reply;
}
bool
fencing_peer_active(pcmk__node_status_t *peer)
{
return (peer != NULL) && (peer->name != NULL)
&& pcmk_is_set(peer->processes, crm_get_cluster_proc());
}
void
set_fencing_completed(remote_fencing_op_t *op)
{
struct timespec tv;
qb_util_timespec_from_epoch_get(&tv);
op->completed = tv.tv_sec;
op->completed_nsec = tv.tv_nsec;
}
/*!
* \internal
* \brief Look for alternate node needed if local node shouldn't fence target
*
* \param[in] target Node that must be fenced
*
* \return Name of an alternate node that should fence \p target if any,
* or NULL otherwise
*/
static const char *
check_alternate_host(const char *target)
{
if (pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei)) {
GHashTableIter gIter;
pcmk__node_status_t *entry = NULL;
g_hash_table_iter_init(&gIter, pcmk__peer_cache);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
if (fencing_peer_active(entry)
&& !pcmk__str_eq(entry->name, target, pcmk__str_casei)) {
crm_notice("Forwarding self-fencing request to %s",
entry->name);
return entry->name;
}
}
crm_warn("Will handle own fencing because no peer can");
}
return NULL;
}
static void
remove_relay_op(xmlNode * request)
{
xmlNode *dev = pcmk__xpath_find_one(request->doc,
"//*[@" PCMK__XA_ST_DEVICE_ACTION "]",
LOG_TRACE);
const char *relay_op_id = NULL;
const char *op_id = NULL;
const char *client_name = NULL;
const char *target = NULL;
remote_fencing_op_t *relay_op = NULL;
if (dev) {
target = crm_element_value(dev, PCMK__XA_ST_TARGET);
}
relay_op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP_RELAY);
op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP);
client_name = crm_element_value(request, PCMK__XA_ST_CLIENTNAME);
/* Delete RELAY operation. */
if ((relay_op_id != NULL) && (target != NULL)
&& pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei)) {
relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id);
if (relay_op) {
GHashTableIter iter;
remote_fencing_op_t *list_op = NULL;
g_hash_table_iter_init(&iter, stonith_remote_op_list);
/* If the operation to be deleted is registered as a duplicate, delete the registration. */
while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) {
GList *dup_iter = NULL;
if (list_op != relay_op) {
for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) {
remote_fencing_op_t *other = dup_iter->data;
if (other == relay_op) {
other->duplicates = g_list_remove(other->duplicates, relay_op);
break;
}
}
}
}
crm_debug("Deleting relay op %s ('%s'%s%s for %s), "
"replaced by op %s ('%s'%s%s for %s)",
relay_op->id, relay_op->action,
(relay_op->target == NULL)? "" : " targeting ",
pcmk__s(relay_op->target, ""),
relay_op->client_name, op_id, relay_op->action,
(target == NULL)? "" : " targeting ", pcmk__s(target, ""),
client_name);
g_hash_table_remove(stonith_remote_op_list, relay_op_id);
}
}
}
/*!
* \internal
* \brief Check whether an API request was sent by a privileged user
*
* API commands related to fencing configuration may be done only by privileged
* IPC users (i.e. root or hacluster), because all other users should go through
* the CIB to have ACLs applied. If no client was given, this is a peer request,
* which is always allowed.
*
* \param[in] c IPC client that sent request (or NULL if sent by CPG peer)
* \param[in] op Requested API operation (for logging only)
*
* \return true if sender is peer or privileged client, otherwise false
*/
static inline bool
is_privileged(const pcmk__client_t *c, const char *op)
{
if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) {
return true;
} else {
crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
pcmk__s(op, ""), pcmk__client_name(c));
return false;
}
}
// CRM_OP_REGISTER
static xmlNode *
handle_register_request(pcmk__request_t *request)
{
xmlNode *reply = pcmk__xe_create(NULL, "reply");
pcmk__assert(request->ipc_client != NULL);
crm_xml_add(reply, PCMK__XA_ST_OP, CRM_OP_REGISTER);
crm_xml_add(reply, PCMK__XA_ST_CLIENTID, request->ipc_client->id);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
pcmk__set_request_flags(request, pcmk__request_reuse_options);
return reply;
}
// STONITH_OP_EXEC
static xmlNode *
handle_agent_request(pcmk__request_t *request)
{
execute_agent_action(request->xml, &request->result);
if (request->result.execution_status == PCMK_EXEC_PENDING) {
return NULL;
}
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_TIMEOUT_UPDATE
static xmlNode *
handle_update_timeout_request(pcmk__request_t *request)
{
const char *call_id = crm_element_value(request->xml, PCMK__XA_ST_CALLID);
const char *client_id = crm_element_value(request->xml,
PCMK__XA_ST_CLIENTID);
int op_timeout = 0;
crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &op_timeout);
do_stonith_async_timeout_update(client_id, call_id, op_timeout);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
// STONITH_OP_QUERY
static xmlNode *
handle_query_request(pcmk__request_t *request)
{
int timeout = 0;
xmlNode *dev = NULL;
const char *action = NULL;
const char *target = NULL;
const char *client_id = crm_element_value(request->xml,
PCMK__XA_ST_CLIENTID);
struct st_query_data *query = NULL;
if (request->peer != NULL) {
// Record it for the future notification
create_remote_stonith_op(client_id, request->xml, TRUE);
}
/* Delete the DC node RELAY operation. */
remove_relay_op(request->xml);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
dev = pcmk__xpath_find_one(request->xml->doc,
"//*[@" PCMK__XA_ST_DEVICE_ACTION "]",
LOG_NEVER);
if (dev != NULL) {
const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) {
return NULL; // No query or reply necessary
}
target = crm_element_value(dev, PCMK__XA_ST_TARGET);
action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION);
}
crm_log_xml_trace(request->xml, "Query");
query = pcmk__assert_alloc(1, sizeof(struct st_query_data));
query->reply = fenced_construct_reply(request->xml, NULL, &request->result);
query->remote_peer = pcmk__str_copy(request->peer);
query->client_id = pcmk__str_copy(client_id);
query->target = pcmk__str_copy(target);
query->action = pcmk__str_copy(action);
query->call_options = request->call_options;
crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &timeout);
get_capable_devices(target, action, timeout,
pcmk_is_set(query->call_options,
st_opt_allow_self_fencing),
- query, stonith_query_capable_device_cb, st_device_supports_none);
+ query, stonith_query_capable_device_cb, fenced_df_none);
return NULL;
}
// STONITH_OP_NOTIFY
static xmlNode *
handle_notify_request(pcmk__request_t *request)
{
const char *flag_name = NULL;
pcmk__assert(request->ipc_client != NULL);
flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_ACTIVATE);
if (flag_name != NULL) {
crm_debug("Enabling %s callbacks for client %s",
flag_name, pcmk__request_origin(request));
- pcmk__set_client_flags(request->ipc_client, get_stonith_flag(flag_name));
+ pcmk__set_client_flags(request->ipc_client,
+ fenced_parse_notify_flag(flag_name));
}
flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_DEACTIVATE);
if (flag_name != NULL) {
crm_debug("Disabling %s callbacks for client %s",
flag_name, pcmk__request_origin(request));
pcmk__clear_client_flags(request->ipc_client,
- get_stonith_flag(flag_name));
+ fenced_parse_notify_flag(flag_name));
}
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
pcmk__set_request_flags(request, pcmk__request_reuse_options);
return pcmk__ipc_create_ack(request->ipc_flags, PCMK__XE_ACK, NULL,
CRM_EX_OK);
}
// STONITH_OP_RELAY
static xmlNode *
handle_relay_request(pcmk__request_t *request)
{
xmlNode *dev = pcmk__xpath_find_one(request->xml->doc,
"//*[@" PCMK__XA_ST_TARGET "]",
LOG_TRACE);
crm_notice("Received forwarded fencing request from "
"%s %s to fence (%s) peer %s",
pcmk__request_origin_type(request),
pcmk__request_origin(request),
crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION),
crm_element_value(dev, PCMK__XA_ST_TARGET));
if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) {
- fenced_set_protocol_error(&request->result);
+ set_bad_request_result(&request->result);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
return NULL;
}
// STONITH_OP_FENCE
static xmlNode *
handle_fence_request(pcmk__request_t *request)
{
if (request->peer != NULL) {
fence_locally(request->xml, &request->result);
} else if (pcmk_is_set(request->call_options, st_opt_manual_ack)) {
switch (fenced_handle_manual_confirmation(request->ipc_client,
request->xml)) {
case pcmk_rc_ok:
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE,
NULL);
break;
case EINPROGRESS:
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
NULL);
break;
default:
- fenced_set_protocol_error(&request->result);
+ set_bad_request_result(&request->result);
break;
}
} else {
const char *alternate_host = NULL;
xmlNode *dev = pcmk__xpath_find_one(request->xml->doc,
"//*[@" PCMK__XA_ST_TARGET "]",
LOG_TRACE);
const char *target = crm_element_value(dev, PCMK__XA_ST_TARGET);
const char *action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION);
const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
if (request->ipc_client != NULL) {
int tolerance = 0;
crm_notice("Client %s wants to fence (%s) %s using %s",
pcmk__request_origin(request), action,
target, (device? device : "any device"));
crm_element_value_int(dev, PCMK__XA_ST_TOLERANCE, &tolerance);
if (stonith_check_fence_tolerance(tolerance, target, action)) {
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE,
NULL);
return fenced_construct_reply(request->xml, NULL,
&request->result);
}
alternate_host = check_alternate_host(target);
} else {
crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'",
request->peer, action, target,
(device == NULL)? "(any)" : device);
}
if (alternate_host != NULL) {
const char *client_id = NULL;
remote_fencing_op_t *op = NULL;
pcmk__node_status_t *node =
pcmk__get_node(0, alternate_host, NULL,
pcmk__node_search_cluster_member);
if (request->ipc_client->id == 0) {
client_id = crm_element_value(request->xml,
PCMK__XA_ST_CLIENTID);
} else {
client_id = request->ipc_client->id;
}
/* Create a duplicate fencing operation to relay with the client ID.
* When a query response is received, this operation should be
* deleted to avoid keeping the duplicate around.
*/
op = create_remote_stonith_op(client_id, request->xml, FALSE);
crm_xml_add(request->xml, PCMK__XA_ST_OP, STONITH_OP_RELAY);
crm_xml_add(request->xml, PCMK__XA_ST_CLIENTID,
request->ipc_client->id);
crm_xml_add(request->xml, PCMK__XA_ST_REMOTE_OP, op->id);
// @TODO On failure, fail request immediately, or maybe panic
pcmk__cluster_send_message(node, pcmk_ipc_fenced, request->xml);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
NULL);
} else if (initiate_remote_stonith_op(request->ipc_client, request->xml,
FALSE) == NULL) {
- fenced_set_protocol_error(&request->result);
+ set_bad_request_result(&request->result);
} else {
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
NULL);
}
}
if (request->result.execution_status == PCMK_EXEC_PENDING) {
return NULL;
}
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_FENCE_HISTORY
static xmlNode *
handle_history_request(pcmk__request_t *request)
{
xmlNode *reply = NULL;
xmlNode *data = NULL;
stonith_fence_history(request->xml, &data, request->peer,
request->call_options);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
if (!pcmk_is_set(request->call_options, st_opt_discard_reply)) {
/* When the local node broadcasts its history, it sets
* st_opt_discard_reply and doesn't need a reply.
*/
reply = fenced_construct_reply(request->xml, data, &request->result);
}
pcmk__xml_free(data);
return reply;
}
// STONITH_OP_DEVICE_ADD
static xmlNode *
handle_device_add_request(pcmk__request_t *request)
{
const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);
xmlNode *dev = pcmk__xpath_find_one(request->xml->doc,
"//" PCMK__XE_ST_DEVICE_ID, LOG_ERR);
if (is_privileged(request->ipc_client, op)) {
int rc = fenced_device_register(dev, false);
rc = pcmk_rc2legacy(rc);
pcmk__set_result(&request->result,
((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
stonith__legacy2status(rc),
((rc == pcmk_ok)? NULL : pcmk_strerror(rc)));
} else {
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must register device via CIB");
}
fenced_send_config_notification(op, &request->result,
(dev == NULL)? NULL : pcmk__xe_id(dev));
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_DEVICE_DEL
static xmlNode *
handle_device_delete_request(pcmk__request_t *request)
{
xmlNode *dev = pcmk__xpath_find_one(request->xml->doc,
"//" PCMK__XE_ST_DEVICE_ID, LOG_ERR);
const char *device_id = crm_element_value(dev, PCMK_XA_ID);
const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);
if (is_privileged(request->ipc_client, op)) {
stonith_device_remove(device_id, false);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
} else {
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must delete device via CIB");
}
fenced_send_config_notification(op, &request->result, device_id);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_LEVEL_ADD
static xmlNode *
handle_level_add_request(pcmk__request_t *request)
{
- char *desc = NULL;
const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);
if (is_privileged(request->ipc_client, op)) {
- fenced_register_level(request->xml, &desc, &request->result);
+ fenced_register_level(request->xml, &request->result);
} else {
- unpack_level_request(request->xml, NULL, NULL, NULL, &desc);
+ unpack_level_request(request->xml, NULL, NULL, NULL);
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must add level via CIB");
}
- fenced_send_config_notification(op, &request->result, desc);
- free(desc);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_LEVEL_DEL
static xmlNode *
handle_level_delete_request(pcmk__request_t *request)
{
- char *desc = NULL;
const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);
if (is_privileged(request->ipc_client, op)) {
- fenced_unregister_level(request->xml, &desc, &request->result);
+ fenced_unregister_level(request->xml, &request->result);
} else {
- unpack_level_request(request->xml, NULL, NULL, NULL, &desc);
+ unpack_level_request(request->xml, NULL, NULL, NULL);
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must delete level via CIB");
}
- fenced_send_config_notification(op, &request->result, desc);
- free(desc);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// CRM_OP_RM_NODE_CACHE
static xmlNode *
handle_cache_request(pcmk__request_t *request)
{
int node_id = 0;
const char *name = NULL;
crm_element_value_int(request->xml, PCMK_XA_ID, &node_id);
name = crm_element_value(request->xml, PCMK_XA_UNAME);
pcmk__cluster_forget_cluster_node(node_id, name);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
static xmlNode *
handle_unknown_request(pcmk__request_t *request)
{
crm_err("Unknown IPC request %s from %s %s",
request->op, pcmk__request_origin_type(request),
pcmk__request_origin(request));
pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
"Unknown IPC request type '%s' (bug?)", request->op);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
static void
fenced_register_handlers(void)
{
pcmk__server_command_t handlers[] = {
{ CRM_OP_REGISTER, handle_register_request },
{ STONITH_OP_EXEC, handle_agent_request },
{ STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request },
{ STONITH_OP_QUERY, handle_query_request },
{ STONITH_OP_NOTIFY, handle_notify_request },
{ STONITH_OP_RELAY, handle_relay_request },
{ STONITH_OP_FENCE, handle_fence_request },
{ STONITH_OP_FENCE_HISTORY, handle_history_request },
{ STONITH_OP_DEVICE_ADD, handle_device_add_request },
{ STONITH_OP_DEVICE_DEL, handle_device_delete_request },
{ STONITH_OP_LEVEL_ADD, handle_level_add_request },
{ STONITH_OP_LEVEL_DEL, handle_level_delete_request },
{ CRM_OP_RM_NODE_CACHE, handle_cache_request },
{ NULL, handle_unknown_request },
};
fenced_handlers = pcmk__register_handlers(handlers);
}
void
fenced_unregister_handlers(void)
{
if (fenced_handlers != NULL) {
g_hash_table_destroy(fenced_handlers);
fenced_handlers = NULL;
}
}
static void
handle_request(pcmk__request_t *request)
{
xmlNode *reply = NULL;
const char *reason = NULL;
if (fenced_handlers == NULL) {
fenced_register_handlers();
}
reply = pcmk__process_request(request, fenced_handlers);
if (reply != NULL) {
if (pcmk_is_set(request->flags, pcmk__request_reuse_options)
&& (request->ipc_client != NULL)) {
/* Certain IPC-only commands must reuse the call options from the
* original request rather than the ones set by stonith_send_reply()
* -> do_local_reply().
*/
pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply,
request->ipc_flags);
request->ipc_client->request_id = 0;
} else {
stonith_send_reply(reply, request->call_options,
request->peer, request->ipc_client);
}
pcmk__xml_free(reply);
}
reason = request->result.exit_reason;
crm_debug("Processed %s request from %s %s: %s%s%s%s",
request->op, pcmk__request_origin_type(request),
pcmk__request_origin(request),
pcmk_exec_status_str(request->result.execution_status),
(reason == NULL)? "" : " (",
(reason == NULL)? "" : reason,
(reason == NULL)? "" : ")");
}
static void
handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer)
{
// Copy, because request might be freed before we want to log this
char *op = crm_element_value_copy(request, PCMK__XA_ST_OP);
if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
process_remote_stonith_query(request);
} else if (pcmk__str_any_of(op, STONITH_OP_NOTIFY, STONITH_OP_FENCE,
NULL)) {
fenced_process_fencing_reply(request);
} else {
crm_err("Ignoring unknown %s reply from %s %s",
pcmk__s(op, "untyped"), ((client == NULL)? "peer" : "client"),
((client == NULL)? remote_peer : pcmk__client_name(client)));
crm_log_xml_warn(request, "UnknownOp");
free(op);
return;
}
crm_debug("Processed %s reply from %s %s",
op, ((client == NULL)? "peer" : "client"),
((client == NULL)? remote_peer : pcmk__client_name(client)));
free(op);
}
/*!
* \internal
* \brief Handle a message from an IPC client or CPG peer
*
* \param[in,out] client If not NULL, IPC client that sent message
* \param[in] id If from IPC client, IPC message ID
* \param[in] flags Message flags
* \param[in,out] message Message XML
* \param[in] remote_peer If not NULL, CPG peer that sent message
*/
void
stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
xmlNode *message, const char *remote_peer)
{
uint32_t call_options = st_opt_none;
int rc = pcmk_rc_ok;
bool is_reply = false;
CRM_CHECK(message != NULL, return);
if (pcmk__xpath_find_one(message->doc, "//" PCMK__XE_ST_REPLY,
LOG_NEVER) != NULL) {
is_reply = true;
}
rc = pcmk__xe_get_flags(message, PCMK__XA_ST_CALLOPT, &call_options,
st_opt_none);
if (rc != pcmk_rc_ok) {
crm_warn("Couldn't parse options from message: %s", pcmk_rc_str(rc));
}
crm_debug("Processing %ssynchronous %s %s %u from %s %s",
pcmk_is_set(call_options, st_opt_sync_call)? "" : "a",
crm_element_value(message, PCMK__XA_ST_OP),
(is_reply? "reply" : "request"), id,
((client == NULL)? "peer" : "client"),
((client == NULL)? remote_peer : pcmk__client_name(client)));
if (pcmk_is_set(call_options, st_opt_sync_call)) {
pcmk__assert((client == NULL) || (client->request_id == id));
}
if (is_reply) {
handle_reply(client, message, remote_peer);
} else {
pcmk__request_t request = {
.ipc_client = client,
.ipc_id = id,
.ipc_flags = flags,
.peer = remote_peer,
.xml = message,
.call_options = call_options,
.result = PCMK__UNKNOWN_RESULT,
};
request.op = crm_element_value_copy(request.xml, PCMK__XA_ST_OP);
CRM_CHECK(request.op != NULL, return);
if (pcmk_is_set(request.call_options, st_opt_sync_call)) {
pcmk__set_request_flags(&request, pcmk__request_sync);
}
handle_request(&request);
pcmk__reset_request(&request);
}
}
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
index 9807a7f38c..412fbd3e8d 100644
--- a/daemons/fenced/fenced_history.c
+++ b/daemons/fenced/fenced_history.c
@@ -1,579 +1,579 @@
/*
* Copyright 2009-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <libxml/tree.h> // xmlNode
#include <crm/crm.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/cluster/internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <pacemaker-fenced.h>
#define MAX_STONITH_HISTORY 500
/*!
* \internal
* \brief Send a broadcast to all nodes to trigger cleanup or
* history synchronisation
*
* \param[in] history Optional history to be attached
* \param[in] callopts We control cleanup via a flag in the callopts
* \param[in] target Cleanup can be limited to certain fence-targets
*/
static void
stonith_send_broadcast_history(xmlNode *history,
int callopts,
const char *target)
{
xmlNode *bcast = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND);
xmlNode *wrapper = pcmk__xe_create(bcast, PCMK__XE_ST_CALLDATA);
xmlNode *call_data = pcmk__xe_create(wrapper, __func__);
crm_xml_add(bcast, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
crm_xml_add(bcast, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST);
crm_xml_add(bcast, PCMK__XA_ST_OP, STONITH_OP_FENCE_HISTORY);
crm_xml_add_int(bcast, PCMK__XA_ST_CALLOPT, callopts);
pcmk__xml_copy(call_data, history);
if (target != NULL) {
crm_xml_add(call_data, PCMK__XA_ST_TARGET, target);
}
pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, bcast);
pcmk__xml_free(bcast);
}
static gboolean
stonith_remove_history_entry (gpointer key,
gpointer value,
gpointer user_data)
{
remote_fencing_op_t *op = value;
const char *target = (const char *) user_data;
if ((op->state == st_failed) || (op->state == st_done)) {
if ((target) && (strcmp(op->target, target) != 0)) {
return FALSE;
}
return TRUE;
}
return FALSE; /* don't clean pending operations */
}
/*!
* \internal
* \brief Send out a cleanup broadcast or do a local history-cleanup
*
* \param[in] target Cleanup can be limited to certain fence-targets
* \param[in] broadcast Send out a cleanup broadcast
*/
static void
stonith_fence_history_cleanup(const char *target,
gboolean broadcast)
{
if (broadcast) {
stonith_send_broadcast_history(NULL,
st_opt_cleanup | st_opt_discard_reply,
target);
/* we'll do the local clean when we receive back our own broadcast */
} else if (stonith_remote_op_list) {
g_hash_table_foreach_remove(stonith_remote_op_list,
stonith_remove_history_entry,
(gpointer) target);
fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL);
}
}
/* keeping the length of fence-history within bounds
* =================================================
*
* If things are really running wild a lot of fencing-attempts
* might fill up the hash-map, eventually using up a lot
* of memory and creating huge history-sync messages.
* Before the history being synced across nodes at least
* the reboot of a cluster-node helped keeping the
* history within bounds even though not in a reliable
* manner.
*
* stonith_remote_op_list isn't sorted for time-stamps
* thus it would be kind of expensive to delete e.g.
* the oldest entry if it would grow past MAX_STONITH_HISTORY
* entries.
* It is more efficient to purge MAX_STONITH_HISTORY/2
* entries whenever the list grows beyond MAX_STONITH_HISTORY.
* (sort for age + purge the MAX_STONITH_HISTORY/2 oldest)
* That done on a per-node-base might raise the
* probability of large syncs to occur.
* Things like introducing a broadcast to purge
* MAX_STONITH_HISTORY/2 entries or not sync above a certain
* threshold coming to mind ...
* Simplest thing though is to purge the full history
* throughout the cluster once MAX_STONITH_HISTORY is reached.
* On the other hand this leads to purging the history in
* situations where it would be handy to have it probably.
*/
/*!
* \internal
* \brief Compare two remote fencing operations by status and completion time
*
* A pending operation is ordered before a completed operation. If both
* operations have completed, then the more recently completed operation is
* ordered first. Two pending operations are considered equal.
*
* \param[in] a First \c remote_fencing_op_t to compare
* \param[in] b Second \c remote_fencing_op_t to compare
*
* \return Standard comparison result (a negative integer if \p a is lesser,
* 0 if the values are equal, and a positive integer if \p a is greater)
*/
static gint
cmp_op_by_completion(gconstpointer a, gconstpointer b)
{
const remote_fencing_op_t *op1 = a;
const remote_fencing_op_t *op2 = b;
bool op1_pending = stonith__op_state_pending(op1->state);
bool op2_pending = stonith__op_state_pending(op2->state);
if (op1_pending && op2_pending) {
return 0;
}
if (op1_pending) {
return -1;
}
if (op2_pending) {
return 1;
}
if (op1->completed > op2->completed) {
return -1;
}
if (op1->completed < op2->completed) {
return 1;
}
if (op1->completed_nsec > op2->completed_nsec) {
return -1;
}
if (op1->completed_nsec < op2->completed_nsec) {
return 1;
}
return 0;
}
/*!
* \internal
* \brief Remove a completed operation from \c stonith_remote_op_list
*
* \param[in] data \c remote_fencing_op_t to remove
* \param[in] user_data Ignored
*/
static void
remove_completed_remote_op(gpointer data, gpointer user_data)
{
const remote_fencing_op_t *op = data;
if (!stonith__op_state_pending(op->state)) {
g_hash_table_remove(stonith_remote_op_list, op->id);
}
}
/*!
* \internal
* \brief Do a local history-trim to MAX_STONITH_HISTORY / 2 entries
* once over MAX_STONITH_HISTORY
*/
void
stonith_fence_history_trim(void)
{
if (stonith_remote_op_list == NULL) {
return;
}
if (g_hash_table_size(stonith_remote_op_list) > MAX_STONITH_HISTORY) {
GList *ops = g_hash_table_get_values(stonith_remote_op_list);
crm_trace("More than %d entries in fencing history, purging oldest "
"completed operations", MAX_STONITH_HISTORY);
ops = g_list_sort(ops, cmp_op_by_completion);
// Always keep pending ops regardless of number of entries
g_list_foreach(g_list_nth(ops, MAX_STONITH_HISTORY / 2),
remove_completed_remote_op, NULL);
// No need for a notification after purging old data
g_list_free(ops);
}
}
/*!
* \internal
* \brief Convert xml fence-history to a hash-table like stonith_remote_op_list
*
* \param[in] history Fence-history in xml
*
* \return Fence-history as hash-table
*/
static GHashTable *
stonith_xml_history_to_list(const xmlNode *history)
{
xmlNode *xml_op = NULL;
GHashTable *rv = NULL;
init_stonith_remote_op_hash_table(&rv);
CRM_LOG_ASSERT(rv != NULL);
for (xml_op = pcmk__xe_first_child(history, NULL, NULL, NULL);
xml_op != NULL; xml_op = pcmk__xe_next(xml_op, NULL)) {
remote_fencing_op_t *op = NULL;
char *id = crm_element_value_copy(xml_op, PCMK__XA_ST_REMOTE_OP);
int state;
int exit_status = CRM_EX_OK;
int execution_status = PCMK_EXEC_DONE;
long long completed;
long long completed_nsec = 0L;
if (!id) {
crm_warn("Malformed fencing history received from peer");
continue;
}
crm_trace("Attaching op %s to hashtable", id);
op = pcmk__assert_alloc(1, sizeof(remote_fencing_op_t));
op->id = id;
op->target = crm_element_value_copy(xml_op, PCMK__XA_ST_TARGET);
op->action = crm_element_value_copy(xml_op, PCMK__XA_ST_DEVICE_ACTION);
op->originator = crm_element_value_copy(xml_op, PCMK__XA_ST_ORIGIN);
op->delegate = crm_element_value_copy(xml_op, PCMK__XA_ST_DELEGATE);
op->client_name = crm_element_value_copy(xml_op,
PCMK__XA_ST_CLIENTNAME);
crm_element_value_ll(xml_op, PCMK__XA_ST_DATE, &completed);
op->completed = (time_t) completed;
crm_element_value_ll(xml_op, PCMK__XA_ST_DATE_NSEC, &completed_nsec);
op->completed_nsec = completed_nsec;
crm_element_value_int(xml_op, PCMK__XA_ST_STATE, &state);
op->state = (enum op_state) state;
/* @COMPAT We can't use stonith__xe_get_result() here because
* fencers <2.1.3 didn't include results, leading it to assume an error
* status. Instead, set an unknown status in that case.
*/
if ((crm_element_value_int(xml_op, PCMK__XA_RC_CODE, &exit_status) < 0)
|| (crm_element_value_int(xml_op, PCMK__XA_OP_STATUS,
&execution_status) < 0)) {
exit_status = CRM_EX_INDETERMINATE;
execution_status = PCMK_EXEC_UNKNOWN;
}
pcmk__set_result(&op->result, exit_status, execution_status,
crm_element_value(xml_op, PCMK_XA_EXIT_REASON));
pcmk__set_result_output(&op->result,
crm_element_value_copy(xml_op,
PCMK__XA_ST_OUTPUT),
NULL);
g_hash_table_replace(rv, id, op);
CRM_LOG_ASSERT(g_hash_table_lookup(rv, id) != NULL);
}
return rv;
}
/*!
* \internal
* \brief Craft xml difference between local fence-history and a history
* coming from remote, and merge the remote history into the local
*
* \param[in,out] remote_history Fence-history as hash-table (may be NULL)
* \param[in] add_id If crafting the answer for an API
* history-request there is no need for the id
* \param[in] target Optionally limit to certain fence-target
*
* \return The fence-history as xml
*/
static xmlNode *
stonith_local_history_diff_and_merge(GHashTable *remote_history,
gboolean add_id, const char *target)
{
xmlNode *history = NULL;
GHashTableIter iter;
remote_fencing_op_t *op = NULL;
gboolean updated = FALSE;
int cnt = 0;
if (stonith_remote_op_list) {
char *id = NULL;
history = pcmk__xe_create(NULL, PCMK__XE_ST_HISTORY);
g_hash_table_iter_init(&iter, stonith_remote_op_list);
while (g_hash_table_iter_next(&iter, (void **)&id, (void **)&op)) {
xmlNode *entry = NULL;
if (remote_history) {
remote_fencing_op_t *remote_op =
g_hash_table_lookup(remote_history, op->id);
if (remote_op) {
if (stonith__op_state_pending(op->state)
&& !stonith__op_state_pending(remote_op->state)) {
crm_debug("Updating outdated pending operation %.8s "
"(state=%s) according to the one (state=%s) from "
"remote peer history",
- op->id, stonith_op_state_str(op->state),
- stonith_op_state_str(remote_op->state));
+ op->id, stonith__op_state_text(op->state),
+ stonith__op_state_text(remote_op->state));
g_hash_table_steal(remote_history, op->id);
op->id = remote_op->id;
remote_op->id = id;
g_hash_table_iter_replace(&iter, remote_op);
updated = TRUE;
continue; /* skip outdated entries */
} else if (!stonith__op_state_pending(op->state)
&& stonith__op_state_pending(remote_op->state)) {
crm_debug("Broadcasting operation %.8s (state=%s) to "
"update the outdated pending one "
"(state=%s) in remote peer history",
- op->id, stonith_op_state_str(op->state),
- stonith_op_state_str(remote_op->state));
+ op->id, stonith__op_state_text(op->state),
+ stonith__op_state_text(remote_op->state));
g_hash_table_remove(remote_history, op->id);
} else {
g_hash_table_remove(remote_history, op->id);
continue; /* skip entries broadcasted already */
}
}
}
if (!pcmk__str_eq(target, op->target, pcmk__str_null_matches)) {
continue;
}
cnt++;
crm_trace("Attaching op %s", op->id);
entry = pcmk__xe_create(history, STONITH_OP_EXEC);
if (add_id) {
crm_xml_add(entry, PCMK__XA_ST_REMOTE_OP, op->id);
}
crm_xml_add(entry, PCMK__XA_ST_TARGET, op->target);
crm_xml_add(entry, PCMK__XA_ST_DEVICE_ACTION, op->action);
crm_xml_add(entry, PCMK__XA_ST_ORIGIN, op->originator);
crm_xml_add(entry, PCMK__XA_ST_DELEGATE, op->delegate);
crm_xml_add(entry, PCMK__XA_ST_CLIENTNAME, op->client_name);
crm_xml_add_ll(entry, PCMK__XA_ST_DATE, op->completed);
crm_xml_add_ll(entry, PCMK__XA_ST_DATE_NSEC,
op->completed_nsec);
crm_xml_add_int(entry, PCMK__XA_ST_STATE, op->state);
stonith__xe_set_result(entry, &op->result);
}
}
if (remote_history) {
init_stonith_remote_op_hash_table(&stonith_remote_op_list);
updated |= g_hash_table_size(remote_history);
g_hash_table_iter_init(&iter, remote_history);
while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
if (stonith__op_state_pending(op->state) &&
pcmk__str_eq(op->originator, fenced_get_local_node(),
pcmk__str_casei)) {
crm_warn("Failing pending operation %.8s originated by us but "
"known only from peer history", op->id);
op->state = st_failed;
set_fencing_completed(op);
/* CRM_EX_EXPIRED + PCMK_EXEC_INVALID prevents finalize_op()
* from setting a delegate
*/
pcmk__set_result(&op->result, CRM_EX_EXPIRED, PCMK_EXEC_INVALID,
"Initiated by earlier fencer "
"process and presumed failed");
fenced_broadcast_op_result(op, false);
}
g_hash_table_iter_steal(&iter);
g_hash_table_replace(stonith_remote_op_list, op->id, op);
/* we could trim the history here but if we bail
* out after trim we might miss more recent entries
* of those that might still be in the list
* if we don't bail out trimming once is more
* efficient and memory overhead is minimal as
* we are just moving pointers from one hash to
* another
*/
}
g_hash_table_destroy(remote_history); /* remove what is left */
}
if (updated) {
stonith_fence_history_trim();
fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL);
}
if (cnt == 0) {
pcmk__xml_free(history);
return NULL;
} else {
return history;
}
}
/*!
* \internal
* \brief Craft xml from the local fence-history
*
* \param[in] add_id If crafting the answer for an API
* history-request there is no need for the id
* \param[in] target Optionally limit to certain fence-target
*
* \return The fence-history as xml
*/
static xmlNode *
stonith_local_history(gboolean add_id, const char *target)
{
return stonith_local_history_diff_and_merge(NULL, add_id, target);
}
/*!
* \internal
* \brief Handle fence-history messages (from API or coming in as broadcasts)
*
* \param[in,out] msg Request XML
* \param[out] output Where to set local history, if requested
* \param[in] remote_peer If broadcast, peer that sent it
* \param[in] options Call options from the request
*/
void
stonith_fence_history(xmlNode *msg, xmlNode **output,
const char *remote_peer, int options)
{
const char *target = NULL;
xmlNode *dev = pcmk__xpath_find_one(msg->doc,
"//*[@" PCMK__XA_ST_TARGET "]",
LOG_NEVER);
xmlNode *out_history = NULL;
if (dev) {
target = crm_element_value(dev, PCMK__XA_ST_TARGET);
if (target && (options & st_opt_cs_nodeid)) {
int nodeid;
pcmk__node_status_t *node = NULL;
pcmk__scan_min_int(target, &nodeid, 0);
node = pcmk__search_node_caches(nodeid, NULL, NULL,
pcmk__node_search_any
|pcmk__node_search_cluster_cib);
if (node != NULL) {
target = node->name;
}
}
}
if (options & st_opt_cleanup) {
const char *call_id = crm_element_value(msg, PCMK__XA_ST_CALLID);
crm_trace("Cleaning up operations on %s in %p", target,
stonith_remote_op_list);
stonith_fence_history_cleanup(target, (call_id != NULL));
} else if (options & st_opt_broadcast) {
/* there is no clear sign atm for when a history sync
is done so send a notification for anything
that smells like history-sync
*/
fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED, NULL,
NULL);
if (crm_element_value(msg, PCMK__XA_ST_CALLID) != NULL) {
/* this is coming from the stonith-API
*
* craft a broadcast with node's history
* so that every node can merge and broadcast
* what it has on top
*/
out_history = stonith_local_history(TRUE, NULL);
crm_trace("Broadcasting history to peers");
stonith_send_broadcast_history(out_history,
st_opt_broadcast | st_opt_discard_reply,
NULL);
} else if (remote_peer &&
!pcmk__str_eq(remote_peer, fenced_get_local_node(),
pcmk__str_casei)) {
xmlNode *history = pcmk__xpath_find_one(msg->doc,
"//" PCMK__XE_ST_HISTORY,
LOG_NEVER);
/* either a broadcast created directly upon stonith-API request
* or a diff as response to such a thing
*
* in both cases it may have a history or not
* if we have differential data
* merge in what we've received and stop
* otherwise broadcast what we have on top
* marking as differential and merge in afterwards
*/
if (!history
|| !pcmk__xe_attr_is_true(history, PCMK__XA_ST_DIFFERENTIAL)) {
GHashTable *received_history = NULL;
if (history != NULL) {
received_history = stonith_xml_history_to_list(history);
}
out_history =
stonith_local_history_diff_and_merge(received_history, TRUE, NULL);
if (out_history) {
crm_trace("Broadcasting history-diff to peers");
pcmk__xe_set_bool_attr(out_history,
PCMK__XA_ST_DIFFERENTIAL, true);
stonith_send_broadcast_history(out_history,
st_opt_broadcast | st_opt_discard_reply,
NULL);
} else {
crm_trace("History-diff is empty - skip broadcast");
}
}
} else {
crm_trace("Skipping history-query-broadcast (%s%s)"
" we sent ourselves",
remote_peer?"remote-peer=":"local-ipc",
remote_peer?remote_peer:"");
}
} else {
/* plain history request */
crm_trace("Looking for operations on %s in %p", target,
stonith_remote_op_list);
*output = stonith_local_history(FALSE, target);
}
pcmk__xml_free(out_history);
}
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 42760b8d27..870c1dc75c 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -1,2622 +1,2633 @@
/*
* Copyright 2009-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <ctype.h>
#include <regex.h>
#include <libxml/tree.h> // xmlNode
#include <crm/crm.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/cluster/internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/common/util.h>
#include <pacemaker-fenced.h>
#define TIMEOUT_MULTIPLY_FACTOR 1.2
/* When one fencer queries its peers for devices able to handle a fencing
* request, each peer will reply with a list of such devices available to it.
* Each reply will be parsed into a peer_device_info_t, with each device's
* information kept in a device_properties_t.
*/
typedef struct device_properties_s {
/* Whether access to this device has been verified */
gboolean verified;
/* The remaining members are indexed by the operation's "phase" */
/* Whether this device has been executed in each phase */
gboolean executed[st_phase_max];
/* Whether this device is disallowed from executing in each phase */
gboolean disallowed[st_phase_max];
/* Action-specific timeout for each phase */
int custom_action_timeout[st_phase_max];
/* Action-specific maximum random delay for each phase */
int delay_max[st_phase_max];
/* Action-specific base delay for each phase */
int delay_base[st_phase_max];
/* Group of enum st_device_flags */
uint32_t device_support_flags;
} device_properties_t;
typedef struct {
/* Name of peer that sent this result */
char *host;
/* Only try peers for non-topology based operations once */
gboolean tried;
/* Number of entries in the devices table */
int ndevices;
/* Devices available to this host that are capable of fencing the target */
GHashTable *devices;
} peer_device_info_t;
GHashTable *stonith_remote_op_list = NULL;
extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
int call_options);
static void request_peer_fencing(remote_fencing_op_t *op,
peer_device_info_t *peer);
static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup);
static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
static int get_op_total_timeout(const remote_fencing_op_t *op,
const peer_device_info_t *chosen_peer);
static gint
sort_strings(gconstpointer a, gconstpointer b)
{
return strcmp(a, b);
}
static void
free_remote_query(gpointer data)
{
if (data != NULL) {
peer_device_info_t *peer = data;
g_hash_table_destroy(peer->devices);
free(peer->host);
free(peer);
}
}
void
free_stonith_remote_op_list(void)
{
if (stonith_remote_op_list != NULL) {
g_hash_table_destroy(stonith_remote_op_list);
stonith_remote_op_list = NULL;
}
}
struct peer_count_data {
const remote_fencing_op_t *op;
gboolean verified_only;
uint32_t support_action_only;
int count;
};
/*!
* \internal
* \brief Increment a counter if a device has not been executed yet
*
* \param[in] key Device ID (ignored)
* \param[in] value Device properties
* \param[in,out] user_data Peer count data
*/
static void
count_peer_device(gpointer key, gpointer value, gpointer user_data)
{
device_properties_t *props = (device_properties_t*)value;
struct peer_count_data *data = user_data;
if (!props->executed[data->op->phase]
&& (!data->verified_only || props->verified)
- && ((data->support_action_only == st_device_supports_none) || pcmk_is_set(props->device_support_flags, data->support_action_only))) {
+ && ((data->support_action_only == fenced_df_none)
+ || pcmk_is_set(props->device_support_flags,
+ data->support_action_only))) {
++(data->count);
}
}
/*!
* \internal
* \brief Check the number of available devices in a peer's query results
*
* \param[in] op Operation that results are for
* \param[in] peer Peer to count
* \param[in] verified_only Whether to count only verified devices
* \param[in] support_action_only Whether to count only devices that support action
*
* \return Number of devices available to peer that were not already executed
*/
static int
count_peer_devices(const remote_fencing_op_t *op,
const peer_device_info_t *peer, gboolean verified_only, uint32_t support_on_action_only)
{
struct peer_count_data data;
data.op = op;
data.verified_only = verified_only;
data.support_action_only = support_on_action_only;
data.count = 0;
if (peer) {
g_hash_table_foreach(peer->devices, count_peer_device, &data);
}
return data.count;
}
/*!
* \internal
* \brief Search for a device in a query result
*
* \param[in] op Operation that result is for
* \param[in] peer Query result for a peer
* \param[in] device Device ID to search for
*
* \return Device properties if found, NULL otherwise
*/
static device_properties_t *
find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer,
const char *device, uint32_t support_action_only)
{
device_properties_t *props = g_hash_table_lookup(peer->devices, device);
- if (props && support_action_only != st_device_supports_none && !pcmk_is_set(props->device_support_flags, support_action_only)) {
+ if (props == NULL) {
return NULL;
}
- return (props && !props->executed[op->phase]
- && !props->disallowed[op->phase])? props : NULL;
+ if ((support_action_only != fenced_df_none)
+ && !pcmk_is_set(props->device_support_flags, support_action_only)) {
+ return NULL;
+ }
+ if (props->executed[op->phase] || props->disallowed[op->phase]) {
+ return NULL;
+ }
+ return props;
}
/*!
* \internal
* \brief Find a device in a peer's device list and mark it as executed
*
* \param[in] op Operation that peer result is for
* \param[in,out] peer Peer with results to search
* \param[in] device ID of device to mark as done
* \param[in] verified_devices_only Only consider verified devices
*
* \return TRUE if device was found and marked, FALSE otherwise
*/
static gboolean
grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer,
const char *device, gboolean verified_devices_only)
{
device_properties_t *props = find_peer_device(op, peer, device,
fenced_support_flag(op->action));
if ((props == NULL) || (verified_devices_only && !props->verified)) {
return FALSE;
}
crm_trace("Removing %s from %s (%d remaining)",
- device, peer->host, count_peer_devices(op, peer, FALSE, st_device_supports_none));
+ device, peer->host,
+ count_peer_devices(op, peer, FALSE, fenced_df_none));
props->executed[op->phase] = TRUE;
return TRUE;
}
static void
clear_remote_op_timers(remote_fencing_op_t * op)
{
if (op->query_timer) {
g_source_remove(op->query_timer);
op->query_timer = 0;
}
if (op->op_timer_total) {
g_source_remove(op->op_timer_total);
op->op_timer_total = 0;
}
if (op->op_timer_one) {
g_source_remove(op->op_timer_one);
op->op_timer_one = 0;
}
}
static void
free_remote_op(gpointer data)
{
remote_fencing_op_t *op = data;
crm_log_xml_debug(op->request, "Destroying");
clear_remote_op_timers(op);
free(op->id);
free(op->action);
free(op->delegate);
free(op->target);
free(op->client_id);
free(op->client_name);
free(op->originator);
if (op->query_results) {
g_list_free_full(op->query_results, free_remote_query);
}
if (op->request) {
pcmk__xml_free(op->request);
op->request = NULL;
}
if (op->devices_list) {
g_list_free_full(op->devices_list, free);
op->devices_list = NULL;
}
g_list_free_full(op->automatic_list, free);
g_list_free(op->duplicates);
pcmk__reset_result(&op->result);
free(op);
}
void
init_stonith_remote_op_hash_table(GHashTable **table)
{
if (*table == NULL) {
*table = pcmk__strkey_table(NULL, free_remote_op);
}
}
/*!
* \internal
* \brief Return an operation's originally requested action (before any remap)
*
* \param[in] op Operation to check
*
* \return Operation's original action
*/
static const char *
op_requested_action(const remote_fencing_op_t *op)
{
return ((op->phase > st_phase_requested)? PCMK_ACTION_REBOOT : op->action);
}
/*!
* \internal
* \brief Remap a "reboot" operation to the "off" phase
*
* \param[in,out] op Operation to remap
*/
static void
op_phase_off(remote_fencing_op_t *op)
{
crm_info("Remapping multiple-device reboot targeting %s to 'off' "
QB_XS " id=%.8s", op->target, op->id);
op->phase = st_phase_off;
/* Happily, "off" and "on" are shorter than "reboot", so we can reuse the
* memory allocation at each phase.
*/
strcpy(op->action, PCMK_ACTION_OFF);
}
/*!
* \internal
* \brief Advance a remapped reboot operation to the "on" phase
*
* \param[in,out] op Operation to remap
*/
static void
op_phase_on(remote_fencing_op_t *op)
{
GList *iter = NULL;
crm_info("Remapped 'off' targeting %s complete, "
"remapping to 'on' for %s " QB_XS " id=%.8s",
op->target, op->client_name, op->id);
op->phase = st_phase_on;
strcpy(op->action, PCMK_ACTION_ON);
/* Skip devices with automatic unfencing, because the cluster will handle it
* when the node rejoins.
*/
for (iter = op->automatic_list; iter != NULL; iter = iter->next) {
GList *match = g_list_find_custom(op->devices_list, iter->data,
sort_strings);
if (match) {
op->devices_list = g_list_remove(op->devices_list, match->data);
}
}
g_list_free_full(op->automatic_list, free);
op->automatic_list = NULL;
/* Rewind device list pointer */
op->devices = op->devices_list;
}
/*!
* \internal
* \brief Reset a remapped reboot operation
*
* \param[in,out] op Operation to reset
*/
static void
undo_op_remap(remote_fencing_op_t *op)
{
if (op->phase > 0) {
crm_info("Undoing remap of reboot targeting %s for %s "
QB_XS " id=%.8s", op->target, op->client_name, op->id);
op->phase = st_phase_requested;
strcpy(op->action, PCMK_ACTION_REBOOT);
}
}
/*!
* \internal
* \brief Create notification data XML for a fencing operation result
*
* \param[in,out] parent Parent XML element for newly created element
* \param[in] op Fencer operation that completed
*
* \return Newly created XML to add as notification data
* \note The caller is responsible for freeing the result.
*/
static xmlNode *
fencing_result2xml(xmlNode *parent, const remote_fencing_op_t *op)
{
xmlNode *notify_data = pcmk__xe_create(parent, PCMK__XE_ST_NOTIFY_FENCE);
crm_xml_add_int(notify_data, PCMK_XA_STATE, op->state);
crm_xml_add(notify_data, PCMK__XA_ST_TARGET, op->target);
crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ACTION, op->action);
crm_xml_add(notify_data, PCMK__XA_ST_DELEGATE, op->delegate);
crm_xml_add(notify_data, PCMK__XA_ST_REMOTE_OP, op->id);
crm_xml_add(notify_data, PCMK__XA_ST_ORIGIN, op->originator);
crm_xml_add(notify_data, PCMK__XA_ST_CLIENTID, op->client_id);
crm_xml_add(notify_data, PCMK__XA_ST_CLIENTNAME, op->client_name);
return notify_data;
}
/*!
* \internal
* \brief Broadcast a fence result notification to all CPG peers
*
* \param[in] op Fencer operation that completed
* \param[in] op_merged Whether this operation is a duplicate of another
*/
void
fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged)
{
static int count = 0;
xmlNode *bcast = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY);
xmlNode *wrapper = NULL;
xmlNode *notify_data = NULL;
count++;
crm_trace("Broadcasting result to peers");
crm_xml_add(bcast, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY);
crm_xml_add(bcast, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST);
crm_xml_add(bcast, PCMK__XA_ST_OP, STONITH_OP_NOTIFY);
crm_xml_add_int(bcast, PCMK_XA_COUNT, count);
if (op_merged) {
pcmk__xe_set_bool_attr(bcast, PCMK__XA_ST_OP_MERGED, true);
}
wrapper = pcmk__xe_create(bcast, PCMK__XE_ST_CALLDATA);
notify_data = fencing_result2xml(wrapper, op);
stonith__xe_set_result(notify_data, &op->result);
pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, bcast);
pcmk__xml_free(bcast);
return;
}
/*!
* \internal
* \brief Reply to a local request originator and notify all subscribed clients
*
* \param[in,out] op Fencer operation that completed
* \param[in,out] data Top-level XML to add notification to
*/
static void
handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data)
{
xmlNode *notify_data = NULL;
xmlNode *reply = NULL;
pcmk__client_t *client = NULL;
if (op->notify_sent == TRUE) {
/* nothing to do */
return;
}
/* Do notification with a clean data object */
crm_xml_add_int(data, PCMK_XA_STATE, op->state);
crm_xml_add(data, PCMK__XA_ST_TARGET, op->target);
crm_xml_add(data, PCMK__XA_ST_OP, op->action);
reply = fenced_construct_reply(op->request, data, &op->result);
crm_xml_add(reply, PCMK__XA_ST_DELEGATE, op->delegate);
/* Send fencing OP reply to local client that initiated fencing */
client = pcmk__find_client_by_id(op->client_id);
if (client == NULL) {
crm_trace("Skipping reply to %s: no longer a client", op->client_id);
} else {
do_local_reply(reply, client, op->call_options);
}
/* bcast to all local clients that the fencing operation happend */
notify_data = fencing_result2xml(NULL, op);
fenced_send_notification(PCMK__VALUE_ST_NOTIFY_FENCE, &op->result,
notify_data);
pcmk__xml_free(notify_data);
fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL);
/* mark this op as having notify's already sent */
op->notify_sent = TRUE;
pcmk__xml_free(reply);
}
/*!
* \internal
* \brief Finalize all duplicates of a given fencer operation
*
* \param[in,out] op Fencer operation that completed
* \param[in,out] data Top-level XML to add notification to
*/
static void
finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data)
{
for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) {
remote_fencing_op_t *other = iter->data;
if (other->state == st_duplicate) {
other->state = op->state;
crm_debug("Performing duplicate notification for %s@%s: %s "
QB_XS " id=%.8s",
other->client_name, other->originator,
pcmk_exec_status_str(op->result.execution_status),
other->id);
pcmk__copy_result(&op->result, &other->result);
finalize_op(other, data, true);
} else {
// Possible if (for example) it timed out already
crm_err("Skipping duplicate notification for %s@%s "
QB_XS " state=%s id=%.8s",
other->client_name, other->originator,
- stonith_op_state_str(other->state), other->id);
+ stonith__op_state_text(other->state), other->id);
}
}
}
static char *
delegate_from_xml(xmlNode *xml)
{
xmlNode *match = pcmk__xpath_find_one(xml->doc,
"//*[@" PCMK__XA_ST_DELEGATE "]",
LOG_NEVER);
if (match == NULL) {
return crm_element_value_copy(xml, PCMK__XA_SRC);
} else {
return crm_element_value_copy(match, PCMK__XA_ST_DELEGATE);
}
}
/*!
* \internal
* \brief Finalize a peer fencing operation
*
* Clean up after a fencing operation completes. This function has two code
* paths: the executioner uses it to broadcast the result to CPG peers, and then
* each peer (including the executioner) uses it to process that broadcast and
* notify its IPC clients of the result.
*
* \param[in,out] op Fencer operation that completed
* \param[in,out] data If not NULL, XML reply of last delegated operation
* \param[in] dup Whether this operation is a duplicate of another
* (in which case, do not broadcast the result)
*
* \note The operation result should be set before calling this function.
*/
static void
finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
{
int level = LOG_ERR;
const char *subt = NULL;
xmlNode *local_data = NULL;
gboolean op_merged = FALSE;
CRM_CHECK((op != NULL), return);
// This is a no-op if timers have already been cleared
clear_remote_op_timers(op);
if (op->notify_sent) {
// Most likely, this is a timed-out action that eventually completed
crm_notice("Operation '%s'%s%s by %s for %s@%s%s: "
"Result arrived too late " QB_XS " id=%.8s",
op->action, (op->target? " targeting " : ""),
(op->target? op->target : ""),
(op->delegate? op->delegate : "unknown node"),
op->client_name, op->originator,
(op_merged? " (merged)" : ""),
op->id);
return;
}
set_fencing_completed(op);
undo_op_remap(op);
if (data == NULL) {
data = pcmk__xe_create(NULL, "remote-op");
local_data = data;
} else if (op->delegate == NULL) {
switch (op->result.execution_status) {
case PCMK_EXEC_NO_FENCE_DEVICE:
break;
case PCMK_EXEC_INVALID:
if (op->result.exit_status != CRM_EX_EXPIRED) {
op->delegate = delegate_from_xml(data);
}
break;
default:
op->delegate = delegate_from_xml(data);
break;
}
}
if (dup || (crm_element_value(data, PCMK__XA_ST_OP_MERGED) != NULL)) {
op_merged = true;
}
/* Tell everyone the operation is done, we will continue
* with doing the local notifications once we receive
* the broadcast back. */
subt = crm_element_value(data, PCMK__XA_SUBT);
if (!dup && !pcmk__str_eq(subt, PCMK__VALUE_BROADCAST, pcmk__str_none)) {
/* Defer notification until the bcast message arrives */
fenced_broadcast_op_result(op, op_merged);
pcmk__xml_free(local_data);
return;
}
if (pcmk__result_ok(&op->result) || dup
|| !pcmk__str_eq(op->originator, fenced_get_local_node(),
pcmk__str_casei)) {
level = LOG_NOTICE;
}
do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) "
QB_XS " id=%.8s", op->action, (op->target? " targeting " : ""),
(op->target? op->target : ""),
(op->delegate? op->delegate : "unknown node"),
op->client_name, op->originator,
(op_merged? " (merged)" : ""),
crm_exit_str(op->result.exit_status),
pcmk_exec_status_str(op->result.execution_status),
((op->result.exit_reason == NULL)? "" : ": "),
((op->result.exit_reason == NULL)? "" : op->result.exit_reason),
op->id);
handle_local_reply_and_notify(op, data);
if (!dup) {
finalize_op_duplicates(op, data);
}
/* Free non-essential parts of the record
* Keep the record around so we can query the history
*/
if (op->query_results) {
g_list_free_full(op->query_results, free_remote_query);
op->query_results = NULL;
}
if (op->request) {
pcmk__xml_free(op->request);
op->request = NULL;
}
pcmk__xml_free(local_data);
}
/*!
* \internal
* \brief Finalize a watchdog fencer op after the waiting time expires
*
* \param[in,out] userdata Fencer operation that completed
*
* \return G_SOURCE_REMOVE (which tells glib not to restart timer)
*/
static gboolean
remote_op_watchdog_done(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
op->op_timer_one = 0;
crm_notice("Self-fencing (%s) by %s for %s assumed complete "
QB_XS " id=%.8s",
op->action, op->target, op->client_name, op->id);
op->state = st_done;
pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
finalize_op(op, NULL, false);
return G_SOURCE_REMOVE;
}
static gboolean
remote_op_timeout_one(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
op->op_timer_one = 0;
crm_notice("Peer's '%s' action targeting %s for client %s timed out " QB_XS
" id=%.8s", op->action, op->target, op->client_name, op->id);
pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
"Peer did not return fence result within timeout");
// The requested delay has been applied for the first device
if (op->client_delay > 0) {
op->client_delay = 0;
crm_trace("Try another device for '%s' action targeting %s "
"for client %s without delay " QB_XS " id=%.8s",
op->action, op->target, op->client_name, op->id);
}
// Try another device, if appropriate
request_peer_fencing(op, NULL);
return G_SOURCE_REMOVE;
}
/*!
* \internal
* \brief Finalize a remote fencer operation that timed out
*
* \param[in,out] op Fencer operation that timed out
* \param[in] reason Readable description of what step timed out
*/
static void
finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
{
crm_debug("Action '%s' targeting %s for client %s timed out "
QB_XS " id=%.8s",
op->action, op->target, op->client_name, op->id);
if (op->phase == st_phase_on) {
/* A remapped reboot operation timed out in the "on" phase, but the
* "off" phase completed successfully, so quit trying any further
* devices, and return success.
*/
op->state = st_done;
pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
} else {
op->state = st_failed;
pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
}
finalize_op(op, NULL, false);
}
/*!
* \internal
* \brief Finalize a remote fencer operation that timed out
*
* \param[in,out] userdata Fencer operation that timed out
*
* \return G_SOURCE_REMOVE (which tells glib not to restart timer)
*/
static gboolean
remote_op_timeout(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
op->op_timer_total = 0;
if (op->state == st_done) {
crm_debug("Action '%s' targeting %s for client %s already completed "
QB_XS " id=%.8s",
op->action, op->target, op->client_name, op->id);
} else {
finalize_timed_out_op(userdata, "Fencing did not complete within a "
"total timeout based on the "
"configured timeout and retries for "
"any devices attempted");
}
return G_SOURCE_REMOVE;
}
static gboolean
remote_op_query_timeout(gpointer data)
{
remote_fencing_op_t *op = data;
op->query_timer = 0;
if (op->state == st_done) {
crm_debug("Operation %.8s targeting %s already completed",
op->id, op->target);
} else if (op->state == st_exec) {
crm_debug("Operation %.8s targeting %s already in progress",
op->id, op->target);
} else if (op->query_results) {
// Query succeeded, so attempt the actual fencing
crm_debug("Query %.8s targeting %s complete (state=%s)",
- op->id, op->target, stonith_op_state_str(op->state));
+ op->id, op->target, stonith__op_state_text(op->state));
request_peer_fencing(op, NULL);
} else {
crm_debug("Query %.8s targeting %s timed out (state=%s)",
- op->id, op->target, stonith_op_state_str(op->state));
+ op->id, op->target, stonith__op_state_text(op->state));
finalize_timed_out_op(op, "No capable peers replied to device query "
"within timeout");
}
return G_SOURCE_REMOVE;
}
static gboolean
topology_is_empty(stonith_topology_t *tp)
{
int i;
if (tp == NULL) {
return TRUE;
}
for (i = 0; i < ST__LEVEL_COUNT; i++) {
if (tp->levels[i] != NULL) {
return FALSE;
}
}
return TRUE;
}
/*!
* \internal
* \brief Add a device to an operation's automatic unfencing list
*
* \param[in,out] op Operation to modify
* \param[in] device Device ID to add
*/
static void
add_required_device(remote_fencing_op_t *op, const char *device)
{
GList *match = g_list_find_custom(op->automatic_list, device,
sort_strings);
if (!match) {
op->automatic_list = g_list_prepend(op->automatic_list,
pcmk__str_copy(device));
}
}
/*!
* \internal
* \brief Remove a device from the automatic unfencing list
*
* \param[in,out] op Operation to modify
* \param[in] device Device ID to remove
*/
static void
remove_required_device(remote_fencing_op_t *op, const char *device)
{
GList *match = g_list_find_custom(op->automatic_list, device,
sort_strings);
if (match) {
op->automatic_list = g_list_remove(op->automatic_list, match->data);
}
}
/* deep copy the device list */
static void
set_op_device_list(remote_fencing_op_t * op, GList *devices)
{
GList *lpc = NULL;
if (op->devices_list) {
g_list_free_full(op->devices_list, free);
op->devices_list = NULL;
}
for (lpc = devices; lpc != NULL; lpc = lpc->next) {
const char *device = lpc->data;
op->devices_list = g_list_append(op->devices_list,
pcmk__str_copy(device));
}
op->devices = op->devices_list;
}
/*!
* \internal
* \brief Check whether a node matches a topology target
*
* \param[in] tp Topology table entry to check
* \param[in] node Name of node to check
*
* \return TRUE if node matches topology target
*/
static gboolean
topology_matches(const stonith_topology_t *tp, const char *node)
{
regex_t r_patt;
CRM_CHECK(node && tp && tp->target, return FALSE);
switch (tp->kind) {
case fenced_target_by_attribute:
/* This level targets by attribute, so tp->target is a NAME=VALUE pair
* of a permanent attribute applied to targeted nodes. The test below
* relies on the locally cached copy of the CIB, so if fencing needs to
* be done before the initial CIB is received or after a malformed CIB
* is received, then the topology will be unable to be used.
*/
if (node_has_attr(node, tp->target_attribute, tp->target_value)) {
crm_notice("Matched %s with %s by attribute", node, tp->target);
return TRUE;
}
break;
case fenced_target_by_pattern:
/* This level targets node names matching a pattern, so tp->target
* (and tp->target_pattern) is a regular expression.
*/
if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) {
crm_info("Bad regex '%s' for fencing level", tp->target);
} else {
int status = regexec(&r_patt, node, 0, NULL, 0);
regfree(&r_patt);
if (status == 0) {
crm_notice("Matched %s with %s by name", node, tp->target);
return TRUE;
}
}
break;
case fenced_target_by_name:
crm_trace("Testing %s against %s", node, tp->target);
return pcmk__str_eq(tp->target, node, pcmk__str_casei);
default:
break;
}
crm_trace("No match for %s with %s", node, tp->target);
return FALSE;
}
stonith_topology_t *
find_topology_for_host(const char *host)
{
GHashTableIter tIter;
stonith_topology_t *tp = g_hash_table_lookup(topology, host);
if(tp != NULL) {
crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
return tp;
}
g_hash_table_iter_init(&tIter, topology);
while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) {
if (topology_matches(tp, host)) {
crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
return tp;
}
}
crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology));
return NULL;
}
/*!
* \internal
* \brief Set fencing operation's device list to target's next topology level
*
* \param[in,out] op Remote fencing operation to modify
* \param[in] empty_ok If true, an operation without a target (i.e.
* queries) or a target without a topology will get a
* pcmk_rc_ok return value instead of ENODEV
*
* \return Standard Pacemaker return value
*/
static int
advance_topology_level(remote_fencing_op_t *op, bool empty_ok)
{
stonith_topology_t *tp = NULL;
if (op->target) {
tp = find_topology_for_host(op->target);
}
if (topology_is_empty(tp)) {
return empty_ok? pcmk_rc_ok : ENODEV;
}
pcmk__assert(tp->levels != NULL);
stonith__set_call_options(op->call_options, op->id, st_opt_topology);
/* This is a new level, so undo any remapping left over from previous */
undo_op_remap(op);
do {
op->level++;
} while (op->level < ST__LEVEL_COUNT && tp->levels[op->level] == NULL);
if (op->level < ST__LEVEL_COUNT) {
crm_trace("Attempting fencing level %d targeting %s (%d devices) "
"for client %s@%s (id=%.8s)",
op->level, op->target, g_list_length(tp->levels[op->level]),
op->client_name, op->originator, op->id);
set_op_device_list(op, tp->levels[op->level]);
// The requested delay has been applied for the first fencing level
if ((op->level > 1) && (op->client_delay > 0)) {
op->client_delay = 0;
}
if ((g_list_next(op->devices_list) != NULL)
&& pcmk__str_eq(op->action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
/* A reboot has been requested for a topology level with multiple
* devices. Instead of rebooting the devices sequentially, we will
* turn them all off, then turn them all on again. (Think about
* switched power outlets for redundant power supplies.)
*/
op_phase_off(op);
}
return pcmk_rc_ok;
}
crm_info("All %sfencing options targeting %s for client %s@%s failed "
QB_XS " id=%.8s",
(stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"",
op->target, op->client_name, op->originator, op->id);
return ENODEV;
}
/*!
* \internal
* \brief If fencing operation is a duplicate, merge it into the other one
*
* \param[in,out] op Fencing operation to check
*/
static void
merge_duplicates(remote_fencing_op_t *op)
{
GHashTableIter iter;
remote_fencing_op_t *other = NULL;
time_t now = time(NULL);
g_hash_table_iter_init(&iter, stonith_remote_op_list);
while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) {
const char *other_action = op_requested_action(other);
pcmk__node_status_t *node = NULL;
if (!strcmp(op->id, other->id)) {
continue; // Don't compare against self
}
if (other->state > st_exec) {
crm_trace("%.8s not duplicate of %.8s: not in progress",
op->id, other->id);
continue;
}
if (!pcmk__str_eq(op->target, other->target, pcmk__str_casei)) {
crm_trace("%.8s not duplicate of %.8s: node %s vs. %s",
op->id, other->id, op->target, other->target);
continue;
}
if (!pcmk__str_eq(op->action, other_action, pcmk__str_none)) {
crm_trace("%.8s not duplicate of %.8s: action %s vs. %s",
op->id, other->id, op->action, other_action);
continue;
}
if (pcmk__str_eq(op->client_name, other->client_name, pcmk__str_casei)) {
crm_trace("%.8s not duplicate of %.8s: same client %s",
op->id, other->id, op->client_name);
continue;
}
if (pcmk__str_eq(other->target, other->originator, pcmk__str_casei)) {
crm_trace("%.8s not duplicate of %.8s: self-fencing for %s",
op->id, other->id, other->target);
continue;
}
node = pcmk__get_node(0, other->originator, NULL,
pcmk__node_search_cluster_member);
if (!fencing_peer_active(node)) {
crm_notice("Failing action '%s' targeting %s originating from "
"client %s@%s: Originator is dead " QB_XS " id=%.8s",
other->action, other->target, other->client_name,
other->originator, other->id);
crm_trace("%.8s not duplicate of %.8s: originator dead",
op->id, other->id);
other->state = st_failed;
continue;
}
if ((other->total_timeout > 0)
&& (now > (other->total_timeout + other->created))) {
crm_trace("%.8s not duplicate of %.8s: old (%lld vs. %lld + %ds)",
op->id, other->id, (long long)now, (long long)other->created,
other->total_timeout);
continue;
}
/* There is another in-flight request to fence the same host
* Piggyback on that instead. If it fails, so do we.
*/
other->duplicates = g_list_append(other->duplicates, op);
if (other->total_timeout == 0) {
other->total_timeout = op->total_timeout =
TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL);
crm_trace("Best guess as to timeout used for %.8s: %ds",
other->id, other->total_timeout);
}
crm_notice("Merging fencing action '%s' targeting %s originating from "
"client %s with identical request from %s@%s "
QB_XS " original=%.8s duplicate=%.8s total_timeout=%ds",
op->action, op->target, op->client_name,
other->client_name, other->originator,
op->id, other->id, other->total_timeout);
report_timeout_period(op, other->total_timeout);
op->state = st_duplicate;
}
}
static uint32_t fencing_active_peers(void)
{
uint32_t count = 0;
pcmk__node_status_t *entry = NULL;
GHashTableIter gIter;
g_hash_table_iter_init(&gIter, pcmk__peer_cache);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
if(fencing_peer_active(entry)) {
count++;
}
}
return count;
}
/*!
* \internal
* \brief Process a manual confirmation of a pending fence action
*
* \param[in] client IPC client that sent confirmation
* \param[in,out] msg Request XML with manual confirmation
*
* \return Standard Pacemaker return code
*/
int
fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg)
{
remote_fencing_op_t *op = NULL;
xmlNode *dev = pcmk__xpath_find_one(msg->doc,
"//*[@" PCMK__XA_ST_TARGET "]",
LOG_ERR);
CRM_CHECK(dev != NULL, return EPROTO);
crm_notice("Received manual confirmation that %s has been fenced",
pcmk__s(crm_element_value(dev, PCMK__XA_ST_TARGET),
"unknown target"));
op = initiate_remote_stonith_op(client, msg, TRUE);
if (op == NULL) {
return EPROTO;
}
op->state = st_done;
op->delegate = pcmk__str_copy("a human");
// For the fencer's purposes, the fencing operation is done
pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
finalize_op(op, msg, false);
/* For the requester's purposes, the operation is still pending. The
* actual result will be sent asynchronously via the operation's done_cb().
*/
return EINPROGRESS;
}
/*!
* \internal
* \brief Create a new remote stonith operation
*
* \param[in] client ID of local stonith client that initiated the operation
* \param[in] request The request from the client that started the operation
* \param[in] peer TRUE if this operation is owned by another stonith peer
* (an operation owned by one peer is stored on all peers,
* but only the owner executes it; all nodes get the results
* once the owner finishes execution)
*/
void *
create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer)
{
remote_fencing_op_t *op = NULL;
xmlNode *dev = pcmk__xpath_find_one(request->doc,
"//*[@" PCMK__XA_ST_TARGET "]",
LOG_NEVER);
int rc = pcmk_rc_ok;
const char *operation = NULL;
init_stonith_remote_op_hash_table(&stonith_remote_op_list);
/* If this operation is owned by another node, check to make
* sure we haven't already created this operation. */
if (peer && dev) {
const char *op_id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP);
CRM_CHECK(op_id != NULL, return NULL);
op = g_hash_table_lookup(stonith_remote_op_list, op_id);
if (op) {
crm_debug("Reusing existing remote fencing op %.8s for %s",
op_id, ((client == NULL)? "unknown client" : client));
return op;
}
}
op = pcmk__assert_alloc(1, sizeof(remote_fencing_op_t));
crm_element_value_int(request, PCMK__XA_ST_TIMEOUT, &(op->base_timeout));
// Value -1 means disable any static/random fencing delays
crm_element_value_int(request, PCMK__XA_ST_DELAY, &(op->client_delay));
if (peer && dev) {
op->id = crm_element_value_copy(dev, PCMK__XA_ST_REMOTE_OP);
} else {
op->id = crm_generate_uuid();
}
g_hash_table_replace(stonith_remote_op_list, op->id, op);
op->state = st_query;
op->replies_expected = fencing_active_peers();
op->action = crm_element_value_copy(dev, PCMK__XA_ST_DEVICE_ACTION);
/* The node initiating the stonith operation. If an operation is relayed,
* this is the last node the operation lands on. When in standalone mode,
* origin is the ID of the client that originated the operation.
*
* Or may be the name of the function that created the operation.
*/
op->originator = crm_element_value_copy(dev, PCMK__XA_ST_ORIGIN);
if (op->originator == NULL) {
/* Local or relayed request */
op->originator = pcmk__str_copy(fenced_get_local_node());
}
// Delegate may not be set
op->delegate = crm_element_value_copy(dev, PCMK__XA_ST_DELEGATE);
op->created = time(NULL);
CRM_LOG_ASSERT(client != NULL);
op->client_id = pcmk__str_copy(client);
/* For a RELAY operation, set fenced on the client. */
operation = crm_element_value(request, PCMK__XA_ST_OP);
if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
op->client_name = crm_strdup_printf("%s.%lu", crm_system_name,
(unsigned long) getpid());
} else {
op->client_name = crm_element_value_copy(request,
PCMK__XA_ST_CLIENTNAME);
}
op->target = crm_element_value_copy(dev, PCMK__XA_ST_TARGET);
// @TODO Figure out how to avoid copying XML here
op->request = pcmk__xml_copy(NULL, request);
rc = pcmk__xe_get_flags(request, PCMK__XA_ST_CALLOPT, &(op->call_options),
0U);
if (rc != pcmk_rc_ok) {
crm_warn("Couldn't parse options from request %s: %s",
op->id, pcmk_rc_str(rc));
}
crm_element_value_int(request, PCMK__XA_ST_CALLID, &(op->client_callid));
crm_trace("%s new fencing op %s ('%s' targeting %s for client %s, "
"base timeout %ds, %u %s expected)",
(peer && dev)? "Recorded" : "Generated", op->id, op->action,
op->target, op->client_name, op->base_timeout,
op->replies_expected,
pcmk__plural_alt(op->replies_expected, "reply", "replies"));
if (op->call_options & st_opt_cs_nodeid) {
int nodeid;
pcmk__node_status_t *node = NULL;
pcmk__scan_min_int(op->target, &nodeid, 0);
node = pcmk__search_node_caches(nodeid, NULL, NULL,
pcmk__node_search_any
|pcmk__node_search_cluster_cib);
/* Ensure the conversion only happens once */
stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid);
if ((node != NULL) && (node->name != NULL)) {
pcmk__str_update(&(op->target), node->name);
} else {
crm_warn("Could not expand nodeid '%s' into a host name", op->target);
}
}
/* check to see if this is a duplicate operation of another in-flight operation */
merge_duplicates(op);
if (op->state != st_duplicate) {
/* kick history readers */
fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL);
}
/* safe to trim as long as that doesn't touch pending ops */
stonith_fence_history_trim();
return op;
}
/*!
* \internal
* \brief Create a peer fencing operation from a request, and initiate it
*
* \param[in] client IPC client that made request (NULL to get from request)
* \param[in] request Request XML
* \param[in] manual_ack Whether this is a manual action confirmation
*
* \return Newly created operation on success, otherwise NULL
*/
remote_fencing_op_t *
initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request,
gboolean manual_ack)
{
int query_timeout = 0;
xmlNode *query = NULL;
const char *client_id = NULL;
remote_fencing_op_t *op = NULL;
const char *relay_op_id = NULL;
const char *operation = NULL;
if (client) {
client_id = client->id;
} else {
client_id = crm_element_value(request, PCMK__XA_ST_CLIENTID);
}
CRM_LOG_ASSERT(client_id != NULL);
op = create_remote_stonith_op(client_id, request, FALSE);
op->owner = TRUE;
if (manual_ack) {
return op;
}
CRM_CHECK(op->action, return NULL);
if (advance_topology_level(op, true) != pcmk_rc_ok) {
op->state = st_failed;
}
switch (op->state) {
case st_failed:
// advance_topology_level() exhausted levels
pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"All topology levels failed");
crm_warn("Could not request peer fencing (%s) targeting %s "
QB_XS " id=%.8s", op->action, op->target, op->id);
finalize_op(op, NULL, false);
return op;
case st_duplicate:
crm_info("Requesting peer fencing (%s) targeting %s (duplicate) "
QB_XS " id=%.8s", op->action, op->target, op->id);
return op;
default:
crm_notice("Requesting peer fencing (%s) targeting %s "
QB_XS " id=%.8s state=%s base_timeout=%ds",
op->action, op->target, op->id,
- stonith_op_state_str(op->state), op->base_timeout);
+ stonith__op_state_text(op->state), op->base_timeout);
}
query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY,
NULL, op->call_options);
crm_xml_add(query, PCMK__XA_ST_REMOTE_OP, op->id);
crm_xml_add(query, PCMK__XA_ST_TARGET, op->target);
crm_xml_add(query, PCMK__XA_ST_DEVICE_ACTION, op_requested_action(op));
crm_xml_add(query, PCMK__XA_ST_ORIGIN, op->originator);
crm_xml_add(query, PCMK__XA_ST_CLIENTID, op->client_id);
crm_xml_add(query, PCMK__XA_ST_CLIENTNAME, op->client_name);
crm_xml_add_int(query, PCMK__XA_ST_TIMEOUT, op->base_timeout);
/* In case of RELAY operation, RELAY information is added to the query to delete the original operation of RELAY. */
operation = crm_element_value(request, PCMK__XA_ST_OP);
if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
relay_op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP);
if (relay_op_id) {
crm_xml_add(query, PCMK__XA_ST_REMOTE_OP_RELAY, relay_op_id);
}
}
pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, query);
pcmk__xml_free(query);
query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR;
op->query_timer = pcmk__create_timer((1000 * query_timeout), remote_op_query_timeout, op);
return op;
}
enum find_best_peer_options {
/*! Skip checking the target peer for capable fencing devices */
FIND_PEER_SKIP_TARGET = 0x0001,
/*! Only check the target peer for capable fencing devices */
FIND_PEER_TARGET_ONLY = 0x0002,
/*! Skip peers and devices that are not verified */
FIND_PEER_VERIFIED_ONLY = 0x0004,
};
static bool
is_watchdog_fencing(const remote_fencing_op_t *op, const char *device)
{
return (stonith_watchdog_timeout_ms > 0
// Only an explicit mismatch is considered not a watchdog fencing.
&& pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_null_matches)
&& pcmk__is_fencing_action(op->action)
&& node_does_watchdog_fencing(op->target));
}
static peer_device_info_t *
find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options)
{
GList *iter = NULL;
gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE;
if (!device && pcmk_is_set(op->call_options, st_opt_topology)) {
return NULL;
}
for (iter = op->query_results; iter != NULL; iter = iter->next) {
peer_device_info_t *peer = iter->data;
crm_trace("Testing result from %s targeting %s with %d device%s: %d %x",
peer->host, op->target, peer->ndevices,
pcmk__plural_s(peer->ndevices), peer->tried, options);
if ((options & FIND_PEER_SKIP_TARGET) && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
continue;
}
if ((options & FIND_PEER_TARGET_ONLY) && !pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
continue;
}
if (pcmk_is_set(op->call_options, st_opt_topology)) {
if (grab_peer_device(op, peer, device, verified_devices_only)) {
return peer;
}
} else if (!peer->tried
&& count_peer_devices(op, peer, verified_devices_only,
fenced_support_flag(op->action))) {
/* No topology: Use the current best peer */
crm_trace("Simple fencing");
return peer;
}
}
return NULL;
}
static peer_device_info_t *
stonith_choose_peer(remote_fencing_op_t * op)
{
const char *device = NULL;
peer_device_info_t *peer = NULL;
uint32_t active = fencing_active_peers();
do {
if (op->devices) {
device = op->devices->data;
crm_trace("Checking for someone to fence (%s) %s using %s",
op->action, op->target, device);
} else {
crm_trace("Checking for someone to fence (%s) %s",
op->action, op->target);
}
/* Best choice is a peer other than the target with verified access */
peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY);
if (peer) {
crm_trace("Found verified peer %s for %s", peer->host, device?device:"<any>");
return peer;
}
if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) {
crm_trace("Waiting before looking for unverified devices to fence %s", op->target);
return NULL;
}
/* If no other peer has verified access, next best is unverified access */
peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET);
if (peer) {
crm_trace("Found best unverified peer %s", peer->host);
return peer;
}
/* If no other peer can do it, last option is self-fencing
* (which is never allowed for the "on" phase of a remapped reboot)
*/
if (op->phase != st_phase_on) {
peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY);
if (peer) {
crm_trace("%s will fence itself", peer->host);
return peer;
}
}
/* Try the next fencing level if there is one (unless we're in the "on"
* phase of a remapped "reboot", because we ignore errors in that case)
*/
} while ((op->phase != st_phase_on)
&& pcmk_is_set(op->call_options, st_opt_topology)
&& (advance_topology_level(op, false) == pcmk_rc_ok));
/* With a simple watchdog fencing configuration without a topology,
* "device" is NULL here. Consider it should be done with watchdog fencing.
*/
if (is_watchdog_fencing(op, device)) {
crm_info("Couldn't contact watchdog-fencing target-node (%s)",
op->target);
/* check_watchdog_fencing_and_wait will log additional info */
} else {
crm_notice("Couldn't find anyone to fence (%s) %s using %s",
op->action, op->target, (device? device : "any device"));
}
return NULL;
}
static int
valid_fencing_timeout(int specified_timeout, bool action_specific,
const remote_fencing_op_t *op, const char *device)
{
int timeout = specified_timeout;
if (!is_watchdog_fencing(op, device)) {
return timeout;
}
timeout = (int) QB_MIN(QB_MAX(specified_timeout,
pcmk__timeout_ms2s(stonith_watchdog_timeout_ms)),
INT_MAX);
if (timeout > specified_timeout) {
if (action_specific) {
crm_warn("pcmk_%s_timeout %ds for %s is too short (must be >= "
PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " %ds), using %ds "
"instead",
op->action, specified_timeout, device? device : "watchdog",
timeout, timeout);
} else {
crm_warn("Fencing timeout %ds is too short (must be >= "
PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " %ds), using %ds "
"instead",
specified_timeout, timeout, timeout);
}
}
return timeout;
}
static int
get_device_timeout(const remote_fencing_op_t *op,
const peer_device_info_t *peer, const char *device,
bool with_delay)
{
int timeout = op->base_timeout;
device_properties_t *props;
timeout = valid_fencing_timeout(op->base_timeout, false, op, device);
if (!peer || !device) {
return timeout;
}
props = g_hash_table_lookup(peer->devices, device);
if (!props) {
return timeout;
}
if (props->custom_action_timeout[op->phase]) {
timeout = valid_fencing_timeout(props->custom_action_timeout[op->phase],
true, op, device);
}
// op->client_delay < 0 means disable any static/random fencing delays
if (with_delay && (op->client_delay >= 0)) {
// delay_base is eventually limited by delay_max
timeout += (props->delay_max[op->phase] > 0 ?
props->delay_max[op->phase] : props->delay_base[op->phase]);
}
return timeout;
}
struct timeout_data {
const remote_fencing_op_t *op;
const peer_device_info_t *peer;
int total_timeout;
};
/*!
* \internal
* \brief Add timeout to a total if device has not been executed yet
*
* \param[in] key GHashTable key (device ID)
* \param[in] value GHashTable value (device properties)
* \param[in,out] user_data Timeout data
*/
static void
add_device_timeout(gpointer key, gpointer value, gpointer user_data)
{
const char *device_id = key;
device_properties_t *props = value;
struct timeout_data *timeout = user_data;
if (!props->executed[timeout->op->phase]
&& !props->disallowed[timeout->op->phase]) {
timeout->total_timeout += get_device_timeout(timeout->op, timeout->peer,
device_id, true);
}
}
static int
get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer)
{
struct timeout_data timeout;
timeout.op = op;
timeout.peer = peer;
timeout.total_timeout = 0;
g_hash_table_foreach(peer->devices, add_device_timeout, &timeout);
return (timeout.total_timeout? timeout.total_timeout : op->base_timeout);
}
static int
get_op_total_timeout(const remote_fencing_op_t *op,
const peer_device_info_t *chosen_peer)
{
long long total_timeout = 0;
stonith_topology_t *tp = find_topology_for_host(op->target);
if (pcmk_is_set(op->call_options, st_opt_topology) && tp) {
int i;
GList *device_list = NULL;
GList *iter = NULL;
GList *auto_list = NULL;
if (pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none)
&& (op->automatic_list != NULL)) {
auto_list = g_list_copy(op->automatic_list);
}
/* Yep, this looks scary, nested loops all over the place.
* Here is what is going on.
* Loop1: Iterate through fencing levels.
* Loop2: If a fencing level has devices, loop through each device
* Loop3: For each device in a fencing level, see what peer owns it
* and what that peer has reported the timeout is for the device.
*/
for (i = 0; i < ST__LEVEL_COUNT; i++) {
if (!tp->levels[i]) {
continue;
}
for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
bool found = false;
for (iter = op->query_results; iter != NULL; iter = iter->next) {
const peer_device_info_t *peer = iter->data;
if (auto_list) {
GList *match = g_list_find_custom(auto_list, device_list->data,
sort_strings);
if (match) {
auto_list = g_list_remove(auto_list, match->data);
}
}
if (find_peer_device(op, peer, device_list->data,
fenced_support_flag(op->action))) {
total_timeout += get_device_timeout(op, peer,
device_list->data,
true);
found = true;
break;
}
} /* End Loop3: match device with peer that owns device, find device's timeout period */
/* in case of watchdog-device we add the timeout to the budget
if didn't get a reply
*/
if (!found && is_watchdog_fencing(op, device_list->data)) {
total_timeout += pcmk__timeout_ms2s(stonith_watchdog_timeout_ms);
}
} /* End Loop2: iterate through devices at a specific level */
} /*End Loop1: iterate through fencing levels */
//Add only exists automatic_list device timeout
if (auto_list) {
for (iter = auto_list; iter != NULL; iter = iter->next) {
GList *iter2 = NULL;
for (iter2 = op->query_results; iter2 != NULL; iter = iter2->next) {
peer_device_info_t *peer = iter2->data;
- if (find_peer_device(op, peer, iter->data, st_device_supports_on)) {
+ if (find_peer_device(op, peer, iter->data,
+ fenced_df_supports_on)) {
total_timeout += get_device_timeout(op, peer,
iter->data, true);
break;
}
}
}
}
g_list_free(auto_list);
} else if (chosen_peer) {
total_timeout = get_peer_timeout(op, chosen_peer);
} else {
total_timeout = valid_fencing_timeout(op->base_timeout, false, op,
NULL);
}
if (total_timeout <= 0) {
total_timeout = op->base_timeout;
}
/* Take any requested fencing delay into account to prevent it from eating
* up the total timeout.
*/
if (op->client_delay > 0) {
total_timeout += op->client_delay;
}
return (int) QB_MIN(total_timeout, INT_MAX);
}
static void
report_timeout_period(remote_fencing_op_t * op, int op_timeout)
{
GList *iter = NULL;
xmlNode *update = NULL;
const char *client_node = NULL;
const char *client_id = NULL;
const char *call_id = NULL;
if (op->call_options & st_opt_sync_call) {
/* There is no reason to report the timeout for a synchronous call. It
* is impossible to use the reported timeout to do anything when the client
* is blocking for the response. This update is only important for
* async calls that require a callback to report the results in. */
return;
} else if (!op->request) {
return;
}
crm_trace("Reporting timeout for %s (id=%.8s)", op->client_name, op->id);
client_node = crm_element_value(op->request, PCMK__XA_ST_CLIENTNODE);
call_id = crm_element_value(op->request, PCMK__XA_ST_CALLID);
client_id = crm_element_value(op->request, PCMK__XA_ST_CLIENTID);
if (!client_node || !call_id || !client_id) {
return;
}
if (pcmk__str_eq(client_node, fenced_get_local_node(), pcmk__str_casei)) {
// Client is connected to this node, so send update directly to them
do_stonith_async_timeout_update(client_id, call_id, op_timeout);
return;
}
/* The client is connected to another node, relay this update to them */
update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0);
crm_xml_add(update, PCMK__XA_ST_REMOTE_OP, op->id);
crm_xml_add(update, PCMK__XA_ST_CLIENTID, client_id);
crm_xml_add(update, PCMK__XA_ST_CALLID, call_id);
crm_xml_add_int(update, PCMK__XA_ST_TIMEOUT, op_timeout);
pcmk__cluster_send_message(pcmk__get_node(0, client_node, NULL,
pcmk__node_search_cluster_member),
pcmk_ipc_fenced, update);
pcmk__xml_free(update);
for (iter = op->duplicates; iter != NULL; iter = iter->next) {
remote_fencing_op_t *dup = iter->data;
crm_trace("Reporting timeout for duplicate %.8s to client %s",
dup->id, dup->client_name);
report_timeout_period(iter->data, op_timeout);
}
}
/*!
* \internal
* \brief Advance an operation to the next device in its topology
*
* \param[in,out] op Fencer operation to advance
* \param[in] device ID of device that just completed
* \param[in,out] msg If not NULL, XML reply of last delegated operation
*/
static void
advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
xmlNode *msg)
{
/* Advance to the next device at this topology level, if any */
if (op->devices) {
op->devices = op->devices->next;
}
/* Handle automatic unfencing if an "on" action was requested */
if ((op->phase == st_phase_requested)
&& pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none)) {
/* If the device we just executed was required, it's not anymore */
remove_required_device(op, device);
/* If there are no more devices at this topology level, run through any
* remaining devices with automatic unfencing
*/
if (op->devices == NULL) {
op->devices = op->automatic_list;
}
}
if ((op->devices == NULL) && (op->phase == st_phase_off)) {
/* We're done with this level and with required devices, but we had
* remapped "reboot" to "off", so start over with "on". If any devices
* need to be turned back on, op->devices will be non-NULL after this.
*/
op_phase_on(op);
}
// This function is only called if the previous device succeeded
pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
if (op->devices) {
/* Necessary devices remain, so execute the next one */
crm_trace("Next targeting %s on behalf of %s@%s",
op->target, op->client_name, op->originator);
// The requested delay has been applied for the first device
if (op->client_delay > 0) {
op->client_delay = 0;
}
request_peer_fencing(op, NULL);
} else {
/* We're done with all devices and phases, so finalize operation */
crm_trace("Marking complex fencing op targeting %s as complete",
op->target);
op->state = st_done;
finalize_op(op, msg, false);
}
}
static gboolean
check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
{
if (node_does_watchdog_fencing(op->target)) {
guint timeout_ms = QB_MIN(stonith_watchdog_timeout_ms, UINT_MAX);
crm_notice("Waiting %s for %s to self-fence (%s) for "
"client %s " QB_XS " id=%.8s",
pcmk__readable_interval(timeout_ms), op->target, op->action,
op->client_name, op->id);
if (op->op_timer_one) {
g_source_remove(op->op_timer_one);
}
op->op_timer_one = pcmk__create_timer(timeout_ms, remote_op_watchdog_done,
op);
return TRUE;
} else {
crm_debug("Skipping fallback to watchdog-fencing as %s is "
"not in host-list", op->target);
}
return FALSE;
}
/*!
* \internal
* \brief Ask a peer to execute a fencing operation
*
* \param[in,out] op Fencing operation to be executed
* \param[in,out] peer If NULL or topology is in use, choose best peer to
* execute the fencing, otherwise use this peer
*/
static void
request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer)
{
const char *device = NULL;
int timeout;
CRM_CHECK(op != NULL, return);
crm_trace("Action %.8s targeting %s for %s is %s",
op->id, op->target, op->client_name,
- stonith_op_state_str(op->state));
+ stonith__op_state_text(op->state));
if ((op->phase == st_phase_on) && (op->devices != NULL)) {
/* We are in the "on" phase of a remapped topology reboot. If this
* device has pcmk_reboot_action="off", or doesn't support the "on"
* action, skip it.
*
* We can't check device properties at this point because we haven't
* chosen a peer for this stage yet. Instead, we check the local node's
* knowledge about the device. If different versions of the fence agent
* are installed on different nodes, there's a chance this could be
* mistaken, but the worst that could happen is we don't try turning the
* node back on when we should.
*/
device = op->devices->data;
if (pcmk__str_eq(fenced_device_reboot_action(device), PCMK_ACTION_OFF,
pcmk__str_none)) {
crm_info("Not turning %s back on using %s because the device is "
"configured to stay off (pcmk_reboot_action='off')",
op->target, device);
advance_topology_device_in_level(op, device, NULL);
return;
}
if (!fenced_device_supports_on(device)) {
crm_info("Not turning %s back on using %s because the agent "
"doesn't support 'on'", op->target, device);
advance_topology_device_in_level(op, device, NULL);
return;
}
}
timeout = op->base_timeout;
if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) {
peer = stonith_choose_peer(op);
}
if (!op->op_timer_total) {
op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, peer);
op->op_timer_total = pcmk__create_timer(1000 * op->total_timeout, remote_op_timeout, op);
report_timeout_period(op, op->total_timeout);
crm_info("Total timeout set to %ds for peer's fencing targeting %s for %s "
QB_XS " id=%.8s",
op->total_timeout, op->target, op->client_name, op->id);
}
if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) {
/* Ignore the caller's peer preference if topology is in use, because
* that peer might not have access to the required device. With
* topology, stonith_choose_peer() removes the device from further
* consideration, so the timeout must be calculated beforehand.
*
* @TODO Basing the total timeout on the caller's preferred peer (above)
* is less than ideal.
*/
peer = stonith_choose_peer(op);
device = op->devices->data;
/* Fencing timeout sent to peer takes no delay into account.
* The peer will add a dedicated timer for any delay upon
* schedule_stonith_command().
*/
timeout = get_device_timeout(op, peer, device, false);
}
if (peer) {
int timeout_one = 0;
xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
const pcmk__node_status_t *peer_node =
pcmk__get_node(0, peer->host, NULL,
pcmk__node_search_cluster_member);
if (op->client_delay > 0) {
/* Take requested fencing delay into account to prevent it from
* eating up the timeout.
*/
timeout_one = TIMEOUT_MULTIPLY_FACTOR * op->client_delay;
}
crm_xml_add(remote_op, PCMK__XA_ST_REMOTE_OP, op->id);
crm_xml_add(remote_op, PCMK__XA_ST_TARGET, op->target);
crm_xml_add(remote_op, PCMK__XA_ST_DEVICE_ACTION, op->action);
crm_xml_add(remote_op, PCMK__XA_ST_ORIGIN, op->originator);
crm_xml_add(remote_op, PCMK__XA_ST_CLIENTID, op->client_id);
crm_xml_add(remote_op, PCMK__XA_ST_CLIENTNAME, op->client_name);
crm_xml_add_int(remote_op, PCMK__XA_ST_TIMEOUT, timeout);
crm_xml_add_int(remote_op, PCMK__XA_ST_CALLOPT, op->call_options);
crm_xml_add_int(remote_op, PCMK__XA_ST_DELAY, op->client_delay);
if (device) {
timeout_one += TIMEOUT_MULTIPLY_FACTOR *
get_device_timeout(op, peer, device, true);
crm_notice("Requesting that %s perform '%s' action targeting %s "
"using %s " QB_XS " for client %s (%ds)",
peer->host, op->action, op->target, device,
op->client_name, timeout_one);
crm_xml_add(remote_op, PCMK__XA_ST_DEVICE_ID, device);
} else {
timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer);
crm_notice("Requesting that %s perform '%s' action targeting %s "
QB_XS " for client %s (%ds, %s)",
peer->host, op->action, op->target, op->client_name,
timeout_one,
pcmk__readable_interval(stonith_watchdog_timeout_ms));
}
op->state = st_exec;
if (op->op_timer_one) {
g_source_remove(op->op_timer_one);
op->op_timer_one = 0;
}
if (!is_watchdog_fencing(op, device)
|| !check_watchdog_fencing_and_wait(op)) {
/* Some thoughts about self-fencing cases reaching this point:
- Actually check in check_watchdog_fencing_and_wait
shouldn't fail if STONITH_WATCHDOG_ID is
chosen as fencing-device and it being present implies
watchdog-fencing is enabled anyway
- If watchdog-fencing is disabled either in general or for
a specific target - detected in check_watchdog_fencing_and_wait -
for some other kind of self-fencing we can't expect
a success answer but timeout is fine if the node doesn't
come back in between
- Delicate might be the case where we have watchdog-fencing
enabled for a node but the watchdog-fencing-device isn't
explicitly chosen for self-fencing. Local scheduler execution
in sbd might detect the node as unclean and lead to timely
self-fencing. Otherwise the selection of
PCMK_OPT_STONITH_WATCHDOG_TIMEOUT at least is questionable.
*/
/* coming here we're not waiting for watchdog timeout -
thus engage timer with timout evaluated before */
op->op_timer_one = pcmk__create_timer((1000 * timeout_one), remote_op_timeout_one, op);
}
pcmk__cluster_send_message(peer_node, pcmk_ipc_fenced, remote_op);
peer->tried = TRUE;
pcmk__xml_free(remote_op);
return;
} else if (op->phase == st_phase_on) {
/* A remapped "on" cannot be executed, but the node was already
* turned off successfully, so ignore the error and continue.
*/
crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s "
"after successful 'off'", device, op->target);
advance_topology_device_in_level(op, device, NULL);
return;
} else if (op->owner == FALSE) {
crm_err("Fencing (%s) targeting %s for client %s is not ours to control",
op->action, op->target, op->client_name);
} else if (op->query_timer == 0) {
/* We've exhausted all available peers */
crm_info("No remaining peers capable of fencing (%s) %s for client %s "
QB_XS " state=%s", op->action, op->target, op->client_name,
- stonith_op_state_str(op->state));
+ stonith__op_state_text(op->state));
CRM_CHECK(op->state < st_done, return);
finalize_timed_out_op(op, "All nodes failed, or are unable, to "
"fence target");
} else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
/* if the operation never left the query state,
* but we have all the expected replies, then no devices
* are available to execute the fencing operation. */
if (is_watchdog_fencing(op, device)
&& check_watchdog_fencing_and_wait(op)) {
/* Consider a watchdog fencing targeting an offline node executing
* once it starts waiting for the target to self-fence. So that when
* the query timer pops, remote_op_query_timeout() considers the
* fencing already in progress.
*/
op->state = st_exec;
return;
}
if (op->state == st_query) {
crm_info("No peers (out of %d) have devices capable of fencing "
"(%s) %s for client %s " QB_XS " state=%s",
op->replies, op->action, op->target, op->client_name,
- stonith_op_state_str(op->state));
+ stonith__op_state_text(op->state));
pcmk__reset_result(&op->result);
pcmk__set_result(&op->result, CRM_EX_ERROR,
PCMK_EXEC_NO_FENCE_DEVICE, NULL);
} else {
if (pcmk_is_set(op->call_options, st_opt_topology)) {
pcmk__reset_result(&op->result);
pcmk__set_result(&op->result, CRM_EX_ERROR,
PCMK_EXEC_NO_FENCE_DEVICE, NULL);
}
/* ... else use existing result from previous failed attempt
* (topology is not in use, and no devices remain to be attempted).
* Overwriting the result with PCMK_EXEC_NO_FENCE_DEVICE would
* prevent finalize_op() from setting the correct delegate if
* needed.
*/
crm_info("No peers (out of %d) are capable of fencing (%s) %s "
"for client %s " QB_XS " state=%s",
op->replies, op->action, op->target, op->client_name,
- stonith_op_state_str(op->state));
+ stonith__op_state_text(op->state));
}
op->state = st_failed;
finalize_op(op, NULL, false);
} else {
crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
"for client %s " QB_XS " id=%.8s",
op->action, op->target, (device? " using " : ""),
(device? device : ""), op->client_name, op->id);
}
}
/*!
* \internal
* \brief Comparison function for sorting query results
*
* \param[in] a GList item to compare
* \param[in] b GList item to compare
*
* \return Per the glib documentation, "a negative integer if the first value
* comes before the second, 0 if they are equal, or a positive integer
* if the first value comes after the second."
*/
static gint
sort_peers(gconstpointer a, gconstpointer b)
{
const peer_device_info_t *peer_a = a;
const peer_device_info_t *peer_b = b;
return (peer_b->ndevices - peer_a->ndevices);
}
/*!
* \internal
* \brief Determine if all the devices in the topology are found or not
*
* \param[in] op Fencing operation with topology to check
*/
static gboolean
all_topology_devices_found(const remote_fencing_op_t *op)
{
GList *device = NULL;
GList *iter = NULL;
device_properties_t *match = NULL;
stonith_topology_t *tp = NULL;
gboolean skip_target = FALSE;
int i;
tp = find_topology_for_host(op->target);
if (!tp) {
return FALSE;
}
if (pcmk__is_fencing_action(op->action)) {
/* Don't count the devices on the target node if we are killing
* the target node. */
skip_target = TRUE;
}
for (i = 0; i < ST__LEVEL_COUNT; i++) {
for (device = tp->levels[i]; device; device = device->next) {
match = NULL;
for (iter = op->query_results; iter && !match; iter = iter->next) {
peer_device_info_t *peer = iter->data;
if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
continue;
}
- match = find_peer_device(op, peer, device->data, st_device_supports_none);
+ match = find_peer_device(op, peer, device->data,
+ fenced_df_none);
}
if (!match) {
return FALSE;
}
}
}
return TRUE;
}
/*!
* \internal
* \brief Parse action-specific device properties from XML
*
* \param[in] xml XML element containing the properties
* \param[in] peer Name of peer that sent XML (for logs)
* \param[in] device Device ID (for logs)
* \param[in] action Action the properties relate to (for logs)
* \param[in,out] op Fencing operation that properties are being parsed for
* \param[in] phase Phase the properties relate to
* \param[in,out] props Device properties to update
*/
static void
parse_action_specific(const xmlNode *xml, const char *peer, const char *device,
const char *action, remote_fencing_op_t *op,
enum st_remap_phase phase, device_properties_t *props)
{
props->custom_action_timeout[phase] = 0;
crm_element_value_int(xml, PCMK__XA_ST_ACTION_TIMEOUT,
&props->custom_action_timeout[phase]);
if (props->custom_action_timeout[phase]) {
crm_trace("Peer %s with device %s returned %s action timeout %ds",
peer, device, action, props->custom_action_timeout[phase]);
}
props->delay_max[phase] = 0;
crm_element_value_int(xml, PCMK__XA_ST_DELAY_MAX, &props->delay_max[phase]);
if (props->delay_max[phase]) {
crm_trace("Peer %s with device %s returned maximum of random delay %ds for %s",
peer, device, props->delay_max[phase], action);
}
props->delay_base[phase] = 0;
crm_element_value_int(xml, PCMK__XA_ST_DELAY_BASE,
&props->delay_base[phase]);
if (props->delay_base[phase]) {
crm_trace("Peer %s with device %s returned base delay %ds for %s",
peer, device, props->delay_base[phase], action);
}
/* Handle devices with automatic unfencing */
if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) {
int required = 0;
crm_element_value_int(xml, PCMK__XA_ST_REQUIRED, &required);
if (required) {
crm_trace("Peer %s requires device %s to execute for action %s",
peer, device, action);
add_required_device(op, device);
}
}
/* If a reboot is remapped to off+on, it's possible that a node is allowed
* to perform one action but not another.
*/
if (pcmk__xe_attr_is_true(xml, PCMK__XA_ST_ACTION_DISALLOWED)) {
props->disallowed[phase] = TRUE;
crm_trace("Peer %s is disallowed from executing %s for device %s",
peer, action, device);
}
}
/*!
* \internal
* \brief Parse one device's properties from peer's XML query reply
*
* \param[in] xml XML node containing device properties
* \param[in,out] op Operation that query and reply relate to
* \param[in,out] peer Peer's device information
* \param[in] device ID of device being parsed
*/
static void
add_device_properties(const xmlNode *xml, remote_fencing_op_t *op,
peer_device_info_t *peer, const char *device)
{
xmlNode *child;
int verified = 0;
device_properties_t *props =
pcmk__assert_alloc(1, sizeof(device_properties_t));
int rc = pcmk_rc_ok;
/* Add a new entry to this peer's devices list */
g_hash_table_insert(peer->devices, pcmk__str_copy(device), props);
/* Peers with verified (monitored) access will be preferred */
crm_element_value_int(xml, PCMK__XA_ST_MONITOR_VERIFIED, &verified);
if (verified) {
crm_trace("Peer %s has confirmed a verified device %s",
peer->host, device);
props->verified = TRUE;
}
// Nodes <2.1.5 won't set this, so assume unfencing in that case
rc = pcmk__xe_get_flags(xml, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS,
&(props->device_support_flags),
- st_device_supports_on);
+ fenced_df_supports_on);
if (rc != pcmk_rc_ok) {
crm_warn("Couldn't determine device support for %s "
"(assuming unfencing): %s", device, pcmk_rc_str(rc));
}
/* Parse action-specific device properties */
parse_action_specific(xml, peer->host, device, op_requested_action(op),
op, st_phase_requested, props);
for (child = pcmk__xe_first_child(xml, NULL, NULL, NULL); child != NULL;
child = pcmk__xe_next(child, NULL)) {
/* Replies for "reboot" operations will include the action-specific
* values for "off" and "on" in child elements, just in case the reboot
* winds up getting remapped.
*/
if (pcmk__str_eq(pcmk__xe_id(child), PCMK_ACTION_OFF, pcmk__str_none)) {
parse_action_specific(child, peer->host, device, PCMK_ACTION_OFF,
op, st_phase_off, props);
} else if (pcmk__str_eq(pcmk__xe_id(child), PCMK_ACTION_ON,
pcmk__str_none)) {
parse_action_specific(child, peer->host, device, PCMK_ACTION_ON,
op, st_phase_on, props);
}
}
}
/*!
* \internal
* \brief Parse a peer's XML query reply and add it to operation's results
*
* \param[in,out] op Operation that query and reply relate to
* \param[in] host Name of peer that sent this reply
* \param[in] ndevices Number of devices expected in reply
* \param[in] xml XML node containing device list
*
* \return Newly allocated result structure with parsed reply
*/
static peer_device_info_t *
add_result(remote_fencing_op_t *op, const char *host, int ndevices,
const xmlNode *xml)
{
peer_device_info_t *peer = pcmk__assert_alloc(1,
sizeof(peer_device_info_t));
xmlNode *child;
peer->host = pcmk__str_copy(host);
peer->devices = pcmk__strkey_table(free, free);
/* Each child element describes one capable device available to the peer */
for (child = pcmk__xe_first_child(xml, NULL, NULL, NULL); child != NULL;
child = pcmk__xe_next(child, NULL)) {
const char *device = pcmk__xe_id(child);
if (device) {
add_device_properties(child, op, peer, device);
}
}
peer->ndevices = g_hash_table_size(peer->devices);
CRM_CHECK(ndevices == peer->ndevices,
crm_err("Query claimed to have %d device%s but %d found",
ndevices, pcmk__plural_s(ndevices), peer->ndevices));
op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers);
return peer;
}
/*!
* \internal
* \brief Handle a peer's reply to our fencing query
*
* Parse a query result from XML and store it in the remote operation
* table, and when enough replies have been received, issue a fencing request.
*
* \param[in] msg XML reply received
*
* \return pcmk_ok on success, -errno on error
*
* \note See initiate_remote_stonith_op() for how the XML query was initially
* formed, and stonith_query() for how the peer formed its XML reply.
*/
int
process_remote_stonith_query(xmlNode *msg)
{
int ndevices = 0;
gboolean host_is_target = FALSE;
gboolean have_all_replies = FALSE;
const char *id = NULL;
const char *host = NULL;
remote_fencing_op_t *op = NULL;
peer_device_info_t *peer = NULL;
uint32_t replies_expected;
xmlNode *dev = pcmk__xpath_find_one(msg->doc,
"//*[@" PCMK__XA_ST_REMOTE_OP "]",
LOG_ERR);
CRM_CHECK(dev != NULL, return -EPROTO);
id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP);
CRM_CHECK(id != NULL, return -EPROTO);
dev = pcmk__xpath_find_one(msg->doc,
"//*[@" PCMK__XA_ST_AVAILABLE_DEVICES "]",
LOG_ERR);
CRM_CHECK(dev != NULL, return -EPROTO);
crm_element_value_int(dev, PCMK__XA_ST_AVAILABLE_DEVICES, &ndevices);
op = g_hash_table_lookup(stonith_remote_op_list, id);
if (op == NULL) {
crm_debug("Received query reply for unknown or expired operation %s",
id);
return -EOPNOTSUPP;
}
replies_expected = fencing_active_peers();
if (op->replies_expected < replies_expected) {
replies_expected = op->replies_expected;
}
if ((++op->replies >= replies_expected) && (op->state == st_query)) {
have_all_replies = TRUE;
}
host = crm_element_value(msg, PCMK__XA_SRC);
host_is_target = pcmk__str_eq(host, op->target, pcmk__str_casei);
crm_info("Query result %d of %d from %s for %s/%s (%d device%s) %s",
op->replies, replies_expected, host,
op->target, op->action, ndevices, pcmk__plural_s(ndevices), id);
if (ndevices > 0) {
peer = add_result(op, host, ndevices, dev);
}
pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
if (pcmk_is_set(op->call_options, st_opt_topology)) {
/* If we start the fencing before all the topology results are in,
* it is possible fencing levels will be skipped because of the missing
* query results. */
if (op->state == st_query && all_topology_devices_found(op)) {
/* All the query results are in for the topology, start the fencing ops. */
crm_trace("All topology devices found");
request_peer_fencing(op, peer);
} else if (have_all_replies) {
crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
replies_expected, op->replies);
request_peer_fencing(op, NULL);
}
} else if (op->state == st_query) {
int nverified = count_peer_devices(op, peer, TRUE,
fenced_support_flag(op->action));
/* We have a result for a non-topology fencing op that looks promising,
* go ahead and start fencing before query timeout */
if ((peer != NULL) && !host_is_target && nverified) {
/* we have a verified device living on a peer that is not the target */
crm_trace("Found %d verified device%s",
nverified, pcmk__plural_s(nverified));
request_peer_fencing(op, peer);
} else if (have_all_replies) {
crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
replies_expected, op->replies);
request_peer_fencing(op, NULL);
} else {
crm_trace("Waiting for more peer results before launching fencing operation");
}
} else if ((peer != NULL) && (op->state == st_done)) {
crm_info("Discarding query result from %s (%d device%s): "
"Operation is %s", peer->host,
peer->ndevices, pcmk__plural_s(peer->ndevices),
- stonith_op_state_str(op->state));
+ stonith__op_state_text(op->state));
}
return pcmk_ok;
}
/*!
* \internal
* \brief Handle a peer's reply to a fencing request
*
* Parse a fencing reply from XML, and either finalize the operation
* or attempt another device as appropriate.
*
* \param[in] msg XML reply received
*/
void
fenced_process_fencing_reply(xmlNode *msg)
{
const char *id = NULL;
const char *device = NULL;
remote_fencing_op_t *op = NULL;
xmlNode *dev = pcmk__xpath_find_one(msg->doc,
"//*[@" PCMK__XA_ST_REMOTE_OP "]",
LOG_ERR);
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
CRM_CHECK(dev != NULL, return);
id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP);
CRM_CHECK(id != NULL, return);
dev = stonith__find_xe_with_result(msg);
CRM_CHECK(dev != NULL, return);
stonith__xe_get_result(dev, &result);
device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
if (stonith_remote_op_list) {
op = g_hash_table_lookup(stonith_remote_op_list, id);
}
if ((op == NULL) && pcmk__result_ok(&result)) {
/* Record successful fencing operations */
const char *client_id = crm_element_value(dev, PCMK__XA_ST_CLIENTID);
op = create_remote_stonith_op(client_id, dev, TRUE);
}
if (op == NULL) {
/* Could be for an event that began before we started */
/* TODO: Record the op for later querying */
crm_info("Received peer result of unknown or expired operation %s", id);
pcmk__reset_result(&result);
return;
}
pcmk__reset_result(&op->result);
op->result = result; // The operation takes ownership of the result
if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
crm_err("Received outdated reply for device %s (instead of %s) to "
"fence (%s) %s. Operation already timed out at peer level.",
device, (const char *) op->devices->data, op->action, op->target);
return;
}
if (pcmk__str_eq(crm_element_value(msg, PCMK__XA_SUBT),
PCMK__VALUE_BROADCAST, pcmk__str_none)) {
if (pcmk__result_ok(&op->result)) {
op->state = st_done;
} else {
op->state = st_failed;
}
finalize_op(op, msg, false);
return;
} else if (!pcmk__str_eq(op->originator, fenced_get_local_node(),
pcmk__str_casei)) {
/* If this isn't a remote level broadcast, and we are not the
* originator of the operation, we should not be receiving this msg. */
crm_err("Received non-broadcast fencing result for operation %.8s "
"we do not own (device %s targeting %s)",
op->id, device, op->target);
return;
}
if (pcmk_is_set(op->call_options, st_opt_topology)) {
const char *device = NULL;
const char *reason = op->result.exit_reason;
/* We own the op, and it is complete. broadcast the result to all nodes
* and notify our local clients. */
if (op->state == st_done) {
finalize_op(op, msg, false);
return;
}
device = crm_element_value(msg, PCMK__XA_ST_DEVICE_ID);
if ((op->phase == 2) && !pcmk__result_ok(&op->result)) {
/* A remapped "on" failed, but the node was already turned off
* successfully, so ignore the error and continue.
*/
crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s "
"after successful 'off'",
device, pcmk_exec_status_str(op->result.execution_status),
(reason == NULL)? "" : ": ",
(reason == NULL)? "" : reason,
op->target);
pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
} else {
crm_notice("Action '%s' targeting %s%s%s on behalf of %s@%s: "
"%s%s%s%s",
op->action, op->target,
((device == NULL)? "" : " using "),
((device == NULL)? "" : device),
op->client_name,
op->originator,
pcmk_exec_status_str(op->result.execution_status),
(reason == NULL)? "" : " (",
(reason == NULL)? "" : reason,
(reason == NULL)? "" : ")");
}
if (pcmk__result_ok(&op->result)) {
/* An operation completed successfully. Try another device if
* necessary, otherwise mark the operation as done. */
advance_topology_device_in_level(op, device, msg);
return;
} else {
/* This device failed, time to try another topology level. If no other
* levels are available, mark this operation as failed and report results. */
if (advance_topology_level(op, false) != pcmk_rc_ok) {
op->state = st_failed;
finalize_op(op, msg, false);
return;
}
}
} else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) {
op->state = st_done;
finalize_op(op, msg, false);
return;
} else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT)
&& (op->devices == NULL)) {
/* If the operation timed out don't bother retrying other peers. */
op->state = st_failed;
finalize_op(op, msg, false);
return;
} else {
/* fall-through and attempt other fencing action using another peer */
}
/* Retry on failure */
crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
op->target, op->originator, op->client_name,
pcmk_exec_status_str(op->result.execution_status));
request_peer_fencing(op, NULL);
}
gboolean
stonith_check_fence_tolerance(int tolerance, const char *target, const char *action)
{
GHashTableIter iter;
time_t now = time(NULL);
remote_fencing_op_t *rop = NULL;
if (tolerance <= 0 || !stonith_remote_op_list || target == NULL ||
action == NULL) {
return FALSE;
}
g_hash_table_iter_init(&iter, stonith_remote_op_list);
while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) {
if (strcmp(rop->target, target) != 0) {
continue;
} else if (rop->state != st_done) {
continue;
/* We don't have to worry about remapped reboots here
* because if state is done, any remapping has been undone
*/
} else if (strcmp(rop->action, action) != 0) {
continue;
} else if ((rop->completed + tolerance) < now) {
continue;
}
crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s",
target, action, tolerance, rop->delegate, rop->originator);
return TRUE;
}
return FALSE;
}
diff --git a/daemons/fenced/fenced_scheduler.c b/daemons/fenced/fenced_scheduler.c
index a67fef5c3a..00a1952c17 100644
--- a/daemons/fenced/fenced_scheduler.c
+++ b/daemons/fenced/fenced_scheduler.c
@@ -1,258 +1,258 @@
/*
* Copyright 2009-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <errno.h>
#include <glib.h>
#include <crm/pengine/status.h>
#include <crm/pengine/internal.h>
#include <pacemaker-internal.h>
#include <pacemaker-fenced.h>
// fenced_scheduler_run() assumes it's the only place scheduler->input gets set
static pcmk_scheduler_t *scheduler = NULL;
/*!
* \internal
* \brief Initialize scheduler data for fencer purposes
*
* \return Standard Pacemaker return code
*/
int
fenced_scheduler_init(void)
{
pcmk__output_t *logger = NULL;
int rc = pcmk__log_output_new(&logger);
if (rc != pcmk_rc_ok) {
return rc;
}
scheduler = pcmk_new_scheduler();
if (scheduler == NULL) {
pcmk__output_free(logger);
return ENOMEM;
}
pe__register_messages(logger);
pcmk__register_lib_messages(logger);
pcmk__output_set_log_level(logger, LOG_TRACE);
scheduler->priv->out = logger;
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Set the local node name for scheduling purposes
*
* \param[in] node_name Name to set as local node name
*/
void
fenced_set_local_node(const char *node_name)
{
pcmk__assert(scheduler != NULL);
scheduler->priv->local_node_name = pcmk__str_copy(node_name);
}
/*!
* \internal
* \brief Get the local node name
*
* \return Local node name
*/
const char *
fenced_get_local_node(void)
{
if (scheduler == NULL) {
return NULL;
}
return scheduler->priv->local_node_name;
}
/*!
* \internal
* \brief Free all scheduler-related resources
*/
void
fenced_scheduler_cleanup(void)
{
if (scheduler != NULL) {
pcmk__output_t *logger = scheduler->priv->out;
if (logger != NULL) {
logger->finish(logger, CRM_EX_OK, true, NULL);
pcmk__output_free(logger);
scheduler->priv->out = NULL;
}
pcmk_free_scheduler(scheduler);
scheduler = NULL;
}
}
/*!
* \internal
* \brief Check whether the local node is in a resource's allowed node list
*
* \param[in] rsc Resource to check
*
* \return Pointer to node if found, otherwise NULL
*/
static pcmk_node_t *
local_node_allowed_for(const pcmk_resource_t *rsc)
{
if ((rsc != NULL) && (scheduler->priv->local_node_name != NULL)) {
GHashTableIter iter;
pcmk_node_t *node = NULL;
g_hash_table_iter_init(&iter, rsc->priv->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
if (pcmk__str_eq(node->priv->name, scheduler->priv->local_node_name,
pcmk__str_casei)) {
return node;
}
}
}
return NULL;
}
/*!
* \internal
* \brief If a given resource or any of its children are fencing devices,
* register the devices
*
* \param[in,out] data Resource to check
* \param[in,out] user_data Ignored
*/
static void
register_if_fencing_device(gpointer data, gpointer user_data)
{
pcmk_resource_t *rsc = data;
const char *rsc_id = pcmk__s(rsc->priv->history_id, rsc->id);
xmlNode *xml = NULL;
GHashTableIter hash_iter;
pcmk_node_t *node = NULL;
const char *name = NULL;
const char *value = NULL;
const char *agent = NULL;
const char *rsc_provides = NULL;
stonith_key_value_t *params = NULL;
// If this is a collective resource, check children instead
if (rsc->priv->children != NULL) {
for (GList *iter = rsc->priv->children;
iter != NULL; iter = iter->next) {
register_if_fencing_device(iter->data, NULL);
if (pcmk__is_clone(rsc)) {
return; // Only one instance needs to be checked for clones
}
}
return;
}
if (!pcmk_is_set(rsc->flags, pcmk__rsc_fence_device)) {
return; // Not a fencing device
}
if (pe__resource_is_disabled(rsc)) {
crm_info("Ignoring fencing device %s because it is disabled", rsc->id);
return;
}
if ((stonith_watchdog_timeout_ms <= 0) &&
pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
crm_info("Ignoring fencing device %s "
"because watchdog fencing is disabled", rsc->id);
return;
}
// Check whether local node is allowed to run resource
node = local_node_allowed_for(rsc);
if (node == NULL) {
crm_info("Ignoring fencing device %s "
"because local node is not allowed to run it", rsc->id);
return;
}
if (node->assign->score < 0) {
crm_info("Ignoring fencing device %s "
"because local node has preference %s for it",
rsc->id, pcmk_readable_score(node->assign->score));
return;
}
// If device is in a group, check whether local node is allowed for group
if (pcmk__is_group(rsc->priv->parent)) {
pcmk_node_t *group_node = local_node_allowed_for(rsc->priv->parent);
if ((group_node != NULL) && (group_node->assign->score < 0)) {
crm_info("Ignoring fencing device %s "
"because local node has preference %s for its group",
rsc->id, pcmk_readable_score(group_node->assign->score));
return;
}
}
crm_debug("Reloading configuration of fencing device %s", rsc->id);
agent = crm_element_value(rsc->priv->xml, PCMK_XA_TYPE);
get_meta_attributes(rsc->priv->meta, rsc, NULL, scheduler);
rsc_provides = g_hash_table_lookup(rsc->priv->meta,
PCMK_STONITH_PROVIDES);
g_hash_table_iter_init(&hash_iter, pe_rsc_params(rsc, node, scheduler));
while (g_hash_table_iter_next(&hash_iter, (gpointer *) &name,
(gpointer *) &value)) {
if ((name == NULL) || (value == NULL)) {
continue;
}
- params = stonith_key_value_add(params, name, value);
+ params = stonith__key_value_add(params, name, value);
}
xml = create_device_registration_xml(rsc_id, st_namespace_any, agent,
params, rsc_provides);
- stonith_key_value_freeall(params, 1, 1);
+ stonith__key_value_freeall(params, true, true);
pcmk__assert(fenced_device_register(xml, true) == pcmk_rc_ok);
pcmk__xml_free(xml);
}
/*!
* \internal
* \brief Run the scheduler for fencer purposes
*
* \param[in] cib CIB to use as scheduler input
*
* \note Scheduler object is reset before returning, but \p cib is not freed.
*/
void
fenced_scheduler_run(xmlNode *cib)
{
CRM_CHECK((cib != NULL) && (scheduler != NULL)
&& (scheduler->input == NULL), return);
pcmk_reset_scheduler(scheduler);
scheduler->input = cib;
pcmk__set_scheduler_flags(scheduler,
pcmk__sched_location_only|pcmk__sched_no_counts);
pcmk__schedule_actions(scheduler);
g_list_foreach(scheduler->priv->resources, register_if_fencing_device,
NULL);
scheduler->input = NULL; // Wasn't a copy, so don't let API free it
pcmk_reset_scheduler(scheduler);
}
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
index cb862b2284..e1b6e1f6cd 100644
--- a/daemons/fenced/pacemaker-fenced.c
+++ b/daemons/fenced/pacemaker-fenced.c
@@ -1,666 +1,671 @@
/*
* Copyright 2009-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h> // PRIu32, PRIx32
#include <crm/crm.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/output_internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/common/mainloop.h>
#include <crm/cib/internal.h>
#include <pacemaker-fenced.h>
#define SUMMARY "daemon for executing fencing devices in a Pacemaker cluster"
// @TODO This should be guint
long long stonith_watchdog_timeout_ms = 0;
GList *stonith_watchdog_targets = NULL;
static GMainLoop *mainloop = NULL;
gboolean stonith_shutdown_flag = FALSE;
static qb_ipcs_service_t *ipcs = NULL;
static pcmk__output_t *out = NULL;
pcmk__supported_format_t formats[] = {
PCMK__SUPPORTED_FORMAT_NONE,
PCMK__SUPPORTED_FORMAT_TEXT,
PCMK__SUPPORTED_FORMAT_XML,
{ NULL, NULL, NULL }
};
static struct {
gboolean stand_alone;
gchar **log_files;
} options;
crm_exit_t exit_code = CRM_EX_OK;
static void stonith_cleanup(void);
static int32_t
st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
if (stonith_shutdown_flag) {
crm_info("Ignoring new client [%d] during shutdown",
pcmk__client_pid(c));
return -ECONNREFUSED;
}
if (pcmk__new_client(c, uid, gid) == NULL) {
return -ENOMEM;
}
return 0;
}
/* Exit code means? */
static int32_t
st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
uint32_t call_options = st_opt_none;
xmlNode *request = NULL;
pcmk__client_t *c = pcmk__find_client(qbc);
const char *op = NULL;
int rc = pcmk_rc_ok;
if (c == NULL) {
crm_info("Invalid client: %p", qbc);
return 0;
}
request = pcmk__client_data2xml(c, data, &id, &flags);
if (request == NULL) {
pcmk__ipc_send_ack(c, id, flags, PCMK__XE_NACK, NULL, CRM_EX_PROTOCOL);
return 0;
}
op = crm_element_value(request, PCMK__XA_CRM_TASK);
if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
crm_xml_add(request, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
crm_xml_add(request, PCMK__XA_ST_OP, op);
crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id);
crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c));
crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, fenced_get_local_node());
pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, request);
pcmk__xml_free(request);
return 0;
}
if (c->name == NULL) {
const char *value = crm_element_value(request, PCMK__XA_ST_CLIENTNAME);
c->name = crm_strdup_printf("%s.%u", pcmk__s(value, "unknown"), c->pid);
}
rc = pcmk__xe_get_flags(request, PCMK__XA_ST_CALLOPT, &call_options,
st_opt_none);
if (rc != pcmk_rc_ok) {
crm_warn("Couldn't parse options from IPC request: %s",
pcmk_rc_str(rc));
}
crm_trace("Flags %#08" PRIx32 "/%#08x for command %" PRIu32
" from client %s", flags, call_options, id, pcmk__client_name(c));
if (pcmk_is_set(call_options, st_opt_sync_call)) {
pcmk__assert(pcmk_is_set(flags, crm_ipc_client_response));
CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */
c->request_id = id; /* Reply only to the last one */
}
crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id);
crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c));
crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, fenced_get_local_node());
crm_log_xml_trace(request, "ipc-received");
stonith_command(c, id, flags, request, NULL);
pcmk__xml_free(request);
return 0;
}
/* Error code means? */
static int32_t
st_ipc_closed(qb_ipcs_connection_t * c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p closed", c);
pcmk__free_client(client);
/* 0 means: yes, go ahead and destroy the connection */
return 0;
}
static void
st_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p destroyed", c);
st_ipc_closed(c);
}
static void
stonith_peer_callback(xmlNode * msg, void *private_data)
{
const char *remote_peer = crm_element_value(msg, PCMK__XA_SRC);
const char *op = crm_element_value(msg, PCMK__XA_ST_OP);
if (pcmk__str_eq(op, STONITH_OP_POKE, pcmk__str_none)) {
return;
}
crm_log_xml_trace(msg, "Peer[inbound]");
stonith_command(NULL, 0, 0, msg, remote_peer);
}
#if SUPPORT_COROSYNC
static void
handle_cpg_message(cpg_handle_t handle, const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
xmlNode *xml = NULL;
const char *from = NULL;
char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from);
if(data == NULL) {
return;
}
xml = pcmk__xml_parse(data);
if (xml == NULL) {
crm_err("Invalid XML: '%.120s'", data);
free(data);
return;
}
crm_xml_add(xml, PCMK__XA_SRC, from);
stonith_peer_callback(xml, NULL);
pcmk__xml_free(xml);
free(data);
}
static void
stonith_peer_cs_destroy(gpointer user_data)
{
crm_crit("Lost connection to cluster layer, shutting down");
stonith_shutdown(0);
}
#endif
void
do_local_reply(const xmlNode *notify_src, pcmk__client_t *client,
int call_options)
{
/* send callback to originating child */
int local_rc = pcmk_rc_ok;
int rid = 0;
uint32_t ipc_flags = crm_ipc_server_event;
if (pcmk_is_set(call_options, st_opt_sync_call)) {
CRM_LOG_ASSERT(client->request_id);
rid = client->request_id;
client->request_id = 0;
ipc_flags = crm_ipc_flags_none;
}
local_rc = pcmk__ipc_send_xml(client, rid, notify_src, ipc_flags);
if (local_rc == pcmk_rc_ok) {
crm_trace("Sent response %d to client %s",
rid, pcmk__client_name(client));
} else {
crm_warn("%synchronous reply to client %s failed: %s",
(pcmk_is_set(call_options, st_opt_sync_call)? "S" : "As"),
pcmk__client_name(client), pcmk_rc_str(local_rc));
}
}
-uint64_t
-get_stonith_flag(const char *name)
+/*!
+ * \internal
+ * \brief Parse a fencer client notification type string to a flag
+ *
+ * \param[in] type Notification type string
+ *
+ * \return Flag corresponding to \p type, or \c fenced_nf_none if none exists
+ */
+enum fenced_notify_flags
+fenced_parse_notify_flag(const char *type)
{
- if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_FENCE, pcmk__str_none)) {
- return st_callback_notify_fence;
-
- } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) {
- return st_callback_device_add;
-
- } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) {
- return st_callback_device_del;
-
- } else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY,
- pcmk__str_none)) {
- return st_callback_notify_history;
-
- } else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED,
- pcmk__str_none)) {
- return st_callback_notify_history_synced;
-
+ if (pcmk__str_eq(type, PCMK__VALUE_ST_NOTIFY_FENCE, pcmk__str_none)) {
+ return fenced_nf_fence_result;
+ }
+ if (pcmk__str_eq(type, STONITH_OP_DEVICE_ADD, pcmk__str_none)) {
+ return fenced_nf_device_registered;
+ }
+ if (pcmk__str_eq(type, STONITH_OP_DEVICE_DEL, pcmk__str_none)) {
+ return fenced_nf_device_removed;
+ }
+ if (pcmk__str_eq(type, PCMK__VALUE_ST_NOTIFY_HISTORY, pcmk__str_none)) {
+ return fenced_nf_history_changed;
+ }
+ if (pcmk__str_eq(type, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED,
+ pcmk__str_none)) {
+ return fenced_nf_history_synced;
}
- return st_callback_unknown;
+ return fenced_nf_none;
}
static void
stonith_notify_client(gpointer key, gpointer value, gpointer user_data)
{
const xmlNode *update_msg = user_data;
pcmk__client_t *client = value;
const char *type = NULL;
CRM_CHECK(client != NULL, return);
CRM_CHECK(update_msg != NULL, return);
type = crm_element_value(update_msg, PCMK__XA_SUBT);
CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return);
if (client->ipcs == NULL) {
crm_trace("Skipping client with NULL channel");
return;
}
- if (pcmk_is_set(client->flags, get_stonith_flag(type))) {
+ if (pcmk_is_set(client->flags, fenced_parse_notify_flag(type))) {
int rc = pcmk__ipc_send_xml(client, 0, update_msg,
crm_ipc_server_event);
if (rc != pcmk_rc_ok) {
crm_warn("%s notification of client %s failed: %s "
QB_XS " id=%.8s rc=%d", type, pcmk__client_name(client),
pcmk_rc_str(rc), client->id, rc);
} else {
crm_trace("Sent %s notification to client %s",
type, pcmk__client_name(client));
}
}
}
void
do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout)
{
pcmk__client_t *client = NULL;
xmlNode *notify_data = NULL;
if (!timeout || !call_id || !client_id) {
return;
}
client = pcmk__find_client_by_id(client_id);
if (!client) {
return;
}
notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_ASYNC_TIMEOUT_VALUE);
crm_xml_add(notify_data, PCMK__XA_T, PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE);
crm_xml_add(notify_data, PCMK__XA_ST_CALLID, call_id);
crm_xml_add_int(notify_data, PCMK__XA_ST_TIMEOUT, timeout);
crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id);
if (client) {
pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event);
}
pcmk__xml_free(notify_data);
}
/*!
* \internal
* \brief Notify relevant IPC clients of a fencing operation result
*
* \param[in] type Notification type
* \param[in] result Result of fencing operation (assume success if NULL)
* \param[in] data If not NULL, add to notification as call data
*/
void
fenced_send_notification(const char *type, const pcmk__action_result_t *result,
xmlNode *data)
{
/* TODO: Standardize the contents of data */
xmlNode *update_msg = pcmk__xe_create(NULL, PCMK__XE_NOTIFY);
CRM_LOG_ASSERT(type != NULL);
crm_xml_add(update_msg, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY);
crm_xml_add(update_msg, PCMK__XA_SUBT, type);
crm_xml_add(update_msg, PCMK__XA_ST_OP, type);
stonith__xe_set_result(update_msg, result);
if (data != NULL) {
xmlNode *wrapper = pcmk__xe_create(update_msg, PCMK__XE_ST_CALLDATA);
pcmk__xml_copy(wrapper, data);
}
crm_trace("Notifying clients");
pcmk__foreach_ipc_client(stonith_notify_client, update_msg);
pcmk__xml_free(update_msg);
crm_trace("Notify complete");
}
/*!
* \internal
* \brief Send notifications for a configuration change to subscribed clients
*
- * \param[in] op Notification type (\c STONITH_OP_DEVICE_ADD,
- * \c STONITH_OP_DEVICE_DEL, \c STONITH_OP_LEVEL_ADD, or
- * \c STONITH_OP_LEVEL_DEL)
+ * \param[in] op Notification type (\c STONITH_OP_DEVICE_ADD or
+ * \c STONITH_OP_DEVICE_DEL)
* \param[in] result Operation result
* \param[in] desc Description of what changed (either device ID or string
* representation of level
* (<tt><target>[<level_index>]</tt>))
*/
void
fenced_send_config_notification(const char *op,
const pcmk__action_result_t *result,
const char *desc)
{
xmlNode *notify_data = pcmk__xe_create(NULL, op);
crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ID, desc);
fenced_send_notification(op, result, notify_data);
pcmk__xml_free(notify_data);
}
/*!
* \internal
* \brief Check whether a node does watchdog-fencing
*
* \param[in] node Name of node to check
*
* \return TRUE if node found in stonith_watchdog_targets
* or stonith_watchdog_targets is empty indicating
* all nodes are doing watchdog-fencing
*/
gboolean
node_does_watchdog_fencing(const char *node)
{
return ((stonith_watchdog_targets == NULL) ||
pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei));
}
void
stonith_shutdown(int nsig)
{
crm_info("Terminating with %d clients", pcmk__ipc_client_count());
stonith_shutdown_flag = TRUE;
if (mainloop != NULL && g_main_loop_is_running(mainloop)) {
g_main_loop_quit(mainloop);
}
}
static void
stonith_cleanup(void)
{
fenced_cib_cleanup();
if (ipcs) {
qb_ipcs_destroy(ipcs);
}
pcmk__cluster_destroy_node_caches();
pcmk__client_cleanup();
free_stonith_remote_op_list();
free_topology_list();
fenced_free_device_table();
free_metadata_cache();
fenced_unregister_handlers();
}
struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = st_ipc_accept,
.connection_created = NULL,
.msg_process = st_ipc_dispatch,
.connection_closed = st_ipc_closed,
.connection_destroyed = st_ipc_destroy
};
/*!
* \internal
* \brief Callback for peer status changes
*
* \param[in] type What changed
* \param[in] node What peer had the change
* \param[in] data Previous value of what changed
*/
static void
st_peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node,
const void *data)
{
if ((type != pcmk__node_update_processes)
&& !pcmk_is_set(node->flags, pcmk__node_status_remote)) {
/*
* This is a hack until we can send to a nodeid and/or we fix node name lookups
* These messages are ignored in stonith_peer_callback()
*/
xmlNode *query = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND);
crm_xml_add(query, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
crm_xml_add(query, PCMK__XA_ST_OP, STONITH_OP_POKE);
crm_debug("Broadcasting our uname because of node %" PRIu32,
node->cluster_layer_id);
pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, query);
pcmk__xml_free(query);
}
}
/* @COMPAT Deprecated since 2.1.8. Use pcmk_list_fence_attrs() or
* crm_resource --list-options=fencing instead of querying daemon metadata.
*
* NOTE: pcs (as of at least 0.11.8) uses this
*/
static int
fencer_metadata(void)
{
const char *name = PCMK__SERVER_FENCED;
const char *desc_short = N_("Instance attributes available for all "
"\"stonith\"-class resources");
const char *desc_long = N_("Instance attributes available for all "
"\"stonith\"-class resources and used by "
"Pacemaker's fence daemon");
return pcmk__daemon_metadata(out, name, desc_short, desc_long,
pcmk__opt_fencing);
}
static GOptionEntry entries[] = {
{ "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE,
&options.stand_alone, N_("Intended for use in regression testing only"),
NULL },
{ "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY,
&options.log_files, N_("Send logs to the additional named logfile"), NULL },
{ NULL }
};
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group)
{
GOptionContext *context = NULL;
context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
pcmk__add_main_args(context, entries);
return context;
}
int
main(int argc, char **argv)
{
int rc = pcmk_rc_ok;
pcmk_cluster_t *cluster = NULL;
crm_ipc_t *old_instance = NULL;
GError *error = NULL;
GOptionGroup *output_group = NULL;
pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
gchar **processed_args = pcmk__cmdline_preproc(argv, "l");
GOptionContext *context = build_arg_context(args, &output_group);
crm_log_preinit(NULL, argc, argv);
pcmk__register_formats(output_group, formats);
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
exit_code = CRM_EX_USAGE;
goto done;
}
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_ERROR;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Error creating output format %s: %s",
args->output_ty, pcmk_rc_str(rc));
goto done;
}
if (args->version) {
out->version(out, false);
goto done;
}
if ((g_strv_length(processed_args) >= 2)
&& pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) {
rc = fencer_metadata();
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_FATAL;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Unable to display metadata: %s", pcmk_rc_str(rc));
}
goto done;
}
// Open additional log files
pcmk__add_logfiles(options.log_files, out);
crm_log_init(NULL, LOG_INFO + args->verbosity, TRUE,
(args->verbosity > 0), argc, argv, FALSE);
crm_notice("Starting Pacemaker fencer");
old_instance = crm_ipc_new("stonith-ng", 0);
if (old_instance == NULL) {
/* crm_ipc_new() will have already logged an error message with
* crm_err()
*/
exit_code = CRM_EX_FATAL;
goto done;
}
if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) {
// IPC endpoint already up
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_crit("Aborting start-up because another fencer instance is "
"already active");
goto done;
} else {
// Not up or not authentic, we'll proceed either way
crm_ipc_destroy(old_instance);
old_instance = NULL;
}
mainloop_add_signal(SIGTERM, stonith_shutdown);
pcmk__cluster_init_node_caches();
rc = fenced_scheduler_init();
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_FATAL;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Error initializing scheduler data: %s", pcmk_rc_str(rc));
goto done;
}
cluster = pcmk_cluster_new();
#if SUPPORT_COROSYNC
if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
pcmk_cluster_set_destroy_fn(cluster, stonith_peer_cs_destroy);
pcmk_cpg_set_deliver_fn(cluster, handle_cpg_message);
pcmk_cpg_set_confchg_fn(cluster, pcmk__cpg_confchg_cb);
}
#endif // SUPPORT_COROSYNC
pcmk__cluster_set_status_callback(&st_peer_update_callback);
if (pcmk_cluster_connect(cluster) != pcmk_rc_ok) {
exit_code = CRM_EX_FATAL;
crm_crit("Cannot sign in to the cluster... terminating");
goto done;
}
fenced_set_local_node(cluster->priv->node_name);
if (!options.stand_alone) {
setup_cib();
}
fenced_init_device_table();
init_topology_list();
pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks);
// Create the mainloop and run it...
mainloop = g_main_loop_new(NULL, FALSE);
crm_notice("Pacemaker fencer successfully started and accepting connections");
g_main_loop_run(mainloop);
done:
g_strfreev(processed_args);
pcmk__free_arg_context(context);
g_strfreev(options.log_files);
stonith_cleanup();
pcmk_cluster_free(cluster);
fenced_scheduler_cleanup();
pcmk__output_and_clear_error(&error, out);
if (out != NULL) {
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
}
pcmk__unregister_formats();
crm_exit(exit_code);
}
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
index c1590438cc..23a9b03235 100644
--- a/daemons/fenced/pacemaker-fenced.h
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -1,333 +1,403 @@
/*
* Copyright 2009-2025 the Pacemaker project contributors
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <stdint.h> // uint32_t, uint64_t
#include <libxml/tree.h> // xmlNode
#include <crm/common/mainloop.h>
#include <crm/cluster.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
/*!
* \internal
* \brief Check whether target has already been fenced recently
*
* \param[in] tolerance Number of seconds to look back in time
* \param[in] target Name of node to search for
* \param[in] action Action we want to match
*
* \return TRUE if an equivalent fencing operation took place in the last
* \p tolerance seconds, FALSE otherwise
*/
gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action);
+/*!
+ * \internal
+ * \brief Flags for \c fenced_device_t configuration, state, and support
+ */
+enum fenced_device_flags {
+ //! This flag has no effect
+ fenced_df_none = UINT32_C(0),
+
+ //! Device supports list action
+ fenced_df_supports_list = (UINT32_C(1) << 0),
+
+ //! Device supports on action
+ fenced_df_supports_on = (UINT32_C(1) << 1),
+
+ //! Device supports reboot action
+ fenced_df_supports_reboot = (UINT32_C(1) << 2),
+
+ //! Device supports status action
+ fenced_df_supports_status = (UINT32_C(1) << 3),
+
+ //! Device is automatically used to unfence newly joined nodes
+ fenced_df_auto_unfence = (UINT32_C(1) << 4),
+
+ //! Device has run a successful list, status, or monitor action on this node
+ fenced_df_verified = (UINT32_C(1) << 5),
+
+ //! Device has been registered via the stonith API
+ fenced_df_api_registered = (UINT32_C(1) << 6),
+
+ //! Device has been registered via the fencer's CIB diff callback
+ fenced_df_cib_registered = (UINT32_C(1) << 7),
+
+ //! Device has not yet been re-registered after a CIB change
+ fenced_df_dirty = (UINT32_C(1) << 8),
+};
+
+/*!
+ * \internal
+ * \brief Set flags for a fencing device
+ *
+ * \param[in,out] device Device whose flags to set (\c fenced_device_t)
+ * \param[in] set_flags Group of <tt>enum fenced_device_flags</tt> to set
+ */
+#define fenced_device_set_flags(device, set_flags) do { \
+ pcmk__assert((device) != NULL); \
+ (device)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
+ "Fence device", (device)->id, \
+ (device)->flags, set_flags, \
+ #set_flags); \
+ } while (0)
+
+/*!
+ * \internal
+ * \brief Clear flags for a fencing device
+ *
+ * \param[in,out] device Device whose flags to clear (\c fenced_device_t)
+ * \param[in] clear_flags Group of <tt>enum fenced_device_flags</tt> to
+ * clear
+ */
+#define fenced_device_clear_flags(device, clear_flags) do { \
+ pcmk__assert((device) != NULL); \
+ (device)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
+ LOG_TRACE, "Fence device", \
+ (device)->id, \
+ (device)->flags, \
+ clear_flags, #clear_flags); \
+ } while (0)
+
+/*!
+ * \internal
+ * \brief Flags for fencer client notification types
+ */
+enum fenced_notify_flags {
+ //! This flag has no effect
+ fenced_nf_none = UINT32_C(0),
+
+ //! Notify about fencing operation results
+ fenced_nf_fence_result = (UINT32_C(1) << 0),
+
+ // @TODO Consider notifying about device registrations via the CIB
+ //! Notify about fencing device registrations via the fencer API
+ fenced_nf_device_registered = (UINT32_C(1) << 1),
+
+ // @TODO Consider notifying about device removals via the CIB
+ //! Notify about fencing device removals via the fencer API
+ fenced_nf_device_removed = (UINT32_C(1) << 2),
+
+ //! Notify about changes to fencing history
+ fenced_nf_history_changed = (UINT32_C(1) << 3),
+
+ /* @FIXME A comment in stonith_fence_history() says its check is not
+ * conclusive: it may send a "history synced" notification when the history
+ * has not been synced. Hence "might have been synced" below. Try to find a
+ * better test.
+ */
+ //! Notify when the fencing history might have been synced
+ fenced_nf_history_synced = (UINT32_C(1) << 4),
+};
+
+enum fenced_notify_flags fenced_parse_notify_flag(const char *type);
+
typedef struct {
char *id;
char *agent;
char *namespace;
/*! list of actions that must execute on the target node. Used for unfencing */
GString *on_target_actions;
GList *targets;
time_t targets_age;
- /* whether the cluster should automatically unfence nodes with the device */
- gboolean automatic_unfencing;
-
- uint32_t flags; // Group of enum st_device_flags
+ uint32_t flags; // Group of enum fenced_device_flags
GHashTable *params;
GHashTable *aliases;
GList *pending_ops;
mainloop_timer_t *timer;
crm_trigger_t *work;
xmlNode *agent_metadata;
-
- /*! A verified device is one that has contacted the
- * agent successfully to perform a monitor operation */
- gboolean verified;
-
- gboolean cib_registered;
- gboolean api_registered;
- gboolean dirty;
+ const char *default_host_arg;
} fenced_device_t;
/* These values are used to index certain arrays by "phase". Usually an
* operation has only one "phase", so phase is always zero. However, some
* reboots are remapped to "off" then "on", in which case "reboot" will be
* phase 0, "off" will be phase 1 and "on" will be phase 2.
*/
enum st_remap_phase {
st_phase_requested = 0,
st_phase_off = 1,
st_phase_on = 2,
st_phase_max = 3
};
typedef struct remote_fencing_op_s {
/* @TODO Abstract the overlap with async_command_t (some members have
* different names for the same thing), which should allow reducing
* duplication in some functions
*/
/* The unique id associated with this operation */
char *id;
/*! The node this operation will fence */
char *target;
/*! The fencing action to perform on the target. (reboot, on, off) */
char *action;
/*! When was the fencing action recorded (seconds since epoch) */
time_t created;
/*! Marks if the final notifications have been sent to local stonith clients. */
gboolean notify_sent;
/*! The number of query replies received */
guint replies;
/*! The number of query replies expected */
guint replies_expected;
/*! Does this node own control of this operation */
gboolean owner;
/*! After query is complete, This the high level timer that expires the entire operation */
guint op_timer_total;
/*! This timer expires the current fencing request. Many fencing
* requests may exist in a single operation */
guint op_timer_one;
/*! This timer expires the query request sent out to determine
* what nodes are contain what devices, and who those devices can fence */
guint query_timer;
/*! This is the default timeout to use for each fencing device if no
* custom timeout is received in the query. */
gint base_timeout;
/*! This is the calculated total timeout an operation can take before
* expiring. This is calculated by adding together all the timeout
* values associated with the devices this fencing operation may call */
gint total_timeout;
/*!
* Fencing delay (in seconds) requested by API client (used by controller to
* implement \c PCMK_OPT_PRIORITY_FENCING_DELAY). A value of -1 means
* disable all configured delays.
*/
int client_delay;
/*! Delegate is the node being asked to perform a fencing action
* on behalf of the node that owns the remote operation. Some operations
* will involve multiple delegates. This value represents the final delegate
* that is used. */
char *delegate;
/*! The point at which the remote operation completed */
time_t completed;
//! Group of enum stonith_call_options associated with this operation
uint32_t call_options;
/*! The current state of the remote operation. This indicates
* what stage the op is in, query, exec, done, duplicate, failed. */
enum op_state state;
/*! The node that owns the remote operation */
char *originator;
/*! The local client id that initiated the fencing request */
char *client_id;
/*! The client's call_id that initiated the fencing request */
int client_callid;
/*! The name of client that initiated the fencing request */
char *client_name;
/*! List of the received query results for all the nodes in the cpg group */
GList *query_results;
/*! The original request that initiated the remote stonith operation */
xmlNode *request;
/*! The current topology level being executed */
guint level;
/*! The current operation phase being executed */
enum st_remap_phase phase;
/*! Devices with automatic unfencing (always run if "on" requested, never if remapped) */
GList *automatic_list;
/*! List of all devices at the currently executing topology level */
GList *devices_list;
/*! Current entry in the topology device list */
GList *devices;
/*! List of duplicate operations attached to this operation. Once this operation
* completes, the duplicate operations will be closed out as well. */
GList *duplicates;
/*! The point at which the remote operation completed(nsec) */
long long completed_nsec;
/*! The (potentially intermediate) result of the operation */
pcmk__action_result_t result;
} remote_fencing_op_t;
void fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged);
-// Fencer-specific client flags
-enum st_client_flags {
- st_callback_unknown = UINT64_C(0),
- st_callback_notify_fence = (UINT64_C(1) << 0),
- st_callback_device_add = (UINT64_C(1) << 2),
- st_callback_device_del = (UINT64_C(1) << 4),
- st_callback_notify_history = (UINT64_C(1) << 5),
- st_callback_notify_history_synced = (UINT64_C(1) << 6)
-};
-
// How the user specified the target of a topology level
enum fenced_target_by {
fenced_target_by_unknown = -1, // Invalid or not yet parsed
fenced_target_by_name, // By target name
fenced_target_by_pattern, // By a pattern matching target names
fenced_target_by_attribute, // By a node attribute/value on target
};
/*
* Complex fencing requirements are specified via fencing topologies.
* A topology consists of levels; each level is a list of fencing devices.
* Topologies are stored in a hash table by node name. When a node needs to be
* fenced, if it has an entry in the topology table, the levels are tried
* sequentially, and the devices in each level are tried sequentially.
* Fencing is considered successful as soon as any level succeeds;
* a level is considered successful if all its devices succeed.
* Essentially, all devices at a given level are "and-ed" and the
* levels are "or-ed".
*
* This structure is used for the topology table entries.
* Topology levels start from 1, so levels[0] is unused and always NULL.
*/
typedef struct stonith_topology_s {
enum fenced_target_by kind; // How target was specified
/*! Node name regex or attribute name=value for which topology applies */
char *target;
char *target_value;
char *target_pattern;
char *target_attribute;
/*! Names of fencing devices at each topology level */
GList *levels[ST__LEVEL_COUNT];
} stonith_topology_t;
void stonith_shutdown(int nsig);
void fenced_init_device_table(void);
void fenced_free_device_table(void);
bool fenced_has_watchdog_device(void);
void fenced_foreach_device(GHFunc fn, gpointer user_data);
void fenced_foreach_device_remove(GHRFunc fn);
void init_topology_list(void);
void free_topology_list(void);
void free_stonith_remote_op_list(void);
void init_stonith_remote_op_hash_table(GHashTable **table);
void free_metadata_cache(void);
void fenced_unregister_handlers(void);
-uint64_t get_stonith_flag(const char *name);
-
void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
xmlNode *op_request, const char *remote_peer);
int fenced_device_register(const xmlNode *dev, bool from_cib);
void stonith_device_remove(const char *id, bool from_cib);
char *stonith_level_key(const xmlNode *msg, enum fenced_target_by);
-void fenced_register_level(xmlNode *msg, char **desc,
- pcmk__action_result_t *result);
-void fenced_unregister_level(xmlNode *msg, char **desc,
- pcmk__action_result_t *result);
+void fenced_register_level(xmlNode *msg, pcmk__action_result_t *result);
+void fenced_unregister_level(xmlNode *msg, pcmk__action_result_t *result);
stonith_topology_t *find_topology_for_host(const char *host);
void do_local_reply(const xmlNode *notify_src, pcmk__client_t *client,
int call_options);
xmlNode *fenced_construct_reply(const xmlNode *request, xmlNode *data,
const pcmk__action_result_t *result);
void
do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
void fenced_send_notification(const char *type,
const pcmk__action_result_t *result,
xmlNode *data);
void fenced_send_config_notification(const char *op,
const pcmk__action_result_t *result,
const char *desc);
remote_fencing_op_t *initiate_remote_stonith_op(const pcmk__client_t *client,
xmlNode *request,
gboolean manual_ack);
void fenced_process_fencing_reply(xmlNode *msg);
int process_remote_stonith_query(xmlNode * msg);
void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer);
void stonith_fence_history(xmlNode *msg, xmlNode **output,
const char *remote_peer, int options);
void stonith_fence_history_trim(void);
bool fencing_peer_active(pcmk__node_status_t *peer);
void set_fencing_completed(remote_fencing_op_t * op);
int fenced_handle_manual_confirmation(const pcmk__client_t *client,
xmlNode *msg);
const char *fenced_device_reboot_action(const char *device_id);
bool fenced_device_supports_on(const char *device_id);
gboolean node_has_attr(const char *node, const char *name, const char *value);
gboolean node_does_watchdog_fencing(const char *node);
void fencing_topology_init(void);
void setup_cib(void);
void fenced_cib_cleanup(void);
int fenced_scheduler_init(void);
void fenced_set_local_node(const char *node_name);
const char *fenced_get_local_node(void);
void fenced_scheduler_cleanup(void);
void fenced_scheduler_run(xmlNode *cib);
-static inline void
-fenced_set_protocol_error(pcmk__action_result_t *result)
-{
- pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
- "Fencer API request missing required information (bug?)");
-}
-
/*!
* \internal
* \brief Get the device flag to use with a given action when searching devices
*
* \param[in] action Action to check
*
- * \return st_device_supports_on if \p action is "on", otherwise
- * st_device_supports_none
+ * \return \c fenced_df_supports_on if \p action is "on", otherwise
+ * \c fenced_df_none
*/
static inline uint32_t
fenced_support_flag(const char *action)
{
if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) {
- return st_device_supports_on;
+ return fenced_df_supports_on;
}
- return st_device_supports_none;
+ return fenced_df_none;
}
extern GHashTable *topology;
extern long long stonith_watchdog_timeout_ms;
extern GList *stonith_watchdog_targets;
extern GHashTable *stonith_remote_op_list;
extern crm_exit_t exit_code;
extern gboolean stonith_shutdown_flag;
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
index 4615993645..f81044ddfa 100644
--- a/include/crm/fencing/internal.h
+++ b/include/crm/fencing/internal.h
@@ -1,198 +1,196 @@
/*
* Copyright 2011-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_FENCING_INTERNAL__H
#define PCMK__CRM_FENCING_INTERNAL__H
+#include <stdbool.h> // bool
+
#include <glib.h>
#include <crm/common/ipc.h>
#include <crm/common/xml.h>
#include <crm/common/output_internal.h>
#include <crm/common/results_internal.h>
#include <crm/stonith-ng.h>
#ifdef __cplusplus
extern "C" {
#endif
-enum st_device_flags {
- st_device_supports_none = (0 << 0),
- st_device_supports_list = (1 << 0),
- st_device_supports_status = (1 << 1),
- st_device_supports_reboot = (1 << 2),
- st_device_supports_parameter_plug = (1 << 3),
- st_device_supports_parameter_port = (1 << 4),
- st_device_supports_on = (1 << 5),
-};
-
-#define stonith__set_device_flags(device_flags, device_id, flags_to_set) do { \
- device_flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
- "Fence device", device_id, \
- (device_flags), (flags_to_set), \
- #flags_to_set); \
- } while (0)
+stonith_t *stonith__api_new(void);
+void stonith__api_free(stonith_t *stonith_api);
+int stonith__api_dispatch(stonith_t *stonith_api);
+
+int stonith__api_connect_retry(stonith_t *stonith, const char *name,
+ int max_attempts);
+
+bool stonith__agent_exists(const char *name);
+
+stonith_key_value_t *stonith__key_value_add(stonith_key_value_t *head,
+ const char *key, const char *value);
+void stonith__key_value_freeall(stonith_key_value_t *head, bool keys,
+ bool values);
#define stonith__set_call_options(st_call_opts, call_for, flags_to_set) do { \
st_call_opts = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
"Fencer call", (call_for), \
(st_call_opts), (flags_to_set), \
#flags_to_set); \
} while (0)
#define stonith__clear_call_options(st_call_opts, call_for, flags_to_clear) do { \
st_call_opts = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
"Fencer call", (call_for), \
(st_call_opts), (flags_to_clear), \
#flags_to_clear); \
} while (0)
struct stonith_action_s;
typedef struct stonith_action_s stonith_action_t;
stonith_action_t *stonith__action_create(const char *agent,
const char *action_name,
const char *target,
int timeout_sec,
GHashTable *device_args,
GHashTable *port_map,
const char *host_arg);
void stonith__destroy_action(stonith_action_t *action);
pcmk__action_result_t *stonith__action_result(stonith_action_t *action);
int stonith__result2rc(const pcmk__action_result_t *result);
void stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result);
void stonith__xe_get_result(const xmlNode *xml, pcmk__action_result_t *result);
xmlNode *stonith__find_xe_with_result(xmlNode *xml);
int stonith__execute_async(stonith_action_t *action, void *userdata,
void (*done) (int pid,
const pcmk__action_result_t *result,
void *user_data),
void (*fork_cb) (int pid, void *user_data));
int stonith__metadata_async(const char *agent, int timeout_sec,
void (*callback)(int pid,
const pcmk__action_result_t *result,
void *user_data),
void *user_data);
xmlNode *create_level_registration_xml(const char *node, const char *pattern,
const char *attr, const char *value,
int level,
const stonith_key_value_t *device_list);
xmlNode *create_device_registration_xml(const char *id,
enum stonith_namespace standard,
const char *agent,
const stonith_key_value_t *params,
const char *rsc_provides);
void stonith__register_messages(pcmk__output_t *out);
GList *stonith__parse_targets(const char *hosts);
+void stonith__history_free(stonith_history_t *head);
const char *stonith__later_succeeded(const stonith_history_t *event,
const stonith_history_t *top_history);
stonith_history_t *stonith__sort_history(stonith_history_t *history);
-void stonith__device_parameter_flags(uint32_t *device_flags,
- const char *device_name,
- xmlNode *metadata);
+const char *stonith__default_host_arg(xmlNode *metadata);
/* Only 1-9 is allowed for fencing topology levels,
* however, 0 is used to unregister all levels in
* unregister requests.
*/
# define ST__LEVEL_COUNT 10
# define STONITH_ATTR_ACTION_OP "action"
# define STONITH_OP_EXEC "st_execute"
# define STONITH_OP_TIMEOUT_UPDATE "st_timeout_update"
# define STONITH_OP_QUERY "st_query"
# define STONITH_OP_FENCE "st_fence"
# define STONITH_OP_RELAY "st_relay"
# define STONITH_OP_DEVICE_ADD "st_device_register"
# define STONITH_OP_DEVICE_DEL "st_device_remove"
# define STONITH_OP_FENCE_HISTORY "st_fence_history"
# define STONITH_OP_LEVEL_ADD "st_level_add"
# define STONITH_OP_LEVEL_DEL "st_level_remove"
# define STONITH_OP_NOTIFY "st_notify"
# define STONITH_OP_POKE "poke"
# define STONITH_WATCHDOG_AGENT "fence_watchdog"
/* Don't change 2 below as it would break rolling upgrade */
# define STONITH_WATCHDOG_AGENT_INTERNAL "#watchdog"
# define STONITH_WATCHDOG_ID "watchdog"
stonith_history_t *stonith__first_matching_event(stonith_history_t *history,
bool (*matching_fn)(stonith_history_t *, void *),
void *user_data);
bool stonith__event_state_pending(stonith_history_t *history, void *user_data);
bool stonith__event_state_eq(stonith_history_t *history, void *user_data);
bool stonith__event_state_neq(stonith_history_t *history, void *user_data);
int stonith__legacy2status(int rc);
int stonith__exit_status(const stonith_callback_data_t *data);
int stonith__execution_status(const stonith_callback_data_t *data);
const char *stonith__exit_reason(const stonith_callback_data_t *data);
int stonith__event_exit_status(const stonith_event_t *event);
int stonith__event_execution_status(const stonith_event_t *event);
const char *stonith__event_exit_reason(const stonith_event_t *event);
char *stonith__event_description(const stonith_event_t *event);
gchar *stonith__history_description(const stonith_history_t *event,
bool full_history,
const char *later_succeeded,
uint32_t show_opts);
+const char *stonith__op_state_text(enum op_state state);
+
/*!
* \internal
* \brief Is a fencing operation in pending state?
*
* \param[in] state State as enum op_state value
*
* \return A boolean
*/
static inline bool
stonith__op_state_pending(enum op_state state)
{
return state != st_failed && state != st_done;
}
gboolean stonith__watchdog_fencing_enabled_for_node(const char *node);
gboolean stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node);
/*!
* \internal
* \brief Validate a fencing configuration
*
* \param[in,out] st Fencer connection to use
* \param[in] call_options Group of enum stonith_call_options
* \param[in] rsc_id Resource to validate
- * \param[in] namespace_s Type of fence agent to search for
* \param[in] agent Fence agent to validate
* \param[in,out] params Fence device configuration parameters
* \param[in] timeout_sec How long to wait for operation to complete
* \param[in,out] output If non-NULL, where to store any agent output
* \param[in,out] error_output If non-NULL, where to store agent error output
*
* \return Standard Pacemaker return code
*/
int stonith__validate(stonith_t *st, int call_options, const char *rsc_id,
- const char *namespace_s, const char *agent,
- GHashTable *params, int timeout_sec, char **output,
- char **error_output);
+ const char *agent, GHashTable *params, int timeout_sec,
+ char **output, char **error_output);
#ifdef __cplusplus
}
#endif
#endif // PCMK__CRM_FENCING_INTERNAL__H
diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h
index 3ae6388573..2f0b0e8401 100644
--- a/include/crm/stonith-ng.h
+++ b/include/crm/stonith-ng.h
@@ -1,715 +1,822 @@
/*
- * Copyright 2004-2024 the Pacemaker project contributors
+ * Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_STONITH_NG__H
# define PCMK__CRM_STONITH_NG__H
#ifdef __cplusplus
extern "C" {
#endif
/**
* \file
* \brief Fencing aka. STONITH
* \ingroup fencing
*/
-/* IMPORTANT: DLM source code includes this file directly, without having access
- * to other Pacemaker headers on its include path, so this file should *not*
- * include any other Pacemaker headers. (DLM might be updated to avoid the
- * issue, but we should still follow this guideline for a long time after.)
+/* IMPORTANT: dlm source code includes this file directly. Until dlm v4.2.0
+ * (commit 5afd9fdc), dlm did not have access to other Pacemaker headers on its
+ * include path. This file should *not* include any other Pacemaker headers
+ * until we decide that we no longer need to support dlm versions older than
+ * v4.2.0.
+ *
+ * @COMPAT Remove this restriction and take any opportunities to simplify code
+ * when possible.
*/
# include <dlfcn.h>
# include <errno.h>
# include <stdbool.h> // bool
# include <stdint.h> // uint32_t
# include <time.h> // time_t
-/* *INDENT-OFF* */
+// @TODO Keep this definition but make it internal
+/*!
+ * \brief Fencer API connection state
+ * \deprecated Do not use
+ */
enum stonith_state {
stonith_connected_command,
stonith_connected_query,
stonith_disconnected,
};
-//! Flags that can be set in call options for API requests
+// @TODO Keep this definition but make it internal
+/*!
+ * \brief Flags that can be set in call options for API requests
+ *
+ * \deprecated Do not use
+ */
enum stonith_call_options {
- //! No options
+ // No options
+ //! \deprecated Do not use
st_opt_none = 0,
#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
- //! \deprecated Unused
+ //! \deprecated Do not use
st_opt_verbose = (1 << 0),
#endif
- //! The fencing target is allowed to execute the request
+ // The fencing target is allowed to execute the request
+ //! \deprecated Do not use
st_opt_allow_self_fencing = (1 << 1),
#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
- //! \deprecated Use st_opt_allow_self_fencing instead
+ //! \deprecated Do not use
st_opt_allow_suicide = st_opt_allow_self_fencing,
#endif
// Used internally to indicate that request is manual fence confirmation
- //! \internal Do not use
+ // \internal Do not use
+ //! \deprecated Do not use
st_opt_manual_ack = (1 << 3),
- //! Do not return any reply from server
+ // Do not return any reply from server
+ //! \deprecated Do not use
st_opt_discard_reply = (1 << 4),
// Used internally to indicate that request requires a fencing topology
- //! \internal Do not use
+ // \internal Do not use
+ //! \deprecated Do not use
st_opt_topology = (1 << 6),
#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
- //! \deprecated Unused
+ //! \deprecated Do not use
st_opt_scope_local = (1 << 8),
#endif
- //! Interpret target as node cluster layer ID instead of name
+ // Interpret target as node cluster layer ID instead of name
+ //! \deprecated Do not use
st_opt_cs_nodeid = (1 << 9),
- //! Wait for request to be completed before returning
+ // Wait for request to be completed before returning
+ //! \deprecated Do not use
st_opt_sync_call = (1 << 12),
- //! Request that server send an update with optimal callback timeout
+ // Request that server send an update with optimal callback timeout
+ //! \deprecated Do not use
st_opt_timeout_updates = (1 << 13),
- //! Invoke callback only if request succeeded
+ // Invoke callback only if request succeeded
+ //! \deprecated Do not use
st_opt_report_only_success = (1 << 14),
- //! For a fence history request, request that the history be cleared
+ // For a fence history request, request that the history be cleared
+ //! \deprecated Do not use
st_opt_cleanup = (1 << 19),
- //! For a fence history request, broadcast the request to all nodes
+ // For a fence history request, broadcast the request to all nodes
+ //! \deprecated Do not use
st_opt_broadcast = (1 << 20),
};
-/*! Order matters here, do not change values */
-enum op_state
-{
- st_query,
- st_exec,
- st_done,
- st_duplicate,
- st_failed,
+// Order matters here, do not change values
+// @TODO Keep this definition but make it internal
+/*!
+ * \brief Fencing operation states
+ * \deprecated Do not use
+ */
+enum op_state {
+ st_query, //! \deprecated Do not use
+ st_exec, //! \deprecated Do not use
+ st_done, //! \deprecated Do not use
+ st_duplicate, //! \deprecated Do not use
+ st_failed, //! \deprecated Do not use
};
-// Supported fence agent interface standards
+// @TODO Keep this definition but make it internal
+/*!
+ * \brief Supported fence agent interface standards
+ * \deprecated Do not use
+ */
enum stonith_namespace {
- st_namespace_invalid,
- st_namespace_any,
- st_namespace_internal, // Implemented internally by Pacemaker
+ st_namespace_invalid, //! \deprecated Do not use
+ st_namespace_any, //! \deprecated Do not use
+
+ // Implemented internally by Pacemaker
+ st_namespace_internal, //! \deprecated Do not use
/* Neither of these projects are active any longer, but the fence agent
* interfaces they created are still in use and supported by Pacemaker.
*/
- st_namespace_rhcs, // Red Hat Cluster Suite compatible
- st_namespace_lha, // Linux-HA compatible
-};
+ // Red Hat Cluster Suite compatible
+ st_namespace_rhcs, //! \deprecated Do not use
-enum stonith_namespace stonith_text2namespace(const char *namespace_s);
-const char *stonith_namespace2text(enum stonith_namespace st_namespace);
-enum stonith_namespace stonith_get_namespace(const char *agent,
- const char *namespace_s);
+ // Linux-HA compatible
+ st_namespace_lha, //! \deprecated Do not use
+};
+/* @COMPAT Drop this and use a GList/GSList of pcmk_nvpair_t or a GHashtable as
+ * appropriate
+ */
+/*!
+ * \brief Key-value pair list node
+ * \deprecated Do not use
+ */
typedef struct stonith_key_value_s {
char *key;
char *value;
- struct stonith_key_value_s *next;
+ struct stonith_key_value_s *next;
} stonith_key_value_t;
+// @TODO Keep this definition but make it internal
+/*!
+ * \brief Fencing history entry
+ * \deprecated Do not use
+ */
typedef struct stonith_history_s {
char *target;
char *action;
char *origin;
char *delegate;
char *client;
int state;
time_t completed;
struct stonith_history_s *next;
long completed_nsec;
char *exit_reason;
} stonith_history_t;
+// @TODO Keep this typedef but rename it and make it internal
typedef struct stonith_s stonith_t;
+// @TODO Keep this definition but make it internal
+/*!
+ * \brief Fencing event
+ * \deprecated Do not use
+ */
typedef struct stonith_event_s {
char *id;
char *operation;
int result;
char *origin;
char *target;
char *action;
char *executioner;
char *device;
- /*! The name of the client that initiated the action. */
+ // Name of the client that initiated the action
char *client_origin;
- //! \internal This field should be treated as internal to Pacemaker
void *opaque;
} stonith_event_t;
+// @TODO Keep this definition but make it internal
+/*!
+ * \brief Data for an asynchronous fencing request callback
+ * \deprecated Do not use
+ */
typedef struct stonith_callback_data_s {
int rc;
int call_id;
void *userdata;
//! \internal This field should be treated as internal to Pacemaker
void *opaque;
} stonith_callback_data_t;
-typedef struct stonith_api_operations_s
-{
+// @TODO Keep this object but make it internal
+/*!
+ * \brief Fencer API operations
+ * \deprecated Use appropriate functions in libpacemaker instead
+ */
+typedef struct stonith_api_operations_s {
/*!
* \brief Destroy a fencer connection
*
* \param[in,out] st Fencer connection to destroy
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*free) (stonith_t *st);
/*!
* \brief Connect to the local fencer
*
* \param[in,out] st Fencer connection to connect
* \param[in] name Client name to use
* \param[out] stonith_fd If NULL, use a main loop, otherwise
* store IPC file descriptor here
*
* \return Legacy Pacemaker return code
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*connect) (stonith_t *st, const char *name, int *stonith_fd);
/*!
* \brief Disconnect from the local stonith daemon.
*
* \param[in,out] st Fencer connection to disconnect
*
* \return Legacy Pacemaker return code
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*disconnect)(stonith_t *st);
/*!
* \brief Unregister a fence device with the local fencer
*
* \param[in,out] st Fencer connection to disconnect
* \param[in] options Group of enum stonith_call_options
* \param[in] name ID of fence device to unregister
*
* \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
* on success, otherwise a negative legacy Pacemaker return code
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*remove_device)(stonith_t *st, int options, const char *name);
/*!
* \brief Register a fence device with the local fencer
*
* \param[in,out] st Fencer connection to use
* \param[in] options Group of enum stonith_call_options
* \param[in] id ID of fence device to register
* \param[in] namespace_s Type of fence agent to search for ("redhat"
* or "stonith-ng" for RHCS-style, "internal"
* for Pacemaker-internal devices, "heartbeat"
* for LHA-style, or "any" or NULL for any)
* \param[in] agent Name of fence agent for device
* \param[in] params Fence agent parameters for device
*
* \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
* on success, otherwise a negative legacy Pacemaker return code
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*register_device)(stonith_t *st, int options, const char *id,
const char *namespace_s, const char *agent,
const stonith_key_value_t *params);
/*!
* \brief Unregister a fencing level for specified node with local fencer
*
* \param[in,out] st Fencer connection to use
* \param[in] options Group of enum stonith_call_options
* \param[in] node Target node to unregister level for
* \param[in] level Topology level number to unregister
*
* \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
* on success, otherwise a negative legacy Pacemaker return code
+ * \note Not used internally
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*remove_level)(stonith_t *st, int options, const char *node,
int level);
/*!
* \brief Register a fencing level for specified node with local fencer
*
* \param[in,out] st Fencer connection to use
* \param[in] options Group of enum stonith_call_options
* \param[in] node Target node to register level for
* \param[in] level Topology level number to register
* \param[in] device_list Devices to register in level
*
* \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
* on success, otherwise a negative legacy Pacemaker return code
+ * \note Used only by cts-fence-helper.c internally
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*register_level)(stonith_t *st, int options, const char *node,
int level, const stonith_key_value_t *device_list);
/*!
* \brief Retrieve a fence agent's metadata
*
* \param[in,out] stonith Fencer connection
* \param[in] call_options Group of enum stonith_call_options
* (currently ignored)
* \param[in] agent Fence agent to query
- * \param[in] namespace_s Type of fence agent to search for ("redhat"
- * or "stonith-ng" for RHCS-style, "internal"
- * for Pacemaker-internal devices, "heartbeat"
- * for LHA-style, or "any" or NULL for any)
+ * \param[in] namespace_s Ignored
* \param[out] output Where to store metadata
* \param[in] timeout_sec Error if not complete within this time
*
* \return Legacy Pacemaker return code
* \note The caller is responsible for freeing *output using free().
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*metadata)(stonith_t *stonith, int call_options, const char *agent,
const char *namespace_s, char **output, int timeout_sec);
/*!
* \brief Retrieve a list of installed fence agents
*
* \param[in,out] stonith Fencer connection to use
* \param[in] call_options Group of enum stonith_call_options
* (currently ignored)
* \param[in] namespace_s Type of fence agents to list ("redhat"
* or "stonith-ng" for RHCS-style, "internal" for
* Pacemaker-internal devices, "heartbeat" for
* LHA-style, or "any" or NULL for all)
* \param[out] devices Where to store agent list
- * \param[in] timeout Error if unable to complete within this
- * (currently ignored)
+ * \param[in] timeout Ignored
*
* \return Number of items in list on success, or negative errno otherwise
* \note The caller is responsible for freeing the returned list with
- * stonith_key_value_freeall().
+ * \c stonith__key_value_freeall().
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*list_agents)(stonith_t *stonith, int call_options,
const char *namespace_s, stonith_key_value_t **devices,
int timeout);
/*!
* \brief Get the output of a fence device's list action
*
* \param[in,out] stonith Fencer connection to use
* \param[in] call_options Group of enum stonith_call_options
* \param[in] id Fence device ID to run list for
* \param[out] list_info Where to store list output
* \param[in] timeout Error if unable to complete within this
*
* \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
* on success, otherwise a negative legacy Pacemaker return code
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*list)(stonith_t *stonith, int call_options, const char *id,
char **list_info, int timeout);
/*!
* \brief Check whether a fence device is reachable by monitor action
*
* \param[in,out] stonith Fencer connection to use
* \param[in] call_options Group of enum stonith_call_options
* \param[in] id Fence device ID to run monitor for
* \param[in] timeout Error if unable to complete within this
*
* \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
* on success, otherwise a negative legacy Pacemaker return code
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*monitor)(stonith_t *stonith, int call_options, const char *id,
int timeout);
/*!
* \brief Check whether a fence device target is reachable by status action
*
* \param[in,out] stonith Fencer connection to use
* \param[in] call_options Group of enum stonith_call_options
* \param[in] id Fence device ID to run status for
* \param[in] port Fence target to run status for
* \param[in] timeout Error if unable to complete within this
*
* \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
* on success, otherwise a negative legacy Pacemaker return code
+ * \note Used only by cts-fence-helper.c internally
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*status)(stonith_t *stonith, int call_options, const char *id,
const char *port, int timeout);
/*!
* \brief List registered fence devices
*
* \param[in,out] stonith Fencer connection to use
* \param[in] call_options Group of enum stonith_call_options
* \param[in] target Fence target to run status for
* \param[out] devices Where to store list of fence devices
* \param[in] timeout Error if unable to complete within this
*
* \note If node is provided, only devices that can fence the node id
* will be returned.
*
* \return Number of items in list on success, or negative errno otherwise
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*query)(stonith_t *stonith, int call_options, const char *target,
stonith_key_value_t **devices, int timeout);
/*!
* \brief Request that a target get fenced
*
* \param[in,out] stonith Fencer connection to use
* \param[in] call_options Group of enum stonith_call_options
* \param[in] node Fence target
* \param[in] action "on", "off", or "reboot"
* \param[in] timeout Default per-device timeout to use with
* each executed device
* \param[in] tolerance Accept result of identical fence action
* completed within this time
*
* \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
* on success, otherwise a negative legacy Pacemaker return code
+ * \note Used only by cts-fence-helper.c and \c stonith_api_kick()
+ * internally. The latter might go away eventually if dlm starts using
+ * \c pcmk_request_fencing().
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*fence)(stonith_t *stonith, int call_options, const char *node,
const char *action, int timeout, int tolerance);
/*!
* \brief Manually confirm that a node has been fenced
*
* \param[in,out] stonith Fencer connection to use
* \param[in] call_options Group of enum stonith_call_options
* \param[in] target Fence target
*
* \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
* on success, otherwise a negative legacy Pacemaker return code
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*confirm)(stonith_t *stonith, int call_options, const char *target);
/*!
* \brief List fencing actions that have occurred for a target
*
* \param[in,out] stonith Fencer connection to use
* \param[in] call_options Group of enum stonith_call_options
* \param[in] node Fence target
* \param[out] history Where to store list of fencing actions
* \param[in] timeout Error if unable to complete within this
*
* \return Legacy Pacemaker return code
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*history)(stonith_t *stonith, int call_options, const char *node,
stonith_history_t **history, int timeout);
/*!
* \brief Register a callback for fence notifications
*
* \param[in,out] stonith Fencer connection to use
* \param[in] event Event to register for
* \param[in] callback Callback to register
*
* \return Legacy Pacemaker return code
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*register_notification)(stonith_t *stonith, const char *event,
void (*callback)(stonith_t *st,
stonith_event_t *e));
/*!
* \brief Unregister callbacks for fence notifications
*
* \param[in,out] stonith Fencer connection to use
* \param[in] event Event to unregister callbacks for (NULL for all)
*
* \return Legacy Pacemaker return code
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*remove_notification)(stonith_t *stonith, const char *event);
/*!
* \brief Register a callback for an asynchronous fencing result
*
* \param[in,out] stonith Fencer connection to use
* \param[in] call_id Call ID to register callback for
* \param[in] timeout Error if result not received in this time
* \param[in] options Group of enum stonith_call_options
* (respects \c st_opt_timeout_updates and
* \c st_opt_report_only_success)
* \param[in,out] user_data Pointer to pass to callback
* \param[in] callback_name Unique identifier for callback
* \param[in] callback Callback to register (may be called
* immediately if \p call_id indicates error)
*
* \return \c TRUE on success, \c FALSE if call_id indicates error,
* or -EINVAL if \p stonith is not valid
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*register_callback)(stonith_t *stonith, int call_id, int timeout,
int options, void *user_data,
const char *callback_name,
void (*callback)(stonith_t *st,
stonith_callback_data_t *data));
/*!
* \brief Unregister callbacks for asynchronous fencing results
*
* \param[in,out] stonith Fencer connection to use
* \param[in] call_id If \p all_callbacks is false, call ID
* to unregister callback for
* \param[in] all_callbacks If true, unregister all callbacks
*
* \return pcmk_ok
+ * \note Not used internally (but perhaps it should be)
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*remove_callback)(stonith_t *stonith, int call_id, bool all_callbacks);
/*!
* \brief Unregister fencing level for specified node, pattern or attribute
*
* \param[in,out] st Fencer connection to use
* \param[in] options Group of enum stonith_call_options
* \param[in] node If not NULL, unregister level targeting this node
* \param[in] pattern If not NULL, unregister level targeting nodes
* whose names match this regular expression
* \param[in] attr If this and \p value are not NULL, unregister
* level targeting nodes with this node attribute
* set to \p value
* \param[in] value If this and \p attr are not NULL, unregister
* level targeting nodes with node attribute \p attr
* set to this
* \param[in] level Topology level number to remove
*
* \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
* on success, otherwise a negative legacy Pacemaker return code
* \note The caller should set only one of \p node, \p pattern, or \p attr
* and \p value.
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*remove_level_full)(stonith_t *st, int options,
const char *node, const char *pattern,
const char *attr, const char *value, int level);
/*!
* \brief Register fencing level for specified node, pattern or attribute
*
* \param[in,out] st Fencer connection to use
* \param[in] options Group of enum stonith_call_options
* \param[in] node If not NULL, register level targeting this
* node by name
* \param[in] pattern If not NULL, register level targeting nodes
* whose names match this regular expression
* \param[in] attr If this and \p value are not NULL, register
* level targeting nodes with this node
* attribute set to \p value
* \param[in] value If this and \p attr are not NULL, register
* level targeting nodes with node attribute
* \p attr set to this
* \param[in] level Topology level number to remove
* \param[in] device_list Devices to use in level
*
* \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
* on success, otherwise a negative legacy Pacemaker return code
*
* \note The caller should set only one of node, pattern or attr/value.
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*register_level_full)(stonith_t *st, int options,
const char *node, const char *pattern,
const char *attr, const char *value, int level,
const stonith_key_value_t *device_list);
/*!
* \brief Validate an arbitrary stonith device configuration
*
* \param[in,out] st Fencer connection to use
* \param[in] call_options Group of enum stonith_call_options
* \param[in] rsc_id ID used to replace CIB secrets in \p params
- * \param[in] namespace_s Type of fence agent to validate ("redhat"
- * or "stonith-ng" for RHCS-style, "internal"
- * for Pacemaker-internal devices, "heartbeat"
- * for LHA-style, or "any" or NULL for any)
+ * \param[in] namespace_s Ignored
* \param[in] agent Fence agent to validate
* \param[in] params Configuration parameters to pass to agent
* \param[in] timeout Fail if no response within this many seconds
* \param[out] output If non-NULL, where to store any agent output
* \param[out] error_output If non-NULL, where to store agent error output
*
* \return pcmk_ok if validation succeeds, -errno otherwise
* \note If pcmk_ok is returned, the caller is responsible for freeing
* the output (if requested) with free().
+ * \note Not used internally
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*validate)(stonith_t *st, int call_options, const char *rsc_id,
const char *namespace_s, const char *agent,
const stonith_key_value_t *params, int timeout,
char **output, char **error_output);
/*!
* \brief Request delayed fencing of a target
*
* \param[in,out] stonith Fencer connection to use
* \param[in] call_options Group of enum stonith_call_options
* \param[in] node Fence target
* \param[in] action "on", "off", or "reboot"
* \param[in] timeout Default per-device timeout to use with
* each executed device
* \param[in] tolerance Accept result of identical fence action
* completed within this time
* \param[in] delay Execute fencing after this delay (-1
* disables any delay from pcmk_delay_base
* and pcmk_delay_max)
*
* \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
* on success, otherwise a negative legacy Pacemaker return code
+ * \deprecated \c stonith_api_operations_t is deprecated for external use
*/
int (*fence_with_delay)(stonith_t *stonith, int call_options,
const char *node, const char *action, int timeout,
int tolerance, int delay);
} stonith_api_operations_t;
+// @TODO Keep this object but make it internal
+/*!
+ * \brief Fencer API connection object
+ * \deprecated Use appropriate functions in libpacemaker instead
+ */
struct stonith_s {
enum stonith_state state;
int call_id;
void *st_private;
stonith_api_operations_t *cmds;
};
-/* *INDENT-ON* */
-
-/* Core functions */
-stonith_t *stonith_api_new(void);
-void stonith_api_delete(stonith_t * st);
-
-void stonith_dump_pending_callbacks(stonith_t * st);
-
-bool stonith_dispatch(stonith_t * st);
-
-stonith_key_value_t *stonith_key_value_add(stonith_key_value_t * kvp, const char *key,
- const char *value);
-void stonith_key_value_freeall(stonith_key_value_t * kvp, int keys, int values);
-
-void stonith_history_free(stonith_history_t *history);
-
-// Convenience functions
-int stonith_api_connect_retry(stonith_t *st, const char *name,
- int max_attempts);
-const char *stonith_op_state_str(enum op_state state);
/* Basic helpers that allows nodes to be fenced and the history to be
* queried without mainloop or the caller understanding the full API
*
* At least one of nodeid and uname are required
*
- * NOTE: DLM uses both of these
+ * NOTE: dlm (as of at least 4.3.0) uses these (via the helper functions below)
*/
int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off);
time_t stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress);
/*
* Helpers for using the above functions without install-time dependencies
*
* Usage:
* #include <crm/stonith-ng.h>
*
* To turn a node off by corosync nodeid:
* stonith_api_kick_helper(nodeid, 120, 1);
*
* To check the last fence date/time (also by nodeid):
* last = stonith_api_time_helper(nodeid, 0);
*
* To check if fencing is in progress:
* if(stonith_api_time_helper(nodeid, 1) > 0) { ... }
*
* eg.
#include <stdio.h>
#include <time.h>
#include <crm/stonith-ng.h>
int
main(int argc, char ** argv)
{
int rc = 0;
int nodeid = 102;
rc = stonith_api_time_helper(nodeid, 0);
printf("%d last fenced at %s\n", nodeid, ctime(rc));
rc = stonith_api_kick_helper(nodeid, 120, 1);
printf("%d fence result: %d\n", nodeid, rc);
rc = stonith_api_time_helper(nodeid, 0);
printf("%d last fenced at %s\n", nodeid, ctime(rc));
return 0;
}
*/
#define STONITH_LIBRARY "libstonithd.so.56"
+// NOTE: dlm (as of at least 4.3.0) uses these (via the helper functions below)
typedef int (*st_api_kick_fn) (int nodeid, const char *uname, int timeout, bool off);
typedef time_t (*st_api_time_fn) (int nodeid, const char *uname, bool in_progress);
+// NOTE: dlm (as of at least 4.3.0) uses this
static inline int
stonith_api_kick_helper(uint32_t nodeid, int timeout, bool off)
{
static void *st_library = NULL;
static st_api_kick_fn st_kick_fn;
if (st_library == NULL) {
st_library = dlopen(STONITH_LIBRARY, RTLD_LAZY);
}
if (st_library && st_kick_fn == NULL) {
st_kick_fn = (st_api_kick_fn) dlsym(st_library, "stonith_api_kick");
}
if (st_kick_fn == NULL) {
#ifdef ELIBACC
return -ELIBACC;
#else
return -ENOSYS;
#endif
}
return (*st_kick_fn) (nodeid, NULL, timeout, off);
}
+// NOTE: dlm (as of at least 4.3.0) uses this
static inline time_t
stonith_api_time_helper(uint32_t nodeid, bool in_progress)
{
static void *st_library = NULL;
static st_api_time_fn st_time_fn;
if (st_library == NULL) {
st_library = dlopen(STONITH_LIBRARY, RTLD_LAZY);
}
if (st_library && st_time_fn == NULL) {
st_time_fn = (st_api_time_fn) dlsym(st_library, "stonith_api_time");
}
if (st_time_fn == NULL) {
return 0;
}
return (*st_time_fn) (nodeid, NULL, in_progress);
}
-/**
- * Does the given agent describe a stonith resource that can exist?
- *
- * \param[in] agent What is the name of the agent?
- * \param[in] timeout Timeout to use when querying. If 0 is given,
- * use a default of 120.
- *
- * \return A boolean
+#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
+
+/* Normally we'd put this section in a separate file (crm/fencing/compat.h), but
+ * we can't do that for the reason noted at the top of this file. That does mean
+ * we have to duplicate these declarations where they're implemented.
*/
+
+//! \deprecated Use appropriate functions in libpacemaker
+stonith_t *stonith_api_new(void);
+
+//! \deprecated Use appropriate functions in libpacemaker
+void stonith_api_delete(stonith_t *stonith);
+
+//! \deprecated Do not use
+void stonith_dump_pending_callbacks(stonith_t *stonith);
+
+//! \deprecated Do not use
+bool stonith_dispatch(stonith_t *stonith_api);
+
+//! \deprecated Do not use
+stonith_key_value_t *stonith_key_value_add(stonith_key_value_t *kvp,
+ const char *key, const char *value);
+
+//! \deprecated Do not use
+void stonith_key_value_freeall(stonith_key_value_t *head, int keys, int values);
+
+//! \deprecated Do not use
+void stonith_history_free(stonith_history_t *head);
+
+//! \deprecated Do not use
+int stonith_api_connect_retry(stonith_t *st, const char *name,
+ int max_attempts);
+
+//! \deprecated Do not use
+const char *stonith_op_state_str(enum op_state state);
+
+//! \deprecated Do not use
bool stonith_agent_exists(const char *agent, int timeout);
-/*!
- * \brief Turn fence action into a more readable string
- *
- * \param[in] action Fence action
- */
+//! \deprecated Do not use
const char *stonith_action_str(const char *action);
+//! \deprecated Do not use
+enum stonith_namespace stonith_text2namespace(const char *namespace_s);
+
+//! \deprecated Do not use
+const char *stonith_namespace2text(enum stonith_namespace st_namespace);
+
+//! \deprecated Do not use
+enum stonith_namespace stonith_get_namespace(const char *agent,
+ const char *namespace_s);
+
+#endif // !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
+
#ifdef __cplusplus
}
#endif
#endif
diff --git a/include/pacemaker.h b/include/pacemaker.h
index cac2e21765..eae1e233a5 100644
--- a/include/pacemaker.h
+++ b/include/pacemaker.h
@@ -1,698 +1,698 @@
/*
* Copyright 2019-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__PACEMAKER__H
# define PCMK__PACEMAKER__H
# include <glib.h>
# include <libxml/tree.h>
# include <crm/common/scheduler.h>
# include <crm/cib/cib_types.h>
# include <crm/stonith-ng.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* \file
* \brief High Level API
* \ingroup pacemaker
*/
/*!
* \brief Modify operation of running a cluster simulation.
*/
enum pcmk_sim_flags {
// @COMPAT Use UINT32_C(1); should not affect behavior
pcmk_sim_none = 0,
pcmk_sim_all_actions = 1 << 0,
pcmk_sim_show_pending = 1 << 1,
pcmk_sim_process = 1 << 2,
pcmk_sim_show_scores = 1 << 3,
pcmk_sim_show_utilization = 1 << 4,
pcmk_sim_simulate = 1 << 5,
pcmk_sim_sanitized = 1 << 6,
pcmk_sim_verbose = 1 << 7,
};
/*!
* \brief Synthetic cluster events that can be injected into the cluster
* for running simulations.
*/
typedef struct {
/*! A list of node names (gchar *) to simulate bringing online */
GList *node_up;
/*! A list of node names (gchar *) to simulate bringing offline */
GList *node_down;
/*! A list of node names (gchar *) to simulate failing */
GList *node_fail;
/*! A list of operations (gchar *) to inject. The format of these strings
* is described in the "Operation Specification" section of crm_simulate
* help output.
*/
GList *op_inject;
/*! A list of operations (gchar *) that should return a given error code
* if they fail. The format of these strings is described in the
* "Operation Specification" section of crm_simulate help output.
*/
GList *op_fail;
/*! A list of tickets (gchar *) to simulate granting */
GList *ticket_grant;
/*! A list of tickets (gchar *) to simulate revoking */
GList *ticket_revoke;
/*! A list of tickets (gchar *) to simulate putting on standby */
GList *ticket_standby;
/*! A list of tickets (gchar *) to simulate activating */
GList *ticket_activate;
/*! Does the cluster have an active watchdog device? */
char *watchdog;
/*! Does the cluster have quorum? */
char *quorum;
} pcmk_injections_t;
/*!
* \brief Get and output controller status
*
* \param[in,out] xml Destination for the result, as an XML tree
* \param[in] node_name Name of node whose status is desired
* (\p NULL for DC)
* \param[in] message_timeout_ms How long to wait for a reply from the
* controller API. If 0,
* \p pcmk_ipc_dispatch_sync will be used.
* Otherwise, \p pcmk_ipc_dispatch_poll will
* be used.
*
* \return Standard Pacemaker return code
*/
int pcmk_controller_status(xmlNodePtr *xml, const char *node_name,
unsigned int message_timeout_ms);
/*!
* \brief Get and output designated controller node name
*
* \param[in,out] xml Destination for the result, as an XML tree
* \param[in] message_timeout_ms How long to wait for a reply from the
* controller API. If 0,
* \p pcmk_ipc_dispatch_sync will be used.
* Otherwise, \p pcmk_ipc_dispatch_poll will
* be used.
*
* \return Standard Pacemaker return code
*/
int pcmk_designated_controller(xmlNodePtr *xml,
unsigned int message_timeout_ms);
/*!
* \brief Free a :pcmk_injections_t structure
*
* \param[in,out] injections The structure to be freed
*/
void pcmk_free_injections(pcmk_injections_t *injections);
/*!
* \brief Get and optionally output node info corresponding to a node ID from
* the controller
*
* \param[in,out] xml Destination for the result, as an XML tree
* \param[in,out] node_id ID of node whose name to get. If \p NULL
* or 0, get the local node name. If not
* \p NULL, store the true node ID here on
* success.
* \param[out] node_name If not \p NULL, where to store the node
* name
* \param[out] uuid If not \p NULL, where to store the node
* UUID
* \param[out] state If not \p NULL, where to store the
* membership state
* \param[out] is_remote If not \p NULL, where to store whether the
* node is a Pacemaker Remote node
* \param[out] have_quorum If not \p NULL, where to store whether the
* node has quorum
* \param[in] show_output Whether to output the node info
* \param[in] message_timeout_ms How long to wait for a reply from the
* controller API. If 0,
* \p pcmk_ipc_dispatch_sync will be used.
* Otherwise, \p pcmk_ipc_dispatch_poll will
* be used.
*
* \return Standard Pacemaker return code
*
* \note The caller is responsible for freeing \p *node_name, \p *uuid, and
* \p *state using \p free().
*/
int pcmk_query_node_info(xmlNodePtr *xml, uint32_t *node_id, char **node_name,
char **uuid, char **state, bool *have_quorum,
bool *is_remote, bool show_output,
unsigned int message_timeout_ms);
/*!
* \brief Get the node name corresponding to a node ID from the controller
*
* \param[in,out] xml Destination for the result, as an XML tree
* \param[in,out] node_id ID of node whose name to get (or 0 for the
* local node)
* \param[out] node_name If not \p NULL, where to store the node
* name
* \param[in] message_timeout_ms How long to wait for a reply from the
* controller API. If 0,
* \p pcmk_ipc_dispatch_sync will be used.
* Otherwise, \p pcmk_ipc_dispatch_poll will
* be used.
*
* \return Standard Pacemaker return code
*
* \note The caller is responsible for freeing \p *node_name using \p free().
*/
static inline int
pcmk_query_node_name(xmlNodePtr *xml, uint32_t node_id, char **node_name,
unsigned int message_timeout_ms)
{
return pcmk_query_node_info(xml, &node_id, node_name, NULL, NULL, NULL,
NULL, false, message_timeout_ms);
}
/*!
* \brief Get and output \p pacemakerd status
*
* \param[in,out] xml Destination for the result, as an XML tree
* \param[in] ipc_name IPC name for request
* \param[in] message_timeout_ms How long to wait for a reply from the
* \p pacemakerd API. If 0,
* \p pcmk_ipc_dispatch_sync will be used.
* Otherwise, \p pcmk_ipc_dispatch_poll will
* be used.
*
* \return Standard Pacemaker return code
*/
int pcmk_pacemakerd_status(xmlNodePtr *xml, const char *ipc_name,
unsigned int message_timeout_ms);
/*!
* \brief Remove a resource
*
* \param[in,out] xml Destination for the result, as an XML tree
* \param[in] rsc_id Resource to remove
* \param[in] rsc_type Type of the resource ("primitive", "group", etc.)
*
* \return Standard Pacemaker return code
* \note This function will return \p pcmk_rc_ok if \p rsc_id doesn't exist
* or if \p rsc_type is incorrect for \p rsc_id (deleting something
* that doesn't exist always succeeds).
*/
int pcmk_resource_delete(xmlNodePtr *xml, const char *rsc_id, const char *rsc_type);
/*!
* \brief Calculate and output resource operation digests
*
* \param[out] xml Where to store XML with result
* \param[in,out] rsc Resource to calculate digests for
* \param[in] node Node whose operation history should be used
* \param[in] overrides Hash table of configuration parameters to override
*
* \return Standard Pacemaker return code
*/
int pcmk_resource_digests(xmlNodePtr *xml, pcmk_resource_t *rsc,
const pcmk_node_t *node, GHashTable *overrides);
/*!
* \brief Simulate a cluster's response to events
*
* This high-level function essentially implements crm_simulate(8). It operates
* on an input CIB file and various lists of events that can be simulated. It
* optionally writes out a variety of artifacts to show the results of the
* simulation. Output can be modified with various flags.
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in,out] scheduler Scheduler data
* \param[in] injections A structure containing cluster events
* (node up/down, tickets, injected operations)
* \param[in] flags Group of <tt>enum pcmk_sim_flags</tt>
* \param[in] section_opts Which portions of the cluster status output
* should be displayed?
* \param[in] use_date Date to set the cluster's time to (may be NULL)
* \param[in] input_file The source CIB file, which may be overwritten by
* this function (may be NULL)
* \param[in] graph_file Where to write the XML-formatted transition graph
* (may be NULL, in which case no file will be
* written)
* \param[in] dot_file Where to write the dot(1) formatted transition
* graph (may be NULL, in which case no file will
* be written)
*
* \return Standard Pacemaker return code
*/
int pcmk_simulate(xmlNodePtr *xml, pcmk_scheduler_t *scheduler,
const pcmk_injections_t *injections, unsigned int flags,
unsigned int section_opts, const char *use_date,
const char *input_file, const char *graph_file,
const char *dot_file);
/*!
* \brief Verify that a CIB is error-free or output errors and warnings
*
* This high-level function essentially implements crm_verify(8). It operates
* on an input CIB file, which can be inputted through one of several ways. It
* writes out XML-formatted output.
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] cib_source Source of the CIB:
* NULL -> use live cib, "-" -> stdin
* "<..." -> xml str, otherwise -> xml file name
*
* \return Standard Pacemaker return code
*/
int pcmk_verify(xmlNodePtr *xml, const char *cib_source);
/*!
* \brief Get nodes list
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] node_types Node type(s) to return (default: all)
*
* \return Standard Pacemaker return code
*/
int pcmk_list_nodes(xmlNodePtr *xml, const char *node_types);
/*!
* \brief Output cluster status formatted like `crm_mon --output-as=xml`
*
* \param[in,out] xml The destination for the result, as an XML tree
*
* \return Standard Pacemaker return code
*/
int pcmk_status(xmlNodePtr *xml);
/*!
* \brief Check whether each rule in a list is in effect
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] input The CIB XML to check (if \c NULL, use current CIB)
* \param[in] date Check whether the rule is in effect at this date and
* time (if \c NULL, use current date and time)
* \param[in] rule_ids The IDs of the rules to check, as a <tt>NULL</tt>-
* terminated list.
*
* \return Standard Pacemaker return code
*/
int pcmk_check_rules(xmlNodePtr *xml, xmlNodePtr input, const crm_time_t *date,
const char **rule_ids);
/*!
* \brief Check whether a given rule is in effect
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] input The CIB XML to check (if \c NULL, use current CIB)
* \param[in] date Check whether the rule is in effect at this date and
* time (if \c NULL, use current date and time)
* \param[in] rule_ids The ID of the rule to check
*
* \return Standard Pacemaker return code
*/
static inline int
pcmk_check_rule(xmlNodePtr *xml, xmlNodePtr input, const crm_time_t *date,
const char *rule_id)
{
const char *rule_ids[] = {rule_id, NULL};
return pcmk_check_rules(xml, input, date, rule_ids);
}
//! Bit flags to control which fields of result code info are displayed
enum pcmk_rc_disp_flags {
pcmk_rc_disp_none = 0, //!< (Does nothing)
pcmk_rc_disp_code = (1 << 0), //!< Display result code number
pcmk_rc_disp_name = (1 << 1), //!< Display result code name
pcmk_rc_disp_desc = (1 << 2), //!< Display result code description
};
/*!
* \brief Display the name and/or description of a result code
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] code The result code
* \param[in] type Interpret \c code as this type of result code.
* Supported values: \c pcmk_result_legacy,
* \c pcmk_result_rc, \c pcmk_result_exitcode.
* \param[in] flags Group of \c pcmk_rc_disp_flags
*
* \return Standard Pacemaker return code
*/
int pcmk_show_result_code(xmlNodePtr *xml, int code, enum pcmk_result_type type,
uint32_t flags);
/*!
* \brief List all valid result codes in a particular family
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] type The family of result codes to list. Supported
* values: \c pcmk_result_legacy, \c pcmk_result_rc,
* \c pcmk_result_exitcode.
* \param[in] flags Group of \c pcmk_rc_disp_flags
*
* \return Standard Pacemaker return code
*/
int pcmk_list_result_codes(xmlNodePtr *xml, enum pcmk_result_type type,
uint32_t flags);
/*!
* \brief List available providers for the given OCF agent
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] agent_spec Resource agent name
*
* \return Standard Pacemaker return code
*/
int pcmk_list_alternatives(xmlNodePtr *xml, const char *agent_spec);
/*!
* \brief List all agents available for the named standard and/or provider
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] agent_spec STD[:PROV]
*
* \return Standard Pacemaker return code
*/
int pcmk_list_agents(xmlNodePtr *xml, char *agent_spec);
/*!
* \brief List all available OCF providers for the given agent
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] agent_spec Resource agent name
*
* \return Standard Pacemaker return code
*/
int pcmk_list_providers(xmlNodePtr *xml, const char *agent_spec);
/*!
* \brief List all available resource agent standards
*
* \param[in,out] xml The destination for the result, as an XML tree
*
* \return Standard Pacemaker return code
*/
int pcmk_list_standards(xmlNodePtr *xml);
/*!
* \brief List all available cluster options
*
* These are options that affect the entire cluster.
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] all If \c true, include advanced and deprecated options
* (currently always treated as true)
*
* \return Standard Pacemaker return code
*/
int pcmk_list_cluster_options(xmlNode **xml, bool all);
/*!
* \brief List common fencing resource parameters
*
* These are parameters that are available for all fencing resources, regardless
* of type. They are processed by Pacemaker, rather than by the fence agent or
* the fencing library.
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] all If \c true, include advanced and deprecated options
* (currently always treated as true)
*
* \return Standard Pacemaker return code
*/
int pcmk_list_fencing_params(xmlNode **xml, bool all);
/*!
* \internal
* \brief List meta-attributes applicable to primitive resources as OCF-like XML
*
* \param[in,out] out Output object
* \param[in] all If \c true, include advanced and deprecated options (this
* is always treated as true for XML output objects)
*
* \return Standard Pacemaker return code
*/
int pcmk_list_primitive_meta(xmlNode **xml, bool all);
/*!
* \brief Return constraints that apply to the given ticket
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] ticket_id Ticket to find constraint for, or \c NULL for
* all ticket constraints
*
* \return Standard Pacemaker return code
*/
int pcmk_ticket_constraints(xmlNodePtr *xml, const char *ticket_id);
/*!
* \brief Delete a ticket's state from the local cluster site
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] ticket_id Ticket to delete
* \param[in] force If \c true, delete the ticket even if it has
* been granted
*
* \return Standard Pacemaker return code
*/
int pcmk_ticket_delete(xmlNodePtr *xml, const char *ticket_id, bool force);
/*!
* \brief Return the value of a ticket's attribute
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] ticket_id Ticket to find attribute value for
* \param[in] attr_name Attribute's name to find value for
* \param[in] attr_default If either the ticket or the attribute do not
* exist, use this as the value in \p xml
*
* \return Standard Pacemaker return code
*/
int pcmk_ticket_get_attr(xmlNodePtr *xml, const char *ticket_id,
const char *attr_name, const char *attr_default);
/*!
* \brief Return information about the given ticket
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] ticket_id Ticket to find info value for, or \c NULL for
* all tickets
*
* \return Standard Pacemaker return code
*/
int pcmk_ticket_info(xmlNodePtr *xml, const char *ticket_id);
/*!
* \brief Remove the given attribute(s) from a ticket
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] ticket_id Ticket to remove attributes from
* \param[in] attr_delete A list of attribute names
* \param[in] force Attempting to remove the granted attribute of
* \p ticket_id will cause this function to return
* \c EACCES unless \p force is set to \c true
*
* \return Standard Pacemaker return code
*/
int pcmk_ticket_remove_attr(xmlNodePtr *xml, const char *ticket_id, GList *attr_delete,
bool force);
/*!
* \brief Set the given attribute(s) on a ticket
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] ticket_id Ticket to set attributes on
* \param[in] attr_set A hash table of attributes, where keys are the
* attribute names and the values are the attribute
* values
* \param[in] force Attempting to change the granted status of
* \p ticket_id will cause this function to return
* \c EACCES unless \p force is set to \c true
*
* \return Standard Pacemaker return code
*
* \note If no \p ticket_id attribute exists but \p attr_set is non-NULL, the
* ticket will be created with the given attributes.
*/
int pcmk_ticket_set_attr(xmlNodePtr *xml, const char *ticket_id, GHashTable *attr_set,
bool force);
/*!
* \brief Return a ticket's state XML
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] ticket_id Ticket to find state for, or \c NULL for all
* tickets
*
* \return Standard Pacemaker return code
*
* \note If \p ticket_id is not \c NULL and more than one ticket exists with
* that ID, this function returns \c pcmk_rc_duplicate_id.
*/
int pcmk_ticket_state(xmlNodePtr *xml, const char *ticket_id);
/*!
* \brief Ask the cluster to perform fencing
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] target The node that should be fenced
* \param[in] action The fencing action (on, off, reboot) to perform
* \param[in] name Who requested the fence action?
* \param[in] timeout How long to wait for operation to complete (in ms)
* \param[in] tolerance If a successful action for \p target happened within
* this many ms, return 0 without performing the action
* again
* \param[in] delay Apply this delay (in milliseconds) before initiating
* fencing action (-1 applies no delay and also
* disables any fencing delay from pcmk_delay_base and
* pcmk_delay_max)
* \param[out] reason If not NULL, where to put descriptive failure reason
*
* \return Standard Pacemaker return code
* \note If \p reason is not NULL, the caller is responsible for freeing its
* returned value.
*/
int pcmk_request_fencing(xmlNodePtr *xml, const char *target, const char *action,
const char *name, unsigned int timeout,
unsigned int tolerance, int delay, char **reason);
/*!
* \brief List the fencing operations that have occurred for a specific node
*
* \note If \p xml is not NULL, it will be freed first and the previous
* contents lost.
*
* \param[in,out] xml The destination for the result, as an XML tree
* \param[in] target The node to get history for
* \param[in] timeout How long to wait for operation to complete (in ms)
* \param[in] quiet Suppress most output
* \param[in] verbose Include additional output
* \param[in] broadcast Gather fencing history from all nodes
* \param[in] cleanup Clean up fencing history after listing
*
* \return Standard Pacemaker return code
*/
int pcmk_fence_history(xmlNodePtr *xml, const char *target, unsigned int timeout,
bool quiet, int verbose, bool broadcast, bool cleanup);
/*!
* \brief List all installed fence agents
*
* \param[in,out] xml The destination for the result, as an XML tree (if
* not NULL, previous contents will be freed and lost)
- * \param[in] timeout How long to wait for operation to complete (in ms)
+ * \param[in] timeout Ignored
*
* \return Standard Pacemaker return code
*/
int pcmk_fence_installed(xmlNodePtr *xml, unsigned int timeout);
/*!
* \brief When was a device last fenced?
*
* \param[in,out] xml The destination for the result, as an XML tree (if
* not NULL, previous contents will be freed and lost)
* \param[in] target The node that was fenced
* \param[in] as_nodeid If true, \p target has node ID rather than name
*
* \return Standard Pacemaker return code
*/
int pcmk_fence_last(xmlNodePtr *xml, const char *target, bool as_nodeid);
/*!
* \brief List nodes that can be fenced
*
* \param[in,out] xml The destination for the result, as an XML tree (if
* not NULL, previous contents will be freed and lost)
* \param[in] device_id Resource ID of fence device to check
* \param[in] timeout How long to wait for operation to complete (in ms)
*
* \return Standard Pacemaker return code
*/
int pcmk_fence_list_targets(xmlNodePtr *xml, const char *device_id,
unsigned int timeout);
/*!
* \brief Get metadata for a fence agent
*
* \note If \p xml is not NULL, it will be freed first and the previous
* contents lost.
*
* \param[in,out] xml The destination for the result, as an XML tree (if
* not NULL, previous contents will be freed and lost)
* \param[in] agent The fence agent to get metadata for
* \param[in] timeout How long to wait for operation to complete (in ms)
*
* \return Standard Pacemaker return code
*/
int pcmk_fence_metadata(xmlNodePtr *xml, const char *agent, unsigned int timeout);
/*!
* \brief List registered fence devices
*
* \param[in,out] xml The destination for the result, as an XML tree (if
* not NULL, previous contents will be freed and lost)
* \param[in] target If not NULL, return only devices that can fence this
* \param[in] timeout How long to wait for operation to complete (in ms)
*
* \return Standard Pacemaker return code
*/
int pcmk_fence_registered(xmlNodePtr *xml, const char *target, unsigned int timeout);
/*!
* \brief Register a fencing topology level
*
* \param[in,out] xml The destination for the result, as an XML tree (if
* not NULL, previous contents will be freed and lost)
* \param[in] target What fencing level targets (as "name=value" to
* target by given node attribute, or "@pattern" to
* target by node name pattern, or a node name)
* \param[in] fence_level Index number of level to add
* \param[in] devices Devices to use in level as a list of char *
*
* \return Standard Pacemaker return code
*/
int pcmk_fence_register_level(xmlNodePtr *xml, const char *target, int fence_level,
GList *devices);
/*!
* \brief Unregister a fencing topology level
*
* \param[in,out] xml The destination for the result, as an XML tree (if
* not NULL, previous contents will be freed and lost)
* \param[in] target What fencing level targets (as "name=value" to
* target by given node attribute, or "@pattern" to
* target by node name pattern, or a node name)
* \param[in] fence_level Index number of level to remove
*
* \return Standard Pacemaker return code
*/
int pcmk_fence_unregister_level(xmlNodePtr *xml, const char *target, int fence_level);
/*!
* \brief Validate a fence device configuration
*
* \param[in,out] xml The destination for the result, as an XML tree (if
* not NULL, previous contents will be freed and lost)
* \param[in] agent The agent to validate (for example, "fence_xvm")
* \param[in] id Fence device ID (may be NULL)
* \param[in] params Fence device configuration parameters
* \param[in] timeout How long to wait for operation to complete (in ms)
*
* \return Standard Pacemaker return code
*/
int pcmk_fence_validate(xmlNodePtr *xml, const char *agent, const char *id,
GHashTable *params, unsigned int timeout);
#ifdef __cplusplus
}
#endif
#endif
diff --git a/include/pcmki/pcmki_fence.h b/include/pcmki/pcmki_fence.h
index 208576bef2..3dec468563 100644
--- a/include/pcmki/pcmki_fence.h
+++ b/include/pcmki/pcmki_fence.h
@@ -1,257 +1,256 @@
/*
* Copyright 2019-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__PCMKI_PCMKI_FENCE__H
#define PCMK__PCMKI_PCMKI_FENCE__H
#include <crm/stonith-ng.h>
#include <crm/common/output_internal.h>
#ifdef __cplusplus
extern "C" {
#endif
/*!
* \brief Control how much of the fencing history is output.
*/
enum pcmk__fence_history {
pcmk__fence_history_none,
pcmk__fence_history_reduced,
pcmk__fence_history_full
};
/*!
* \brief Ask the cluster to perform fencing
*
* \note This is the internal version of pcmk_request_fencing(). External users
* of the pacemaker API should use that function instead.
*
* \param[in,out] st A connection to the fencer API
* \param[in] target The node that should be fenced
* \param[in] action The fencing action (on, off, reboot) to perform
* \param[in] name Who requested the fence action?
* \param[in] timeout How long to wait for operation to complete (in ms)
* \param[in] tolerance If a successful action for \p target happened within
* this many milliseconds, return success without
* performing the action again
* \param[in] delay Apply this delay (in milliseconds) before initiating
* fencing action (a value of -1 applies no delay and
* disables any fencing delay from pcmk_delay_base and
* pcmk_delay_max)
* \param[out] reason If not NULL, where to put descriptive failure reason
*
* \return Standard Pacemaker return code
* \note If \p reason is not NULL, the caller is responsible for freeing its
* returned value.
* \todo delay is eventually used with pcmk__create_timer() and should be guint
*/
int pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
const char *name, unsigned int timeout,
unsigned int tolerance, int delay, char **reason);
/*!
* \brief List the fencing operations that have occurred for a specific node
*
* \note This is the internal version of pcmk_fence_history(). External users
* of the pacemaker API should use that function instead.
*
* \note \p out should be initialized with pcmk__output_new() before calling this
* function and destroyed with out->finish and pcmk__output_free() before
* reusing it with any other functions in this library.
*
* \param[in,out] out The output functions structure
* \param[in,out] st A connection to the fencer API
* \param[in] target The node to get history for
* \param[in] timeout How long to wait for operation to complete (in ms)
* \param[in] verbose Include additional output
* \param[in] broadcast Gather fencing history from all nodes
* \param[in] cleanup Clean up fencing history after listing
*
* \return Standard Pacemaker return code
*/
int pcmk__fence_history(pcmk__output_t *out, stonith_t *st, const char *target,
unsigned int timeout, int verbose, bool broadcast,
bool cleanup);
/*!
* \brief List all installed fence agents
*
* \note This is the internal version of pcmk_fence_installed(). External users
* of the pacemaker API should use that function instead.
*
* \note \p out should be initialized with pcmk__output_new() before calling this
* function and destroyed with out->finish and pcmk__output_free() before
* reusing it with any other functions in this library.
*
* \param[in,out] out The output functions structure
* \param[in,out] st A connection to the fencer API
- * \param[in] timeout How long to wait for the operation to complete (in ms)
*
* \return Standard Pacemaker return code
*/
-int pcmk__fence_installed(pcmk__output_t *out, stonith_t *st, unsigned int timeout);
+int pcmk__fence_installed(pcmk__output_t *out, stonith_t *st);
/*!
* \brief When was a device last fenced?
*
* \note This is the internal version of pcmk_fence_last(). External users
* of the pacemaker API should use that function instead.
*
* \note \p out should be initialized with pcmk__output_new() before calling this
* function and destroyed with out->finish and pcmk__output_free() before
* reusing it with any other functions in this library.
*
* \param[in,out] out The output functions structure.
* \param[in] target The node that was fenced.
* \param[in] as_nodeid
*
* \return Standard Pacemaker return code
*/
int pcmk__fence_last(pcmk__output_t *out, const char *target, bool as_nodeid);
/*!
* \brief List nodes that can be fenced
*
* \note This is the internal version of pcmk_fence_list_targets(). External users
* of the pacemaker API should use that function instead.
*
* \note \p out should be initialized with pcmk__output_new() before calling this
* function and destroyed with out->finish and pcmk__output_free() before
* reusing it with any other functions in this library.
*
* \param[in,out] out The output functions structure
* \param[in,out] st A connection to the fencer API
* \param[in] device_id Resource ID of fence device to check
* \param[in] timeout How long to wait for operation to complete (in ms)
*
* \return Standard Pacemaker return code
*/
int pcmk__fence_list_targets(pcmk__output_t *out, stonith_t *st,
const char *device_id, unsigned int timeout);
/*!
* \brief Get metadata for a fence agent
*
* \note This is the internal version of pcmk_fence_metadata(). External users
* of the pacemaker API should use that function instead.
*
* \note \p out should be initialized with pcmk__output_new() before calling this
* function and destroyed with out->finish and pcmk__output_free() before
* reusing it with any other functions in this library.
*
* \param[in,out] out The output functions structure
* \param[in,out] st A connection to the fencer API
* \param[in] agent The fence agent to get metadata for
* \param[in] timeout How long to wait for the operation to complete (in ms)
*
* \return Standard Pacemaker return code
*/
int pcmk__fence_metadata(pcmk__output_t *out, stonith_t *st, const char *agent,
unsigned int timeout);
/*!
* \brief List registered fence devices
*
* \note This is the internal version of pcmk_fence_metadata(). External users
* of the pacemaker API should use that function instead.
*
* \note \p out should be initialized with pcmk__output_new() before calling this
* function and destroyed with out->finish and pcmk__output_free() before
* reusing it with any other functions in this library.
*
* \param[in,out] out The output functions structure
* \param[in,out] st A connection to the fencer API
* \param[in] target If not NULL, return only devices that can fence this
* \param[in] timeout How long to wait for the operation to complete (in ms)
*
* \return Standard Pacemaker return code
*/
int pcmk__fence_registered(pcmk__output_t *out, stonith_t *st,
const char *target, unsigned int timeout);
/*!
* \brief Register a fencing level for a specific node, node regex, or attribute
*
* \note This is the internal version of pcmk_fence_register_level(). External users
* of the pacemaker API should use that function instead.
*
* \p target can take three different forms:
* - name=value, in which case \p target is an attribute.
* - @pattern, in which case \p target is a node regex.
* - Otherwise, \p target is a node name.
*
* \param[in,out] st A connection to the fencer API
* \param[in] target The object to register a fencing level for
* \param[in] fence_level Index number of level to add
* \param[in] devices Devices to use in level as a list of char *
*
* \return Standard Pacemaker return code
*/
int pcmk__fence_register_level(stonith_t *st, const char *target,
int fence_level, GList *devices);
/*!
* \brief Unregister a fencing level for specific node, node regex, or attribute
*
* \note This is the internal version of pcmk_fence_unregister_level(). External users
* of the pacemaker API should use that function instead.
*
* \p target can take three different forms:
* - name=value, in which case \p target is an attribute.
* - @pattern, in which case \p target is a node regex.
* - Otherwise, \p target is a node name.
*
* \param[in,out] st A connection to the fencer API
* \param[in] target The object to unregister a fencing level for
* \param[in] fence_level Index number of level to remove
*
* \return Standard Pacemaker return code
*/
int pcmk__fence_unregister_level(stonith_t *st, const char *target,
int fence_level);
/*!
* \brief Validate a fence device configuration
*
* \note This is the internal version of pcmk_stonith_validate(). External users
* of the pacemaker API should use that function instead.
*
* \note \p out should be initialized with pcmk__output_new() before calling this
* function and destroyed with out->finish and pcmk__output_free() before
* reusing it with any other functions in this library.
*
* \param[in,out] out The output functions structure
* \param[in,out] st A connection to the fencer API
* \param[in] agent The agent to validate (for example, "fence_xvm")
* \param[in] id Fence device ID (may be NULL)
* \param[in] params Fence device configuration parameters
* \param[in] timeout How long to wait for the operation to complete (in ms)
*
* \return Standard Pacemaker return code
*/
int pcmk__fence_validate(pcmk__output_t *out, stonith_t *st, const char *agent,
const char *id, GHashTable *params, unsigned int timeout);
/*!
* \brief Fetch fencing history, optionally reducing it
*
* \param[in,out] st A connection to the fencer API
* \param[out] stonith_history Destination for storing the history
* \param[in] fence_history How much of the fencing history to display
*
* \return Standard Pacemaker return code
*/
int
pcmk__get_fencing_history(stonith_t *st, stonith_history_t **stonith_history,
enum pcmk__fence_history fence_history);
#ifdef __cplusplus
}
#endif
#endif // PCMK__PCMKI_PCMKI_FENCE__H
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
index 5da7acf026..ad7a17ea26 100644
--- a/lib/fencing/st_actions.c
+++ b/lib/fencing/st_actions.c
@@ -1,717 +1,718 @@
/*
* Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <libgen.h>
#include <inttypes.h>
#include <sys/types.h>
#include <glib.h>
#include <libxml/tree.h> // xmlNode
#include <crm/crm.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <crm/services_internal.h>
#include "fencing_private.h"
struct stonith_action_s {
/*! user defined data */
char *agent;
char *action;
GHashTable *args;
int timeout;
bool async;
void *userdata;
void (*done_cb) (int pid, const pcmk__action_result_t *result,
void *user_data);
void (*fork_cb) (int pid, void *user_data);
svc_action_t *svc_action;
/*! internal timing information */
time_t initial_start_time;
int tries;
int remaining_timeout;
int max_retries;
int pid;
pcmk__action_result_t result;
};
static int internal_stonith_action_execute(stonith_action_t *action);
static void log_action(stonith_action_t *action, pid_t pid);
/*!
* \internal
* \brief Set an action's result based on services library result
*
* \param[in,out] action Fence action to set result for
* \param[in,out] svc_action Service action to get result from
*/
static void
set_result_from_svc_action(stonith_action_t *action, svc_action_t *svc_action)
{
services__copy_result(svc_action, &(action->result));
pcmk__set_result_output(&(action->result),
services__grab_stdout(svc_action),
services__grab_stderr(svc_action));
}
static void
log_action(stonith_action_t *action, pid_t pid)
{
/* The services library has already logged the output at info or debug
* level, so just raise to warning for stderr.
*/
if (action->result.action_stderr != NULL) {
/* Logging the whole string confuses syslog when the string is xml */
char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid);
crm_log_output(LOG_WARNING, prefix, action->result.action_stderr);
free(prefix);
}
}
static void
append_config_arg(gpointer key, gpointer value, gpointer user_data)
{
/* Filter out parameters handled directly by Pacemaker.
*
* STONITH_ATTR_ACTION_OP is added elsewhere and should never be part of the
* fencing resource's parameter list. We should ignore its value if it is
* configured there.
*/
if (!pcmk__str_eq(key, STONITH_ATTR_ACTION_OP, pcmk__str_casei)
&& !pcmk_stonith_param(key)
&& (strstr(key, CRM_META) == NULL)
&& !pcmk__str_eq(key, PCMK_XA_CRM_FEATURE_SET, pcmk__str_none)) {
crm_trace("Passing %s=%s with fence action",
(const char *) key, (const char *) (value? value : ""));
pcmk__insert_dup((GHashTable *) user_data, key, pcmk__s(value, ""));
}
}
/*!
* \internal
* \brief Create a table of arguments for a fencing action
*
- * \param[in] agent Fencing agent name
- * \param[in] action Name of fencing action
- * \param[in] target Name of target node for fencing action
- * \param[in] device_args Fence device parameters
- * \param[in] port_map Target node-to-port mapping for fence device
- * \param[in] host_arg Argument name for passing target
+ * \param[in] agent Fencing agent name
+ * \param[in] action Name of fencing action
+ * \param[in] target Name of target node for fencing action
+ * \param[in] device_args Fence device parameters
+ * \param[in] port_map Target node-to-port mapping for fence device
+ * \param[in] default_host_arg Default agent parameter for passing target
*
* \return Newly created hash table of arguments for fencing action
*/
static GHashTable *
make_args(const char *agent, const char *action, const char *target,
- GHashTable *device_args, GHashTable *port_map, const char *host_arg)
+ GHashTable *device_args, GHashTable *port_map,
+ const char *default_host_arg)
{
GHashTable *arg_list = NULL;
const char *value = NULL;
CRM_CHECK(action != NULL, return NULL);
arg_list = pcmk__strkey_table(free, free);
// Add action to arguments (using an alias if requested)
if (device_args) {
char buffer[512];
snprintf(buffer, sizeof(buffer), "pcmk_%s_action", action);
value = g_hash_table_lookup(device_args, buffer);
if (value) {
crm_debug("Substituting '%s' for fence action %s targeting %s",
value, action, pcmk__s(target, "no node"));
action = value;
}
}
// Tell the fence agent what action to perform
pcmk__insert_dup(arg_list, STONITH_ATTR_ACTION_OP, action);
/* If this is a fencing operation against another node, add more standard
* arguments.
*/
if ((target != NULL) && (device_args != NULL)) {
const char *param = NULL;
/* Always pass the target's name, per
* https://github.com/ClusterLabs/fence-agents/blob/main/doc/FenceAgentAPI.md
*/
pcmk__insert_dup(arg_list, "nodename", target);
// Check whether target should be specified as some other argument
param = g_hash_table_lookup(device_args, PCMK_STONITH_HOST_ARGUMENT);
if (param == NULL) {
// Use caller's default (likely from agent metadata)
- param = host_arg;
+ param = default_host_arg;
}
if ((param != NULL)
&& !pcmk__str_eq(agent, "fence_legacy", pcmk__str_none)
&& !pcmk__str_eq(param, PCMK_VALUE_NONE, pcmk__str_casei)) {
value = g_hash_table_lookup(device_args, param);
if (pcmk__str_eq(value, "dynamic",
pcmk__str_casei|pcmk__str_null_matches)) {
/* If the host argument is "dynamic" or not configured,
* reset it to the target
*/
const char *alias = NULL;
if (port_map) {
alias = g_hash_table_lookup(port_map, target);
}
if (alias == NULL) {
alias = target;
}
crm_debug("Passing %s='%s' with fence action %s targeting %s",
param, alias, action, pcmk__s(target, "no node"));
pcmk__insert_dup(arg_list, param, alias);
}
}
}
if (device_args) {
g_hash_table_foreach(device_args, append_config_arg, arg_list);
}
return arg_list;
}
/*!
* \internal
* \brief Free all memory used by a stonith action
*
* \param[in,out] action Action to free
*/
void
stonith__destroy_action(stonith_action_t *action)
{
if (action) {
free(action->agent);
if (action->args) {
g_hash_table_destroy(action->args);
}
free(action->action);
if (action->svc_action) {
services_action_free(action->svc_action);
}
pcmk__reset_result(&(action->result));
free(action);
}
}
/*!
* \internal
* \brief Get the result of an executed stonith action
*
* \param[in] action Executed action
*
* \return Pointer to action's result (or NULL if \p action is NULL)
*/
pcmk__action_result_t *
stonith__action_result(stonith_action_t *action)
{
return (action == NULL)? NULL : &(action->result);
}
#define FAILURE_MAX_RETRIES 2
/*!
* \internal
* \brief Create a new fencing action to be executed
*
- * \param[in] agent Fence agent to use
- * \param[in] action_name Fencing action to be executed
- * \param[in] target Name of target of fencing action (if known)
- * \param[in] timeout_sec Timeout to be used when executing action
- * \param[in] device_args Parameters to pass to fence agent
- * \param[in] port_map Mapping of target names to device ports
- * \param[in] host_arg Agent parameter used to pass target name
+ * \param[in] agent Fence agent to use
+ * \param[in] action_name Fencing action to be executed
+ * \param[in] target Name of target of fencing action (if known)
+ * \param[in] timeout_sec Timeout to be used when executing action
+ * \param[in] device_args Parameters to pass to fence agent
+ * \param[in] port_map Mapping of target names to device ports
+ * \param[in] default_host_arg Default agent parameter for passing target
*
* \return Newly created fencing action (asserts on error, never NULL)
*/
stonith_action_t *
stonith__action_create(const char *agent, const char *action_name,
const char *target, int timeout_sec,
GHashTable *device_args, GHashTable *port_map,
- const char *host_arg)
+ const char *default_host_arg)
{
stonith_action_t *action = pcmk__assert_alloc(1, sizeof(stonith_action_t));
action->args = make_args(agent, action_name, target, device_args, port_map,
- host_arg);
+ default_host_arg);
crm_debug("Preparing '%s' action targeting %s using agent %s",
action_name, pcmk__s(target, "no node"), agent);
action->agent = strdup(agent);
action->action = strdup(action_name);
action->timeout = action->remaining_timeout = timeout_sec;
action->max_retries = FAILURE_MAX_RETRIES;
pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN,
"Initialization bug in fencing library");
if (device_args) {
char buffer[512];
const char *value = NULL;
snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", action_name);
value = g_hash_table_lookup(device_args, buffer);
if (value) {
action->max_retries = atoi(value);
}
}
return action;
}
static gboolean
update_remaining_timeout(stonith_action_t * action)
{
int diff = time(NULL) - action->initial_start_time;
if (action->tries >= action->max_retries) {
crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed",
action->agent, action->action, action->max_retries);
action->remaining_timeout = 0;
} else if ((action->result.execution_status != PCMK_EXEC_TIMEOUT)
&& (diff < (action->timeout * 0.7))) {
/* only set remaining timeout period if there is 30%
* or greater of the original timeout period left */
action->remaining_timeout = action->timeout - diff;
} else {
action->remaining_timeout = 0;
}
return action->remaining_timeout ? TRUE : FALSE;
}
/*!
* \internal
* \brief Map a fencing action result to a standard return code
*
* \param[in] result Fencing action result to map
*
* \return Standard Pacemaker return code that best corresponds to \p result
*/
int
stonith__result2rc(const pcmk__action_result_t *result)
{
if (pcmk__result_ok(result)) {
return pcmk_rc_ok;
}
switch (result->execution_status) {
case PCMK_EXEC_PENDING: return EINPROGRESS;
case PCMK_EXEC_CANCELLED: return ECANCELED;
case PCMK_EXEC_TIMEOUT: return ETIME;
case PCMK_EXEC_NOT_INSTALLED: return ENOENT;
case PCMK_EXEC_NOT_SUPPORTED: return EOPNOTSUPP;
case PCMK_EXEC_NOT_CONNECTED: return ENOTCONN;
case PCMK_EXEC_NO_FENCE_DEVICE: return ENODEV;
case PCMK_EXEC_NO_SECRETS: return EACCES;
/* For the fencing API, PCMK_EXEC_INVALID is used with fencer API
* operations that don't involve executing an agent (for example,
* registering devices). This allows us to use the CRM_EX_* codes in the
* exit status for finer-grained responses.
*/
case PCMK_EXEC_INVALID:
switch (result->exit_status) {
case CRM_EX_INVALID_PARAM: return EINVAL;
case CRM_EX_INSUFFICIENT_PRIV: return EACCES;
case CRM_EX_PROTOCOL: return EPROTO;
/* CRM_EX_EXPIRED is used for orphaned fencing operations left
* over from a previous instance of the fencer. For API backward
* compatibility, this is mapped to the previously used code for
* this case, EHOSTUNREACH.
*/
case CRM_EX_EXPIRED: return EHOSTUNREACH;
default: break;
}
break;
default:
break;
}
// Try to provide useful error code based on result's error output
if (result->action_stderr == NULL) {
return ENODATA;
} else if (strcasestr(result->action_stderr, "timed out")
|| strcasestr(result->action_stderr, "timeout")) {
return ETIME;
} else if (strcasestr(result->action_stderr, "unrecognised action")
|| strcasestr(result->action_stderr, "unrecognized action")
|| strcasestr(result->action_stderr, "unsupported action")) {
return EOPNOTSUPP;
}
// Oh well, we tried
return pcmk_rc_error;
}
/*!
* \internal
* \brief Determine execution status equivalent of legacy fencer return code
*
* Fence action notifications, and fence action callbacks from older fencers
* (<=2.1.2) in a rolling upgrade, will have only a legacy return code. Map this
* to an execution status as best as possible (essentially, the inverse of
* stonith__result2rc()).
*
* \param[in] rc Legacy return code from fencer
*
* \return Execution status best corresponding to \p rc
*/
int
stonith__legacy2status(int rc)
{
if (rc >= 0) {
return PCMK_EXEC_DONE;
}
switch (-rc) {
case EACCES: return PCMK_EXEC_NO_SECRETS;
case ECANCELED: return PCMK_EXEC_CANCELLED;
case EHOSTUNREACH: return PCMK_EXEC_INVALID;
case EINPROGRESS: return PCMK_EXEC_PENDING;
case ENODEV: return PCMK_EXEC_NO_FENCE_DEVICE;
case ENOENT: return PCMK_EXEC_NOT_INSTALLED;
case ENOTCONN: return PCMK_EXEC_NOT_CONNECTED;
case EOPNOTSUPP: return PCMK_EXEC_NOT_SUPPORTED;
case EPROTO: return PCMK_EXEC_INVALID;
case EPROTONOSUPPORT: return PCMK_EXEC_NOT_SUPPORTED;
case ETIME: return PCMK_EXEC_TIMEOUT;
case ETIMEDOUT: return PCMK_EXEC_TIMEOUT;
default: return PCMK_EXEC_ERROR;
}
}
/*!
* \internal
* \brief Add a fencing result to an XML element as attributes
*
* \param[in,out] xml XML element to add result to
* \param[in] result Fencing result to add (assume success if NULL)
*/
void
stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result)
{
int exit_status = CRM_EX_OK;
enum pcmk_exec_status execution_status = PCMK_EXEC_DONE;
const char *exit_reason = NULL;
const char *action_stdout = NULL;
int rc = pcmk_ok;
CRM_CHECK(xml != NULL, return);
if (result != NULL) {
exit_status = result->exit_status;
execution_status = result->execution_status;
exit_reason = result->exit_reason;
action_stdout = result->action_stdout;
rc = pcmk_rc2legacy(stonith__result2rc(result));
}
crm_xml_add_int(xml, PCMK__XA_OP_STATUS, (int) execution_status);
crm_xml_add_int(xml, PCMK__XA_RC_CODE, exit_status);
crm_xml_add(xml, PCMK_XA_EXIT_REASON, exit_reason);
crm_xml_add(xml, PCMK__XA_ST_OUTPUT, action_stdout);
/* @COMPAT Peers in rolling upgrades, Pacemaker Remote nodes, and external
* code that use libstonithd <=2.1.2 don't check for the full result, and
* need a legacy return code instead.
*/
crm_xml_add_int(xml, PCMK__XA_ST_RC, rc);
}
/*!
* \internal
* \brief Find a fencing result beneath an XML element
*
* \param[in] xml XML element to search
*
* \return \p xml or descendant of it that contains a fencing result, else NULL
*/
xmlNode *
stonith__find_xe_with_result(xmlNode *xml)
{
xmlNode *match = pcmk__xpath_find_one(xml->doc,
"//*[@" PCMK__XA_RC_CODE "]",
LOG_NEVER);
if (match == NULL) {
/* @COMPAT Peers <=2.1.2 in a rolling upgrade provide only a legacy
* return code, not a full result, so check for that.
*/
match = pcmk__xpath_find_one(xml->doc, "//*[@" PCMK__XA_ST_RC "]",
LOG_ERR);
}
return match;
}
/*!
* \internal
* \brief Get a fencing result from an XML element's attributes
*
* \param[in] xml XML element with fencing result
* \param[out] result Where to store fencing result
*/
void
stonith__xe_get_result(const xmlNode *xml, pcmk__action_result_t *result)
{
int exit_status = CRM_EX_OK;
int execution_status = PCMK_EXEC_DONE;
const char *exit_reason = NULL;
char *action_stdout = NULL;
CRM_CHECK((xml != NULL) && (result != NULL), return);
exit_reason = crm_element_value(xml, PCMK_XA_EXIT_REASON);
action_stdout = crm_element_value_copy(xml, PCMK__XA_ST_OUTPUT);
// A result must include an exit status and execution status
if ((crm_element_value_int(xml, PCMK__XA_RC_CODE, &exit_status) < 0)
|| (crm_element_value_int(xml, PCMK__XA_OP_STATUS,
&execution_status) < 0)) {
int rc = pcmk_ok;
exit_status = CRM_EX_ERROR;
/* @COMPAT Peers <=2.1.2 in rolling upgrades provide only a legacy
* return code, not a full result, so check for that.
*/
if (crm_element_value_int(xml, PCMK__XA_ST_RC, &rc) == 0) {
if ((rc == pcmk_ok) || (rc == -EINPROGRESS)) {
exit_status = CRM_EX_OK;
}
execution_status = stonith__legacy2status(rc);
exit_reason = pcmk_strerror(rc);
} else {
execution_status = PCMK_EXEC_ERROR;
exit_reason = "Fencer reply contained neither a full result "
"nor a legacy return code (bug?)";
}
}
pcmk__set_result(result, exit_status, execution_status, exit_reason);
pcmk__set_result_output(result, action_stdout, NULL);
}
static void
stonith_action_async_done(svc_action_t *svc_action)
{
stonith_action_t *action = (stonith_action_t *) svc_action->cb_data;
set_result_from_svc_action(action, svc_action);
svc_action->params = NULL;
log_action(action, action->pid);
if (!pcmk__result_ok(&(action->result))
&& update_remaining_timeout(action)) {
int rc = internal_stonith_action_execute(action);
if (rc == pcmk_ok) {
return;
}
}
if (action->done_cb) {
action->done_cb(action->pid, &(action->result), action->userdata);
}
action->svc_action = NULL; // don't remove our caller
stonith__destroy_action(action);
}
static void
stonith_action_async_forked(svc_action_t *svc_action)
{
stonith_action_t *action = (stonith_action_t *) svc_action->cb_data;
action->pid = svc_action->pid;
action->svc_action = svc_action;
if (action->fork_cb) {
(action->fork_cb) (svc_action->pid, action->userdata);
}
pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING,
NULL);
crm_trace("Child process %d performing action '%s' successfully forked",
action->pid, action->action);
}
/*!
* \internal
* \brief Convert a fencing library action to a services library action
*
* \param[in,out] action Fencing library action to convert
*
* \return Services library action equivalent to \p action on success; on error,
* NULL will be returned and \p action's result will be set
*/
static svc_action_t *
stonith_action_to_svc(stonith_action_t *action)
{
static int stonith_sequence = 0;
char *path = crm_strdup_printf(PCMK__FENCE_BINDIR "/%s", action->agent);
svc_action_t *svc_action = services_action_create_generic(path, NULL);
free(path);
if (svc_action->rc != PCMK_OCF_UNKNOWN) {
set_result_from_svc_action(action, svc_action);
services_action_free(svc_action);
return NULL;
}
svc_action->timeout = action->remaining_timeout * 1000;
svc_action->standard = pcmk__str_copy(PCMK_RESOURCE_CLASS_STONITH);
svc_action->id = crm_strdup_printf("%s_%s_%dof%d", action->agent,
action->action, action->tries,
action->max_retries);
svc_action->agent = pcmk__str_copy(action->agent);
svc_action->sequence = stonith_sequence++;
svc_action->params = action->args;
svc_action->cb_data = (void *) action;
svc_action->flags = pcmk__set_flags_as(__func__, __LINE__,
LOG_TRACE, "Action",
svc_action->id, svc_action->flags,
SVC_ACTION_NON_BLOCKED,
"SVC_ACTION_NON_BLOCKED");
return svc_action;
}
static int
internal_stonith_action_execute(stonith_action_t * action)
{
int rc = pcmk_ok;
int is_retry = 0;
svc_action_t *svc_action = NULL;
CRM_CHECK(action != NULL, return -EINVAL);
if ((action->action == NULL) || (action->args == NULL)
|| (action->agent == NULL)) {
pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR_FATAL, "Bug in fencing library");
return -EINVAL;
}
if (action->tries++ == 0) {
// First attempt of the desired action
action->initial_start_time = time(NULL);
} else {
// Later attempt after earlier failure
crm_info("Attempt %d to execute '%s' action of agent %s "
"(%ds timeout remaining)",
action->tries, action->action, action->agent,
action->remaining_timeout);
is_retry = 1;
}
svc_action = stonith_action_to_svc(action);
if (svc_action == NULL) {
// The only possible errors are out-of-memory and too many arguments
return -E2BIG;
}
/* keep retries from executing out of control and free previous results */
if (is_retry) {
pcmk__reset_result(&(action->result));
// @TODO This should be nonblocking via timer if mainloop is used
sleep(1);
}
if (action->async) {
// We never create a recurring action, so this should always return TRUE
CRM_LOG_ASSERT(services_action_async_fork_notify(svc_action,
&stonith_action_async_done,
&stonith_action_async_forked));
return pcmk_ok;
} else if (!services_action_sync(svc_action)) {
rc = -ECONNABORTED; // @TODO Update API to return more useful error
}
set_result_from_svc_action(action, svc_action);
svc_action->params = NULL;
services_action_free(svc_action);
return rc;
}
/*!
* \internal
* \brief Kick off execution of an async stonith action
*
* \param[in,out] action Action to be executed
* \param[in,out] userdata Datapointer to be passed to callbacks
* \param[in] done Callback to notify action has failed/succeeded
* \param[in] fork_callback Callback to notify successful fork of child
*
* \return pcmk_ok if ownership of action has been taken, -errno otherwise
*/
int
stonith__execute_async(stonith_action_t * action, void *userdata,
void (*done) (int pid,
const pcmk__action_result_t *result,
void *user_data),
void (*fork_cb) (int pid, void *user_data))
{
if (!action) {
return -EINVAL;
}
action->userdata = userdata;
action->done_cb = done;
action->fork_cb = fork_cb;
action->async = true;
return internal_stonith_action_execute(action);
}
/*!
* \internal
* \brief Execute a stonith action
*
* \param[in,out] action Action to execute
*
* \return pcmk_ok on success, -errno otherwise
*/
int
stonith__execute(stonith_action_t *action)
{
int rc = pcmk_ok;
CRM_CHECK(action != NULL, return -EINVAL);
// Keep trying until success, max retries, or timeout
do {
rc = internal_stonith_action_execute(action);
} while ((rc != pcmk_ok) && update_remaining_timeout(action));
return rc;
}
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index 52d348d27d..8122af17d9 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -1,2733 +1,2942 @@
/*
* Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <ctype.h>
#include <inttypes.h>
#include <sys/types.h>
#include <glib.h>
#include <libxml/tree.h> // xmlNode
#include <libxml/xpath.h> // xmlXPathObject, etc.
#include <crm/crm.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <crm/common/mainloop.h>
#include "fencing_private.h"
CRM_TRACE_INIT_DATA(stonith);
// Used as stonith_t:st_private
typedef struct stonith_private_s {
char *token;
crm_ipc_t *ipc;
mainloop_io_t *source;
GHashTable *stonith_op_callback_table;
GList *notify_list;
int notify_refcnt;
bool notify_deletes;
void (*op_callback) (stonith_t * st, stonith_callback_data_t * data);
} stonith_private_t;
// Used as stonith_event_t:opaque
struct event_private {
pcmk__action_result_t result;
};
typedef struct stonith_notify_client_s {
const char *event;
const char *obj_id; /* implement one day */
const char *obj_type; /* implement one day */
void (*notify) (stonith_t * st, stonith_event_t * e);
bool delete;
} stonith_notify_client_t;
typedef struct stonith_callback_client_s {
void (*callback) (stonith_t * st, stonith_callback_data_t * data);
const char *id;
void *user_data;
gboolean only_success;
gboolean allow_timeout_updates;
struct timer_rec_s *timer;
} stonith_callback_client_t;
struct notify_blob_s {
stonith_t *stonith;
xmlNode *xml;
};
struct timer_rec_s {
int call_id;
int timeout;
guint ref;
stonith_t *stonith;
};
typedef int (*stonith_op_t) (const char *, int, const char *, xmlNode *,
xmlNode *, xmlNode *, xmlNode **, xmlNode **);
-bool stonith_dispatch(stonith_t * st);
xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
int call_options);
static int stonith_send_command(stonith_t *stonith, const char *op,
xmlNode *data, xmlNode **output_data,
int call_options, int timeout);
static void stonith_connection_destroy(gpointer user_data);
static void stonith_send_notification(gpointer data, gpointer user_data);
static int stonith_api_del_notification(stonith_t *stonith,
const char *event);
+
/*!
- * \brief Get agent namespace by name
+ * \internal
+ * \brief Parse fence agent namespace from a string
*
* \param[in] namespace_s Name of namespace as string
*
- * \return Namespace as enum value
+ * \return enum value parsed from \p namespace_s
*/
-enum stonith_namespace
-stonith_text2namespace(const char *namespace_s)
+static enum stonith_namespace
+parse_namespace(const char *namespace_s)
{
if (pcmk__str_eq(namespace_s, "any", pcmk__str_null_matches)) {
return st_namespace_any;
-
- } else if (!strcmp(namespace_s, "redhat")
- || !strcmp(namespace_s, "stonith-ng")) {
+ }
+ /* @TODO Is "redhat" still necessary except for stonith_text2namespace()
+ * backward compatibility?
+ */
+ if (pcmk__str_any_of(namespace_s, "redhat", "stonith-ng", NULL)) {
return st_namespace_rhcs;
-
- } else if (!strcmp(namespace_s, "internal")) {
+ }
+ if (pcmk__str_eq(namespace_s, "internal", pcmk__str_none)) {
return st_namespace_internal;
-
- } else if (!strcmp(namespace_s, "heartbeat")) {
+ }
+ if (pcmk__str_eq(namespace_s, "heartbeat", pcmk__str_none)) {
return st_namespace_lha;
}
return st_namespace_invalid;
}
/*!
- * \brief Get agent namespace name
+ * \internal
+ * \brief Get name of a fence agent namespace as a string
*
- * \param[in] namespace Namespace as enum value
+ * \param[in] st_namespace Namespace as enum value
*
- * \return Namespace name as string
+ * \return Name of \p st_namespace as a string
*/
-const char *
-stonith_namespace2text(enum stonith_namespace st_namespace)
+static const char *
+namespace_text(enum stonith_namespace st_namespace)
{
switch (st_namespace) {
- case st_namespace_any: return "any";
- case st_namespace_rhcs: return "stonith-ng";
- case st_namespace_internal: return "internal";
- case st_namespace_lha: return "heartbeat";
- default: break;
+ case st_namespace_any:
+ return "any";
+ case st_namespace_rhcs:
+ return "stonith-ng";
+ case st_namespace_internal:
+ return "internal";
+ case st_namespace_lha:
+ return "heartbeat";
+ default:
+ return "unsupported";
}
- return "unsupported";
}
/*!
- * \brief Determine namespace of a fence agent
+ * \internal
+ * \brief Determine fence agent namespace from agent name
*
- * \param[in] agent Fence agent type
- * \param[in] namespace_s Name of agent namespace as string, if known
+ * This involves external checks (for example, checking the existence of a file
+ * or calling an external library function).
*
- * \return Namespace of specified agent, as enum value
+ * \param[in] agent Fence agent name
+ *
+ * \return Namespace to which \p agent belongs, or \c st_namespace_invalid if
+ * not found
*/
-enum stonith_namespace
-stonith_get_namespace(const char *agent, const char *namespace_s)
+static enum stonith_namespace
+get_namespace_from_agent(const char *agent)
{
- if (pcmk__str_eq(namespace_s, "internal", pcmk__str_none)) {
- return st_namespace_internal;
- }
-
if (stonith__agent_is_rhcs(agent)) {
return st_namespace_rhcs;
}
#if HAVE_STONITH_STONITH_H
if (stonith__agent_is_lha(agent)) {
return st_namespace_lha;
}
-#endif
+#endif // HAVE_STONITH_STONITH_H
return st_namespace_invalid;
}
gboolean
stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
{
gboolean rv = FALSE;
- stonith_t *stonith_api = st?st:stonith_api_new();
+ stonith_t *stonith_api = (st != NULL)? st : stonith__api_new();
char *list = NULL;
if(stonith_api) {
if (stonith_api->state == stonith_disconnected) {
int rc = stonith_api->cmds->connect(stonith_api, "stonith-api", NULL);
if (rc != pcmk_ok) {
crm_err("Failed connecting to Stonith-API for watchdog-fencing-query.");
}
}
if (stonith_api->state != stonith_disconnected) {
/* caveat!!!
* this might fail when when stonithd is just updating the device-list
* probably something we should fix as well for other api-calls */
int rc = stonith_api->cmds->list(stonith_api, st_opt_sync_call, STONITH_WATCHDOG_ID, &list, 0);
if ((rc != pcmk_ok) || (list == NULL)) {
/* due to the race described above it can happen that
* we drop in here - so as not to make remote nodes
* panic on that answer
*/
if (rc == -ENODEV) {
crm_notice("Cluster does not have watchdog fencing device");
} else {
crm_warn("Could not check for watchdog fencing device: %s",
pcmk_strerror(rc));
}
} else if (list[0] == '\0') {
rv = TRUE;
} else {
GList *targets = stonith__parse_targets(list);
rv = pcmk__str_in_list(node, targets, pcmk__str_casei);
g_list_free_full(targets, free);
}
free(list);
if (!st) {
/* if we're provided the api we still might have done the
* connection - but let's assume the caller won't bother
*/
stonith_api->cmds->disconnect(stonith_api);
}
}
if (!st) {
- stonith_api_delete(stonith_api);
+ stonith__api_free(stonith_api);
}
} else {
crm_err("Stonith-API for watchdog-fencing-query couldn't be created.");
}
crm_trace("Pacemaker assumes node %s %sto do watchdog-fencing.",
node, rv?"":"not ");
return rv;
}
gboolean
stonith__watchdog_fencing_enabled_for_node(const char *node)
{
return stonith__watchdog_fencing_enabled_for_node_api(NULL, node);
}
/* when cycling through the list we don't want to delete items
so just mark them and when we know nobody is using the list
loop over it to remove the marked items
*/
static void
foreach_notify_entry (stonith_private_t *private,
GFunc func,
gpointer user_data)
{
private->notify_refcnt++;
g_list_foreach(private->notify_list, func, user_data);
private->notify_refcnt--;
if ((private->notify_refcnt == 0) &&
private->notify_deletes) {
GList *list_item = private->notify_list;
private->notify_deletes = FALSE;
while (list_item != NULL)
{
stonith_notify_client_t *list_client = list_item->data;
GList *next = g_list_next(list_item);
if (list_client->delete) {
free(list_client);
private->notify_list =
g_list_delete_link(private->notify_list, list_item);
}
list_item = next;
}
}
}
static void
stonith_connection_destroy(gpointer user_data)
{
stonith_t *stonith = user_data;
stonith_private_t *native = NULL;
struct notify_blob_s blob;
crm_trace("Sending destroyed notification");
blob.stonith = stonith;
blob.xml = pcmk__xe_create(NULL, PCMK__XE_NOTIFY);
native = stonith->st_private;
native->ipc = NULL;
native->source = NULL;
free(native->token); native->token = NULL;
stonith->state = stonith_disconnected;
crm_xml_add(blob.xml, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY);
crm_xml_add(blob.xml, PCMK__XA_SUBT, PCMK__VALUE_ST_NOTIFY_DISCONNECT);
foreach_notify_entry(native, stonith_send_notification, &blob);
pcmk__xml_free(blob.xml);
}
xmlNode *
create_device_registration_xml(const char *id, enum stonith_namespace standard,
const char *agent,
const stonith_key_value_t *params,
const char *rsc_provides)
{
xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_ST_DEVICE_ID);
xmlNode *args = pcmk__xe_create(data, PCMK__XE_ATTRIBUTES);
#if HAVE_STONITH_STONITH_H
if (standard == st_namespace_any) {
- standard = stonith_get_namespace(agent, NULL);
+ standard = get_namespace_from_agent(agent);
}
if (standard == st_namespace_lha) {
hash2field((gpointer) "plugin", (gpointer) agent, args);
agent = "fence_legacy";
}
#endif
crm_xml_add(data, PCMK_XA_ID, id);
crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__);
crm_xml_add(data, PCMK_XA_AGENT, agent);
if ((standard != st_namespace_any) && (standard != st_namespace_invalid)) {
- crm_xml_add(data, PCMK__XA_NAMESPACE,
- stonith_namespace2text(standard));
+ crm_xml_add(data, PCMK__XA_NAMESPACE, namespace_text(standard));
}
if (rsc_provides) {
crm_xml_add(data, PCMK__XA_RSC_PROVIDES, rsc_provides);
}
for (; params; params = params->next) {
hash2field((gpointer) params->key, (gpointer) params->value, args);
}
return data;
}
static int
stonith_api_register_device(stonith_t *st, int call_options,
const char *id, const char *namespace_s,
const char *agent,
const stonith_key_value_t *params)
{
int rc = 0;
xmlNode *data = NULL;
- data = create_device_registration_xml(id,
- stonith_text2namespace(namespace_s),
+ data = create_device_registration_xml(id, parse_namespace(namespace_s),
agent, params, NULL);
rc = stonith_send_command(st, STONITH_OP_DEVICE_ADD, data, NULL, call_options, 0);
pcmk__xml_free(data);
return rc;
}
static int
stonith_api_remove_device(stonith_t * st, int call_options, const char *name)
{
int rc = 0;
xmlNode *data = NULL;
data = pcmk__xe_create(NULL, PCMK__XE_ST_DEVICE_ID);
crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__);
crm_xml_add(data, PCMK_XA_ID, name);
rc = stonith_send_command(st, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0);
pcmk__xml_free(data);
return rc;
}
static int
stonith_api_remove_level_full(stonith_t *st, int options,
const char *node, const char *pattern,
const char *attr, const char *value, int level)
{
int rc = 0;
xmlNode *data = NULL;
CRM_CHECK(node || pattern || (attr && value), return -EINVAL);
data = pcmk__xe_create(NULL, PCMK_XE_FENCING_LEVEL);
crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__);
if (node) {
crm_xml_add(data, PCMK_XA_TARGET, node);
} else if (pattern) {
crm_xml_add(data, PCMK_XA_TARGET_PATTERN, pattern);
} else {
crm_xml_add(data, PCMK_XA_TARGET_ATTRIBUTE, attr);
crm_xml_add(data, PCMK_XA_TARGET_VALUE, value);
}
crm_xml_add_int(data, PCMK_XA_INDEX, level);
rc = stonith_send_command(st, STONITH_OP_LEVEL_DEL, data, NULL, options, 0);
pcmk__xml_free(data);
return rc;
}
static int
stonith_api_remove_level(stonith_t * st, int options, const char *node, int level)
{
return stonith_api_remove_level_full(st, options, node,
NULL, NULL, NULL, level);
}
/*!
* \internal
* \brief Create XML for fence topology level registration request
*
* \param[in] node If not NULL, target level by this node name
* \param[in] pattern If not NULL, target by node name using this regex
* \param[in] attr If not NULL, target by this node attribute
* \param[in] value If not NULL, target by this node attribute value
* \param[in] level Index number of level to register
* \param[in] device_list List of devices in level
*
* \return Newly allocated XML tree on success, NULL otherwise
*
* \note The caller should set only one of node, pattern or attr/value.
*/
xmlNode *
create_level_registration_xml(const char *node, const char *pattern,
const char *attr, const char *value,
int level, const stonith_key_value_t *device_list)
{
GString *list = NULL;
xmlNode *data;
CRM_CHECK(node || pattern || (attr && value), return NULL);
data = pcmk__xe_create(NULL, PCMK_XE_FENCING_LEVEL);
crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__);
crm_xml_add_int(data, PCMK_XA_ID, level);
crm_xml_add_int(data, PCMK_XA_INDEX, level);
if (node) {
crm_xml_add(data, PCMK_XA_TARGET, node);
} else if (pattern) {
crm_xml_add(data, PCMK_XA_TARGET_PATTERN, pattern);
} else {
crm_xml_add(data, PCMK_XA_TARGET_ATTRIBUTE, attr);
crm_xml_add(data, PCMK_XA_TARGET_VALUE, value);
}
for (; device_list; device_list = device_list->next) {
pcmk__add_separated_word(&list, 1024, device_list->value, ",");
}
if (list != NULL) {
crm_xml_add(data, PCMK_XA_DEVICES, (const char *) list->str);
g_string_free(list, TRUE);
}
return data;
}
static int
stonith_api_register_level_full(stonith_t *st, int options, const char *node,
const char *pattern, const char *attr,
const char *value, int level,
const stonith_key_value_t *device_list)
{
int rc = 0;
xmlNode *data = create_level_registration_xml(node, pattern, attr, value,
level, device_list);
CRM_CHECK(data != NULL, return -EINVAL);
rc = stonith_send_command(st, STONITH_OP_LEVEL_ADD, data, NULL, options, 0);
pcmk__xml_free(data);
return rc;
}
static int
stonith_api_register_level(stonith_t * st, int options, const char *node, int level,
const stonith_key_value_t * device_list)
{
return stonith_api_register_level_full(st, options, node, NULL, NULL, NULL,
level, device_list);
}
static int
stonith_api_device_list(stonith_t *stonith, int call_options,
const char *namespace_s, stonith_key_value_t **devices,
int timeout)
{
int count = 0;
- enum stonith_namespace ns = stonith_text2namespace(namespace_s);
+ enum stonith_namespace ns = parse_namespace(namespace_s);
if (devices == NULL) {
crm_err("Parameter error: stonith_api_device_list");
return -EFAULT;
}
#if HAVE_STONITH_STONITH_H
// Include Linux-HA agents if requested
if ((ns == st_namespace_any) || (ns == st_namespace_lha)) {
count += stonith__list_lha_agents(devices);
}
#endif
// Include Red Hat agents if requested
if ((ns == st_namespace_any) || (ns == st_namespace_rhcs)) {
count += stonith__list_rhcs_agents(devices);
}
return count;
}
// See stonith_api_operations_t:metadata() documentation
static int
stonith_api_device_metadata(stonith_t *stonith, int call_options,
const char *agent, const char *namespace_s,
char **output, int timeout_sec)
{
/* By executing meta-data directly, we can get it from stonith_admin when
* the cluster is not running, which is important for higher-level tools.
*/
- enum stonith_namespace ns = stonith_get_namespace(agent, namespace_s);
+ enum stonith_namespace ns = get_namespace_from_agent(agent);
if (timeout_sec <= 0) {
timeout_sec = PCMK_DEFAULT_ACTION_TIMEOUT_MS;
}
- crm_trace("Looking up metadata for %s agent %s",
- stonith_namespace2text(ns), agent);
+ crm_trace("Looking up metadata for %s agent %s", namespace_text(ns), agent);
switch (ns) {
case st_namespace_rhcs:
return stonith__rhcs_metadata(agent, timeout_sec, output);
#if HAVE_STONITH_STONITH_H
case st_namespace_lha:
return stonith__lha_metadata(agent, timeout_sec, output);
#endif
default:
crm_err("Can't get fence agent '%s' meta-data: No such agent",
agent);
break;
}
return -ENODEV;
}
static int
stonith_api_query(stonith_t * stonith, int call_options, const char *target,
stonith_key_value_t ** devices, int timeout)
{
int rc = 0, lpc = 0, max = 0;
xmlNode *data = NULL;
xmlNode *output = NULL;
xmlXPathObject *xpathObj = NULL;
CRM_CHECK(devices != NULL, return -EINVAL);
data = pcmk__xe_create(NULL, PCMK__XE_ST_DEVICE_ID);
crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__);
crm_xml_add(data, PCMK__XA_ST_TARGET, target);
crm_xml_add(data, PCMK__XA_ST_DEVICE_ACTION, PCMK_ACTION_OFF);
rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout);
if (rc < 0) {
return rc;
}
xpathObj = pcmk__xpath_search(output->doc, "//*[@" PCMK_XA_AGENT "]");
if (xpathObj) {
max = pcmk__xpath_num_results(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = pcmk__xpath_result(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match != NULL) {
+ const char *match_id = crm_element_value(match, PCMK_XA_ID);
xmlChar *match_path = xmlGetNodePath(match);
crm_info("//*[@" PCMK_XA_AGENT "][%d] = %s", lpc, match_path);
free(match_path);
- *devices = stonith_key_value_add(*devices, NULL,
- crm_element_value(match,
- PCMK_XA_ID));
+ *devices = stonith__key_value_add(*devices, NULL, match_id);
}
}
xmlXPathFreeObject(xpathObj);
}
pcmk__xml_free(output);
pcmk__xml_free(data);
return max;
}
/*!
* \internal
* \brief Make a STONITH_OP_EXEC request
*
* \param[in,out] stonith Fencer connection
* \param[in] call_options Bitmask of \c stonith_call_options
* \param[in] id Fence device ID that request is for
* \param[in] action Agent action to request (list, status, monitor)
* \param[in] target Name of target node for requested action
* \param[in] timeout_sec Error if not completed within this many seconds
* \param[out] output Where to set agent output
*/
static int
stonith_api_call(stonith_t *stonith, int call_options, const char *id,
const char *action, const char *target, int timeout_sec,
xmlNode **output)
{
int rc = 0;
xmlNode *data = NULL;
data = pcmk__xe_create(NULL, PCMK__XE_ST_DEVICE_ID);
crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__);
crm_xml_add(data, PCMK__XA_ST_DEVICE_ID, id);
crm_xml_add(data, PCMK__XA_ST_DEVICE_ACTION, action);
crm_xml_add(data, PCMK__XA_ST_TARGET, target);
rc = stonith_send_command(stonith, STONITH_OP_EXEC, data, output,
call_options, timeout_sec);
pcmk__xml_free(data);
return rc;
}
static int
stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **list_info,
int timeout)
{
int rc;
xmlNode *output = NULL;
rc = stonith_api_call(stonith, call_options, id, PCMK_ACTION_LIST, NULL,
timeout, &output);
if (output && list_info) {
const char *list_str;
list_str = crm_element_value(output, PCMK__XA_ST_OUTPUT);
if (list_str) {
*list_info = strdup(list_str);
}
}
if (output) {
pcmk__xml_free(output);
}
return rc;
}
static int
stonith_api_monitor(stonith_t * stonith, int call_options, const char *id, int timeout)
{
return stonith_api_call(stonith, call_options, id, PCMK_ACTION_MONITOR,
NULL, timeout, NULL);
}
static int
stonith_api_status(stonith_t * stonith, int call_options, const char *id, const char *port,
int timeout)
{
return stonith_api_call(stonith, call_options, id, PCMK_ACTION_STATUS, port,
timeout, NULL);
}
static int
stonith_api_fence_with_delay(stonith_t * stonith, int call_options, const char *node,
const char *action, int timeout, int tolerance, int delay)
{
int rc = 0;
xmlNode *data = NULL;
data = pcmk__xe_create(NULL, __func__);
crm_xml_add(data, PCMK__XA_ST_TARGET, node);
crm_xml_add(data, PCMK__XA_ST_DEVICE_ACTION, action);
crm_xml_add_int(data, PCMK__XA_ST_TIMEOUT, timeout);
crm_xml_add_int(data, PCMK__XA_ST_TOLERANCE, tolerance);
crm_xml_add_int(data, PCMK__XA_ST_DELAY, delay);
rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout);
pcmk__xml_free(data);
return rc;
}
static int
stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const char *action,
int timeout, int tolerance)
{
return stonith_api_fence_with_delay(stonith, call_options, node, action,
timeout, tolerance, 0);
}
static int
stonith_api_confirm(stonith_t * stonith, int call_options, const char *target)
{
stonith__set_call_options(call_options, target, st_opt_manual_ack);
return stonith_api_fence(stonith, call_options, target, PCMK_ACTION_OFF, 0,
0);
}
static int
stonith_api_history(stonith_t * stonith, int call_options, const char *node,
stonith_history_t ** history, int timeout)
{
int rc = 0;
xmlNode *data = NULL;
xmlNode *output = NULL;
stonith_history_t *last = NULL;
*history = NULL;
if (node) {
data = pcmk__xe_create(NULL, __func__);
crm_xml_add(data, PCMK__XA_ST_TARGET, node);
}
stonith__set_call_options(call_options, node, st_opt_sync_call);
rc = stonith_send_command(stonith, STONITH_OP_FENCE_HISTORY, data, &output,
call_options, timeout);
pcmk__xml_free(data);
if (rc == 0) {
xmlNode *op = NULL;
xmlNode *reply = pcmk__xpath_find_one(output->doc,
"//" PCMK__XE_ST_HISTORY,
LOG_NEVER);
for (op = pcmk__xe_first_child(reply, NULL, NULL, NULL); op != NULL;
op = pcmk__xe_next(op, NULL)) {
stonith_history_t *kvp;
long long completed;
long long completed_nsec = 0L;
kvp = pcmk__assert_alloc(1, sizeof(stonith_history_t));
kvp->target = crm_element_value_copy(op, PCMK__XA_ST_TARGET);
kvp->action = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ACTION);
kvp->origin = crm_element_value_copy(op, PCMK__XA_ST_ORIGIN);
kvp->delegate = crm_element_value_copy(op, PCMK__XA_ST_DELEGATE);
kvp->client = crm_element_value_copy(op, PCMK__XA_ST_CLIENTNAME);
crm_element_value_ll(op, PCMK__XA_ST_DATE, &completed);
kvp->completed = (time_t) completed;
crm_element_value_ll(op, PCMK__XA_ST_DATE_NSEC, &completed_nsec);
kvp->completed_nsec = completed_nsec;
crm_element_value_int(op, PCMK__XA_ST_STATE, &kvp->state);
kvp->exit_reason = crm_element_value_copy(op, PCMK_XA_EXIT_REASON);
if (last) {
last->next = kvp;
} else {
*history = kvp;
}
last = kvp;
}
}
pcmk__xml_free(output);
return rc;
}
-void stonith_history_free(stonith_history_t *history)
+/*!
+ * \internal
+ * \brief Free a list of fencing history objects and all members of each object
+ *
+ * \param[in,out] head Head of fencing history object list
+ */
+void
+stonith__history_free(stonith_history_t *head)
{
- stonith_history_t *hp, *hp_old;
-
- for (hp = history; hp; hp_old = hp, hp = hp->next, free(hp_old)) {
- free(hp->target);
- free(hp->action);
- free(hp->origin);
- free(hp->delegate);
- free(hp->client);
- free(hp->exit_reason);
+ /* @COMPAT Drop "next" member of stonith_history_t, use a GList or GSList,
+ * and use the appropriate free function (while ensuring the members get
+ * freed)
+ */
+ while (head != NULL) {
+ stonith_history_t *next = head->next;
+
+ free(head->target);
+ free(head->action);
+ free(head->origin);
+ free(head->delegate);
+ free(head->client);
+ free(head->exit_reason);
+ free(head);
+ head = next;
}
}
static gint
stonithlib_GCompareFunc(gconstpointer a, gconstpointer b)
{
int rc = 0;
const stonith_notify_client_t *a_client = a;
const stonith_notify_client_t *b_client = b;
if (a_client->delete || b_client->delete) {
/* make entries marked for deletion not findable */
return -1;
}
CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0);
rc = strcmp(a_client->event, b_client->event);
if (rc == 0) {
if (a_client->notify == NULL || b_client->notify == NULL) {
return 0;
} else if (a_client->notify == b_client->notify) {
return 0;
} else if (((long)a_client->notify) < ((long)b_client->notify)) {
crm_err("callbacks for %s are not equal: %p vs. %p",
a_client->event, a_client->notify, b_client->notify);
return -1;
}
crm_err("callbacks for %s are not equal: %p vs. %p",
a_client->event, a_client->notify, b_client->notify);
return 1;
}
return rc;
}
xmlNode *
stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options)
{
xmlNode *op_msg = NULL;
CRM_CHECK(token != NULL, return NULL);
op_msg = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND);
crm_xml_add(op_msg, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
crm_xml_add(op_msg, PCMK__XA_ST_OP, op);
crm_xml_add_int(op_msg, PCMK__XA_ST_CALLID, call_id);
crm_trace("Sending call options: %.8lx, %d", (long)call_options, call_options);
crm_xml_add_int(op_msg, PCMK__XA_ST_CALLOPT, call_options);
if (data != NULL) {
xmlNode *wrapper = pcmk__xe_create(op_msg, PCMK__XE_ST_CALLDATA);
pcmk__xml_copy(wrapper, data);
}
return op_msg;
}
static void
stonith_destroy_op_callback(gpointer data)
{
stonith_callback_client_t *blob = data;
if (blob->timer && blob->timer->ref > 0) {
g_source_remove(blob->timer->ref);
}
free(blob->timer);
free(blob);
}
static int
stonith_api_signoff(stonith_t * stonith)
{
stonith_private_t *native = stonith->st_private;
crm_debug("Disconnecting from the fencer");
if (native->source != NULL) {
/* Attached to mainloop */
mainloop_del_ipc_client(native->source);
native->source = NULL;
native->ipc = NULL;
} else if (native->ipc) {
/* Not attached to mainloop */
crm_ipc_t *ipc = native->ipc;
native->ipc = NULL;
crm_ipc_close(ipc);
crm_ipc_destroy(ipc);
}
free(native->token); native->token = NULL;
stonith->state = stonith_disconnected;
return pcmk_ok;
}
static int
stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks)
{
stonith_private_t *private = stonith->st_private;
if (all_callbacks) {
private->op_callback = NULL;
g_hash_table_destroy(private->stonith_op_callback_table);
private->stonith_op_callback_table = pcmk__intkey_table(stonith_destroy_op_callback);
} else if (call_id == 0) {
private->op_callback = NULL;
} else {
pcmk__intkey_table_remove(private->stonith_op_callback_table, call_id);
}
return pcmk_ok;
}
/*!
* \internal
* \brief Invoke a (single) specified fence action callback
*
* \param[in,out] st Fencer API connection
* \param[in] call_id If positive, call ID of completed fence action,
* otherwise legacy return code for early failure
* \param[in,out] result Full result for action
* \param[in,out] userdata User data to pass to callback
* \param[in] callback Fence action callback to invoke
*/
static void
invoke_fence_action_callback(stonith_t *st, int call_id,
pcmk__action_result_t *result,
void *userdata,
void (*callback) (stonith_t *st,
stonith_callback_data_t *data))
{
stonith_callback_data_t data = { 0, };
data.call_id = call_id;
data.rc = pcmk_rc2legacy(stonith__result2rc(result));
data.userdata = userdata;
data.opaque = (void *) result;
callback(st, &data);
}
/*!
* \internal
* \brief Invoke any callbacks registered for a specified fence action result
*
* Given a fence action result from the fencer, invoke any callback registered
* for that action, as well as any global callback registered.
*
* \param[in,out] stonith Fencer API connection
* \param[in] msg If non-NULL, fencer reply
* \param[in] call_id If \p msg is NULL, call ID of action that timed out
*/
static void
invoke_registered_callbacks(stonith_t *stonith, const xmlNode *msg, int call_id)
{
stonith_private_t *private = NULL;
stonith_callback_client_t *cb_info = NULL;
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
CRM_CHECK(stonith != NULL, return);
CRM_CHECK(stonith->st_private != NULL, return);
private = stonith->st_private;
if (msg == NULL) {
// Fencer didn't reply in time
pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
"Fencer accepted request but did not reply in time");
CRM_LOG_ASSERT(call_id > 0);
} else {
// We have the fencer reply
if ((crm_element_value_int(msg, PCMK__XA_ST_CALLID, &call_id) != 0)
|| (call_id <= 0)) {
crm_log_xml_warn(msg, "Bad fencer reply");
}
stonith__xe_get_result(msg, &result);
}
if (call_id > 0) {
cb_info = pcmk__intkey_table_lookup(private->stonith_op_callback_table,
call_id);
}
if ((cb_info != NULL) && (cb_info->callback != NULL)
&& (pcmk__result_ok(&result) || !(cb_info->only_success))) {
crm_trace("Invoking callback %s for call %d",
pcmk__s(cb_info->id, "without ID"), call_id);
invoke_fence_action_callback(stonith, call_id, &result,
cb_info->user_data, cb_info->callback);
} else if ((private->op_callback == NULL) && !pcmk__result_ok(&result)) {
crm_warn("Fencing action without registered callback failed: %d (%s%s%s)",
result.exit_status,
pcmk_exec_status_str(result.execution_status),
((result.exit_reason == NULL)? "" : ": "),
((result.exit_reason == NULL)? "" : result.exit_reason));
crm_log_xml_debug(msg, "Failed fence update");
}
if (private->op_callback != NULL) {
crm_trace("Invoking global callback for call %d", call_id);
invoke_fence_action_callback(stonith, call_id, &result, NULL,
private->op_callback);
}
if (cb_info != NULL) {
stonith_api_del_callback(stonith, call_id, FALSE);
}
pcmk__reset_result(&result);
}
static gboolean
stonith_async_timeout_handler(gpointer data)
{
struct timer_rec_s *timer = data;
crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout);
invoke_registered_callbacks(timer->stonith, NULL, timer->call_id);
/* Always return TRUE, never remove the handler
* We do that in stonith_del_callback()
*/
return TRUE;
}
static void
set_callback_timeout(stonith_callback_client_t * callback, stonith_t * stonith, int call_id,
int timeout)
{
struct timer_rec_s *async_timer = callback->timer;
if (timeout <= 0) {
return;
}
if (!async_timer) {
async_timer = pcmk__assert_alloc(1, sizeof(struct timer_rec_s));
callback->timer = async_timer;
}
async_timer->stonith = stonith;
async_timer->call_id = call_id;
/* Allow a fair bit of grace to allow the server to tell us of a timeout
* This is only a fallback
*/
async_timer->timeout = (timeout + 60) * 1000;
if (async_timer->ref) {
g_source_remove(async_timer->ref);
}
async_timer->ref =
pcmk__create_timer(async_timer->timeout, stonith_async_timeout_handler,
async_timer);
}
static void
update_callback_timeout(int call_id, int timeout, stonith_t * st)
{
stonith_callback_client_t *callback = NULL;
stonith_private_t *private = st->st_private;
callback = pcmk__intkey_table_lookup(private->stonith_op_callback_table,
call_id);
if (!callback || !callback->allow_timeout_updates) {
return;
}
set_callback_timeout(callback, st, call_id, timeout);
}
static int
stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata)
{
const char *type = NULL;
struct notify_blob_s blob;
stonith_t *st = userdata;
stonith_private_t *private = NULL;
pcmk__assert(st != NULL);
private = st->st_private;
blob.stonith = st;
blob.xml = pcmk__xml_parse(buffer);
if (blob.xml == NULL) {
crm_warn("Received malformed message from fencer: %s", buffer);
return 0;
}
/* do callbacks */
type = crm_element_value(blob.xml, PCMK__XA_T);
crm_trace("Activating %s callbacks...", type);
if (pcmk__str_eq(type, PCMK__VALUE_STONITH_NG, pcmk__str_none)) {
invoke_registered_callbacks(st, blob.xml, 0);
} else if (pcmk__str_eq(type, PCMK__VALUE_ST_NOTIFY, pcmk__str_none)) {
foreach_notify_entry(private, stonith_send_notification, &blob);
} else if (pcmk__str_eq(type, PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE,
pcmk__str_none)) {
int call_id = 0;
int timeout = 0;
crm_element_value_int(blob.xml, PCMK__XA_ST_TIMEOUT, &timeout);
crm_element_value_int(blob.xml, PCMK__XA_ST_CALLID, &call_id);
update_callback_timeout(call_id, timeout, st);
} else {
crm_err("Unknown message type: %s", type);
crm_log_xml_warn(blob.xml, "BadReply");
}
pcmk__xml_free(blob.xml);
return 1;
}
static int
stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
{
int rc = pcmk_ok;
stonith_private_t *native = NULL;
const char *display_name = name? name : "client";
struct ipc_client_callbacks st_callbacks = {
.dispatch = stonith_dispatch_internal,
.destroy = stonith_connection_destroy
};
CRM_CHECK(stonith != NULL, return -EINVAL);
native = stonith->st_private;
pcmk__assert(native != NULL);
crm_debug("Attempting fencer connection by %s with%s mainloop",
display_name, (stonith_fd? "out" : ""));
stonith->state = stonith_connected_command;
if (stonith_fd) {
/* No mainloop */
native->ipc = crm_ipc_new("stonith-ng", 0);
if (native->ipc != NULL) {
rc = pcmk__connect_generic_ipc(native->ipc);
if (rc == pcmk_rc_ok) {
rc = pcmk__ipc_fd(native->ipc, stonith_fd);
if (rc != pcmk_rc_ok) {
crm_debug("Couldn't get file descriptor for IPC: %s",
pcmk_rc_str(rc));
}
}
if (rc != pcmk_rc_ok) {
crm_ipc_close(native->ipc);
crm_ipc_destroy(native->ipc);
native->ipc = NULL;
}
}
} else {
/* With mainloop */
native->source =
mainloop_add_ipc_client("stonith-ng", G_PRIORITY_MEDIUM, 0, stonith, &st_callbacks);
native->ipc = mainloop_get_ipc_client(native->source);
}
if (native->ipc == NULL) {
rc = -ENOTCONN;
} else {
xmlNode *reply = NULL;
xmlNode *hello = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND);
crm_xml_add(hello, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
crm_xml_add(hello, PCMK__XA_ST_OP, CRM_OP_REGISTER);
crm_xml_add(hello, PCMK__XA_ST_CLIENTNAME, name);
rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply);
if (rc < 0) {
crm_debug("Couldn't register with the fencer: %s "
QB_XS " rc=%d", pcmk_strerror(rc), rc);
rc = -ECOMM;
} else if (reply == NULL) {
crm_debug("Couldn't register with the fencer: no reply");
rc = -EPROTO;
} else {
const char *msg_type = crm_element_value(reply, PCMK__XA_ST_OP);
native->token = crm_element_value_copy(reply, PCMK__XA_ST_CLIENTID);
if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_none)) {
crm_debug("Couldn't register with the fencer: invalid reply type '%s'",
(msg_type? msg_type : "(missing)"));
crm_log_xml_debug(reply, "Invalid fencer reply");
rc = -EPROTO;
} else if (native->token == NULL) {
crm_debug("Couldn't register with the fencer: no token in reply");
crm_log_xml_debug(reply, "Invalid fencer reply");
rc = -EPROTO;
} else {
crm_debug("Connection to fencer by %s succeeded (registration token: %s)",
display_name, native->token);
rc = pcmk_ok;
}
}
pcmk__xml_free(reply);
pcmk__xml_free(hello);
}
if (rc != pcmk_ok) {
crm_debug("Connection attempt to fencer by %s failed: %s "
QB_XS " rc=%d", display_name, pcmk_strerror(rc), rc);
stonith->cmds->disconnect(stonith);
}
return rc;
}
static int
stonith_set_notification(stonith_t * stonith, const char *callback, int enabled)
{
int rc = pcmk_ok;
xmlNode *notify_msg = pcmk__xe_create(NULL, __func__);
stonith_private_t *native = stonith->st_private;
if (stonith->state != stonith_disconnected) {
crm_xml_add(notify_msg, PCMK__XA_ST_OP, STONITH_OP_NOTIFY);
if (enabled) {
crm_xml_add(notify_msg, PCMK__XA_ST_NOTIFY_ACTIVATE, callback);
} else {
crm_xml_add(notify_msg, PCMK__XA_ST_NOTIFY_DEACTIVATE, callback);
}
rc = crm_ipc_send(native->ipc, notify_msg, crm_ipc_client_response, -1, NULL);
if (rc < 0) {
crm_perror(LOG_DEBUG, "Couldn't register for fencing notifications: %d", rc);
rc = -ECOMM;
} else {
rc = pcmk_ok;
}
}
pcmk__xml_free(notify_msg);
return rc;
}
static int
stonith_api_add_notification(stonith_t * stonith, const char *event,
void (*callback) (stonith_t * stonith, stonith_event_t * e))
{
GList *list_item = NULL;
stonith_notify_client_t *new_client = NULL;
stonith_private_t *private = NULL;
private = stonith->st_private;
crm_trace("Adding callback for %s events (%d)", event, g_list_length(private->notify_list));
new_client = pcmk__assert_alloc(1, sizeof(stonith_notify_client_t));
new_client->event = event;
new_client->notify = callback;
list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc);
if (list_item != NULL) {
crm_warn("Callback already present");
free(new_client);
return -ENOTUNIQ;
} else {
private->notify_list = g_list_append(private->notify_list, new_client);
stonith_set_notification(stonith, event, 1);
crm_trace("Callback added (%d)", g_list_length(private->notify_list));
}
return pcmk_ok;
}
static void
del_notify_entry(gpointer data, gpointer user_data)
{
stonith_notify_client_t *entry = data;
stonith_t * stonith = user_data;
if (!entry->delete) {
crm_debug("Removing callback for %s events", entry->event);
stonith_api_del_notification(stonith, entry->event);
}
}
static int
stonith_api_del_notification(stonith_t * stonith, const char *event)
{
GList *list_item = NULL;
stonith_notify_client_t *new_client = NULL;
stonith_private_t *private = stonith->st_private;
if (event == NULL) {
foreach_notify_entry(private, del_notify_entry, stonith);
crm_trace("Removed callback");
return pcmk_ok;
}
crm_debug("Removing callback for %s events", event);
new_client = pcmk__assert_alloc(1, sizeof(stonith_notify_client_t));
new_client->event = event;
new_client->notify = NULL;
list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc);
stonith_set_notification(stonith, event, 0);
if (list_item != NULL) {
stonith_notify_client_t *list_client = list_item->data;
if (private->notify_refcnt) {
list_client->delete = TRUE;
private->notify_deletes = TRUE;
} else {
private->notify_list = g_list_remove(private->notify_list, list_client);
free(list_client);
}
crm_trace("Removed callback");
} else {
crm_trace("Callback not present");
}
free(new_client);
return pcmk_ok;
}
static int
stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int options,
void *user_data, const char *callback_name,
void (*callback) (stonith_t * st, stonith_callback_data_t * data))
{
stonith_callback_client_t *blob = NULL;
stonith_private_t *private = NULL;
CRM_CHECK(stonith != NULL, return -EINVAL);
CRM_CHECK(stonith->st_private != NULL, return -EINVAL);
private = stonith->st_private;
if (call_id == 0) { // Add global callback
private->op_callback = callback;
} else if (call_id < 0) { // Call failed immediately, so call callback now
if (!(options & st_opt_report_only_success)) {
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id));
pcmk__set_result(&result, CRM_EX_ERROR,
stonith__legacy2status(call_id), NULL);
invoke_fence_action_callback(stonith, call_id, &result,
user_data, callback);
} else {
crm_warn("Fencer call failed: %s", pcmk_strerror(call_id));
}
return FALSE;
}
blob = pcmk__assert_alloc(1, sizeof(stonith_callback_client_t));
blob->id = callback_name;
blob->only_success = (options & st_opt_report_only_success) ? TRUE : FALSE;
blob->user_data = user_data;
blob->callback = callback;
blob->allow_timeout_updates = (options & st_opt_timeout_updates) ? TRUE : FALSE;
if (timeout > 0) {
set_callback_timeout(blob, stonith, call_id, timeout);
}
pcmk__intkey_table_insert(private->stonith_op_callback_table, call_id,
blob);
crm_trace("Added callback to %s for call %d", callback_name, call_id);
return TRUE;
}
-static void
-stonith_dump_pending_op(gpointer key, gpointer value, gpointer user_data)
-{
- int call = GPOINTER_TO_INT(key);
- stonith_callback_client_t *blob = value;
-
- crm_debug("Call %d (%s): pending", call, pcmk__s(blob->id, "no ID"));
-}
-
-void
-stonith_dump_pending_callbacks(stonith_t * stonith)
-{
- stonith_private_t *private = stonith->st_private;
-
- if (private->stonith_op_callback_table == NULL) {
- return;
- }
- return g_hash_table_foreach(private->stonith_op_callback_table, stonith_dump_pending_op, NULL);
-}
-
/*!
* \internal
* \brief Get the data section of a fencer notification
*
* \param[in] msg Notification XML
* \param[in] ntype Notification type
*/
static xmlNode *
get_event_data_xml(xmlNode *msg, const char *ntype)
{
char *data_addr = crm_strdup_printf("//%s", ntype);
xmlNode *data = pcmk__xpath_find_one(msg->doc, data_addr, LOG_DEBUG);
free(data_addr);
return data;
}
/*
<notify t="st_notify" subt="st_device_register" st_op="st_device_register" st_rc="0" >
<st_calldata >
<stonith_command t="stonith-ng" st_async_id="088fb640-431a-48b9-b2fc-c4ff78d0a2d9" st_op="st_device_register" st_callid="2" st_callopt="4096" st_timeout="0" st_clientid="088fb640-431a-48b9-b2fc-c4ff78d0a2d9" st_clientname="cts-fence-helper" >
<st_calldata >
<st_device_id id="test-id" origin="create_device_registration_xml" agent="fence_virsh" namespace="stonith-ng" >
<attributes ipaddr="localhost" pcmk-portmal="some-host=pcmk-1 pcmk-3=3,4" login="root" identity_file="/root/.ssh/id_dsa" />
</st_device_id>
</st_calldata>
</stonith_command>
</st_calldata>
</notify>
<notify t="st_notify" subt="st_notify_fence" st_op="st_notify_fence" st_rc="0" >
<st_calldata >
<st_notify_fence st_rc="0" st_target="some-host" st_op="st_fence" st_delegate="test-id" st_origin="61dd7759-e229-4be7-b1f8-ef49dd14d9f0" />
</st_calldata>
</notify>
*/
static stonith_event_t *
xml_to_event(xmlNode *msg)
{
stonith_event_t *event = pcmk__assert_alloc(1, sizeof(stonith_event_t));
struct event_private *event_private = NULL;
event->opaque = pcmk__assert_alloc(1, sizeof(struct event_private));
event_private = (struct event_private *) event->opaque;
crm_log_xml_trace(msg, "stonith_notify");
// All notification types have the operation result and notification subtype
stonith__xe_get_result(msg, &event_private->result);
event->operation = crm_element_value_copy(msg, PCMK__XA_ST_OP);
// @COMPAT The API originally provided the result as a legacy return code
event->result = pcmk_rc2legacy(stonith__result2rc(&event_private->result));
// Some notification subtypes have additional information
if (pcmk__str_eq(event->operation, PCMK__VALUE_ST_NOTIFY_FENCE,
pcmk__str_none)) {
xmlNode *data = get_event_data_xml(msg, event->operation);
if (data == NULL) {
crm_err("No data for %s event", event->operation);
crm_log_xml_notice(msg, "BadEvent");
} else {
event->origin = crm_element_value_copy(data, PCMK__XA_ST_ORIGIN);
event->action = crm_element_value_copy(data,
PCMK__XA_ST_DEVICE_ACTION);
event->target = crm_element_value_copy(data, PCMK__XA_ST_TARGET);
event->executioner = crm_element_value_copy(data,
PCMK__XA_ST_DELEGATE);
event->id = crm_element_value_copy(data, PCMK__XA_ST_REMOTE_OP);
event->client_origin =
crm_element_value_copy(data, PCMK__XA_ST_CLIENTNAME);
event->device = crm_element_value_copy(data, PCMK__XA_ST_DEVICE_ID);
}
} else if (pcmk__str_any_of(event->operation,
STONITH_OP_DEVICE_ADD, STONITH_OP_DEVICE_DEL,
STONITH_OP_LEVEL_ADD, STONITH_OP_LEVEL_DEL,
NULL)) {
xmlNode *data = get_event_data_xml(msg, event->operation);
if (data == NULL) {
crm_err("No data for %s event", event->operation);
crm_log_xml_notice(msg, "BadEvent");
} else {
event->device = crm_element_value_copy(data, PCMK__XA_ST_DEVICE_ID);
}
}
return event;
}
static void
event_free(stonith_event_t * event)
{
struct event_private *event_private = event->opaque;
free(event->id);
free(event->operation);
free(event->origin);
free(event->action);
free(event->target);
free(event->executioner);
free(event->device);
free(event->client_origin);
pcmk__reset_result(&event_private->result);
free(event->opaque);
free(event);
}
static void
stonith_send_notification(gpointer data, gpointer user_data)
{
struct notify_blob_s *blob = user_data;
stonith_notify_client_t *entry = data;
stonith_event_t *st_event = NULL;
const char *event = NULL;
if (blob->xml == NULL) {
crm_warn("Skipping callback - NULL message");
return;
}
event = crm_element_value(blob->xml, PCMK__XA_SUBT);
if (entry == NULL) {
crm_warn("Skipping callback - NULL callback client");
return;
} else if (entry->delete) {
crm_trace("Skipping callback - marked for deletion");
return;
} else if (entry->notify == NULL) {
crm_warn("Skipping callback - NULL callback");
return;
} else if (!pcmk__str_eq(entry->event, event, pcmk__str_none)) {
crm_trace("Skipping callback - event mismatch %p/%s vs. %s", entry, entry->event, event);
return;
}
st_event = xml_to_event(blob->xml);
crm_trace("Invoking callback for %p/%s event...", entry, event);
entry->notify(blob->stonith, st_event);
crm_trace("Callback invoked...");
event_free(st_event);
}
/*!
* \internal
* \brief Create and send an API request
*
* \param[in,out] stonith Stonith connection
* \param[in] op API operation to request
* \param[in] data Data to attach to request
* \param[out] output_data If not NULL, will be set to reply if synchronous
* \param[in] call_options Bitmask of stonith_call_options to use
* \param[in] timeout Error if not completed within this many seconds
*
* \return pcmk_ok (for synchronous requests) or positive call ID
* (for asynchronous requests) on success, -errno otherwise
*/
static int
stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data,
int call_options, int timeout)
{
int rc = 0;
int reply_id = -1;
xmlNode *op_msg = NULL;
xmlNode *op_reply = NULL;
stonith_private_t *native = NULL;
pcmk__assert((stonith != NULL) && (stonith->st_private != NULL)
&& (op != NULL));
native = stonith->st_private;
if (output_data != NULL) {
*output_data = NULL;
}
if ((stonith->state == stonith_disconnected) || (native->token == NULL)) {
return -ENOTCONN;
}
/* Increment the call ID, which must be positive to avoid conflicting with
* error codes. This shouldn't be a problem unless the client mucked with
* it or the counter wrapped around.
*/
stonith->call_id++;
if (stonith->call_id < 1) {
stonith->call_id = 1;
}
op_msg = stonith_create_op(stonith->call_id, native->token, op, data, call_options);
if (op_msg == NULL) {
return -EINVAL;
}
crm_xml_add_int(op_msg, PCMK__XA_ST_TIMEOUT, timeout);
crm_trace("Sending %s message to fencer with timeout %ds", op, timeout);
if (data) {
const char *delay_s = crm_element_value(data, PCMK__XA_ST_DELAY);
if (delay_s) {
crm_xml_add(op_msg, PCMK__XA_ST_DELAY, delay_s);
}
}
{
enum crm_ipc_flags ipc_flags = crm_ipc_flags_none;
if (call_options & st_opt_sync_call) {
pcmk__set_ipc_flags(ipc_flags, "stonith command",
crm_ipc_client_response);
}
rc = crm_ipc_send(native->ipc, op_msg, ipc_flags,
1000 * (timeout + 60), &op_reply);
}
pcmk__xml_free(op_msg);
if (rc < 0) {
crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%ds): %d", op, timeout, rc);
rc = -ECOMM;
goto done;
}
crm_log_xml_trace(op_reply, "Reply");
if (!(call_options & st_opt_sync_call)) {
crm_trace("Async call %d, returning", stonith->call_id);
pcmk__xml_free(op_reply);
return stonith->call_id;
}
crm_element_value_int(op_reply, PCMK__XA_ST_CALLID, &reply_id);
if (reply_id == stonith->call_id) {
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
crm_trace("Synchronous reply %d received", reply_id);
stonith__xe_get_result(op_reply, &result);
rc = pcmk_rc2legacy(stonith__result2rc(&result));
pcmk__reset_result(&result);
if ((call_options & st_opt_discard_reply) || output_data == NULL) {
crm_trace("Discarding reply");
} else {
*output_data = op_reply;
op_reply = NULL; /* Prevent subsequent free */
}
} else if (reply_id <= 0) {
crm_err("Received bad reply: No id set");
crm_log_xml_err(op_reply, "Bad reply");
pcmk__xml_free(op_reply);
op_reply = NULL;
rc = -ENOMSG;
} else {
crm_err("Received bad reply: %d (wanted %d)", reply_id, stonith->call_id);
crm_log_xml_err(op_reply, "Old reply");
pcmk__xml_free(op_reply);
op_reply = NULL;
rc = -ENOMSG;
}
done:
if (!crm_ipc_connected(native->ipc)) {
crm_err("Fencer disconnected");
free(native->token); native->token = NULL;
stonith->state = stonith_disconnected;
}
pcmk__xml_free(op_reply);
return rc;
}
-/* Not used with mainloop */
-bool
-stonith_dispatch(stonith_t * st)
+/*!
+ * \internal
+ * \brief Process IPC messages for a fencer API connection
+ *
+ * This is used for testing purposes in scenarios that don't use a mainloop to
+ * dispatch messages automatically.
+ *
+ * \param[in,out] stonith_api Fencer API connetion object
+ *
+ * \return Standard Pacemaker return code
+ */
+int
+stonith__api_dispatch(stonith_t *stonith_api)
{
- gboolean stay_connected = TRUE;
stonith_private_t *private = NULL;
- pcmk__assert(st != NULL);
- private = st->st_private;
+ pcmk__assert(stonith_api != NULL);
+ private = stonith_api->st_private;
while (crm_ipc_ready(private->ipc)) {
-
if (crm_ipc_read(private->ipc) > 0) {
const char *msg = crm_ipc_buffer(private->ipc);
- stonith_dispatch_internal(msg, strlen(msg), st);
+ stonith_dispatch_internal(msg, strlen(msg), stonith_api);
}
if (!crm_ipc_connected(private->ipc)) {
crm_err("Connection closed");
- stay_connected = FALSE;
+ return ENOTCONN;
}
}
- return stay_connected;
+ return pcmk_rc_ok;
}
static int
-stonith_api_free(stonith_t * stonith)
+free_stonith_api(stonith_t *stonith)
{
int rc = pcmk_ok;
crm_trace("Destroying %p", stonith);
if (stonith->state != stonith_disconnected) {
crm_trace("Unregistering notifications and disconnecting %p first",
stonith);
stonith->cmds->remove_notification(stonith, NULL);
rc = stonith->cmds->disconnect(stonith);
}
if (stonith->state == stonith_disconnected) {
stonith_private_t *private = stonith->st_private;
crm_trace("Removing %d callbacks", g_hash_table_size(private->stonith_op_callback_table));
g_hash_table_destroy(private->stonith_op_callback_table);
crm_trace("Destroying %d notification clients", g_list_length(private->notify_list));
g_list_free_full(private->notify_list, free);
free(stonith->st_private);
free(stonith->cmds);
free(stonith);
} else {
crm_err("Not free'ing active connection: %s (%d)", pcmk_strerror(rc), rc);
}
return rc;
}
-void
-stonith_api_delete(stonith_t * stonith)
-{
- crm_trace("Destroying %p", stonith);
- if(stonith) {
- stonith->cmds->free(stonith);
- }
-}
-
static gboolean
is_stonith_param(gpointer key, gpointer value, gpointer user_data)
{
return pcmk_stonith_param(key);
}
int
stonith__validate(stonith_t *st, int call_options, const char *rsc_id,
- const char *namespace_s, const char *agent,
- GHashTable *params, int timeout_sec, char **output,
- char **error_output)
+ const char *agent, GHashTable *params, int timeout_sec,
+ char **output, char **error_output)
{
int rc = pcmk_rc_ok;
/* Use a dummy node name in case the agent requires a target. We assume the
* actual target doesn't matter for validation purposes (if in practice,
* that is incorrect, we will need to allow the caller to pass the target).
*/
const char *target = "node1";
char *host_arg = NULL;
if (params != NULL) {
host_arg = pcmk__str_copy(g_hash_table_lookup(params, PCMK_STONITH_HOST_ARGUMENT));
/* Remove special stonith params from the table before doing anything else */
g_hash_table_foreach_remove(params, is_stonith_param, NULL);
}
#if PCMK__ENABLE_CIBSECRETS
rc = pcmk__substitute_secrets(rsc_id, params);
if (rc != pcmk_rc_ok) {
crm_warn("Could not replace secret parameters for validation of %s: %s",
agent, pcmk_rc_str(rc));
// rc is standard return value, don't return it in this function
}
#endif
if (output) {
*output = NULL;
}
if (error_output) {
*error_output = NULL;
}
if (timeout_sec <= 0) {
timeout_sec = PCMK_DEFAULT_ACTION_TIMEOUT_MS;
}
- switch (stonith_get_namespace(agent, namespace_s)) {
+ switch (get_namespace_from_agent(agent)) {
case st_namespace_rhcs:
rc = stonith__rhcs_validate(st, call_options, target, agent,
params, host_arg, timeout_sec,
output, error_output);
rc = pcmk_legacy2rc(rc);
break;
#if HAVE_STONITH_STONITH_H
case st_namespace_lha:
rc = stonith__lha_validate(st, call_options, target, agent,
params, timeout_sec, output,
error_output);
rc = pcmk_legacy2rc(rc);
break;
#endif
case st_namespace_invalid:
errno = ENOENT;
rc = errno;
if (error_output) {
*error_output = crm_strdup_printf("Agent %s not found", agent);
} else {
crm_err("Agent %s not found", agent);
}
break;
default:
errno = EOPNOTSUPP;
rc = errno;
if (error_output) {
*error_output = crm_strdup_printf("Agent %s does not support validation",
agent);
} else {
crm_err("Agent %s does not support validation", agent);
}
break;
}
free(host_arg);
return rc;
}
static int
stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id,
const char *namespace_s, const char *agent,
const stonith_key_value_t *params, int timeout_sec,
char **output, char **error_output)
{
/* Validation should be done directly via the agent, so we can get it from
* stonith_admin when the cluster is not running, which is important for
* higher-level tools.
*/
int rc = pcmk_ok;
GHashTable *params_table = pcmk__strkey_table(free, free);
// Convert parameter list to a hash table
for (; params; params = params->next) {
if (!pcmk_stonith_param(params->key)) {
pcmk__insert_dup(params_table, params->key, params->value);
}
}
- rc = stonith__validate(st, call_options, rsc_id, namespace_s, agent,
- params_table, timeout_sec, output, error_output);
+ rc = stonith__validate(st, call_options, rsc_id, agent, params_table,
+ timeout_sec, output, error_output);
g_hash_table_destroy(params_table);
return rc;
}
+/*!
+ * \internal
+ * \brief Create a new fencer API connection object
+ *
+ * \return Newly allocated fencer API connection object, or \c NULL on
+ * allocation failure
+ */
stonith_t *
-stonith_api_new(void)
+stonith__api_new(void)
{
stonith_t *new_stonith = NULL;
stonith_private_t *private = NULL;
new_stonith = calloc(1, sizeof(stonith_t));
if (new_stonith == NULL) {
return NULL;
}
private = calloc(1, sizeof(stonith_private_t));
if (private == NULL) {
free(new_stonith);
return NULL;
}
new_stonith->st_private = private;
private->stonith_op_callback_table = pcmk__intkey_table(stonith_destroy_op_callback);
private->notify_list = NULL;
private->notify_refcnt = 0;
private->notify_deletes = FALSE;
new_stonith->call_id = 1;
new_stonith->state = stonith_disconnected;
new_stonith->cmds = calloc(1, sizeof(stonith_api_operations_t));
if (new_stonith->cmds == NULL) {
free(new_stonith->st_private);
free(new_stonith);
return NULL;
}
-/* *INDENT-OFF* */
- new_stonith->cmds->free = stonith_api_free;
+ new_stonith->cmds->free = free_stonith_api;
new_stonith->cmds->connect = stonith_api_signon;
new_stonith->cmds->disconnect = stonith_api_signoff;
new_stonith->cmds->list = stonith_api_list;
new_stonith->cmds->monitor = stonith_api_monitor;
new_stonith->cmds->status = stonith_api_status;
new_stonith->cmds->fence = stonith_api_fence;
new_stonith->cmds->fence_with_delay = stonith_api_fence_with_delay;
new_stonith->cmds->confirm = stonith_api_confirm;
new_stonith->cmds->history = stonith_api_history;
new_stonith->cmds->list_agents = stonith_api_device_list;
new_stonith->cmds->metadata = stonith_api_device_metadata;
new_stonith->cmds->query = stonith_api_query;
new_stonith->cmds->remove_device = stonith_api_remove_device;
new_stonith->cmds->register_device = stonith_api_register_device;
new_stonith->cmds->remove_level = stonith_api_remove_level;
new_stonith->cmds->remove_level_full = stonith_api_remove_level_full;
new_stonith->cmds->register_level = stonith_api_register_level;
new_stonith->cmds->register_level_full = stonith_api_register_level_full;
new_stonith->cmds->remove_callback = stonith_api_del_callback;
new_stonith->cmds->register_callback = stonith_api_add_callback;
new_stonith->cmds->remove_notification = stonith_api_del_notification;
new_stonith->cmds->register_notification = stonith_api_add_notification;
new_stonith->cmds->validate = stonith_api_validate;
-/* *INDENT-ON* */
return new_stonith;
}
/*!
- * \brief Make a blocking connection attempt to the fencer
+ * \internal
+ * \brief Free a fencer API connection object
*
- * \param[in,out] st Fencer API object
+ * \param[in,out] stonith_api Fencer API connection object
+ */
+void
+stonith__api_free(stonith_t *stonith_api)
+{
+ crm_trace("Destroying %p", stonith_api);
+ if (stonith_api != NULL) {
+ stonith_api->cmds->free(stonith_api);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Connect to the fencer, retrying on failure
+ *
+ * \param[in,out] stonith Fencer API connection object
* \param[in] name Client name to use with fencer
- * \param[in] max_attempts Return error if this many attempts fail
+ * \param[in] max_attempts Maximum number of attempts
*
- * \return pcmk_ok on success, result of last attempt otherwise
+ * \return \c pcmk_rc_ok on success, or result of last attempt otherwise
*/
int
-stonith_api_connect_retry(stonith_t *st, const char *name, int max_attempts)
+stonith__api_connect_retry(stonith_t *stonith_api, const char *name,
+ int max_attempts)
{
- int rc = -EINVAL; // if max_attempts is not positive
+ int rc = EINVAL; // if max_attempts is not positive
for (int attempt = 1; attempt <= max_attempts; attempt++) {
- rc = st->cmds->connect(st, name, NULL);
- if (rc == pcmk_ok) {
- return pcmk_ok;
- } else if (attempt < max_attempts) {
- crm_notice("Fencer connection attempt %d of %d failed (retrying in 2s): %s "
- QB_XS " rc=%d",
- attempt, max_attempts, pcmk_strerror(rc), rc);
+ rc = stonith_api->cmds->connect(stonith_api, name, NULL);
+ rc = pcmk_legacy2rc(rc);
+
+ if (rc == pcmk_rc_ok) {
+ return rc;
+ }
+ if (attempt < max_attempts) {
+ crm_notice("Fencer connection attempt %d of %d failed "
+ "(retrying in 2s): %s " QB_XS " rc=%d",
+ attempt, max_attempts, pcmk_rc_str(rc), rc);
sleep(2);
}
}
crm_notice("Could not connect to fencer: %s " QB_XS " rc=%d",
- pcmk_strerror(rc), rc);
+ pcmk_rc_str(rc), rc);
return rc;
}
+/*!
+ * \internal
+ * \brief Append a newly allocated STONITH key-value pair to a list
+ *
+ * \param[in,out] head Head of key-value pair list (\c NULL for new list)
+ * \param[in] key Key to add
+ * \param[in] value Value to add
+ *
+ * \return Head of appended-to list (equal to \p head if \p head is not \c NULL)
+ * \note The caller is responsible for freeing the return value using
+ * \c stonith__key_value_freeall().
+ */
stonith_key_value_t *
-stonith_key_value_add(stonith_key_value_t * head, const char *key, const char *value)
+stonith__key_value_add(stonith_key_value_t *head, const char *key,
+ const char *value)
{
- stonith_key_value_t *p, *end;
+ /* @COMPAT Replace this function with pcmk_prepend_nvpair(), and reverse the
+ * list when finished adding to it; or with a hash table where order does
+ * not matter
+ */
+ stonith_key_value_t *pair = pcmk__assert_alloc(1,
+ sizeof(stonith_key_value_t));
- p = pcmk__assert_alloc(1, sizeof(stonith_key_value_t));
- p->key = pcmk__str_copy(key);
- p->value = pcmk__str_copy(value);
+ pair->key = pcmk__str_copy(key);
+ pair->value = pcmk__str_copy(value);
- end = head;
- while (end && end->next) {
- end = end->next;
- }
+ if (head != NULL) {
+ stonith_key_value_t *end = head;
+
+ for (; end->next != NULL; end = end->next);
+ end->next = pair;
- if (end) {
- end->next = p;
} else {
- head = p;
+ head = pair;
}
return head;
}
+/*!
+ * \internal
+ * \brief Free all items in a \c stonith_key_value_t list
+ *
+ * This means freeing the list itself with all of its nodes. Keys and values may
+ * be freed depending on arguments.
+ *
+ * \param[in,out] head Head of list
+ * \param[in] keys If \c true, free all keys
+ * \param[in] values If \c true, free all values
+ */
void
-stonith_key_value_freeall(stonith_key_value_t * head, int keys, int values)
+stonith__key_value_freeall(stonith_key_value_t *head, bool keys, bool values)
{
- stonith_key_value_t *p;
+ while (head != NULL) {
+ stonith_key_value_t *next = head->next;
- while (head) {
- p = head->next;
if (keys) {
free(head->key);
}
if (values) {
free(head->value);
}
free(head);
- head = p;
+ head = next;
}
}
#define api_log_open() openlog("stonith-api", LOG_CONS | LOG_NDELAY | LOG_PID, LOG_DAEMON)
#define api_log(level, fmt, args...) syslog(level, "%s: "fmt, __func__, args)
int
stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off)
{
int rc = pcmk_ok;
- stonith_t *st = stonith_api_new();
+ stonith_t *st = stonith__api_new();
const char *action = off? PCMK_ACTION_OFF : PCMK_ACTION_REBOOT;
api_log_open();
if (st == NULL) {
api_log(LOG_ERR, "API initialization failed, could not kick (%s) node %u/%s",
action, nodeid, uname);
return -EPROTO;
}
rc = st->cmds->connect(st, "stonith-api", NULL);
if (rc != pcmk_ok) {
api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)",
action, nodeid, uname, pcmk_strerror(rc), rc);
} else {
char *name = (uname == NULL)? pcmk__itoa(nodeid) : strdup(uname);
int opts = 0;
stonith__set_call_options(opts, name,
st_opt_sync_call|st_opt_allow_self_fencing);
if ((uname == NULL) && (nodeid > 0)) {
stonith__set_call_options(opts, name, st_opt_cs_nodeid);
}
rc = st->cmds->fence(st, opts, name, action, timeout, 0);
free(name);
if (rc != pcmk_ok) {
api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)",
action, nodeid, uname, pcmk_strerror(rc), rc);
} else {
api_log(LOG_NOTICE, "Node %u/%s kicked: %s", nodeid, uname, action);
}
}
- stonith_api_delete(st);
+ stonith__api_free(st);
return rc;
}
time_t
stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress)
{
int rc = pcmk_ok;
time_t when = 0;
- stonith_t *st = stonith_api_new();
+ stonith_t *st = stonith__api_new();
stonith_history_t *history = NULL, *hp = NULL;
if (st == NULL) {
api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: "
"API initialization failed", nodeid, uname);
return when;
}
rc = st->cmds->connect(st, "stonith-api", NULL);
if (rc != pcmk_ok) {
api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc);
} else {
int entries = 0;
int progress = 0;
int completed = 0;
int opts = 0;
char *name = (uname == NULL)? pcmk__itoa(nodeid) : strdup(uname);
stonith__set_call_options(opts, name, st_opt_sync_call);
if ((uname == NULL) && (nodeid > 0)) {
stonith__set_call_options(opts, name, st_opt_cs_nodeid);
}
rc = st->cmds->history(st, opts, name, &history, 120);
free(name);
for (hp = history; hp; hp = hp->next) {
entries++;
if (in_progress) {
progress++;
if (hp->state != st_done && hp->state != st_failed) {
when = time(NULL);
}
} else if (hp->state == st_done) {
completed++;
if (hp->completed > when) {
when = hp->completed;
}
}
}
- stonith_history_free(history);
+ stonith__history_free(history);
if(rc == pcmk_ok) {
api_log(LOG_INFO, "Found %d entries for %u/%s: %d in progress, %d completed", entries, nodeid, uname, progress, completed);
} else {
api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: %s (%d)", nodeid, uname, pcmk_strerror(rc), rc);
}
}
- stonith_api_delete(st);
+ stonith__api_free(st);
if(when) {
api_log(LOG_INFO, "Node %u/%s last kicked at: %ld", nodeid, uname, (long int)when);
}
return when;
}
+/*!
+ * \internal
+ * \brief Check whether a fence agent with a given name exists
+ *
+ * \param[in] name Agent name
+ *
+ * \retval \c true If a fence agent named \p name exists
+ * \retval \c false Otherwise
+ */
bool
-stonith_agent_exists(const char *agent, int timeout)
+stonith__agent_exists(const char *name)
{
- stonith_t *st = NULL;
- stonith_key_value_t *devices = NULL;
- stonith_key_value_t *dIter = NULL;
- bool rc = FALSE;
+ stonith_t *stonith_api = NULL;
+ stonith_key_value_t *agents = NULL;
+ bool rc = false;
- if (agent == NULL) {
- return rc;
+ if (name == NULL) {
+ return false;
}
- st = stonith_api_new();
- if (st == NULL) {
+ stonith_api = stonith__api_new();
+ if (stonith_api == NULL) {
crm_err("Could not list fence agents: API memory allocation failed");
- return FALSE;
+ return false;
}
- st->cmds->list_agents(st, st_opt_sync_call, NULL, &devices, timeout == 0 ? 120 : timeout);
- for (dIter = devices; dIter != NULL; dIter = dIter->next) {
- if (pcmk__str_eq(dIter->value, agent, pcmk__str_none)) {
- rc = TRUE;
+ // The list_agents method ignores its timeout argument
+ stonith_api->cmds->list_agents(stonith_api, st_opt_sync_call, NULL, &agents,
+ 0);
+
+ for (const stonith_key_value_t *iter = agents; iter != NULL;
+ iter = iter->next) {
+ if (pcmk__str_eq(iter->value, name, pcmk__str_none)) {
+ rc = true;
break;
}
}
- stonith_key_value_freeall(devices, 1, 1);
- stonith_api_delete(st);
+ stonith__key_value_freeall(agents, true, true);
+ stonith__api_free(stonith_api);
return rc;
}
-const char *
-stonith_action_str(const char *action)
-{
- if (action == NULL) {
- return "fencing";
- } else if (strcmp(action, PCMK_ACTION_ON) == 0) {
- return "unfencing";
- } else if (strcmp(action, PCMK_ACTION_OFF) == 0) {
- return "turning off";
- } else {
- return action;
- }
-}
-
/*!
* \internal
* \brief Parse a target name from one line of a target list string
*
* \param[in] line One line of a target list string
* \param[in] len String length of line
* \param[in,out] output List to add newly allocated target name to
*/
static void
parse_list_line(const char *line, int len, GList **output)
{
size_t i = 0;
size_t entry_start = 0;
/* Skip complaints about additional parameters device doesn't understand
*
* @TODO Document or eliminate the implied restriction of target names
*/
if (strstr(line, "invalid") || strstr(line, "variable")) {
crm_debug("Skipping list output line: %s", line);
return;
}
// Process line content, character by character
for (i = 0; i <= len; i++) {
if (isspace(line[i]) || (line[i] == ',') || (line[i] == ';')
|| (line[i] == '\0')) {
// We've found a separator (i.e. the end of an entry)
int rc = 0;
char *entry = NULL;
if (i == entry_start) {
// Skip leading and sequential separators
entry_start = i + 1;
continue;
}
entry = pcmk__assert_alloc(i - entry_start + 1, sizeof(char));
/* Read entry, stopping at first separator
*
* @TODO Document or eliminate these character restrictions
*/
rc = sscanf(line + entry_start, "%[a-zA-Z0-9_-.]", entry);
if (rc != 1) {
crm_warn("Could not parse list output entry: %s "
QB_XS " entry_start=%d position=%d",
line + entry_start, entry_start, i);
free(entry);
} else if (pcmk__strcase_any_of(entry, PCMK_ACTION_ON,
PCMK_ACTION_OFF, NULL)) {
/* Some agents print the target status in the list output,
* though none are known now (the separate list-status command
* is used for this, but it can also print "UNKNOWN"). To handle
* this possibility, skip such entries.
*
* @TODO Document or eliminate the implied restriction of target
* names.
*/
free(entry);
} else {
// We have a valid entry
*output = g_list_append(*output, entry);
}
entry_start = i + 1;
}
}
}
/*!
* \internal
* \brief Parse a list of targets from a string
*
* \param[in] list_output Target list as a string
*
* \return List of target names
* \note The target list string format is flexible, to allow for user-specified
* lists such pcmk_host_list and the output of an agent's list action
* (whether direct or via the API, which escapes newlines). There may be
* multiple lines, separated by either a newline or an escaped newline
* (backslash n). Each line may have one or more target names, separated
* by any combination of whitespace, commas, and semi-colons. Lines
* containing "invalid" or "variable" will be ignored entirely. Target
* names "on" or "off" (case-insensitive) will be ignored. Target names
* may contain only alphanumeric characters, underbars (_), dashes (-),
* and dots (.) (if any other character occurs in the name, it and all
* subsequent characters in the name will be ignored).
* \note The caller is responsible for freeing the result with
* g_list_free_full(result, free).
*/
GList *
stonith__parse_targets(const char *target_spec)
{
GList *targets = NULL;
if (target_spec != NULL) {
size_t out_len = strlen(target_spec);
size_t line_start = 0; // Starting index of line being processed
for (size_t i = 0; i <= out_len; ++i) {
if ((target_spec[i] == '\n') || (target_spec[i] == '\0')
|| ((target_spec[i] == '\\') && (target_spec[i + 1] == 'n'))) {
// We've reached the end of one line of output
int len = i - line_start;
if (len > 0) {
char *line = strndup(target_spec + line_start, len);
line[len] = '\0'; // Because it might be a newline
parse_list_line(line, len, &targets);
free(line);
}
if (target_spec[i] == '\\') {
++i; // backslash-n takes up two positions
}
line_start = i + 1;
}
}
}
return targets;
}
/*!
* \internal
* \brief Check whether a fencing failure was followed by an equivalent success
*
* \param[in] event Fencing failure
* \param[in] top_history Complete fencing history (must be sorted by
* stonith__sort_history() beforehand)
*
* \return The name of the node that executed the fencing if a later successful
* event exists, or NULL if no such event exists
*/
const char *
stonith__later_succeeded(const stonith_history_t *event,
const stonith_history_t *top_history)
{
const char *other = NULL;
for (const stonith_history_t *prev_hp = top_history;
prev_hp != NULL; prev_hp = prev_hp->next) {
if (prev_hp == event) {
break;
}
if ((prev_hp->state == st_done) &&
pcmk__str_eq(event->target, prev_hp->target, pcmk__str_casei) &&
pcmk__str_eq(event->action, prev_hp->action, pcmk__str_none) &&
((event->completed < prev_hp->completed) ||
((event->completed == prev_hp->completed) && (event->completed_nsec < prev_hp->completed_nsec)))) {
if ((event->delegate == NULL)
|| pcmk__str_eq(event->delegate, prev_hp->delegate,
pcmk__str_casei)) {
// Prefer equivalent fencing by same executioner
return prev_hp->delegate;
} else if (other == NULL) {
// Otherwise remember first successful executioner
other = (prev_hp->delegate == NULL)? "some node" : prev_hp->delegate;
}
}
}
return other;
}
/*!
* \internal
* \brief Sort fencing history, pending first then by most recently completed
*
* \param[in,out] history List of stonith actions
*
* \return New head of sorted \p history
*/
stonith_history_t *
stonith__sort_history(stonith_history_t *history)
{
stonith_history_t *new = NULL, *pending = NULL, *hp, *np, *tmp;
for (hp = history; hp; ) {
tmp = hp->next;
if ((hp->state == st_done) || (hp->state == st_failed)) {
/* sort into new */
if ((!new) || (hp->completed > new->completed) ||
((hp->completed == new->completed) && (hp->completed_nsec > new->completed_nsec))) {
hp->next = new;
new = hp;
} else {
np = new;
do {
if ((!np->next) || (hp->completed > np->next->completed) ||
((hp->completed == np->next->completed) && (hp->completed_nsec > np->next->completed_nsec))) {
hp->next = np->next;
np->next = hp;
break;
}
np = np->next;
} while (1);
}
} else {
/* put into pending */
hp->next = pending;
pending = hp;
}
hp = tmp;
}
/* pending actions don't have a completed-stamp so make them go front */
if (pending) {
stonith_history_t *last_pending = pending;
while (last_pending->next) {
last_pending = last_pending->next;
}
last_pending->next = new;
new = pending;
}
return new;
}
/*!
- * \brief Return string equivalent of an operation state value
+ * \internal
+ * \brief Return string equivalent of a fencing operation state value
*
* \param[in] state Fencing operation state value
*
- * \return Human-friendly string equivalent of state
+ * \return Human-friendly string equivalent of \p state
*/
const char *
-stonith_op_state_str(enum op_state state)
+stonith__op_state_text(enum op_state state)
{
+ // @COMPAT Move this to the fencer after dropping stonith_op_state_str()
switch (state) {
- case st_query: return "querying";
- case st_exec: return "executing";
- case st_done: return "completed";
- case st_duplicate: return "duplicate";
- case st_failed: return "failed";
+ case st_query:
+ return "querying";
+ case st_exec:
+ return "executing";
+ case st_done:
+ return "completed";
+ case st_duplicate:
+ return "duplicate";
+ case st_failed:
+ return "failed";
+ default:
+ return "unknown";
}
- return "unknown";
}
stonith_history_t *
stonith__first_matching_event(stonith_history_t *history,
bool (*matching_fn)(stonith_history_t *, void *),
void *user_data)
{
for (stonith_history_t *hp = history; hp; hp = hp->next) {
if (matching_fn(hp, user_data)) {
return hp;
}
}
return NULL;
}
bool
stonith__event_state_pending(stonith_history_t *history, void *user_data)
{
return history->state != st_failed && history->state != st_done;
}
bool
stonith__event_state_eq(stonith_history_t *history, void *user_data)
{
return history->state == GPOINTER_TO_INT(user_data);
}
bool
stonith__event_state_neq(stonith_history_t *history, void *user_data)
{
return history->state != GPOINTER_TO_INT(user_data);
}
-void
-stonith__device_parameter_flags(uint32_t *device_flags, const char *device_name,
- xmlNode *metadata)
+/*!
+ * \internal
+ * \brief Check whether a given parameter exists in a fence agent's metadata
+ *
+ * \param[in] metadata Agent metadata
+ * \param[in] name Parameter name
+ *
+ * \retval \c true If \p name exists as a parameter in \p metadata
+ * \retval \c false Otherwise
+ */
+static bool
+param_is_supported(xmlNode *metadata, const char *name)
{
- xmlXPathObject *xpath = NULL;
- int max = 0;
- int lpc = 0;
+ char *xpath_s = crm_strdup_printf("//" PCMK_XE_PARAMETER
+ "[@" PCMK_XA_NAME "='%s']",
+ name);
+ xmlXPathObject *xpath = pcmk__xpath_search(metadata->doc, xpath_s);
+ bool supported = (pcmk__xpath_num_results(xpath) > 0);
- CRM_CHECK((device_flags != NULL) && (metadata != NULL), return);
+ free(xpath_s);
+ xmlXPathFreeObject(xpath);
+ return supported;
+}
- xpath = pcmk__xpath_search(metadata->doc, "//" PCMK_XE_PARAMETER);
- max = pcmk__xpath_num_results(xpath);
+/*!
+ * \internal
+ * \brief Get the default host argument based on a device's agent metadata
+ *
+ * If an agent supports the "plug" parameter, default to that. Otherwise default
+ * to the "port" parameter if supported. Otherwise return \c NULL.
+ *
+ * \param[in] metadata Agent metadata
+ *
+ * \return Parameter name for default host argument
+ */
+const char *
+stonith__default_host_arg(xmlNode *metadata)
+{
+ CRM_CHECK(metadata != NULL, return NULL);
- if (max == 0) {
- xmlXPathFreeObject(xpath);
- return;
+ if (param_is_supported(metadata, "plug")) {
+ return "plug";
}
-
- for (lpc = 0; lpc < max; lpc++) {
- const char *parameter = NULL;
- xmlNode *match = pcmk__xpath_result(xpath, lpc);
-
- CRM_LOG_ASSERT(match != NULL);
- if (match == NULL) {
- continue;
- }
-
- parameter = crm_element_value(match, PCMK_XA_NAME);
-
- if (pcmk__str_eq(parameter, "plug", pcmk__str_casei)) {
- stonith__set_device_flags(*device_flags, device_name,
- st_device_supports_parameter_plug);
-
- } else if (pcmk__str_eq(parameter, "port", pcmk__str_casei)) {
- stonith__set_device_flags(*device_flags, device_name,
- st_device_supports_parameter_port);
- }
+ if (param_is_supported(metadata, "port")) {
+ return "port";
}
-
- xmlXPathFreeObject(xpath);
+ return NULL;
}
/*!
* \internal
* \brief Retrieve fence agent meta-data asynchronously
*
* \param[in] agent Agent to execute
* \param[in] timeout_sec Error if not complete within this time
* \param[in] callback Function to call with result (this will always be
* called, whether by this function directly or
* later via the main loop, and on success the
* metadata will be in its result argument's
* action_stdout)
* \param[in,out] user_data User data to pass to callback
*
* \return Standard Pacemaker return code
* \note The caller must use a main loop. This function is not a
* stonith_api_operations_t method because it does not need a stonith_t
* object and does not go through the fencer, but executes the agent
* directly.
*/
int
stonith__metadata_async(const char *agent, int timeout_sec,
void (*callback)(int pid,
const pcmk__action_result_t *result,
void *user_data),
void *user_data)
{
- switch (stonith_get_namespace(agent, NULL)) {
+ switch (get_namespace_from_agent(agent)) {
case st_namespace_rhcs:
{
stonith_action_t *action = NULL;
int rc = pcmk_ok;
action = stonith__action_create(agent, PCMK_ACTION_METADATA,
NULL, timeout_sec, NULL, NULL,
NULL);
rc = stonith__execute_async(action, user_data, callback, NULL);
if (rc != pcmk_ok) {
callback(0, stonith__action_result(action), user_data);
stonith__destroy_action(action);
}
return pcmk_legacy2rc(rc);
}
#if HAVE_STONITH_STONITH_H
case st_namespace_lha:
// LHA metadata is simply synthesized, so simulate async
{
pcmk__action_result_t result = {
.exit_status = CRM_EX_OK,
.execution_status = PCMK_EXEC_DONE,
.exit_reason = NULL,
.action_stdout = NULL,
.action_stderr = NULL,
};
stonith__lha_metadata(agent, timeout_sec,
&result.action_stdout);
callback(0, &result, user_data);
pcmk__reset_result(&result);
return pcmk_rc_ok;
}
#endif
default:
{
pcmk__action_result_t result = {
.exit_status = CRM_EX_NOSUCH,
.execution_status = PCMK_EXEC_ERROR_HARD,
.exit_reason = crm_strdup_printf("No such agent '%s'",
agent),
.action_stdout = NULL,
.action_stderr = NULL,
};
callback(0, &result, user_data);
pcmk__reset_result(&result);
return ENOENT;
}
}
}
/*!
* \internal
* \brief Return the exit status from an async action callback
*
* \param[in] data Callback data
*
* \return Exit status from callback data
*/
int
stonith__exit_status(const stonith_callback_data_t *data)
{
if ((data == NULL) || (data->opaque == NULL)) {
return CRM_EX_ERROR;
}
return ((pcmk__action_result_t *) data->opaque)->exit_status;
}
/*!
* \internal
* \brief Return the execution status from an async action callback
*
* \param[in] data Callback data
*
* \return Execution status from callback data
*/
int
stonith__execution_status(const stonith_callback_data_t *data)
{
if ((data == NULL) || (data->opaque == NULL)) {
return PCMK_EXEC_UNKNOWN;
}
return ((pcmk__action_result_t *) data->opaque)->execution_status;
}
/*!
* \internal
* \brief Return the exit reason from an async action callback
*
* \param[in] data Callback data
*
* \return Exit reason from callback data
*/
const char *
stonith__exit_reason(const stonith_callback_data_t *data)
{
if ((data == NULL) || (data->opaque == NULL)) {
return NULL;
}
return ((pcmk__action_result_t *) data->opaque)->exit_reason;
}
/*!
* \internal
* \brief Return the exit status from an event notification
*
* \param[in] event Event
*
* \return Exit status from event
*/
int
stonith__event_exit_status(const stonith_event_t *event)
{
if ((event == NULL) || (event->opaque == NULL)) {
return CRM_EX_ERROR;
} else {
struct event_private *event_private = event->opaque;
return event_private->result.exit_status;
}
}
/*!
* \internal
* \brief Return the execution status from an event notification
*
* \param[in] event Event
*
* \return Execution status from event
*/
int
stonith__event_execution_status(const stonith_event_t *event)
{
if ((event == NULL) || (event->opaque == NULL)) {
return PCMK_EXEC_UNKNOWN;
} else {
struct event_private *event_private = event->opaque;
return event_private->result.execution_status;
}
}
/*!
* \internal
* \brief Return the exit reason from an event notification
*
* \param[in] event Event
*
* \return Exit reason from event
*/
const char *
stonith__event_exit_reason(const stonith_event_t *event)
{
if ((event == NULL) || (event->opaque == NULL)) {
return NULL;
} else {
struct event_private *event_private = event->opaque;
return event_private->result.exit_reason;
}
}
/*!
* \internal
* \brief Return a human-friendly description of a fencing event
*
* \param[in] event Event to describe
*
* \return Newly allocated string with description of \p event
* \note The caller is responsible for freeing the return value.
* This function asserts on memory errors and never returns NULL.
*/
char *
stonith__event_description(const stonith_event_t *event)
{
// Use somewhat readable defaults
const char *origin = pcmk__s(event->client_origin, "a client");
const char *origin_node = pcmk__s(event->origin, "a node");
const char *executioner = pcmk__s(event->executioner, "the cluster");
const char *device = pcmk__s(event->device, "unknown");
const char *action = pcmk__s(event->action, event->operation);
const char *target = pcmk__s(event->target, "no node");
const char *reason = stonith__event_exit_reason(event);
const char *status;
if (action == NULL) {
action = "(unknown)";
}
if (stonith__event_execution_status(event) != PCMK_EXEC_DONE) {
status = pcmk_exec_status_str(stonith__event_execution_status(event));
} else if (stonith__event_exit_status(event) != CRM_EX_OK) {
status = pcmk_exec_status_str(PCMK_EXEC_ERROR);
} else {
status = crm_exit_str(CRM_EX_OK);
}
if (pcmk__str_eq(event->operation, PCMK__VALUE_ST_NOTIFY_HISTORY,
pcmk__str_none)) {
return crm_strdup_printf("Fencing history may have changed");
} else if (pcmk__str_eq(event->operation, STONITH_OP_DEVICE_ADD,
pcmk__str_none)) {
return crm_strdup_printf("A fencing device (%s) was added", device);
} else if (pcmk__str_eq(event->operation, STONITH_OP_DEVICE_DEL,
pcmk__str_none)) {
return crm_strdup_printf("A fencing device (%s) was removed", device);
} else if (pcmk__str_eq(event->operation, STONITH_OP_LEVEL_ADD,
pcmk__str_none)) {
return crm_strdup_printf("A fencing topology level (%s) was added",
device);
} else if (pcmk__str_eq(event->operation, STONITH_OP_LEVEL_DEL,
pcmk__str_none)) {
return crm_strdup_printf("A fencing topology level (%s) was removed",
device);
}
// event->operation should be PCMK__VALUE_ST_NOTIFY_FENCE at this point
return crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s%s%s%s (ref=%s)",
action, target, executioner, origin, origin_node,
status,
((reason == NULL)? "" : " ("), pcmk__s(reason, ""),
((reason == NULL)? "" : ")"),
pcmk__s(event->id, "(none)"));
}
+
+// Deprecated functions kept only for backward API compatibility
+// LCOV_EXCL_START
+
+// See comments in stonith-ng.h for why we re-declare before defining
+
+stonith_t *stonith_api_new(void);
+
+stonith_t *
+stonith_api_new(void)
+{
+ return stonith__api_new();
+}
+
+void stonith_api_delete(stonith_t *stonith);
+
+void
+stonith_api_delete(stonith_t *stonith)
+{
+ stonith__api_free(stonith);
+}
+
+static void
+stonith_dump_pending_op(gpointer key, gpointer value, gpointer user_data)
+{
+ int call = GPOINTER_TO_INT(key);
+ stonith_callback_client_t *blob = value;
+
+ crm_debug("Call %d (%s): pending", call, pcmk__s(blob->id, "no ID"));
+}
+
+void stonith_dump_pending_callbacks(stonith_t *stonith);
+
+void
+stonith_dump_pending_callbacks(stonith_t *stonith)
+{
+ stonith_private_t *private = stonith->st_private;
+
+ if (private->stonith_op_callback_table == NULL) {
+ return;
+ }
+ return g_hash_table_foreach(private->stonith_op_callback_table,
+ stonith_dump_pending_op, NULL);
+}
+
+bool stonith_dispatch(stonith_t *stonith_api);
+
+bool
+stonith_dispatch(stonith_t *stonith_api)
+{
+ return (stonith__api_dispatch(stonith_api) == pcmk_rc_ok);
+}
+
+stonith_key_value_t *stonith_key_value_add(stonith_key_value_t *head,
+ const char *key, const char *value);
+
+stonith_key_value_t *
+stonith_key_value_add(stonith_key_value_t *head, const char *key,
+ const char *value)
+{
+ return stonith__key_value_add(head, key, value);
+}
+
+void stonith_key_value_freeall(stonith_key_value_t *head, int keys, int values);
+
+void
+stonith_key_value_freeall(stonith_key_value_t *head, int keys, int values)
+{
+ stonith__key_value_freeall(head, (keys != 0), (values != 0));
+}
+
+void stonith_history_free(stonith_history_t *head);
+
+void
+stonith_history_free(stonith_history_t *head)
+{
+ stonith__history_free(head);
+}
+
+int stonith_api_connect_retry(stonith_t *st, const char *name,
+ int max_attempts);
+
+int
+stonith_api_connect_retry(stonith_t *st, const char *name, int max_attempts)
+{
+ return pcmk_rc2legacy(stonith__api_connect_retry(st, name, max_attempts));
+}
+
+const char *stonith_op_state_str(enum op_state state);
+
+const char *
+stonith_op_state_str(enum op_state state)
+{
+ return stonith__op_state_text(state);
+}
+
+bool stonith_agent_exists(const char *agent, int timeout);
+
+bool
+stonith_agent_exists(const char *agent, int timeout)
+{
+ return stonith__agent_exists(agent);
+}
+
+const char *stonith_action_str(const char *action);
+
+const char *
+stonith_action_str(const char *action)
+{
+ if (action == NULL) {
+ return "fencing";
+ } else if (strcmp(action, PCMK_ACTION_ON) == 0) {
+ return "unfencing";
+ } else if (strcmp(action, PCMK_ACTION_OFF) == 0) {
+ return "turning off";
+ } else {
+ return action;
+ }
+}
+
+enum stonith_namespace stonith_text2namespace(const char *namespace_s);
+
+enum stonith_namespace
+stonith_text2namespace(const char *namespace_s)
+{
+ return parse_namespace(namespace_s);
+}
+
+const char *stonith_namespace2text(enum stonith_namespace st_namespace);
+
+const char *
+stonith_namespace2text(enum stonith_namespace st_namespace)
+{
+ return namespace_text(st_namespace);
+}
+
+enum stonith_namespace stonith_get_namespace(const char *agent,
+ const char *namespace_s);
+
+enum stonith_namespace
+stonith_get_namespace(const char *agent, const char *namespace_s)
+{
+ if (pcmk__str_eq(namespace_s, "internal", pcmk__str_none)) {
+ return st_namespace_internal;
+ }
+ return get_namespace_from_agent(agent);
+}
+
+// LCOV_EXCL_STOP
+// End deprecated API
diff --git a/lib/fencing/st_lha.c b/lib/fencing/st_lha.c
index 7c42c69270..90bc88015c 100644
--- a/lib/fencing/st_lha.c
+++ b/lib/fencing/st_lha.c
@@ -1,309 +1,309 @@
/*
- * Copyright 2004-2024 the Pacemaker project contributors
+ * Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <errno.h>
#include <glib.h>
#include <dlfcn.h>
#include <crm/crm.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <stonith/stonith.h>
#include "fencing_private.h"
#define LHA_STONITH_LIBRARY "libstonith.so.1"
static void *lha_agents_lib = NULL;
// @TODO Use XML string constants and maybe a real XML object
static const char META_TEMPLATE[] =
"<?xml " PCMK_XA_VERSION "=\"1.0\"?>\n"
"<" PCMK_XE_RESOURCE_AGENT " " PCMK_XA_NAME "=\"%s\">\n"
" <" PCMK_XE_VERSION ">1.1</" PCMK_XE_VERSION ">\n"
" <" PCMK_XE_LONGDESC " " PCMK_XA_LANG "=\"" PCMK__VALUE_EN "\">\n"
"%s\n"
" </" PCMK_XE_LONGDESC ">\n"
" <" PCMK_XE_SHORTDESC " " PCMK_XA_LANG "=\"" PCMK__VALUE_EN "\">"
"%s"
"</" PCMK_XE_SHORTDESC ">\n"
"%s\n"
" <" PCMK_XE_ACTIONS ">\n"
" <" PCMK_XE_ACTION " " PCMK_XA_NAME "=\"" PCMK_ACTION_START "\""
" " PCMK_META_TIMEOUT "=\"%s\" />\n"
" <" PCMK_XE_ACTION " " PCMK_XA_NAME "=\"" PCMK_ACTION_STOP "\""
" " PCMK_META_TIMEOUT "=\"15s\" />\n"
" <" PCMK_XE_ACTION " " PCMK_XA_NAME "=\"" PCMK_ACTION_STATUS "\""
" " PCMK_META_TIMEOUT "=\"%s\" />\n"
" <" PCMK_XE_ACTION " " PCMK_XA_NAME "=\"" PCMK_ACTION_MONITOR "\""
" " PCMK_META_TIMEOUT "=\"%s\""
" " PCMK_META_INTERVAL "=\"3600s\" />\n"
" <" PCMK_XE_ACTION " " PCMK_XA_NAME "=\"" PCMK_ACTION_META_DATA "\""
" " PCMK_META_TIMEOUT "=\"15s\" />\n"
" </" PCMK_XE_ACTIONS ">\n"
" <" PCMK_XE_SPECIAL " " PCMK_XA_TAG "=\"heartbeat\">\n"
" <" PCMK_XE_VERSION ">2.0</" PCMK_XE_VERSION ">\n"
" </" PCMK_XE_SPECIAL ">\n"
"</" PCMK_XE_RESOURCE_AGENT ">\n";
static void *
find_library_function(void **handle, const char *lib, const char *fn)
{
void *a_function;
if (*handle == NULL) {
*handle = dlopen(lib, RTLD_LAZY);
if ((*handle) == NULL) {
crm_err("Could not open %s: %s", lib, dlerror());
return NULL;
}
}
a_function = dlsym(*handle, fn);
if (a_function == NULL) {
crm_err("Could not find %s in %s: %s", fn, lib, dlerror());
}
return a_function;
}
/*!
* \internal
* \brief Check whether a given fence agent is an LHA agent
*
* \param[in] agent Fence agent type
*
* \return true if \p agent is an LHA agent, otherwise false
*/
bool
stonith__agent_is_lha(const char *agent)
{
Stonith *stonith_obj = NULL;
static bool need_init = true;
static Stonith *(*st_new_fn) (const char *) = NULL;
static void (*st_del_fn) (Stonith *) = NULL;
if (need_init) {
need_init = false;
st_new_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY,
"stonith_new");
st_del_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY,
"stonith_delete");
}
if (lha_agents_lib && st_new_fn && st_del_fn) {
stonith_obj = (*st_new_fn) (agent);
if (stonith_obj) {
(*st_del_fn) (stonith_obj);
return true;
}
}
return false;
}
int
stonith__list_lha_agents(stonith_key_value_t **devices)
{
static gboolean need_init = TRUE;
int count = 0;
char **entry = NULL;
char **type_list = NULL;
static char **(*type_list_fn) (void) = NULL;
static void (*type_free_fn) (char **) = NULL;
if (need_init) {
need_init = FALSE;
type_list_fn = find_library_function(&lha_agents_lib,
LHA_STONITH_LIBRARY,
"stonith_types");
type_free_fn = find_library_function(&lha_agents_lib,
LHA_STONITH_LIBRARY,
"stonith_free_hostlist");
}
if (type_list_fn) {
type_list = (*type_list_fn) ();
}
for (entry = type_list; entry != NULL && *entry; ++entry) {
crm_trace("Added: %s", *entry);
- *devices = stonith_key_value_add(*devices, NULL, *entry);
+ *devices = stonith__key_value_add(*devices, NULL, *entry);
count++;
}
if (type_list && type_free_fn) {
(*type_free_fn) (type_list);
}
return count;
}
static void
stonith_plugin(int priority, const char *fmt, ...) G_GNUC_PRINTF(2, 3);
static void
stonith_plugin(int priority, const char *format, ...)
{
int err = errno;
va_list ap;
int len = 0;
char *string = NULL;
va_start(ap, format);
len = vasprintf (&string, format, ap);
va_end(ap);
pcmk__assert(len > 0);
do_crm_log_alias(priority, __FILE__, __func__, __LINE__, "%s", string);
free(string);
errno = err;
}
int
stonith__lha_metadata(const char *agent, int timeout, char **output)
{
int rc = 0;
char *buffer = NULL;
static const char *no_parameter_info = "<!-- no value -->";
Stonith *stonith_obj = NULL;
static gboolean need_init = TRUE;
static Stonith *(*st_new_fn) (const char *) = NULL;
static const char *(*st_info_fn) (Stonith *, int) = NULL;
static void (*st_del_fn) (Stonith *) = NULL;
static void (*st_log_fn) (Stonith *, PILLogFun) = NULL;
if (need_init) {
need_init = FALSE;
st_new_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY,
"stonith_new");
st_del_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY,
"stonith_delete");
st_log_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY,
"stonith_set_log");
st_info_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY,
"stonith_get_info");
}
if (lha_agents_lib && st_new_fn && st_del_fn && st_info_fn && st_log_fn) {
char *meta_longdesc = NULL;
char *meta_shortdesc = NULL;
char *meta_param = NULL;
const char *timeout_str = NULL;
gchar *meta_longdesc_esc = NULL;
gchar *meta_shortdesc_esc = NULL;
stonith_obj = st_new_fn(agent);
if (stonith_obj != NULL) {
st_log_fn(stonith_obj, (PILLogFun) &stonith_plugin);
/* A st_info_fn() may free any existing output buffer every time
* when it's called. Copy the output every time.
*/
meta_longdesc = pcmk__str_copy(st_info_fn(stonith_obj,
ST_DEVICEDESCR));
if (meta_longdesc == NULL) {
crm_warn("no long description in %s's metadata.", agent);
meta_longdesc = pcmk__str_copy(no_parameter_info);
}
meta_shortdesc = pcmk__str_copy(st_info_fn(stonith_obj,
ST_DEVICEID));
if (meta_shortdesc == NULL) {
crm_warn("no short description in %s's metadata.", agent);
meta_shortdesc = pcmk__str_copy(no_parameter_info);
}
meta_param = pcmk__str_copy(st_info_fn(stonith_obj,
ST_CONF_XML));
if (meta_param == NULL) {
crm_warn("no list of parameters in %s's metadata.", agent);
meta_param = pcmk__str_copy(no_parameter_info);
}
st_del_fn(stonith_obj);
} else {
errno = EINVAL;
crm_perror(LOG_ERR, "Agent %s not found", agent);
return -EINVAL;
}
if (pcmk__xml_needs_escape(meta_longdesc, pcmk__xml_escape_text)) {
meta_longdesc_esc = pcmk__xml_escape(meta_longdesc,
pcmk__xml_escape_text);
}
if (pcmk__xml_needs_escape(meta_shortdesc, pcmk__xml_escape_text)) {
meta_shortdesc_esc = pcmk__xml_escape(meta_shortdesc,
pcmk__xml_escape_text);
}
/* @TODO This needs a string that's parsable by crm_get_msec(). In
* general, pcmk__readable_interval() doesn't provide that. It works
* here because PCMK_DEFAULT_ACTION_TIMEOUT_MS is 20000 -> "20s".
*/
timeout_str = pcmk__readable_interval(PCMK_DEFAULT_ACTION_TIMEOUT_MS);
buffer = crm_strdup_printf(META_TEMPLATE, agent,
((meta_longdesc_esc != NULL) ?
meta_longdesc_esc : meta_longdesc),
((meta_shortdesc_esc != NULL) ?
meta_shortdesc_esc : meta_shortdesc),
meta_param, timeout_str, timeout_str,
timeout_str);
g_free(meta_longdesc_esc);
g_free(meta_shortdesc_esc);
free(meta_longdesc);
free(meta_shortdesc);
free(meta_param);
}
if (output) {
*output = buffer;
} else {
free(buffer);
}
return rc;
}
/* Implement a dummy function that uses -lpils so that linkers don't drop the
* reference.
*/
#include <pils/plugin.h>
const char *i_hate_pils(int rc);
const char *
i_hate_pils(int rc)
{
return PIL_strerror(rc);
}
int
stonith__lha_validate(stonith_t *st, int call_options, const char *target,
const char *agent, GHashTable *params, int timeout,
char **output, char **error_output)
{
errno = EOPNOTSUPP;
crm_perror(LOG_ERR, "Cannot validate Linux-HA fence agents");
return -EOPNOTSUPP;
}
diff --git a/lib/fencing/st_output.c b/lib/fencing/st_output.c
index 786f9d5a2d..3ce9d01314 100644
--- a/lib/fencing/st_output.c
+++ b/lib/fencing/st_output.c
@@ -1,606 +1,625 @@
/*
* Copyright 2019-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdarg.h>
#include <stdint.h>
#include <crm/stonith-ng.h>
#include <crm/common/iso8601.h>
#include <crm/common/util.h>
#include <crm/common/xml.h>
#include <crm/common/output.h>
#include <crm/common/output_internal.h>
#include <crm/common/xml_internal.h>
#include <crm/fencing/internal.h>
#include <crm/pengine/internal.h>
/*!
* \internal
* \brief Convert seconds and nanoseconds to a date/time/time-zone string
*
* \param[in] sec Seconds
* \param[in] nsec Nanoseconds
* \param[in] show_usec Whether to show time in microseconds resolution (if
* false, use seconds resolution)
*
* \return A string representation of \p sec and \nsec
*
* \note The caller is responsible for freeing the return value using \p free().
*/
static char *
timespec_string(time_t sec, long nsec, bool show_usec) {
const struct timespec ts = {
.tv_sec = sec,
.tv_nsec = nsec,
};
return pcmk__timespec2str(&ts,
crm_time_log_date
|crm_time_log_timeofday
|crm_time_log_with_timezone
|(show_usec? crm_time_usecs : 0));
}
+/*!
+ * \internal
+ * \brief Return a readable string equivalent of a fencing history item's action
+ *
+ * \param[in] history Fencing history entry
+ *
+ * \return Readable string equivalent of action belonging to \p history
+ */
+static const char *
+history_action_text(const stonith_history_t *history)
+{
+ if (pcmk__str_eq(history->action, PCMK_ACTION_ON, pcmk__str_none)) {
+ return "unfencing";
+ }
+ if (pcmk__str_eq(history->action, PCMK_ACTION_OFF, pcmk__str_none)) {
+ return "turning off";
+ }
+ return pcmk__s(history->action, "fencing");
+}
+
/*!
* \internal
* \brief Return a status-friendly description of fence history entry state
*
* \param[in] history Fence history entry to describe
*
* \return One-word description of history entry state
- * \note This is similar to stonith_op_state_str() except user-oriented (i.e.
+ * \note This is similar to stonith__op_state_text() except user-oriented (i.e.,
* for cluster status) instead of developer-oriented (for debug logs).
*/
static const char *
state_str(const stonith_history_t *history)
{
switch (history->state) {
case st_failed: return "failed";
case st_done: return "successful";
default: return "pending";
}
}
/*!
* \internal
* \brief Create a description of a fencing history entry for status displays
*
* \param[in] history Fencing history entry to describe
* \param[in] full_history Whether this is for full or condensed history
* \param[in] later_succeeded Node that a later equivalent attempt succeeded
* from, or NULL if none
* \param[in] show_opts Flag group of pcmk_show_opt_e
*
* \return Newly created string with fencing history entry description
*
* \note The caller is responsible for freeing the return value with g_free().
* \note This is similar to stonith__event_description(), except this is used
* for history entries (stonith_history_t) in status displays rather than
* event notifications (stonith_event_t) in log messages.
*/
gchar *
stonith__history_description(const stonith_history_t *history,
bool full_history, const char *later_succeeded,
uint32_t show_opts)
{
GString *str = g_string_sized_new(256); // Generous starting size
char *completed_time_s = NULL;
if ((history->state == st_failed) || (history->state == st_done)) {
completed_time_s = timespec_string(history->completed,
history->completed_nsec, true);
}
- pcmk__g_strcat(str,
- stonith_action_str(history->action), " of ", history->target,
+ pcmk__g_strcat(str, history_action_text(history), " of ", history->target,
NULL);
if (!pcmk_is_set(show_opts, pcmk_show_failed_detail)) {
// More human-friendly
if (((history->state == st_failed) || (history->state == st_done))
&& (history->delegate != NULL)) {
pcmk__g_strcat(str, " by ", history->delegate, NULL);
}
pcmk__g_strcat(str, " for ", history->client, "@", history->origin,
NULL);
if (!full_history) {
g_string_append(str, " last"); // For example, "last failed at ..."
}
}
pcmk__add_word(&str, 0, state_str(history));
// For failed actions, add exit reason if available
if ((history->state == st_failed) && (history->exit_reason != NULL)) {
pcmk__g_strcat(str, " (", history->exit_reason, ")", NULL);
}
if (pcmk_is_set(show_opts, pcmk_show_failed_detail)) {
// More technical
g_string_append(str, ": ");
// For completed actions, add delegate if available
if (((history->state == st_failed) || (history->state == st_done))
&& (history->delegate != NULL)) {
pcmk__g_strcat(str, PCMK_XA_DELEGATE "=", history->delegate, ", ",
NULL);
}
// Add information about originator
pcmk__g_strcat(str,
PCMK_XA_CLIENT "=", history->client, ", "
PCMK_XA_ORIGIN "=", history->origin, NULL);
// For completed actions, add completion time
if (completed_time_s != NULL) {
if (full_history) {
g_string_append(str, ", completed");
} else if (history->state == st_failed) {
g_string_append(str, ", last-failed");
} else {
g_string_append(str, ", last-successful");
}
pcmk__g_strcat(str, "='", completed_time_s, "'", NULL);
}
} else if (completed_time_s != NULL) {
// More human-friendly
pcmk__g_strcat(str, " at ", completed_time_s, NULL);
}
if ((history->state == st_failed) && (later_succeeded != NULL)) {
pcmk__g_strcat(str,
" (a later attempt from ", later_succeeded,
" succeeded)", NULL);
}
free(completed_time_s);
return g_string_free(str, FALSE);
}
PCMK__OUTPUT_ARGS("failed-fencing-list", "stonith_history_t *", "GList *",
"uint32_t", "uint32_t", "bool")
static int
failed_history(pcmk__output_t *out, va_list args)
{
stonith_history_t *history = va_arg(args, stonith_history_t *);
GList *only_node = va_arg(args, GList *);
uint32_t section_opts = va_arg(args, uint32_t);
uint32_t show_opts = va_arg(args, uint32_t);
bool print_spacer = va_arg(args, int);
int rc = pcmk_rc_no_output;
for (stonith_history_t *hp = history; hp; hp = hp->next) {
if (hp->state != st_failed) {
continue;
}
if (!pcmk__str_in_list(hp->target, only_node, pcmk__str_star_matches|pcmk__str_casei)) {
continue;
}
PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Failed Fencing Actions");
out->message(out, "stonith-event", hp,
pcmk_all_flags_set(section_opts, pcmk_section_fencing_all),
false, stonith__later_succeeded(hp, history), show_opts);
out->increment_list(out);
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
PCMK__OUTPUT_ARGS("fencing-list", "stonith_history_t *", "GList *", "uint32_t",
"uint32_t", "bool")
static int
stonith_history(pcmk__output_t *out, va_list args)
{
stonith_history_t *history = va_arg(args, stonith_history_t *);
GList *only_node = va_arg(args, GList *);
uint32_t section_opts = va_arg(args, uint32_t);
uint32_t show_opts = va_arg(args, uint32_t);
bool print_spacer = va_arg(args, int);
int rc = pcmk_rc_no_output;
for (stonith_history_t *hp = history; hp; hp = hp->next) {
if (!pcmk__str_in_list(hp->target, only_node, pcmk__str_star_matches|pcmk__str_casei)) {
continue;
}
if (hp->state != st_failed) {
PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Fencing History");
out->message(out, "stonith-event", hp,
pcmk_all_flags_set(section_opts,
pcmk_section_fencing_all),
false, stonith__later_succeeded(hp, history), show_opts);
out->increment_list(out);
}
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
PCMK__OUTPUT_ARGS("full-fencing-list", "crm_exit_t", "stonith_history_t *",
"GList *", "uint32_t", "uint32_t", "bool")
static int
full_history(pcmk__output_t *out, va_list args)
{
crm_exit_t history_rc G_GNUC_UNUSED = va_arg(args, crm_exit_t);
stonith_history_t *history = va_arg(args, stonith_history_t *);
GList *only_node = va_arg(args, GList *);
uint32_t section_opts = va_arg(args, uint32_t);
uint32_t show_opts = va_arg(args, uint32_t);
bool print_spacer = va_arg(args, int);
int rc = pcmk_rc_no_output;
for (stonith_history_t *hp = history; hp; hp = hp->next) {
if (!pcmk__str_in_list(hp->target, only_node, pcmk__str_star_matches|pcmk__str_casei)) {
continue;
}
PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Fencing History");
out->message(out, "stonith-event", hp,
pcmk_all_flags_set(section_opts, pcmk_section_fencing_all),
false, stonith__later_succeeded(hp, history), show_opts);
out->increment_list(out);
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
PCMK__OUTPUT_ARGS("full-fencing-list", "crm_exit_t", "stonith_history_t *",
"GList *", "uint32_t", "uint32_t", "bool")
static int
full_history_xml(pcmk__output_t *out, va_list args)
{
crm_exit_t history_rc = va_arg(args, crm_exit_t);
stonith_history_t *history = va_arg(args, stonith_history_t *);
GList *only_node = va_arg(args, GList *);
uint32_t section_opts = va_arg(args, uint32_t);
uint32_t show_opts = va_arg(args, uint32_t);
bool print_spacer G_GNUC_UNUSED = va_arg(args, int);
int rc = pcmk_rc_no_output;
if (history_rc == 0) {
for (stonith_history_t *hp = history; hp; hp = hp->next) {
if (!pcmk__str_in_list(hp->target, only_node, pcmk__str_star_matches|pcmk__str_casei)) {
continue;
}
PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Fencing History");
out->message(out, "stonith-event", hp,
pcmk_all_flags_set(section_opts,
pcmk_section_fencing_all),
false, stonith__later_succeeded(hp, history), show_opts);
out->increment_list(out);
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
} else {
char *rc_s = pcmk__itoa(history_rc);
pcmk__output_create_xml_node(out, PCMK_XE_FENCE_HISTORY,
PCMK_XA_STATUS, rc_s,
NULL);
free(rc_s);
rc = pcmk_rc_ok;
}
return rc;
}
PCMK__OUTPUT_ARGS("last-fenced", "const char *", "time_t")
static int
last_fenced_html(pcmk__output_t *out, va_list args) {
const char *target = va_arg(args, const char *);
time_t when = va_arg(args, time_t);
if (when) {
char *buf = crm_strdup_printf("Node %s last fenced at: %s", target, ctime(&when));
pcmk__output_create_html_node(out, PCMK__XE_DIV, NULL, NULL, buf);
free(buf);
return pcmk_rc_ok;
} else {
return pcmk_rc_no_output;
}
}
PCMK__OUTPUT_ARGS("last-fenced", "const char *", "time_t")
static int
last_fenced_text(pcmk__output_t *out, va_list args) {
const char *target = va_arg(args, const char *);
time_t when = va_arg(args, time_t);
if (when) {
pcmk__indented_printf(out, "Node %s last fenced at: %s", target, ctime(&when));
} else {
pcmk__indented_printf(out, "Node %s has never been fenced\n", target);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("last-fenced", "const char *", "time_t")
static int
last_fenced_xml(pcmk__output_t *out, va_list args) {
const char *target = va_arg(args, const char *);
time_t when = va_arg(args, time_t);
if (when) {
char *buf = timespec_string(when, 0, false);
pcmk__output_create_xml_node(out, PCMK_XE_LAST_FENCED,
PCMK_XA_TARGET, target,
PCMK_XA_WHEN, buf,
NULL);
free(buf);
return pcmk_rc_ok;
} else {
return pcmk_rc_no_output;
}
}
PCMK__OUTPUT_ARGS("pending-fencing-list", "stonith_history_t *", "GList *",
"uint32_t", "uint32_t", "bool")
static int
pending_actions(pcmk__output_t *out, va_list args)
{
stonith_history_t *history = va_arg(args, stonith_history_t *);
GList *only_node = va_arg(args, GList *);
uint32_t section_opts = va_arg(args, uint32_t);
uint32_t show_opts = va_arg(args, uint32_t);
bool print_spacer = va_arg(args, int);
int rc = pcmk_rc_no_output;
for (stonith_history_t *hp = history; hp; hp = hp->next) {
if (!pcmk__str_in_list(hp->target, only_node, pcmk__str_star_matches|pcmk__str_casei)) {
continue;
}
/* Skip the rest of the history after we see a failed/done action */
if ((hp->state == st_failed) || (hp->state == st_done)) {
break;
}
PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Pending Fencing Actions");
out->message(out, "stonith-event", hp,
pcmk_all_flags_set(section_opts, pcmk_section_fencing_all),
false, stonith__later_succeeded(hp, history), show_opts);
out->increment_list(out);
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
PCMK__OUTPUT_ARGS("stonith-event", "stonith_history_t *", "bool", "bool",
"const char *", "uint32_t")
static int
stonith_event_html(pcmk__output_t *out, va_list args)
{
stonith_history_t *event = va_arg(args, stonith_history_t *);
bool full_history = va_arg(args, int);
bool completed_only G_GNUC_UNUSED = va_arg(args, int);
const char *succeeded = va_arg(args, const char *);
uint32_t show_opts = va_arg(args, uint32_t);
gchar *desc = stonith__history_description(event, full_history, succeeded,
show_opts);
switch(event->state) {
case st_done:
out->list_item(out, "successful-stonith-event", "%s", desc);
break;
case st_failed:
out->list_item(out, "failed-stonith-event", "%s", desc);
break;
default:
out->list_item(out, "pending-stonith-event", "%s", desc);
break;
}
g_free(desc);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("stonith-event", "stonith_history_t *", "bool", "bool",
"const char *", "uint32_t")
static int
stonith_event_text(pcmk__output_t *out, va_list args)
{
stonith_history_t *event = va_arg(args, stonith_history_t *);
bool full_history = va_arg(args, int);
bool completed_only = va_arg(args, int);
const char *succeeded = va_arg(args, const char *);
uint32_t show_opts = va_arg(args, uint32_t);
if (completed_only) {
pcmk__formatted_printf(out, "%lld\n", (long long) event->completed);
} else {
gchar *desc = stonith__history_description(event, full_history, succeeded,
show_opts);
pcmk__indented_printf(out, "%s\n", desc);
g_free(desc);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("stonith-event", "stonith_history_t *", "bool", "bool",
"const char *", "uint32_t")
static int
stonith_event_xml(pcmk__output_t *out, va_list args)
{
stonith_history_t *event = va_arg(args, stonith_history_t *);
bool full_history G_GNUC_UNUSED = va_arg(args, int);
bool completed_only G_GNUC_UNUSED = va_arg(args, int);
const char *succeeded G_GNUC_UNUSED = va_arg(args, const char *);
uint32_t show_opts G_GNUC_UNUSED = va_arg(args, uint32_t);
xmlNodePtr node = NULL;
node = pcmk__output_create_xml_node(out, PCMK_XE_FENCE_EVENT,
PCMK_XA_ACTION, event->action,
PCMK_XA_TARGET, event->target,
PCMK_XA_CLIENT, event->client,
PCMK_XA_ORIGIN, event->origin,
NULL);
switch (event->state) {
case st_failed:
pcmk__xe_set_props(node,
PCMK_XA_STATUS, PCMK_VALUE_FAILED,
PCMK_XA_EXIT_REASON, event->exit_reason,
NULL);
break;
case st_done:
crm_xml_add(node, PCMK_XA_STATUS, PCMK_VALUE_SUCCESS);
break;
default: {
char *state = pcmk__itoa(event->state);
pcmk__xe_set_props(node,
PCMK_XA_STATUS, PCMK_VALUE_PENDING,
PCMK_XA_EXTENDED_STATUS, state,
NULL);
free(state);
break;
}
}
if (event->delegate != NULL) {
crm_xml_add(node, PCMK_XA_DELEGATE, event->delegate);
}
if ((event->state == st_failed) || (event->state == st_done)) {
char *time_s = timespec_string(event->completed, event->completed_nsec,
true);
crm_xml_add(node, PCMK_XA_COMPLETED, time_s);
free(time_s);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("validate", "const char *", "const char *", "const char *",
"const char *", "int")
static int
validate_agent_html(pcmk__output_t *out, va_list args) {
const char *agent = va_arg(args, const char *);
const char *device = va_arg(args, const char *);
const char *output = va_arg(args, const char *);
const char *error_output = va_arg(args, const char *);
int rc = va_arg(args, int);
if (device) {
char *buf = crm_strdup_printf("Validation of %s on %s %s", agent, device,
rc ? "failed" : "succeeded");
pcmk__output_create_html_node(out, PCMK__XE_DIV, NULL, NULL, buf);
free(buf);
} else {
char *buf = crm_strdup_printf("Validation of %s %s", agent,
rc ? "failed" : "succeeded");
pcmk__output_create_html_node(out, PCMK__XE_DIV, NULL, NULL, buf);
free(buf);
}
out->subprocess_output(out, rc, output, error_output);
return rc;
}
PCMK__OUTPUT_ARGS("validate", "const char *", "const char *", "const char *",
"const char *", "int")
static int
validate_agent_text(pcmk__output_t *out, va_list args) {
const char *agent = va_arg(args, const char *);
const char *device = va_arg(args, const char *);
const char *output = va_arg(args, const char *);
const char *error_output = va_arg(args, const char *);
int rc = va_arg(args, int);
if (device) {
pcmk__indented_printf(out, "Validation of %s on %s %s\n", agent, device,
rc ? "failed" : "succeeded");
} else {
pcmk__indented_printf(out, "Validation of %s %s\n", agent,
rc ? "failed" : "succeeded");
}
out->subprocess_output(out, rc, output, error_output);
return rc;
}
PCMK__OUTPUT_ARGS("validate", "const char *", "const char *", "const char *",
"const char *", "int")
static int
validate_agent_xml(pcmk__output_t *out, va_list args) {
const char *agent = va_arg(args, const char *);
const char *device = va_arg(args, const char *);
const char *output = va_arg(args, const char *);
const char *error_output = va_arg(args, const char *);
int rc = va_arg(args, int);
const char *valid = pcmk__btoa(rc == pcmk_ok);
xmlNodePtr node = pcmk__output_create_xml_node(out, PCMK_XE_VALIDATE,
PCMK_XA_AGENT, agent,
PCMK_XA_VALID, valid,
NULL);
if (device != NULL) {
crm_xml_add(node, PCMK_XA_DEVICE, device);
}
pcmk__output_xml_push_parent(out, node);
out->subprocess_output(out, rc, output, error_output);
pcmk__output_xml_pop_parent(out);
return rc;
}
static pcmk__message_entry_t fmt_functions[] = {
{ "failed-fencing-list", "default", failed_history },
{ "fencing-list", "default", stonith_history },
{ "full-fencing-list", "default", full_history },
{ "full-fencing-list", "xml", full_history_xml },
{ "last-fenced", "html", last_fenced_html },
{ "last-fenced", "log", last_fenced_text },
{ "last-fenced", "text", last_fenced_text },
{ "last-fenced", "xml", last_fenced_xml },
{ "pending-fencing-list", "default", pending_actions },
{ "stonith-event", "html", stonith_event_html },
{ "stonith-event", "log", stonith_event_text },
{ "stonith-event", "text", stonith_event_text },
{ "stonith-event", "xml", stonith_event_xml },
{ "validate", "html", validate_agent_html },
{ "validate", "log", validate_agent_text },
{ "validate", "text", validate_agent_text },
{ "validate", "xml", validate_agent_xml },
{ NULL, NULL, NULL }
};
void
stonith__register_messages(pcmk__output_t *out) {
pcmk__register_messages(out, fmt_functions);
}
diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c
index d091ea1153..04b1c79ab1 100644
--- a/lib/fencing/st_rhcs.c
+++ b/lib/fencing/st_rhcs.c
@@ -1,330 +1,324 @@
/*
* Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <dirent.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <glib.h>
#include <libxml/xpath.h> // xmlXPathObject, etc.
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include "fencing_private.h"
/*!
* \internal
* \brief \c scandir() filter for RHCS fence agents
*
* \param[in] entry Directory entry
*
* \retval 1 if \p entry is a regular file whose name begins with \c "fence_"
* \retval 0 otherwise
*/
static int
rhcs_agent_filter(const struct dirent *entry)
{
char *buf = NULL;
struct stat sb;
int rc = 0;
if (!pcmk__starts_with(entry->d_name, "fence_")) {
goto done;
}
// glibc doesn't enforce PATH_MAX, so don't limit buf size
buf = crm_strdup_printf(PCMK__FENCE_BINDIR "/%s", entry->d_name);
if ((stat(buf, &sb) != 0) || !S_ISREG(sb.st_mode)) {
goto done;
}
rc = 1;
done:
free(buf);
return rc;
}
/*!
* \internal
* \brief Add available RHCS-compatible agents to a list
*
* \param[in,out] List to add to
*
* \return Number of agents added
*/
int
stonith__list_rhcs_agents(stonith_key_value_t **devices)
{
struct dirent **namelist = NULL;
const int file_num = scandir(PCMK__FENCE_BINDIR, &namelist,
rhcs_agent_filter, alphasort);
if (file_num < 0) {
int rc = errno;
crm_err("Could not list " PCMK__FENCE_BINDIR ": %s", pcmk_rc_str(rc));
free(namelist);
return 0;
}
for (int i = 0; i < file_num; i++) {
- *devices = stonith_key_value_add(*devices, NULL, namelist[i]->d_name);
+ *devices = stonith__key_value_add(*devices, NULL, namelist[i]->d_name);
free(namelist[i]);
}
free(namelist);
return file_num;
}
static void
stonith_rhcs_parameter_not_required(xmlNode *metadata, const char *parameter)
{
char *xpath = NULL;
xmlXPathObject *xpathObj = NULL;
CRM_CHECK(metadata != NULL, return);
CRM_CHECK(parameter != NULL, return);
xpath = crm_strdup_printf("//" PCMK_XE_PARAMETER "[@" PCMK_XA_NAME "='%s']",
parameter);
/* Fudge metadata so that the parameter isn't required in config
* Pacemaker handles and adds it */
xpathObj = pcmk__xpath_search(metadata->doc, xpath);
if (pcmk__xpath_num_results(xpathObj) > 0) {
xmlNode *tmp = pcmk__xpath_result(xpathObj, 0);
if (tmp != NULL) {
crm_xml_add(tmp, "required", "0");
}
}
xmlXPathFreeObject(xpathObj);
free(xpath);
}
/*!
* \brief Execute RHCS-compatible agent's metadata action
*
* \param[in] agent Agent to execute
* \param[in] timeout_sec Action timeout
* \param[out] metadata Where to store output xmlNode (or NULL to ignore)
*/
static int
stonith__rhcs_get_metadata(const char *agent, int timeout_sec,
xmlNode **metadata)
{
xmlNode *xml = NULL;
xmlNode *actions = NULL;
xmlXPathObject *xpathObj = NULL;
stonith_action_t *action = stonith__action_create(agent,
PCMK_ACTION_METADATA,
NULL, timeout_sec, NULL,
NULL, NULL);
int rc = stonith__execute(action);
pcmk__action_result_t *result = stonith__action_result(action);
if (result == NULL) {
if (rc < 0) {
crm_warn("Could not execute metadata action for %s: %s "
QB_XS " rc=%d", agent, pcmk_strerror(rc), rc);
}
stonith__destroy_action(action);
return rc;
}
if (result->execution_status != PCMK_EXEC_DONE) {
crm_warn("Could not execute metadata action for %s: %s",
agent, pcmk_exec_status_str(result->execution_status));
rc = pcmk_rc2legacy(stonith__result2rc(result));
stonith__destroy_action(action);
return rc;
}
if (!pcmk__result_ok(result)) {
crm_warn("Metadata action for %s returned error code %d",
agent, result->exit_status);
rc = pcmk_rc2legacy(stonith__result2rc(result));
stonith__destroy_action(action);
return rc;
}
if (result->action_stdout == NULL) {
crm_warn("Metadata action for %s returned no data", agent);
stonith__destroy_action(action);
return -ENODATA;
}
xml = pcmk__xml_parse(result->action_stdout);
stonith__destroy_action(action);
if (xml == NULL) {
crm_warn("Metadata for %s is invalid", agent);
return -pcmk_err_schema_validation;
}
xpathObj = pcmk__xpath_search(xml->doc, "//" PCMK_XE_ACTIONS);
if (pcmk__xpath_num_results(xpathObj) > 0) {
actions = pcmk__xpath_result(xpathObj, 0);
}
xmlXPathFreeObject(xpathObj);
// Add start and stop (implemented by pacemaker, not agent) to meta-data
xpathObj = pcmk__xpath_search(xml->doc,
"//" PCMK_XE_ACTION
"[@" PCMK_XA_NAME "='" PCMK_ACTION_STOP "']");
if (pcmk__xpath_num_results(xpathObj) == 0) {
xmlNode *tmp = NULL;
const char *timeout_str = NULL;
timeout_str = pcmk__readable_interval(PCMK_DEFAULT_ACTION_TIMEOUT_MS);
tmp = pcmk__xe_create(actions, PCMK_XE_ACTION);
crm_xml_add(tmp, PCMK_XA_NAME, PCMK_ACTION_STOP);
crm_xml_add(tmp, PCMK_META_TIMEOUT, timeout_str);
tmp = pcmk__xe_create(actions, PCMK_XE_ACTION);
crm_xml_add(tmp, PCMK_XA_NAME, PCMK_ACTION_START);
crm_xml_add(tmp, PCMK_META_TIMEOUT, timeout_str);
}
xmlXPathFreeObject(xpathObj);
// Fudge metadata so parameters are not required in config (pacemaker adds them)
stonith_rhcs_parameter_not_required(xml, STONITH_ATTR_ACTION_OP);
stonith_rhcs_parameter_not_required(xml, "plug");
stonith_rhcs_parameter_not_required(xml, "port");
if (metadata) {
*metadata = xml;
} else {
pcmk__xml_free(xml);
}
return pcmk_ok;
}
/*!
* \brief Retrieve metadata for RHCS-compatible fence agent
*
* \param[in] agent Agent to execute
* \param[in] timeout_sec Action timeout
* \param[out] output Where to store action output (or NULL to ignore)
*/
int
stonith__rhcs_metadata(const char *agent, int timeout_sec, char **output)
{
GString *buffer = NULL;
xmlNode *xml = NULL;
int rc = stonith__rhcs_get_metadata(agent, timeout_sec, &xml);
if (rc != pcmk_ok) {
goto done;
}
buffer = g_string_sized_new(1024);
pcmk__xml_string(xml, pcmk__xml_fmt_pretty|pcmk__xml_fmt_text, buffer, 0);
if (pcmk__str_empty(buffer->str)) {
rc = -pcmk_err_schema_validation;
goto done;
}
if (output != NULL) {
pcmk__str_update(output, buffer->str);
}
done:
if (buffer != NULL) {
g_string_free(buffer, TRUE);
}
pcmk__xml_free(xml);
return rc;
}
bool
stonith__agent_is_rhcs(const char *agent)
{
struct stat prop;
char *buffer = crm_strdup_printf(PCMK__FENCE_BINDIR "/%s", agent);
int rc = stat(buffer, &prop);
free(buffer);
return (rc >= 0) && S_ISREG(prop.st_mode);
}
int
stonith__rhcs_validate(stonith_t *st, int call_options, const char *target,
const char *agent, GHashTable *params,
const char * host_arg, int timeout,
char **output, char **error_output)
{
int rc = pcmk_ok;
int remaining_timeout = timeout;
xmlNode *metadata = NULL;
stonith_action_t *action = NULL;
pcmk__action_result_t *result = NULL;
if (host_arg == NULL) {
time_t start_time = time(NULL);
rc = stonith__rhcs_get_metadata(agent, remaining_timeout, &metadata);
if (rc == pcmk_ok) {
- uint32_t device_flags = 0;
-
- stonith__device_parameter_flags(&device_flags, agent, metadata);
- if (pcmk_is_set(device_flags, st_device_supports_parameter_port)) {
- host_arg = "port";
-
- } else if (pcmk_is_set(device_flags,
- st_device_supports_parameter_plug)) {
- host_arg = "plug";
- }
+ host_arg = stonith__default_host_arg(metadata);
+ crm_trace("Using '%s' as default " PCMK_STONITH_HOST_ARGUMENT
+ " for %s",
+ pcmk__s(host_arg, PCMK_VALUE_NONE), agent);
}
pcmk__xml_free(metadata);
remaining_timeout -= time(NULL) - start_time;
if (rc == -ETIME || remaining_timeout <= 0 ) {
return -ETIME;
}
} else if (pcmk__str_eq(host_arg, PCMK_VALUE_NONE, pcmk__str_casei)) {
host_arg = NULL;
}
action = stonith__action_create(agent, PCMK_ACTION_VALIDATE_ALL, target,
remaining_timeout, params, NULL, host_arg);
rc = stonith__execute(action);
result = stonith__action_result(action);
if (result != NULL) {
rc = pcmk_rc2legacy(stonith__result2rc(result));
// Take ownership of output so stonith__destroy_action() doesn't free it
if (output != NULL) {
*output = result->action_stdout;
result->action_stdout = NULL;
}
if (error_output != NULL) {
*error_output = result->action_stderr;
result->action_stderr = NULL;
}
}
stonith__destroy_action(action);
return rc;
}
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
index 23879814f5..6a37ab5bf4 100644
--- a/lib/lrmd/lrmd_client.c
+++ b/lib/lrmd/lrmd_client.c
@@ -1,2681 +1,2683 @@
/*
* Copyright 2012-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h> // uint32_t, uint64_t
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <glib.h>
#include <dirent.h>
#include <crm/crm.h>
#include <crm/lrmd.h>
#include <crm/lrmd_internal.h>
#include <crm/services.h>
#include <crm/services_internal.h>
#include <crm/common/mainloop.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/remote_internal.h>
#include <crm/common/tls_internal.h>
#include <crm/common/xml.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h> // stonith__*
#include <gnutls/gnutls.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <arpa/inet.h>
#include <netdb.h>
#define MAX_TLS_RECV_WAIT 10000
CRM_TRACE_INIT_DATA(lrmd);
static int lrmd_api_disconnect(lrmd_t * lrmd);
static int lrmd_api_is_connected(lrmd_t * lrmd);
/* IPC proxy functions */
int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg);
static void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg);
void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg));
// GnuTLS client handshake timeout in seconds
#define TLS_HANDSHAKE_TIMEOUT 5
static void lrmd_tls_disconnect(lrmd_t * lrmd);
static int global_remote_msg_id = 0;
static void lrmd_tls_connection_destroy(gpointer userdata);
static int add_tls_to_mainloop(lrmd_t *lrmd, bool do_api_handshake);
typedef struct lrmd_private_s {
uint64_t type;
char *token;
mainloop_io_t *source;
/* IPC parameters */
crm_ipc_t *ipc;
pcmk__remote_t *remote;
/* Extra TLS parameters */
char *remote_nodename;
char *server;
int port;
pcmk__tls_t *tls;
/* while the async connection is occurring, this is the id
* of the connection timeout timer. */
int async_timer;
int sock;
/* since tls requires a round trip across the network for a
* request/reply, there are times where we just want to be able
* to send a request from the client and not wait around (or even care
* about) what the reply is. */
int expected_late_replies;
GList *pending_notify;
crm_trigger_t *process_notify;
crm_trigger_t *handshake_trigger;
lrmd_event_callback callback;
/* Internal IPC proxy msg passing for remote guests */
void (*proxy_callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg);
void *proxy_callback_userdata;
char *peer_version;
} lrmd_private_t;
static int process_lrmd_handshake_reply(xmlNode *reply, lrmd_private_t *native);
static void report_async_connection_result(lrmd_t * lrmd, int rc);
static lrmd_list_t *
lrmd_list_add(lrmd_list_t * head, const char *value)
{
lrmd_list_t *p, *end;
p = pcmk__assert_alloc(1, sizeof(lrmd_list_t));
p->val = strdup(value);
end = head;
while (end && end->next) {
end = end->next;
}
if (end) {
end->next = p;
} else {
head = p;
}
return head;
}
void
lrmd_list_freeall(lrmd_list_t * head)
{
lrmd_list_t *p;
while (head) {
char *val = (char *)head->val;
p = head->next;
free(val);
free(head);
head = p;
}
}
lrmd_key_value_t *
lrmd_key_value_add(lrmd_key_value_t * head, const char *key, const char *value)
{
lrmd_key_value_t *p, *end;
p = pcmk__assert_alloc(1, sizeof(lrmd_key_value_t));
p->key = strdup(key);
p->value = strdup(value);
end = head;
while (end && end->next) {
end = end->next;
}
if (end) {
end->next = p;
} else {
head = p;
}
return head;
}
void
lrmd_key_value_freeall(lrmd_key_value_t * head)
{
lrmd_key_value_t *p;
while (head) {
p = head->next;
free(head->key);
free(head->value);
free(head);
head = p;
}
}
/*!
* \brief Create a new lrmd_event_data_t object
*
* \param[in] rsc_id ID of resource involved in event
* \param[in] task Action name
* \param[in] interval_ms Action interval
*
* \return Newly allocated and initialized lrmd_event_data_t
* \note This functions asserts on memory errors, so the return value is
* guaranteed to be non-NULL. The caller is responsible for freeing the
* result with lrmd_free_event().
*/
lrmd_event_data_t *
lrmd_new_event(const char *rsc_id, const char *task, guint interval_ms)
{
lrmd_event_data_t *event = pcmk__assert_alloc(1, sizeof(lrmd_event_data_t));
// lrmd_event_data_t has (const char *) members that lrmd_free_event() frees
event->rsc_id = pcmk__str_copy(rsc_id);
event->op_type = pcmk__str_copy(task);
event->interval_ms = interval_ms;
return event;
}
lrmd_event_data_t *
lrmd_copy_event(lrmd_event_data_t * event)
{
lrmd_event_data_t *copy = NULL;
copy = pcmk__assert_alloc(1, sizeof(lrmd_event_data_t));
copy->type = event->type;
// lrmd_event_data_t has (const char *) members that lrmd_free_event() frees
copy->rsc_id = pcmk__str_copy(event->rsc_id);
copy->op_type = pcmk__str_copy(event->op_type);
copy->user_data = pcmk__str_copy(event->user_data);
copy->output = pcmk__str_copy(event->output);
copy->remote_nodename = pcmk__str_copy(event->remote_nodename);
copy->exit_reason = pcmk__str_copy(event->exit_reason);
copy->call_id = event->call_id;
copy->timeout = event->timeout;
copy->interval_ms = event->interval_ms;
copy->start_delay = event->start_delay;
copy->rsc_deleted = event->rsc_deleted;
copy->rc = event->rc;
copy->op_status = event->op_status;
copy->t_run = event->t_run;
copy->t_rcchange = event->t_rcchange;
copy->exec_time = event->exec_time;
copy->queue_time = event->queue_time;
copy->connection_rc = event->connection_rc;
copy->params = pcmk__str_table_dup(event->params);
return copy;
}
/*!
* \brief Free an executor event
*
* \param[in,out] Executor event object to free
*/
void
lrmd_free_event(lrmd_event_data_t *event)
{
if (event == NULL) {
return;
}
// @TODO Why are these const char *?
free((void *) event->rsc_id);
free((void *) event->op_type);
free((void *) event->user_data);
free((void *) event->remote_nodename);
lrmd__reset_result(event);
if (event->params != NULL) {
g_hash_table_destroy(event->params);
}
free(event);
}
static void
lrmd_dispatch_internal(gpointer data, gpointer user_data)
{
xmlNode *msg = data;
lrmd_t *lrmd = user_data;
const char *type;
const char *proxy_session = crm_element_value(msg,
PCMK__XA_LRMD_IPC_SESSION);
lrmd_private_t *native = lrmd->lrmd_private;
lrmd_event_data_t event = { 0, };
if (proxy_session != NULL) {
/* this is proxy business */
lrmd_internal_proxy_dispatch(lrmd, msg);
return;
} else if (!native->callback) {
/* no callback set */
crm_trace("notify event received but client has not set callback");
return;
}
event.remote_nodename = native->remote_nodename;
type = crm_element_value(msg, PCMK__XA_LRMD_OP);
crm_element_value_int(msg, PCMK__XA_LRMD_CALLID, &event.call_id);
event.rsc_id = crm_element_value(msg, PCMK__XA_LRMD_RSC_ID);
if (pcmk__str_eq(type, LRMD_OP_RSC_REG, pcmk__str_none)) {
event.type = lrmd_event_register;
} else if (pcmk__str_eq(type, LRMD_OP_RSC_UNREG, pcmk__str_none)) {
event.type = lrmd_event_unregister;
} else if (pcmk__str_eq(type, LRMD_OP_RSC_EXEC, pcmk__str_none)) {
int rc = 0;
int exec_time = 0;
int queue_time = 0;
time_t epoch = 0;
crm_element_value_int(msg, PCMK__XA_LRMD_TIMEOUT, &event.timeout);
crm_element_value_ms(msg, PCMK__XA_LRMD_RSC_INTERVAL,
&event.interval_ms);
crm_element_value_int(msg, PCMK__XA_LRMD_RSC_START_DELAY,
&event.start_delay);
crm_element_value_int(msg, PCMK__XA_LRMD_EXEC_RC, &rc);
event.rc = (enum ocf_exitcode) rc;
crm_element_value_int(msg, PCMK__XA_LRMD_EXEC_OP_STATUS,
&event.op_status);
crm_element_value_int(msg, PCMK__XA_LRMD_RSC_DELETED,
&event.rsc_deleted);
crm_element_value_epoch(msg, PCMK__XA_LRMD_RUN_TIME, &epoch);
event.t_run = epoch;
crm_element_value_epoch(msg, PCMK__XA_LRMD_RCCHANGE_TIME, &epoch);
event.t_rcchange = epoch;
crm_element_value_int(msg, PCMK__XA_LRMD_EXEC_TIME, &exec_time);
CRM_LOG_ASSERT(exec_time >= 0);
event.exec_time = QB_MAX(0, exec_time);
crm_element_value_int(msg, PCMK__XA_LRMD_QUEUE_TIME, &queue_time);
CRM_LOG_ASSERT(queue_time >= 0);
event.queue_time = QB_MAX(0, queue_time);
event.op_type = crm_element_value(msg, PCMK__XA_LRMD_RSC_ACTION);
event.user_data = crm_element_value(msg,
PCMK__XA_LRMD_RSC_USERDATA_STR);
event.type = lrmd_event_exec_complete;
/* output and exit_reason may be freed by a callback */
event.output = crm_element_value_copy(msg, PCMK__XA_LRMD_RSC_OUTPUT);
lrmd__set_result(&event, event.rc, event.op_status,
crm_element_value(msg, PCMK__XA_LRMD_RSC_EXIT_REASON));
event.params = xml2list(msg);
} else if (pcmk__str_eq(type, LRMD_OP_NEW_CLIENT, pcmk__str_none)) {
event.type = lrmd_event_new_client;
} else if (pcmk__str_eq(type, LRMD_OP_POKE, pcmk__str_none)) {
event.type = lrmd_event_poke;
} else {
return;
}
crm_trace("op %s notify event received", type);
native->callback(&event);
if (event.params) {
g_hash_table_destroy(event.params);
}
lrmd__reset_result(&event);
}
// \return Always 0, to indicate that IPC mainloop source should be kept
static int
lrmd_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->lrmd_private;
if (native->callback != NULL) {
xmlNode *msg = pcmk__xml_parse(buffer);
lrmd_dispatch_internal(msg, lrmd);
pcmk__xml_free(msg);
}
return 0;
}
static void
lrmd_free_xml(gpointer userdata)
{
pcmk__xml_free((xmlNode *) userdata);
}
static bool
remote_executor_connected(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->lrmd_private;
return (native->remote->tls_session != NULL);
}
static void
handle_remote_msg(xmlNode *xml, lrmd_t *lrmd)
{
lrmd_private_t *native = lrmd->lrmd_private;
const char *msg_type = NULL;
msg_type = crm_element_value(xml, PCMK__XA_LRMD_REMOTE_MSG_TYPE);
if (pcmk__str_eq(msg_type, "notify", pcmk__str_casei)) {
lrmd_dispatch_internal(xml, lrmd);
} else if (pcmk__str_eq(msg_type, "reply", pcmk__str_casei)) {
const char *op = crm_element_value(xml, PCMK__XA_LRMD_OP);
if (native->expected_late_replies > 0) {
native->expected_late_replies--;
/* The register op message we get as a response to lrmd_handshake_async
* is a reply, so we have to handle that here.
*/
if (pcmk__str_eq(op, "register", pcmk__str_casei)) {
int rc = process_lrmd_handshake_reply(xml, native);
report_async_connection_result(lrmd, pcmk_rc2legacy(rc));
}
} else {
int reply_id = 0;
crm_element_value_int(xml, PCMK__XA_LRMD_CALLID, &reply_id);
/* if this happens, we want to know about it */
crm_err("Got outdated Pacemaker Remote reply %d", reply_id);
}
}
}
/*!
* \internal
* \brief Notify trigger handler
*
* \param[in,out] userdata API connection
*
* \return Always return G_SOURCE_CONTINUE to leave this trigger handler in the
* mainloop
*/
static int
process_pending_notifies(gpointer userdata)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->lrmd_private;
if (native->pending_notify == NULL) {
return G_SOURCE_CONTINUE;
}
crm_trace("Processing pending notifies");
g_list_foreach(native->pending_notify, lrmd_dispatch_internal, lrmd);
g_list_free_full(native->pending_notify, lrmd_free_xml);
native->pending_notify = NULL;
return G_SOURCE_CONTINUE;
}
/*!
* \internal
* \brief TLS dispatch function for file descriptor sources
*
* \param[in,out] userdata API connection
*
* \return -1 on error to remove the source from the mainloop, or 0 otherwise
* to leave it in the mainloop
*/
static int
lrmd_tls_dispatch(gpointer userdata)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->lrmd_private;
xmlNode *xml = NULL;
int rc = pcmk_rc_ok;
if (!remote_executor_connected(lrmd)) {
crm_trace("TLS dispatch triggered after disconnect");
return -1;
}
crm_trace("TLS dispatch triggered");
rc = pcmk__remote_ready(native->remote, 0);
if (rc == pcmk_rc_ok) {
rc = pcmk__read_remote_message(native->remote, -1);
}
if (rc != pcmk_rc_ok && rc != ETIME) {
crm_info("Lost %s executor connection while reading data",
(native->remote_nodename? native->remote_nodename : "local"));
lrmd_tls_disconnect(lrmd);
return -1;
}
/* If rc is ETIME, there was nothing to read but we may already have a
* full message in the buffer
*/
xml = pcmk__remote_message_xml(native->remote);
if (xml == NULL) {
return 0;
}
handle_remote_msg(xml, lrmd);
pcmk__xml_free(xml);
return 0;
}
/* Not used with mainloop */
int
lrmd_poll(lrmd_t * lrmd, int timeout)
{
lrmd_private_t *native = lrmd->lrmd_private;
switch (native->type) {
case pcmk__client_ipc:
return crm_ipc_ready(native->ipc);
case pcmk__client_tls:
if (native->pending_notify) {
return 1;
} else {
int rc = pcmk__remote_ready(native->remote, 0);
switch (rc) {
case pcmk_rc_ok:
return 1;
case ETIME:
return 0;
default:
return pcmk_rc2legacy(rc);
}
}
default:
crm_err("Unsupported executor connection type (bug?): %d",
native->type);
return -EPROTONOSUPPORT;
}
}
/* Not used with mainloop */
bool
lrmd_dispatch(lrmd_t * lrmd)
{
lrmd_private_t *private = NULL;
pcmk__assert(lrmd != NULL);
private = lrmd->lrmd_private;
switch (private->type) {
case pcmk__client_ipc:
while (crm_ipc_ready(private->ipc)) {
if (crm_ipc_read(private->ipc) > 0) {
const char *msg = crm_ipc_buffer(private->ipc);
lrmd_ipc_dispatch(msg, strlen(msg), lrmd);
}
}
break;
case pcmk__client_tls:
lrmd_tls_dispatch(lrmd);
break;
default:
crm_err("Unsupported executor connection type (bug?): %d",
private->type);
}
if (lrmd_api_is_connected(lrmd) == FALSE) {
crm_err("Connection closed");
return FALSE;
}
return TRUE;
}
static xmlNode *
lrmd_create_op(const char *token, const char *op, xmlNode *data, int timeout,
enum lrmd_call_options options)
{
xmlNode *op_msg = NULL;
CRM_CHECK(token != NULL, return NULL);
op_msg = pcmk__xe_create(NULL, PCMK__XE_LRMD_COMMAND);
crm_xml_add(op_msg, PCMK__XA_T, PCMK__VALUE_LRMD);
crm_xml_add(op_msg, PCMK__XA_LRMD_OP, op);
crm_xml_add_int(op_msg, PCMK__XA_LRMD_TIMEOUT, timeout);
crm_xml_add_int(op_msg, PCMK__XA_LRMD_CALLOPT, options);
if (data != NULL) {
xmlNode *wrapper = pcmk__xe_create(op_msg, PCMK__XE_LRMD_CALLDATA);
pcmk__xml_copy(wrapper, data);
}
crm_trace("Created executor %s command with call options %.8lx (%d)",
op, (long)options, options);
return op_msg;
}
static void
lrmd_ipc_connection_destroy(gpointer userdata)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->lrmd_private;
switch (native->type) {
case pcmk__client_ipc:
crm_info("Disconnected from local executor");
break;
case pcmk__client_tls:
crm_info("Disconnected from remote executor on %s",
native->remote_nodename);
break;
default:
crm_err("Unsupported executor connection type %d (bug?)",
native->type);
}
/* Prevent these from being cleaned up in lrmd_api_disconnect() */
native->ipc = NULL;
native->source = NULL;
if (native->callback) {
lrmd_event_data_t event = { 0, };
event.type = lrmd_event_disconnect;
event.remote_nodename = native->remote_nodename;
native->callback(&event);
}
}
static void
lrmd_tls_connection_destroy(gpointer userdata)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->lrmd_private;
crm_info("TLS connection destroyed");
if (native->remote->tls_session) {
gnutls_bye(native->remote->tls_session, GNUTLS_SHUT_RDWR);
gnutls_deinit(native->remote->tls_session);
native->remote->tls_session = NULL;
}
if (native->tls) {
pcmk__free_tls(native->tls);
native->tls = NULL;
}
if (native->sock >= 0) {
close(native->sock);
}
if (native->process_notify) {
mainloop_destroy_trigger(native->process_notify);
native->process_notify = NULL;
}
if (native->pending_notify) {
g_list_free_full(native->pending_notify, lrmd_free_xml);
native->pending_notify = NULL;
}
if (native->handshake_trigger != NULL) {
mainloop_destroy_trigger(native->handshake_trigger);
native->handshake_trigger = NULL;
}
free(native->remote->buffer);
free(native->remote->start_state);
native->remote->buffer = NULL;
native->remote->start_state = NULL;
native->source = 0;
native->sock = -1;
if (native->callback) {
lrmd_event_data_t event = { 0, };
event.remote_nodename = native->remote_nodename;
event.type = lrmd_event_disconnect;
native->callback(&event);
}
return;
}
// \return Standard Pacemaker return code
int
lrmd__remote_send_xml(pcmk__remote_t *session, xmlNode *msg, uint32_t id,
const char *msg_type)
{
crm_xml_add_int(msg, PCMK__XA_LRMD_REMOTE_MSG_ID, id);
crm_xml_add(msg, PCMK__XA_LRMD_REMOTE_MSG_TYPE, msg_type);
return pcmk__remote_send_xml(session, msg);
}
// \return Standard Pacemaker return code
static int
read_remote_reply(lrmd_t *lrmd, int total_timeout, int expected_reply_id,
xmlNode **reply)
{
lrmd_private_t *native = lrmd->lrmd_private;
time_t start = time(NULL);
const char *msg_type = NULL;
int reply_id = 0;
int remaining_timeout = 0;
int rc = pcmk_rc_ok;
/* A timeout of 0 here makes no sense. We have to wait a period of time
* for the response to come back. If -1 or 0, default to 10 seconds. */
if (total_timeout <= 0 || total_timeout > MAX_TLS_RECV_WAIT) {
total_timeout = MAX_TLS_RECV_WAIT;
}
for (*reply = NULL; *reply == NULL; ) {
*reply = pcmk__remote_message_xml(native->remote);
if (*reply == NULL) {
/* read some more off the tls buffer if we still have time left. */
if (remaining_timeout) {
remaining_timeout = total_timeout - ((time(NULL) - start) * 1000);
} else {
remaining_timeout = total_timeout;
}
if (remaining_timeout <= 0) {
return ETIME;
}
rc = pcmk__read_remote_message(native->remote, remaining_timeout);
if (rc != pcmk_rc_ok) {
return rc;
}
*reply = pcmk__remote_message_xml(native->remote);
if (*reply == NULL) {
return ENOMSG;
}
}
crm_element_value_int(*reply, PCMK__XA_LRMD_REMOTE_MSG_ID, &reply_id);
msg_type = crm_element_value(*reply, PCMK__XA_LRMD_REMOTE_MSG_TYPE);
if (!msg_type) {
crm_err("Empty msg type received while waiting for reply");
pcmk__xml_free(*reply);
*reply = NULL;
} else if (pcmk__str_eq(msg_type, "notify", pcmk__str_casei)) {
/* got a notify while waiting for reply, trigger the notify to be processed later */
crm_info("queueing notify");
native->pending_notify = g_list_append(native->pending_notify, *reply);
if (native->process_notify) {
crm_info("notify trigger set.");
mainloop_set_trigger(native->process_notify);
}
*reply = NULL;
} else if (!pcmk__str_eq(msg_type, "reply", pcmk__str_casei)) {
/* msg isn't a reply, make some noise */
crm_err("Expected a reply, got %s", msg_type);
pcmk__xml_free(*reply);
*reply = NULL;
} else if (reply_id != expected_reply_id) {
if (native->expected_late_replies > 0) {
native->expected_late_replies--;
} else {
crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id);
}
pcmk__xml_free(*reply);
*reply = NULL;
}
}
if (native->remote->buffer && native->process_notify) {
mainloop_set_trigger(native->process_notify);
}
return rc;
}
// \return Standard Pacemaker return code
static int
send_remote_message(lrmd_t *lrmd, xmlNode *msg)
{
int rc = pcmk_rc_ok;
lrmd_private_t *native = lrmd->lrmd_private;
global_remote_msg_id++;
if (global_remote_msg_id <= 0) {
global_remote_msg_id = 1;
}
rc = lrmd__remote_send_xml(native->remote, msg, global_remote_msg_id,
"request");
if (rc != pcmk_rc_ok) {
crm_err("Disconnecting because TLS message could not be sent to "
"Pacemaker Remote: %s", pcmk_rc_str(rc));
lrmd_tls_disconnect(lrmd);
}
return rc;
}
static int
lrmd_tls_send_recv(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply)
{
int rc = 0;
xmlNode *xml = NULL;
if (!remote_executor_connected(lrmd)) {
return -ENOTCONN;
}
rc = send_remote_message(lrmd, msg);
if (rc != pcmk_rc_ok) {
return pcmk_rc2legacy(rc);
}
rc = read_remote_reply(lrmd, timeout, global_remote_msg_id, &xml);
if (rc != pcmk_rc_ok) {
crm_err("Disconnecting remote after request %d reply not received: %s "
QB_XS " rc=%d timeout=%dms",
global_remote_msg_id, pcmk_rc_str(rc), rc, timeout);
lrmd_tls_disconnect(lrmd);
}
if (reply) {
*reply = xml;
} else {
pcmk__xml_free(xml);
}
return pcmk_rc2legacy(rc);
}
static int
lrmd_send_xml(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply)
{
int rc = pcmk_ok;
lrmd_private_t *native = lrmd->lrmd_private;
switch (native->type) {
case pcmk__client_ipc:
rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_response, timeout, reply);
break;
case pcmk__client_tls:
rc = lrmd_tls_send_recv(lrmd, msg, timeout, reply);
break;
default:
crm_err("Unsupported executor connection type (bug?): %d",
native->type);
rc = -EPROTONOSUPPORT;
}
return rc;
}
static int
lrmd_send_xml_no_reply(lrmd_t * lrmd, xmlNode * msg)
{
int rc = pcmk_ok;
lrmd_private_t *native = lrmd->lrmd_private;
switch (native->type) {
case pcmk__client_ipc:
rc = crm_ipc_send(native->ipc, msg, crm_ipc_flags_none, 0, NULL);
break;
case pcmk__client_tls:
rc = send_remote_message(lrmd, msg);
if (rc == pcmk_rc_ok) {
/* we don't want to wait around for the reply, but
* since the request/reply protocol needs to behave the same
* as libqb, a reply will eventually come later anyway. */
native->expected_late_replies++;
}
rc = pcmk_rc2legacy(rc);
break;
default:
crm_err("Unsupported executor connection type (bug?): %d",
native->type);
rc = -EPROTONOSUPPORT;
}
return rc;
}
static int
lrmd_api_is_connected(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->lrmd_private;
switch (native->type) {
case pcmk__client_ipc:
return crm_ipc_connected(native->ipc);
case pcmk__client_tls:
return remote_executor_connected(lrmd);
default:
crm_err("Unsupported executor connection type (bug?): %d",
native->type);
return 0;
}
}
/*!
* \internal
* \brief Send a prepared API command to the executor
*
* \param[in,out] lrmd Existing connection to the executor
* \param[in] op Name of API command to send
* \param[in] data Command data XML to add to the sent command
* \param[out] output_data If expecting a reply, it will be stored here
* \param[in] timeout Timeout in milliseconds (if 0, defaults to
* a sensible value per the type of connection,
* standard vs. pacemaker remote);
* also propagated to the command XML
* \param[in] call_options Call options to pass to server when sending
* \param[in] expect_reply If true, wait for a reply from the server;
* must be true for IPC (as opposed to TLS) clients
*
* \return pcmk_ok on success, -errno on error
*/
static int
lrmd_send_command(lrmd_t *lrmd, const char *op, xmlNode *data,
xmlNode **output_data, int timeout,
enum lrmd_call_options options, bool expect_reply)
{
int rc = pcmk_ok;
lrmd_private_t *native = lrmd->lrmd_private;
xmlNode *op_msg = NULL;
xmlNode *op_reply = NULL;
if (!lrmd_api_is_connected(lrmd)) {
return -ENOTCONN;
}
if (op == NULL) {
crm_err("No operation specified");
return -EINVAL;
}
CRM_LOG_ASSERT(native->token != NULL);
crm_trace("Sending %s op to executor", op);
op_msg = lrmd_create_op(native->token, op, data, timeout, options);
if (op_msg == NULL) {
return -EINVAL;
}
if (expect_reply) {
rc = lrmd_send_xml(lrmd, op_msg, timeout, &op_reply);
} else {
rc = lrmd_send_xml_no_reply(lrmd, op_msg);
goto done;
}
if (rc < 0) {
crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%d): %d", op, timeout, rc);
goto done;
} else if (op_reply == NULL) {
rc = -ENOMSG;
goto done;
}
rc = pcmk_ok;
crm_trace("%s op reply received", op);
if (crm_element_value_int(op_reply, PCMK__XA_LRMD_RC, &rc) != 0) {
rc = -ENOMSG;
goto done;
}
crm_log_xml_trace(op_reply, "Reply");
if (output_data) {
*output_data = op_reply;
op_reply = NULL; /* Prevent subsequent free */
}
done:
if (lrmd_api_is_connected(lrmd) == FALSE) {
crm_err("Executor disconnected");
}
pcmk__xml_free(op_msg);
pcmk__xml_free(op_reply);
return rc;
}
static int
lrmd_api_poke_connection(lrmd_t * lrmd)
{
int rc;
lrmd_private_t *native = lrmd->lrmd_private;
xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC);
crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__);
rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0,
(native->type == pcmk__client_ipc));
pcmk__xml_free(data);
return rc < 0 ? rc : pcmk_ok;
}
// \return Standard Pacemaker return code
int
lrmd__validate_remote_settings(lrmd_t *lrmd, GHashTable *hash)
{
int rc = pcmk_rc_ok;
const char *value;
lrmd_private_t *native = lrmd->lrmd_private;
xmlNode *data = pcmk__xe_create(NULL, PCMK__XA_LRMD_OP);
crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__);
value = g_hash_table_lookup(hash, PCMK_OPT_STONITH_WATCHDOG_TIMEOUT);
if ((value) &&
(stonith__watchdog_fencing_enabled_for_node(native->remote_nodename))) {
crm_xml_add(data, PCMK__XA_LRMD_WATCHDOG, value);
}
rc = lrmd_send_command(lrmd, LRMD_OP_CHECK, data, NULL, 0, 0,
(native->type == pcmk__client_ipc));
pcmk__xml_free(data);
return (rc < 0)? pcmk_legacy2rc(rc) : pcmk_rc_ok;
}
static xmlNode *
lrmd_handshake_hello_msg(const char *name, bool is_proxy)
{
xmlNode *hello = pcmk__xe_create(NULL, PCMK__XE_LRMD_COMMAND);
crm_xml_add(hello, PCMK__XA_T, PCMK__VALUE_LRMD);
crm_xml_add(hello, PCMK__XA_LRMD_OP, CRM_OP_REGISTER);
crm_xml_add(hello, PCMK__XA_LRMD_CLIENTNAME, name);
crm_xml_add(hello, PCMK__XA_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
/* advertise that we are a proxy provider */
if (is_proxy) {
pcmk__xe_set_bool_attr(hello, PCMK__XA_LRMD_IS_IPC_PROVIDER, true);
}
return hello;
}
static int
process_lrmd_handshake_reply(xmlNode *reply, lrmd_private_t *native)
{
int rc = pcmk_rc_ok;
const char *version = crm_element_value(reply, PCMK__XA_LRMD_PROTOCOL_VERSION);
const char *msg_type = crm_element_value(reply, PCMK__XA_LRMD_OP);
const char *tmp_ticket = crm_element_value(reply, PCMK__XA_LRMD_CLIENTID);
const char *start_state = crm_element_value(reply, PCMK__XA_NODE_START_STATE);
long long uptime = -1;
crm_element_value_int(reply, PCMK__XA_LRMD_RC, &rc);
rc = pcmk_legacy2rc(rc);
/* The remote executor may add its uptime to the XML reply, which is useful
* in handling transient attributes when the connection to the remote node
* unexpectedly drops. If no parameter is given, just default to -1.
*/
crm_element_value_ll(reply, PCMK__XA_UPTIME, &uptime);
native->remote->uptime = uptime;
if (start_state) {
native->remote->start_state = strdup(start_state);
}
if (rc == EPROTO) {
crm_err("Executor protocol version mismatch between client (%s) and server (%s)",
LRMD_PROTOCOL_VERSION, version);
crm_log_xml_err(reply, "Protocol Error");
} else if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_casei)) {
crm_err("Invalid registration message: %s", msg_type);
crm_log_xml_err(reply, "Bad reply");
rc = EPROTO;
} else if (tmp_ticket == NULL) {
crm_err("No registration token provided");
crm_log_xml_err(reply, "Bad reply");
rc = EPROTO;
} else {
crm_trace("Obtained registration token: %s", tmp_ticket);
native->token = strdup(tmp_ticket);
native->peer_version = strdup(version?version:"1.0"); /* Included since 1.1 */
rc = pcmk_rc_ok;
}
return rc;
}
static int
lrmd_handshake(lrmd_t * lrmd, const char *name)
{
int rc = pcmk_rc_ok;
lrmd_private_t *native = lrmd->lrmd_private;
xmlNode *reply = NULL;
xmlNode *hello = lrmd_handshake_hello_msg(name, native->proxy_callback != NULL);
rc = lrmd_send_xml(lrmd, hello, -1, &reply);
if (rc < 0) {
crm_perror(LOG_DEBUG, "Couldn't complete registration with the executor API: %d", rc);
rc = ECOMM;
} else if (reply == NULL) {
crm_err("Did not receive registration reply");
rc = EPROTO;
} else {
rc = process_lrmd_handshake_reply(reply, native);
}
pcmk__xml_free(reply);
pcmk__xml_free(hello);
if (rc != pcmk_rc_ok) {
lrmd_api_disconnect(lrmd);
}
return rc;
}
static int
lrmd_handshake_async(lrmd_t * lrmd, const char *name)
{
int rc = pcmk_rc_ok;
lrmd_private_t *native = lrmd->lrmd_private;
xmlNode *hello = lrmd_handshake_hello_msg(name, native->proxy_callback != NULL);
rc = send_remote_message(lrmd, hello);
if (rc == pcmk_rc_ok) {
native->expected_late_replies++;
} else {
lrmd_api_disconnect(lrmd);
}
pcmk__xml_free(hello);
return rc;
}
static int
lrmd_ipc_connect(lrmd_t * lrmd, int *fd)
{
int rc = pcmk_ok;
lrmd_private_t *native = lrmd->lrmd_private;
struct ipc_client_callbacks lrmd_callbacks = {
.dispatch = lrmd_ipc_dispatch,
.destroy = lrmd_ipc_connection_destroy
};
crm_info("Connecting to executor");
if (fd) {
/* No mainloop */
native->ipc = crm_ipc_new(CRM_SYSTEM_LRMD, 0);
if (native->ipc != NULL) {
rc = pcmk__connect_generic_ipc(native->ipc);
if (rc == pcmk_rc_ok) {
rc = pcmk__ipc_fd(native->ipc, fd);
}
if (rc != pcmk_rc_ok) {
crm_err("Connection to executor failed: %s", pcmk_rc_str(rc));
rc = -ENOTCONN;
}
}
} else {
native->source = mainloop_add_ipc_client(CRM_SYSTEM_LRMD, G_PRIORITY_HIGH, 0, lrmd, &lrmd_callbacks);
native->ipc = mainloop_get_ipc_client(native->source);
}
if (native->ipc == NULL) {
crm_debug("Could not connect to the executor API");
rc = -ENOTCONN;
}
return rc;
}
static void
copy_gnutls_datum(gnutls_datum_t *dest, gnutls_datum_t *source)
{
pcmk__assert((dest != NULL) && (source != NULL) && (source->data != NULL));
dest->data = gnutls_malloc(source->size);
pcmk__mem_assert(dest->data);
memcpy(dest->data, source->data, source->size);
dest->size = source->size;
}
static void
clear_gnutls_datum(gnutls_datum_t *datum)
{
gnutls_free(datum->data);
datum->data = NULL;
datum->size = 0;
}
#define KEY_READ_LEN 256 // Chunk size for reading key from file
// \return Standard Pacemaker return code
static int
read_gnutls_key(const char *location, gnutls_datum_t *key)
{
FILE *stream = NULL;
size_t buf_len = KEY_READ_LEN;
if ((location == NULL) || (key == NULL)) {
return EINVAL;
}
stream = fopen(location, "r");
if (stream == NULL) {
return errno;
}
key->data = gnutls_malloc(buf_len);
key->size = 0;
while (!feof(stream)) {
int next = fgetc(stream);
if (next == EOF) {
if (!feof(stream)) {
crm_warn("Pacemaker Remote key read was partially successful "
"(copy in memory may be corrupted)");
}
break;
}
if (key->size == buf_len) {
buf_len = key->size + KEY_READ_LEN;
key->data = gnutls_realloc(key->data, buf_len);
pcmk__assert(key->data);
}
key->data[key->size++] = (unsigned char) next;
}
fclose(stream);
if (key->size == 0) {
clear_gnutls_datum(key);
return ENOKEY;
}
return pcmk_rc_ok;
}
// Cache the most recently used Pacemaker Remote authentication key
struct key_cache_s {
time_t updated; // When cached key was read (valid for 1 minute)
const char *location; // Where cached key was read from
gnutls_datum_t key; // Cached key
};
static bool
key_is_cached(struct key_cache_s *key_cache)
{
return key_cache->updated != 0;
}
static bool
key_cache_expired(struct key_cache_s *key_cache)
{
return (time(NULL) - key_cache->updated) >= 60;
}
static void
clear_key_cache(struct key_cache_s *key_cache)
{
clear_gnutls_datum(&(key_cache->key));
if ((key_cache->updated != 0) || (key_cache->location != NULL)) {
key_cache->updated = 0;
key_cache->location = NULL;
crm_debug("Cleared Pacemaker Remote key cache");
}
}
static void
get_cached_key(struct key_cache_s *key_cache, gnutls_datum_t *key)
{
copy_gnutls_datum(key, &(key_cache->key));
crm_debug("Using cached Pacemaker Remote key from %s",
pcmk__s(key_cache->location, "unknown location"));
}
static void
cache_key(struct key_cache_s *key_cache, gnutls_datum_t *key,
const char *location)
{
key_cache->updated = time(NULL);
key_cache->location = location;
copy_gnutls_datum(&(key_cache->key), key);
crm_debug("Using (and cacheing) Pacemaker Remote key from %s",
pcmk__s(location, "unknown location"));
}
/*!
* \internal
* \brief Get Pacemaker Remote authentication key from file or cache
*
* \param[in] location Path to key file to try (this memory must
* persist across all calls of this function)
* \param[out] key Key from location or cache
*
* \return Standard Pacemaker return code
*/
static int
get_remote_key(const char *location, gnutls_datum_t *key)
{
static struct key_cache_s key_cache = { 0, };
int rc = pcmk_rc_ok;
if ((location == NULL) || (key == NULL)) {
return EINVAL;
}
if (key_is_cached(&key_cache)) {
if (key_cache_expired(&key_cache)) {
clear_key_cache(&key_cache);
} else {
get_cached_key(&key_cache, key);
return pcmk_rc_ok;
}
}
rc = read_gnutls_key(location, key);
if (rc != pcmk_rc_ok) {
return rc;
}
cache_key(&key_cache, key, location);
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Initialize the Pacemaker Remote authentication key
*
* Try loading the Pacemaker Remote authentication key from cache if available,
* otherwise from these locations, in order of preference:
*
* - The value of the PCMK_authkey_location environment variable, if set
* - The Pacemaker default key file location
*
* \param[out] key Where to store key
*
* \return Standard Pacemaker return code
*/
int
lrmd__init_remote_key(gnutls_datum_t *key)
{
static const char *env_location = NULL;
static bool need_env = true;
int rc = pcmk_rc_ok;
if (need_env) {
env_location = pcmk__env_option(PCMK__ENV_AUTHKEY_LOCATION);
need_env = false;
}
// Try location in environment variable, if set
if (env_location != NULL) {
rc = get_remote_key(env_location, key);
if (rc == pcmk_rc_ok) {
return pcmk_rc_ok;
}
crm_warn("Could not read Pacemaker Remote key from %s: %s",
env_location, pcmk_rc_str(rc));
return ENOKEY;
}
// Try default location, if environment wasn't explicitly set to it
rc = get_remote_key(DEFAULT_REMOTE_KEY_LOCATION, key);
if (rc == pcmk_rc_ok) {
return pcmk_rc_ok;
}
crm_warn("Could not read Pacemaker Remote key from default location %s: %s",
DEFAULT_REMOTE_KEY_LOCATION, pcmk_rc_str(rc));
return ENOKEY;
}
static void
report_async_connection_result(lrmd_t * lrmd, int rc)
{
lrmd_private_t *native = lrmd->lrmd_private;
if (native->callback) {
lrmd_event_data_t event = { 0, };
event.type = lrmd_event_connect;
event.remote_nodename = native->remote_nodename;
event.connection_rc = rc;
native->callback(&event);
}
}
static void
tls_handshake_failed(lrmd_t *lrmd, int tls_rc, int rc)
{
lrmd_private_t *native = lrmd->lrmd_private;
crm_warn("Disconnecting after TLS handshake with "
"Pacemaker Remote server %s:%d failed: %s",
native->server, native->port,
(rc == EPROTO)? gnutls_strerror(tls_rc) : pcmk_rc_str(rc));
report_async_connection_result(lrmd, pcmk_rc2legacy(rc));
gnutls_deinit(native->remote->tls_session);
native->remote->tls_session = NULL;
lrmd_tls_connection_destroy(lrmd);
}
static void
tls_handshake_succeeded(lrmd_t *lrmd)
{
int rc = pcmk_rc_ok;
lrmd_private_t *native = lrmd->lrmd_private;
/* Now that the handshake is done, see if any client TLS certificate is
* close to its expiration date and log if so. If a TLS certificate is not
* in use, this function will just return so we don't need to check for the
* session type here.
*/
pcmk__tls_check_cert_expiration(native->remote->tls_session);
crm_info("TLS connection to Pacemaker Remote server %s:%d succeeded",
native->server, native->port);
rc = add_tls_to_mainloop(lrmd, true);
/* If add_tls_to_mainloop failed, report that right now. Otherwise, we have
* to wait until we read the async reply to report anything.
*/
if (rc != pcmk_rc_ok) {
report_async_connection_result(lrmd, pcmk_rc2legacy(rc));
}
}
/*!
* \internal
* \brief Perform a TLS client handshake with a Pacemaker Remote server
*
* \param[in] lrmd Newly established Pacemaker Remote executor connection
*
* \return Standard Pacemaker return code
*/
static int
tls_client_handshake(lrmd_t *lrmd)
{
lrmd_private_t *native = lrmd->lrmd_private;
int tls_rc = GNUTLS_E_SUCCESS;
int rc = pcmk__tls_client_handshake(native->remote, TLS_HANDSHAKE_TIMEOUT,
&tls_rc);
if (rc != pcmk_rc_ok) {
tls_handshake_failed(lrmd, tls_rc, rc);
}
return rc;
}
/*!
* \internal
* \brief Add trigger and file descriptor mainloop sources for TLS
*
* \param[in,out] lrmd API connection with established TLS session
* \param[in] do_api_handshake Whether to perform executor handshake
*
* \return Standard Pacemaker return code
*/
static int
add_tls_to_mainloop(lrmd_t *lrmd, bool do_api_handshake)
{
lrmd_private_t *native = lrmd->lrmd_private;
int rc = pcmk_rc_ok;
char *name = crm_strdup_printf("pacemaker-remote-%s:%d",
native->server, native->port);
struct mainloop_fd_callbacks tls_fd_callbacks = {
.dispatch = lrmd_tls_dispatch,
.destroy = lrmd_tls_connection_destroy,
};
native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH,
process_pending_notifies, lrmd);
native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd,
&tls_fd_callbacks);
/* Async connections lose the client name provided by the API caller, so we
* have to use our generated name here to perform the executor handshake.
*
* @TODO Keep track of the caller-provided name. Perhaps we should be using
* that name in this function instead of generating one anyway.
*/
if (do_api_handshake) {
rc = lrmd_handshake_async(lrmd, name);
}
free(name);
return rc;
}
struct handshake_data_s {
lrmd_t *lrmd;
time_t start_time;
int timeout_sec;
};
static gboolean
try_handshake_cb(gpointer user_data)
{
struct handshake_data_s *hs = user_data;
lrmd_t *lrmd = hs->lrmd;
lrmd_private_t *native = lrmd->lrmd_private;
pcmk__remote_t *remote = native->remote;
int rc = pcmk_rc_ok;
int tls_rc = GNUTLS_E_SUCCESS;
if (time(NULL) >= hs->start_time + hs->timeout_sec) {
rc = ETIME;
tls_handshake_failed(lrmd, GNUTLS_E_TIMEDOUT, rc);
free(hs);
return 0;
}
rc = pcmk__tls_client_try_handshake(remote, &tls_rc);
if (rc == pcmk_rc_ok) {
tls_handshake_succeeded(lrmd);
free(hs);
return 0;
} else if (rc == EAGAIN) {
mainloop_set_trigger(native->handshake_trigger);
return 1;
} else {
rc = EKEYREJECTED;
tls_handshake_failed(lrmd, tls_rc, rc);
free(hs);
return 0;
}
}
static void
lrmd_tcp_connect_cb(void *userdata, int rc, int sock)
{
lrmd_t *lrmd = userdata;
lrmd_private_t *native = lrmd->lrmd_private;
int tls_rc = GNUTLS_E_SUCCESS;
bool use_cert = pcmk__x509_enabled();
native->async_timer = 0;
if (rc != pcmk_rc_ok) {
lrmd_tls_connection_destroy(lrmd);
crm_info("Could not connect to Pacemaker Remote at %s:%d: %s "
QB_XS " rc=%d",
native->server, native->port, pcmk_rc_str(rc), rc);
report_async_connection_result(lrmd, pcmk_rc2legacy(rc));
return;
}
/* The TCP connection was successful, so establish the TLS connection. */
native->sock = sock;
if (native->tls == NULL) {
rc = pcmk__init_tls(&native->tls, false, use_cert ? GNUTLS_CRD_CERTIFICATE : GNUTLS_CRD_PSK);
if (rc != pcmk_rc_ok) {
lrmd_tls_connection_destroy(lrmd);
report_async_connection_result(lrmd, pcmk_rc2legacy(rc));
return;
}
}
if (!use_cert) {
gnutls_datum_t psk_key = { NULL, 0 };
rc = lrmd__init_remote_key(&psk_key);
if (rc != pcmk_rc_ok) {
crm_info("Could not connect to Pacemaker Remote at %s:%d: %s "
QB_XS " rc=%d",
native->server, native->port, pcmk_rc_str(rc), rc);
lrmd_tls_connection_destroy(lrmd);
report_async_connection_result(lrmd, pcmk_rc2legacy(rc));
return;
}
pcmk__tls_add_psk_key(native->tls, &psk_key);
gnutls_free(psk_key.data);
}
native->remote->tls_session = pcmk__new_tls_session(native->tls, sock);
if (native->remote->tls_session == NULL) {
lrmd_tls_connection_destroy(lrmd);
report_async_connection_result(lrmd, -EPROTO);
return;
}
/* If the TLS handshake immediately succeeds or fails, we can handle that
* now without having to deal with mainloops and retries. Otherwise, add a
* trigger to keep trying until we get a result (or it times out).
*/
rc = pcmk__tls_client_try_handshake(native->remote, &tls_rc);
if (rc == EAGAIN) {
struct handshake_data_s *hs = NULL;
if (native->handshake_trigger != NULL) {
return;
}
hs = pcmk__assert_alloc(1, sizeof(struct handshake_data_s));
hs->lrmd = lrmd;
hs->start_time = time(NULL);
hs->timeout_sec = TLS_HANDSHAKE_TIMEOUT;
native->handshake_trigger = mainloop_add_trigger(G_PRIORITY_LOW, try_handshake_cb, hs);
mainloop_set_trigger(native->handshake_trigger);
} else if (rc == pcmk_rc_ok) {
tls_handshake_succeeded(lrmd);
} else {
tls_handshake_failed(lrmd, tls_rc, rc);
}
}
static int
lrmd_tls_connect_async(lrmd_t * lrmd, int timeout /*ms */ )
{
int rc = pcmk_rc_ok;
int timer_id = 0;
lrmd_private_t *native = lrmd->lrmd_private;
native->sock = -1;
rc = pcmk__connect_remote(native->server, native->port, timeout, &timer_id,
&(native->sock), lrmd, lrmd_tcp_connect_cb);
if (rc != pcmk_rc_ok) {
crm_warn("Pacemaker Remote connection to %s:%d failed: %s "
QB_XS " rc=%d",
native->server, native->port, pcmk_rc_str(rc), rc);
return rc;
}
native->async_timer = timer_id;
return rc;
}
static int
lrmd_tls_connect(lrmd_t * lrmd, int *fd)
{
int rc = pcmk_rc_ok;
bool use_cert = pcmk__x509_enabled();
lrmd_private_t *native = lrmd->lrmd_private;
native->sock = -1;
rc = pcmk__connect_remote(native->server, native->port, 0, NULL,
&(native->sock), NULL, NULL);
if (rc != pcmk_rc_ok) {
crm_warn("Pacemaker Remote connection to %s:%d failed: %s "
QB_XS " rc=%d",
native->server, native->port, pcmk_rc_str(rc), rc);
lrmd_tls_connection_destroy(lrmd);
return ENOTCONN;
}
if (native->tls == NULL) {
rc = pcmk__init_tls(&native->tls, false, use_cert ? GNUTLS_CRD_CERTIFICATE : GNUTLS_CRD_PSK);
if (rc != pcmk_rc_ok) {
lrmd_tls_connection_destroy(lrmd);
return rc;
}
}
if (!use_cert) {
gnutls_datum_t psk_key = { NULL, 0 };
rc = lrmd__init_remote_key(&psk_key);
if (rc != pcmk_rc_ok) {
lrmd_tls_connection_destroy(lrmd);
return rc;
}
pcmk__tls_add_psk_key(native->tls, &psk_key);
gnutls_free(psk_key.data);
}
native->remote->tls_session = pcmk__new_tls_session(native->tls, native->sock);
if (native->remote->tls_session == NULL) {
lrmd_tls_connection_destroy(lrmd);
return EPROTO;
}
if (tls_client_handshake(lrmd) != pcmk_rc_ok) {
return EKEYREJECTED;
}
crm_info("Client TLS connection established with Pacemaker Remote server %s:%d", native->server,
native->port);
if (fd) {
*fd = native->sock;
} else {
rc = add_tls_to_mainloop(lrmd, false);
}
return rc;
}
static int
lrmd_api_connect(lrmd_t * lrmd, const char *name, int *fd)
{
int rc = -ENOTCONN;
lrmd_private_t *native = lrmd->lrmd_private;
switch (native->type) {
case pcmk__client_ipc:
rc = lrmd_ipc_connect(lrmd, fd);
break;
case pcmk__client_tls:
rc = lrmd_tls_connect(lrmd, fd);
rc = pcmk_rc2legacy(rc);
break;
default:
crm_err("Unsupported executor connection type (bug?): %d",
native->type);
rc = -EPROTONOSUPPORT;
}
if (rc == pcmk_ok) {
rc = lrmd_handshake(lrmd, name);
rc = pcmk_rc2legacy(rc);
}
return rc;
}
static int
lrmd_api_connect_async(lrmd_t * lrmd, const char *name, int timeout)
{
int rc = pcmk_ok;
lrmd_private_t *native = lrmd->lrmd_private;
CRM_CHECK(native && native->callback, return -EINVAL);
switch (native->type) {
case pcmk__client_ipc:
/* fake async connection with ipc. it should be fast
* enough that we gain very little from async */
rc = lrmd_api_connect(lrmd, name, NULL);
if (!rc) {
report_async_connection_result(lrmd, rc);
}
break;
case pcmk__client_tls:
rc = lrmd_tls_connect_async(lrmd, timeout);
rc = pcmk_rc2legacy(rc);
break;
default:
crm_err("Unsupported executor connection type (bug?): %d",
native->type);
rc = -EPROTONOSUPPORT;
}
return rc;
}
static void
lrmd_ipc_disconnect(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->lrmd_private;
if (native->source != NULL) {
/* Attached to mainloop */
mainloop_del_ipc_client(native->source);
native->source = NULL;
native->ipc = NULL;
} else if (native->ipc) {
/* Not attached to mainloop */
crm_ipc_t *ipc = native->ipc;
native->ipc = NULL;
crm_ipc_close(ipc);
crm_ipc_destroy(ipc);
}
}
static void
lrmd_tls_disconnect(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->lrmd_private;
if (native->remote->tls_session) {
gnutls_bye(native->remote->tls_session, GNUTLS_SHUT_RDWR);
gnutls_deinit(native->remote->tls_session);
native->remote->tls_session = NULL;
}
if (native->async_timer) {
g_source_remove(native->async_timer);
native->async_timer = 0;
}
if (native->source != NULL) {
/* Attached to mainloop */
mainloop_del_ipc_client(native->source);
native->source = NULL;
} else if (native->sock >= 0) {
close(native->sock);
native->sock = -1;
}
if (native->pending_notify) {
g_list_free_full(native->pending_notify, lrmd_free_xml);
native->pending_notify = NULL;
}
}
static int
lrmd_api_disconnect(lrmd_t * lrmd)
{
lrmd_private_t *native = lrmd->lrmd_private;
int rc = pcmk_ok;
switch (native->type) {
case pcmk__client_ipc:
crm_debug("Disconnecting from local executor");
lrmd_ipc_disconnect(lrmd);
break;
case pcmk__client_tls:
crm_debug("Disconnecting from remote executor on %s",
native->remote_nodename);
lrmd_tls_disconnect(lrmd);
break;
default:
crm_err("Unsupported executor connection type (bug?): %d",
native->type);
rc = -EPROTONOSUPPORT;
}
free(native->token);
native->token = NULL;
free(native->peer_version);
native->peer_version = NULL;
return rc;
}
static int
lrmd_api_register_rsc(lrmd_t * lrmd,
const char *rsc_id,
const char *class,
const char *provider, const char *type, enum lrmd_call_options options)
{
int rc = pcmk_ok;
xmlNode *data = NULL;
if (!class || !type || !rsc_id) {
return -EINVAL;
}
if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)
&& (provider == NULL)) {
return -EINVAL;
}
data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC);
crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__);
crm_xml_add(data, PCMK__XA_LRMD_RSC_ID, rsc_id);
crm_xml_add(data, PCMK__XA_LRMD_CLASS, class);
crm_xml_add(data, PCMK__XA_LRMD_PROVIDER, provider);
crm_xml_add(data, PCMK__XA_LRMD_TYPE, type);
rc = lrmd_send_command(lrmd, LRMD_OP_RSC_REG, data, NULL, 0, options, true);
pcmk__xml_free(data);
return rc;
}
static int
lrmd_api_unregister_rsc(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options)
{
int rc = pcmk_ok;
xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC);
crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__);
crm_xml_add(data, PCMK__XA_LRMD_RSC_ID, rsc_id);
rc = lrmd_send_command(lrmd, LRMD_OP_RSC_UNREG, data, NULL, 0, options, true);
pcmk__xml_free(data);
return rc;
}
lrmd_rsc_info_t *
lrmd_new_rsc_info(const char *rsc_id, const char *standard,
const char *provider, const char *type)
{
lrmd_rsc_info_t *rsc_info = pcmk__assert_alloc(1, sizeof(lrmd_rsc_info_t));
rsc_info->id = pcmk__str_copy(rsc_id);
rsc_info->standard = pcmk__str_copy(standard);
rsc_info->provider = pcmk__str_copy(provider);
rsc_info->type = pcmk__str_copy(type);
return rsc_info;
}
lrmd_rsc_info_t *
lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info)
{
return lrmd_new_rsc_info(rsc_info->id, rsc_info->standard,
rsc_info->provider, rsc_info->type);
}
void
lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info)
{
if (!rsc_info) {
return;
}
free(rsc_info->id);
free(rsc_info->type);
free(rsc_info->standard);
free(rsc_info->provider);
free(rsc_info);
}
static lrmd_rsc_info_t *
lrmd_api_get_rsc_info(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options)
{
lrmd_rsc_info_t *rsc_info = NULL;
xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC);
xmlNode *output = NULL;
const char *class = NULL;
const char *provider = NULL;
const char *type = NULL;
crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__);
crm_xml_add(data, PCMK__XA_LRMD_RSC_ID, rsc_id);
lrmd_send_command(lrmd, LRMD_OP_RSC_INFO, data, &output, 0, options, true);
pcmk__xml_free(data);
if (!output) {
return NULL;
}
class = crm_element_value(output, PCMK__XA_LRMD_CLASS);
provider = crm_element_value(output, PCMK__XA_LRMD_PROVIDER);
type = crm_element_value(output, PCMK__XA_LRMD_TYPE);
if (!class || !type) {
pcmk__xml_free(output);
return NULL;
} else if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)
&& !provider) {
pcmk__xml_free(output);
return NULL;
}
rsc_info = lrmd_new_rsc_info(rsc_id, class, provider, type);
pcmk__xml_free(output);
return rsc_info;
}
void
lrmd_free_op_info(lrmd_op_info_t *op_info)
{
if (op_info) {
free(op_info->rsc_id);
free(op_info->action);
free(op_info->interval_ms_s);
free(op_info->timeout_ms_s);
free(op_info);
}
}
static int
lrmd_api_get_recurring_ops(lrmd_t *lrmd, const char *rsc_id, int timeout_ms,
enum lrmd_call_options options, GList **output)
{
xmlNode *data = NULL;
xmlNode *output_xml = NULL;
int rc = pcmk_ok;
if (output == NULL) {
return -EINVAL;
}
*output = NULL;
// Send request
if (rsc_id) {
data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC);
crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__);
crm_xml_add(data, PCMK__XA_LRMD_RSC_ID, rsc_id);
}
rc = lrmd_send_command(lrmd, LRMD_OP_GET_RECURRING, data, &output_xml,
timeout_ms, options, true);
if (data) {
pcmk__xml_free(data);
}
// Process reply
if ((rc != pcmk_ok) || (output_xml == NULL)) {
return rc;
}
for (const xmlNode *rsc_xml = pcmk__xe_first_child(output_xml,
PCMK__XE_LRMD_RSC, NULL,
NULL);
(rsc_xml != NULL) && (rc == pcmk_ok);
rsc_xml = pcmk__xe_next(rsc_xml, PCMK__XE_LRMD_RSC)) {
rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
if (rsc_id == NULL) {
crm_err("Could not parse recurring operation information from executor");
continue;
}
for (const xmlNode *op_xml = pcmk__xe_first_child(rsc_xml,
PCMK__XE_LRMD_RSC_OP,
NULL, NULL);
op_xml != NULL;
op_xml = pcmk__xe_next(op_xml, PCMK__XE_LRMD_RSC_OP)) {
lrmd_op_info_t *op_info = calloc(1, sizeof(lrmd_op_info_t));
if (op_info == NULL) {
rc = -ENOMEM;
break;
}
op_info->rsc_id = strdup(rsc_id);
op_info->action = crm_element_value_copy(op_xml,
PCMK__XA_LRMD_RSC_ACTION);
op_info->interval_ms_s =
crm_element_value_copy(op_xml, PCMK__XA_LRMD_RSC_INTERVAL);
op_info->timeout_ms_s =
crm_element_value_copy(op_xml, PCMK__XA_LRMD_TIMEOUT);
*output = g_list_prepend(*output, op_info);
}
}
pcmk__xml_free(output_xml);
return rc;
}
static void
lrmd_api_set_callback(lrmd_t * lrmd, lrmd_event_callback callback)
{
lrmd_private_t *native = lrmd->lrmd_private;
native->callback = callback;
}
void
lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg))
{
lrmd_private_t *native = lrmd->lrmd_private;
native->proxy_callback = callback;
native->proxy_callback_userdata = userdata;
}
void
lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg)
{
lrmd_private_t *native = lrmd->lrmd_private;
if (native->proxy_callback) {
crm_log_xml_trace(msg, "PROXY_INBOUND");
native->proxy_callback(lrmd, native->proxy_callback_userdata, msg);
}
}
int
lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg)
{
if (lrmd == NULL) {
return -ENOTCONN;
}
crm_xml_add(msg, PCMK__XA_LRMD_OP, CRM_OP_IPC_FWD);
crm_log_xml_trace(msg, "PROXY_OUTBOUND");
return lrmd_send_xml_no_reply(lrmd, msg);
}
static int
-stonith_get_metadata(const char *provider, const char *type, char **output)
+stonith_get_metadata(const char *type, char **output)
{
int rc = pcmk_ok;
- stonith_t *stonith_api = stonith_api_new();
+ stonith_t *stonith_api = stonith__api_new();
if (stonith_api == NULL) {
crm_err("Could not get fence agent meta-data: API memory allocation failed");
return -ENOMEM;
}
- rc = stonith_api->cmds->metadata(stonith_api, st_opt_sync_call, type,
- provider, output, 0);
+ rc = stonith_api->cmds->metadata(stonith_api, st_opt_sync_call, type, NULL,
+ output, 0);
if ((rc == pcmk_ok) && (*output == NULL)) {
rc = -EIO;
}
stonith_api->cmds->free(stonith_api);
return rc;
}
static int
lrmd_api_get_metadata(lrmd_t *lrmd, const char *standard, const char *provider,
const char *type, char **output,
enum lrmd_call_options options)
{
return lrmd->cmds->get_metadata_params(lrmd, standard, provider, type,
output, options, NULL);
}
static int
lrmd_api_get_metadata_params(lrmd_t *lrmd, const char *standard,
const char *provider, const char *type,
char **output, enum lrmd_call_options options,
lrmd_key_value_t *params)
{
svc_action_t *action = NULL;
GHashTable *params_table = NULL;
if (!standard || !type) {
lrmd_key_value_freeall(params);
return -EINVAL;
}
if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
lrmd_key_value_freeall(params);
- return stonith_get_metadata(provider, type, output);
+
+ // stonith-class resources don't support a provider
+ return stonith_get_metadata(type, output);
}
params_table = pcmk__strkey_table(free, free);
for (const lrmd_key_value_t *param = params; param; param = param->next) {
pcmk__insert_dup(params_table, param->key, param->value);
}
action = services__create_resource_action(type, standard, provider, type,
PCMK_ACTION_META_DATA, 0,
PCMK_DEFAULT_ACTION_TIMEOUT_MS,
params_table, 0);
lrmd_key_value_freeall(params);
if (action == NULL) {
return -ENOMEM;
}
if (action->rc != PCMK_OCF_UNKNOWN) {
services_action_free(action);
return -EINVAL;
}
if (!services_action_sync(action)) {
crm_err("Failed to retrieve meta-data for %s:%s:%s",
standard, provider, type);
services_action_free(action);
return -EIO;
}
if (!action->stdout_data) {
crm_err("Failed to receive meta-data for %s:%s:%s",
standard, provider, type);
services_action_free(action);
return -EIO;
}
*output = strdup(action->stdout_data);
services_action_free(action);
return pcmk_ok;
}
static int
lrmd_api_exec(lrmd_t *lrmd, const char *rsc_id, const char *action,
const char *userdata, guint interval_ms,
int timeout, /* ms */
int start_delay, /* ms */
enum lrmd_call_options options, lrmd_key_value_t * params)
{
int rc = pcmk_ok;
xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC);
xmlNode *args = pcmk__xe_create(data, PCMK__XE_ATTRIBUTES);
lrmd_key_value_t *tmp = NULL;
crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__);
crm_xml_add(data, PCMK__XA_LRMD_RSC_ID, rsc_id);
crm_xml_add(data, PCMK__XA_LRMD_RSC_ACTION, action);
crm_xml_add(data, PCMK__XA_LRMD_RSC_USERDATA_STR, userdata);
crm_xml_add_ms(data, PCMK__XA_LRMD_RSC_INTERVAL, interval_ms);
crm_xml_add_int(data, PCMK__XA_LRMD_TIMEOUT, timeout);
crm_xml_add_int(data, PCMK__XA_LRMD_RSC_START_DELAY, start_delay);
for (tmp = params; tmp; tmp = tmp->next) {
hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args);
}
rc = lrmd_send_command(lrmd, LRMD_OP_RSC_EXEC, data, NULL, timeout, options, true);
pcmk__xml_free(data);
lrmd_key_value_freeall(params);
return rc;
}
/* timeout is in ms */
static int
lrmd_api_exec_alert(lrmd_t *lrmd, const char *alert_id, const char *alert_path,
int timeout, lrmd_key_value_t *params)
{
int rc = pcmk_ok;
xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_LRMD_ALERT);
xmlNode *args = pcmk__xe_create(data, PCMK__XE_ATTRIBUTES);
lrmd_key_value_t *tmp = NULL;
crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__);
crm_xml_add(data, PCMK__XA_LRMD_ALERT_ID, alert_id);
crm_xml_add(data, PCMK__XA_LRMD_ALERT_PATH, alert_path);
crm_xml_add_int(data, PCMK__XA_LRMD_TIMEOUT, timeout);
for (tmp = params; tmp; tmp = tmp->next) {
hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args);
}
rc = lrmd_send_command(lrmd, LRMD_OP_ALERT_EXEC, data, NULL, timeout,
lrmd_opt_notify_orig_only, true);
pcmk__xml_free(data);
lrmd_key_value_freeall(params);
return rc;
}
static int
lrmd_api_cancel(lrmd_t *lrmd, const char *rsc_id, const char *action,
guint interval_ms)
{
int rc = pcmk_ok;
xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC);
crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__);
crm_xml_add(data, PCMK__XA_LRMD_RSC_ACTION, action);
crm_xml_add(data, PCMK__XA_LRMD_RSC_ID, rsc_id);
crm_xml_add_ms(data, PCMK__XA_LRMD_RSC_INTERVAL, interval_ms);
rc = lrmd_send_command(lrmd, LRMD_OP_RSC_CANCEL, data, NULL, 0, 0, true);
pcmk__xml_free(data);
return rc;
}
static int
list_stonith_agents(lrmd_list_t ** resources)
{
int rc = 0;
- stonith_t *stonith_api = stonith_api_new();
+ stonith_t *stonith_api = stonith__api_new();
stonith_key_value_t *stonith_resources = NULL;
stonith_key_value_t *dIter = NULL;
if (stonith_api == NULL) {
crm_err("Could not list fence agents: API memory allocation failed");
return -ENOMEM;
}
stonith_api->cmds->list_agents(stonith_api, st_opt_sync_call, NULL,
&stonith_resources, 0);
stonith_api->cmds->free(stonith_api);
for (dIter = stonith_resources; dIter; dIter = dIter->next) {
rc++;
if (resources) {
*resources = lrmd_list_add(*resources, dIter->value);
}
}
- stonith_key_value_freeall(stonith_resources, 1, 0);
+ stonith__key_value_freeall(stonith_resources, true, false);
return rc;
}
static int
lrmd_api_list_agents(lrmd_t * lrmd, lrmd_list_t ** resources, const char *class,
const char *provider)
{
int rc = 0;
int stonith_count = 0; // Initially, whether to include stonith devices
if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
stonith_count = 1;
} else {
GList *gIter = NULL;
GList *agents = resources_list_agents(class, provider);
for (gIter = agents; gIter != NULL; gIter = gIter->next) {
*resources = lrmd_list_add(*resources, (const char *)gIter->data);
rc++;
}
g_list_free_full(agents, free);
if (!class) {
stonith_count = 1;
}
}
if (stonith_count) {
// Now, if stonith devices are included, how many there are
stonith_count = list_stonith_agents(resources);
if (stonith_count > 0) {
rc += stonith_count;
}
}
if (rc == 0) {
crm_notice("No agents found for class %s", class);
rc = -EPROTONOSUPPORT;
}
return rc;
}
static bool
does_provider_have_agent(const char *agent, const char *provider, const char *class)
{
bool found = false;
GList *agents = NULL;
GList *gIter2 = NULL;
agents = resources_list_agents(class, provider);
for (gIter2 = agents; gIter2 != NULL; gIter2 = gIter2->next) {
if (pcmk__str_eq(agent, gIter2->data, pcmk__str_casei)) {
found = true;
}
}
g_list_free_full(agents, free);
return found;
}
static int
lrmd_api_list_ocf_providers(lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers)
{
int rc = pcmk_ok;
char *provider = NULL;
GList *ocf_providers = NULL;
GList *gIter = NULL;
ocf_providers = resources_list_providers(PCMK_RESOURCE_CLASS_OCF);
for (gIter = ocf_providers; gIter != NULL; gIter = gIter->next) {
provider = gIter->data;
if (!agent || does_provider_have_agent(agent, provider,
PCMK_RESOURCE_CLASS_OCF)) {
*providers = lrmd_list_add(*providers, (const char *)gIter->data);
rc++;
}
}
g_list_free_full(ocf_providers, free);
return rc;
}
static int
lrmd_api_list_standards(lrmd_t * lrmd, lrmd_list_t ** supported)
{
int rc = 0;
GList *standards = NULL;
GList *gIter = NULL;
standards = resources_list_standards();
for (gIter = standards; gIter != NULL; gIter = gIter->next) {
*supported = lrmd_list_add(*supported, (const char *)gIter->data);
rc++;
}
if (list_stonith_agents(NULL) > 0) {
*supported = lrmd_list_add(*supported, PCMK_RESOURCE_CLASS_STONITH);
rc++;
}
g_list_free_full(standards, free);
return rc;
}
/*!
* \internal
* \brief Create an executor API object
*
* \param[out] api Will be set to newly created API object (it is the
* caller's responsibility to free this value with
* lrmd_api_delete() if this function succeeds)
* \param[in] nodename If the object will be used for a remote connection,
* the node name to use in cluster for remote executor
* \param[in] server If the object will be used for a remote connection,
* the resolvable host name to connect to
* \param[in] port If the object will be used for a remote connection,
* port number on \p server to connect to
*
* \return Standard Pacemaker return code
* \note If the caller leaves one of \p nodename or \p server NULL, the other's
* value will be used for both. If the caller leaves both NULL, an API
* object will be created for a local executor connection.
*/
int
lrmd__new(lrmd_t **api, const char *nodename, const char *server, int port)
{
lrmd_private_t *pvt = NULL;
if (api == NULL) {
return EINVAL;
}
*api = NULL;
// Allocate all memory needed
*api = calloc(1, sizeof(lrmd_t));
if (*api == NULL) {
return ENOMEM;
}
pvt = calloc(1, sizeof(lrmd_private_t));
if (pvt == NULL) {
lrmd_api_delete(*api);
*api = NULL;
return ENOMEM;
}
(*api)->lrmd_private = pvt;
// @TODO Do we need to do this for local connections?
pvt->remote = calloc(1, sizeof(pcmk__remote_t));
(*api)->cmds = calloc(1, sizeof(lrmd_api_operations_t));
if ((pvt->remote == NULL) || ((*api)->cmds == NULL)) {
lrmd_api_delete(*api);
*api = NULL;
return ENOMEM;
}
// Set methods
(*api)->cmds->connect = lrmd_api_connect;
(*api)->cmds->connect_async = lrmd_api_connect_async;
(*api)->cmds->is_connected = lrmd_api_is_connected;
(*api)->cmds->poke_connection = lrmd_api_poke_connection;
(*api)->cmds->disconnect = lrmd_api_disconnect;
(*api)->cmds->register_rsc = lrmd_api_register_rsc;
(*api)->cmds->unregister_rsc = lrmd_api_unregister_rsc;
(*api)->cmds->get_rsc_info = lrmd_api_get_rsc_info;
(*api)->cmds->get_recurring_ops = lrmd_api_get_recurring_ops;
(*api)->cmds->set_callback = lrmd_api_set_callback;
(*api)->cmds->get_metadata = lrmd_api_get_metadata;
(*api)->cmds->exec = lrmd_api_exec;
(*api)->cmds->cancel = lrmd_api_cancel;
(*api)->cmds->list_agents = lrmd_api_list_agents;
(*api)->cmds->list_ocf_providers = lrmd_api_list_ocf_providers;
(*api)->cmds->list_standards = lrmd_api_list_standards;
(*api)->cmds->exec_alert = lrmd_api_exec_alert;
(*api)->cmds->get_metadata_params = lrmd_api_get_metadata_params;
if ((nodename == NULL) && (server == NULL)) {
pvt->type = pcmk__client_ipc;
} else {
if (nodename == NULL) {
nodename = server;
} else if (server == NULL) {
server = nodename;
}
pvt->type = pcmk__client_tls;
pvt->remote_nodename = strdup(nodename);
pvt->server = strdup(server);
if ((pvt->remote_nodename == NULL) || (pvt->server == NULL)) {
lrmd_api_delete(*api);
*api = NULL;
return ENOMEM;
}
pvt->port = port;
if (pvt->port == 0) {
pvt->port = crm_default_remote_port();
}
}
return pcmk_rc_ok;
}
lrmd_t *
lrmd_api_new(void)
{
lrmd_t *api = NULL;
pcmk__assert(lrmd__new(&api, NULL, NULL, 0) == pcmk_rc_ok);
return api;
}
lrmd_t *
lrmd_remote_api_new(const char *nodename, const char *server, int port)
{
lrmd_t *api = NULL;
pcmk__assert(lrmd__new(&api, nodename, server, port) == pcmk_rc_ok);
return api;
}
void
lrmd_api_delete(lrmd_t * lrmd)
{
if (lrmd == NULL) {
return;
}
if (lrmd->cmds != NULL) { // Never NULL, but make static analysis happy
if (lrmd->cmds->disconnect != NULL) { // Also never really NULL
lrmd->cmds->disconnect(lrmd); // No-op if already disconnected
}
free(lrmd->cmds);
}
if (lrmd->lrmd_private != NULL) {
lrmd_private_t *native = lrmd->lrmd_private;
free(native->server);
free(native->remote_nodename);
free(native->remote);
free(native->token);
free(native->peer_version);
free(lrmd->lrmd_private);
}
free(lrmd);
}
struct metadata_cb {
void (*callback)(int pid, const pcmk__action_result_t *result,
void *user_data);
void *user_data;
};
/*!
* \internal
* \brief Process asynchronous metadata completion
*
* \param[in,out] action Metadata action that completed
*/
static void
metadata_complete(svc_action_t *action)
{
struct metadata_cb *metadata_cb = (struct metadata_cb *) action->cb_data;
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
services__copy_result(action, &result);
pcmk__set_result_output(&result, action->stdout_data, action->stderr_data);
metadata_cb->callback(0, &result, metadata_cb->user_data);
result.action_stdout = NULL; // Prevent free, because action owns it
result.action_stderr = NULL; // Prevent free, because action owns it
pcmk__reset_result(&result);
free(metadata_cb);
}
/*!
* \internal
* \brief Retrieve agent metadata asynchronously
*
* \param[in] rsc Resource agent specification
* \param[in] callback Function to call with result (this will always be
* called, whether by this function directly or later
* via the main loop, and on success the metadata will
* be in its result argument's action_stdout)
* \param[in,out] user_data User data to pass to callback
*
* \return Standard Pacemaker return code
* \note This function is not a lrmd_api_operations_t method because it does not
* need an lrmd_t object and does not go through the executor, but
* executes the agent directly.
*/
int
lrmd__metadata_async(const lrmd_rsc_info_t *rsc,
void (*callback)(int pid,
const pcmk__action_result_t *result,
void *user_data),
void *user_data)
{
svc_action_t *action = NULL;
struct metadata_cb *metadata_cb = NULL;
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
CRM_CHECK(callback != NULL, return EINVAL);
if ((rsc == NULL) || (rsc->standard == NULL) || (rsc->type == NULL)) {
pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED,
PCMK_EXEC_ERROR_FATAL,
"Invalid resource specification");
callback(0, &result, user_data);
pcmk__reset_result(&result);
return EINVAL;
}
if (strcmp(rsc->standard, PCMK_RESOURCE_CLASS_STONITH) == 0) {
return stonith__metadata_async(rsc->type,
pcmk__timeout_ms2s(PCMK_DEFAULT_ACTION_TIMEOUT_MS),
callback, user_data);
}
action = services__create_resource_action(pcmk__s(rsc->id, rsc->type),
rsc->standard, rsc->provider,
rsc->type,
PCMK_ACTION_META_DATA, 0,
PCMK_DEFAULT_ACTION_TIMEOUT_MS,
NULL, 0);
if (action == NULL) {
pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"Out of memory");
callback(0, &result, user_data);
pcmk__reset_result(&result);
return ENOMEM;
}
if (action->rc != PCMK_OCF_UNKNOWN) {
services__copy_result(action, &result);
callback(0, &result, user_data);
pcmk__reset_result(&result);
services_action_free(action);
return EINVAL;
}
action->cb_data = calloc(1, sizeof(struct metadata_cb));
if (action->cb_data == NULL) {
services_action_free(action);
pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"Out of memory");
callback(0, &result, user_data);
pcmk__reset_result(&result);
return ENOMEM;
}
metadata_cb = (struct metadata_cb *) action->cb_data;
metadata_cb->callback = callback;
metadata_cb->user_data = user_data;
if (!services_action_async(action, metadata_complete)) {
services_action_free(action);
return pcmk_rc_error; // @TODO Derive from action->rc and ->status
}
// The services library has taken responsibility for action
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Set the result of an executor event
*
* \param[in,out] event Executor event to set
* \param[in] rc OCF exit status of event
* \param[in] op_status Executor status of event
* \param[in] exit_reason Human-friendly description of event
*/
void
lrmd__set_result(lrmd_event_data_t *event, enum ocf_exitcode rc, int op_status,
const char *exit_reason)
{
if (event == NULL) {
return;
}
event->rc = rc;
event->op_status = op_status;
// lrmd_event_data_t has (const char *) members that lrmd_free_event() frees
pcmk__str_update((char **) &event->exit_reason, exit_reason);
}
/*!
* \internal
* \brief Clear an executor event's exit reason, output, and error output
*
* \param[in,out] event Executor event to reset
*/
void
lrmd__reset_result(lrmd_event_data_t *event)
{
if (event == NULL) {
return;
}
free((void *) event->exit_reason);
event->exit_reason = NULL;
free((void *) event->output);
event->output = NULL;
}
/*!
* \internal
* \brief Get the uptime of a remote resource connection
*
* When the cluster connects to a remote resource, part of that resource's
* handshake includes the uptime of the remote resource's connection. This
* uptime is stored in the lrmd_t object.
*
* \return The connection's uptime, or -1 if unknown
*/
time_t
lrmd__uptime(lrmd_t *lrmd)
{
lrmd_private_t *native = lrmd->lrmd_private;
if (native->remote == NULL) {
return -1;
} else {
return native->remote->uptime;
}
}
const char *
lrmd__node_start_state(lrmd_t *lrmd)
{
lrmd_private_t *native = lrmd->lrmd_private;
if (native->remote == NULL) {
return NULL;
} else {
return native->remote->start_state;
}
}
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
index 04e9ee8ad2..8febb37498 100644
--- a/lib/pacemaker/pcmk_fence.c
+++ b/lib/pacemaker/pcmk_fence.c
@@ -1,677 +1,676 @@
/*
- * Copyright 2009-2024 the Pacemaker project contributors
+ * Copyright 2009-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/common/mainloop.h>
#include <crm/common/results.h>
#include <crm/common/output.h>
#include <crm/common/output_internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h> // stonith__*
#include <glib.h>
#include <libxml/tree.h>
#include <pacemaker.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
static const int st_opts = st_opt_sync_call|st_opt_allow_self_fencing;
static GMainLoop *mainloop = NULL;
static struct {
stonith_t *st;
const char *target;
const char *action;
char *name;
unsigned int timeout;
unsigned int tolerance;
int delay;
pcmk__action_result_t result;
} async_fence_data = { NULL, };
static int
handle_level(stonith_t *st, const char *target, int fence_level, GList *devices,
bool added)
{
const char *node = NULL;
const char *pattern = NULL;
const char *name = NULL;
char *value = NULL;
int rc = pcmk_rc_ok;
if (target == NULL) {
// Not really possible, but makes static analysis happy
return EINVAL;
}
/* Determine if targeting by attribute, node name pattern or node name */
value = strchr(target, '=');
if (value != NULL) {
name = target;
*value++ = '\0';
} else if (*target == '@') {
pattern = target + 1;
} else {
node = target;
}
/* Register or unregister level as appropriate */
if (added) {
stonith_key_value_t *kvs = NULL;
for (GList *iter = devices; iter != NULL; iter = iter->next) {
- kvs = stonith_key_value_add(kvs, NULL, iter->data);
+ kvs = stonith__key_value_add(kvs, NULL, iter->data);
}
rc = st->cmds->register_level_full(st, st_opts, node, pattern, name,
value, fence_level, kvs);
- stonith_key_value_freeall(kvs, 0, 1);
+ stonith__key_value_freeall(kvs, false, true);
} else {
rc = st->cmds->remove_level_full(st, st_opts, node, pattern,
name, value, fence_level);
}
return pcmk_legacy2rc(rc);
}
static stonith_history_t *
reduce_fence_history(stonith_history_t *history)
{
stonith_history_t *new, *hp, *np;
if (!history) {
return history;
}
new = history;
hp = new->next;
new->next = NULL;
while (hp) {
stonith_history_t *hp_next = hp->next;
hp->next = NULL;
for (np = new; ; np = np->next) {
if ((hp->state == st_done) || (hp->state == st_failed)) {
/* action not in progress */
if (pcmk__str_eq(hp->target, np->target, pcmk__str_casei)
&& pcmk__str_eq(hp->action, np->action, pcmk__str_none)
&& (hp->state == np->state)
&& ((hp->state == st_done)
|| pcmk__str_eq(hp->delegate, np->delegate,
pcmk__str_casei))) {
/* purge older hp */
- stonith_history_free(hp);
+ stonith__history_free(hp);
break;
}
}
if (!np->next) {
np->next = hp;
break;
}
}
hp = hp_next;
}
return new;
}
static void
notify_callback(stonith_t * st, stonith_event_t * e)
{
if (pcmk__str_eq(async_fence_data.target, e->target, pcmk__str_casei)
&& pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_none)) {
pcmk__set_result(&async_fence_data.result,
stonith__event_exit_status(e),
stonith__event_execution_status(e),
stonith__event_exit_reason(e));
g_main_loop_quit(mainloop);
}
}
static void
fence_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
pcmk__set_result(&async_fence_data.result, stonith__exit_status(data),
stonith__execution_status(data),
stonith__exit_reason(data));
g_main_loop_quit(mainloop);
}
static gboolean
async_fence_helper(gpointer user_data)
{
stonith_t *st = async_fence_data.st;
int call_id = 0;
- int rc = stonith_api_connect_retry(st, async_fence_data.name, 10);
+ int rc = stonith__api_connect_retry(st, async_fence_data.name, 10);
int timeout = 0;
- if (rc != pcmk_ok) {
+ if (rc != pcmk_rc_ok) {
g_main_loop_quit(mainloop);
pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR,
- PCMK_EXEC_NOT_CONNECTED, pcmk_strerror(rc));
+ PCMK_EXEC_NOT_CONNECTED, pcmk_rc_str(rc));
return TRUE;
}
st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_FENCE,
notify_callback);
call_id = st->cmds->fence_with_delay(st,
st_opt_allow_self_fencing,
async_fence_data.target,
async_fence_data.action,
pcmk__timeout_ms2s(async_fence_data.timeout),
pcmk__timeout_ms2s(async_fence_data.tolerance),
async_fence_data.delay);
if (call_id < 0) {
g_main_loop_quit(mainloop);
pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR,
PCMK_EXEC_ERROR, pcmk_strerror(call_id));
return TRUE;
}
timeout = pcmk__timeout_ms2s(async_fence_data.timeout);
if (async_fence_data.delay > 0) {
timeout += async_fence_data.delay;
}
st->cmds->register_callback(st, call_id, timeout, st_opt_timeout_updates,
NULL, "callback", fence_callback);
return TRUE;
}
int
pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
const char *name, unsigned int timeout,
unsigned int tolerance, int delay, char **reason)
{
crm_trigger_t *trig;
int rc = pcmk_rc_ok;
async_fence_data.st = st;
async_fence_data.name = strdup(name);
async_fence_data.target = target;
async_fence_data.action = action;
async_fence_data.timeout = timeout;
async_fence_data.tolerance = tolerance;
async_fence_data.delay = delay;
pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR, PCMK_EXEC_UNKNOWN,
NULL);
trig = mainloop_add_trigger(G_PRIORITY_HIGH, async_fence_helper, NULL);
mainloop_set_trigger(trig);
mainloop = g_main_loop_new(NULL, FALSE);
g_main_loop_run(mainloop);
free(async_fence_data.name);
if (reason != NULL) {
// Give the caller ownership of the exit reason
*reason = async_fence_data.result.exit_reason;
async_fence_data.result.exit_reason = NULL;
}
rc = stonith__result2rc(&async_fence_data.result);
pcmk__reset_result(&async_fence_data.result);
return rc;
}
int
pcmk_request_fencing(xmlNodePtr *xml, const char *target, const char *action,
const char *name, unsigned int timeout,
unsigned int tolerance, int delay, char **reason)
{
stonith_t *st = NULL;
pcmk__output_t *out = NULL;
int rc = pcmk_rc_ok;
rc = pcmk__setup_output_fencing(&out, &st, xml);
if (rc != pcmk_rc_ok) {
return rc;
}
rc = pcmk__request_fencing(st, target, action, name, timeout, tolerance,
delay, reason);
pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml);
st->cmds->disconnect(st);
- stonith_api_delete(st);
+ stonith__api_free(st);
return rc;
}
int
pcmk__fence_history(pcmk__output_t *out, stonith_t *st, const char *target,
unsigned int timeout, int verbose, bool broadcast,
bool cleanup)
{
stonith_history_t *history = NULL;
stonith_history_t *latest = NULL;
int rc = pcmk_rc_ok;
int opts = 0;
if (cleanup) {
out->info(out, "cleaning up fencing-history%s%s",
target ? " for node " : "", target ? target : "");
}
if (broadcast) {
out->info(out, "gather fencing-history from all nodes");
}
stonith__set_call_options(opts, target, st_opts);
if (cleanup) {
stonith__set_call_options(opts, target, st_opt_cleanup);
}
if (broadcast) {
stonith__set_call_options(opts, target, st_opt_broadcast);
}
if (pcmk__str_eq(target, "*", pcmk__str_none)) {
target = NULL;
}
rc = st->cmds->history(st, opts, target, &history, pcmk__timeout_ms2s(timeout));
if (cleanup) {
// Cleanup doesn't return a history list
- stonith_history_free(history);
+ stonith__history_free(history);
return pcmk_legacy2rc(rc);
}
out->begin_list(out, "event", "events", "Fencing history");
history = stonith__sort_history(history);
for (stonith_history_t *hp = history; hp != NULL; hp = hp->next) {
if (hp->state == st_done) {
latest = hp;
}
if (out->is_quiet(out) || !verbose) {
continue;
}
out->message(out, "stonith-event", hp, true, false,
stonith__later_succeeded(hp, history),
(uint32_t) pcmk_show_failed_detail);
out->increment_list(out);
}
if (latest) {
if (out->is_quiet(out)) {
out->message(out, "stonith-event", latest, false, true, NULL,
(uint32_t) pcmk_show_failed_detail);
} else if (!verbose) { // already printed if verbose
out->message(out, "stonith-event", latest, false, false, NULL,
(uint32_t) pcmk_show_failed_detail);
out->increment_list(out);
}
}
out->end_list(out);
- stonith_history_free(history);
+ stonith__history_free(history);
return pcmk_legacy2rc(rc);
}
int
pcmk_fence_history(xmlNodePtr *xml, const char *target, unsigned int timeout,
bool quiet, int verbose, bool broadcast, bool cleanup)
{
stonith_t *st = NULL;
pcmk__output_t *out = NULL;
int rc = pcmk_rc_ok;
rc = pcmk__setup_output_fencing(&out, &st, xml);
if (rc != pcmk_rc_ok) {
return rc;
}
out->quiet = quiet;
rc = pcmk__fence_history(out, st, target, timeout, verbose, broadcast,
cleanup);
pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml);
st->cmds->disconnect(st);
- stonith_api_delete(st);
+ stonith__api_free(st);
return rc;
}
int
-pcmk__fence_installed(pcmk__output_t *out, stonith_t *st, unsigned int timeout)
+pcmk__fence_installed(pcmk__output_t *out, stonith_t *st)
{
stonith_key_value_t *devices = NULL;
int rc = pcmk_rc_ok;
- rc = st->cmds->list_agents(st, st_opt_sync_call, NULL, &devices,
- pcmk__timeout_ms2s(timeout));
+ rc = st->cmds->list_agents(st, st_opt_sync_call, NULL, &devices, 0);
// rc is a negative error code or a positive number of agents
if (rc < 0) {
return pcmk_legacy2rc(rc);
}
out->begin_list(out, "fence device", "fence devices",
"Installed fence devices");
for (stonith_key_value_t *iter = devices; iter != NULL; iter = iter->next) {
out->list_item(out, "device", "%s", iter->value);
}
out->end_list(out);
- stonith_key_value_freeall(devices, 1, 1);
+ stonith__key_value_freeall(devices, true, true);
return pcmk_rc_ok;
}
int
pcmk_fence_installed(xmlNodePtr *xml, unsigned int timeout)
{
stonith_t *st = NULL;
pcmk__output_t *out = NULL;
int rc = pcmk_rc_ok;
rc = pcmk__setup_output_fencing(&out, &st, xml);
if (rc != pcmk_rc_ok) {
return rc;
}
- rc = pcmk__fence_installed(out, st, timeout);
+ rc = pcmk__fence_installed(out, st);
pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml);
st->cmds->disconnect(st);
- stonith_api_delete(st);
+ stonith__api_free(st);
return rc;
}
int
pcmk__fence_last(pcmk__output_t *out, const char *target, bool as_nodeid)
{
time_t when = 0;
if (target == NULL) {
return pcmk_rc_ok;
}
if (as_nodeid) {
when = stonith_api_time(atol(target), NULL, FALSE);
} else {
when = stonith_api_time(0, target, FALSE);
}
return out->message(out, "last-fenced", target, when);
}
int
pcmk_fence_last(xmlNodePtr *xml, const char *target, bool as_nodeid)
{
pcmk__output_t *out = NULL;
int rc = pcmk_rc_ok;
rc = pcmk__xml_output_new(&out, xml);
if (rc != pcmk_rc_ok) {
return rc;
}
stonith__register_messages(out);
rc = pcmk__fence_last(out, target, as_nodeid);
pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml);
return rc;
}
int
pcmk__fence_list_targets(pcmk__output_t *out, stonith_t *st,
const char *device_id, unsigned int timeout)
{
GList *targets = NULL;
char *lists = NULL;
int rc = pcmk_rc_ok;
rc = st->cmds->list(st, st_opts, device_id, &lists, pcmk__timeout_ms2s(timeout));
if (rc != pcmk_rc_ok) {
return pcmk_legacy2rc(rc);
}
targets = stonith__parse_targets(lists);
out->begin_list(out, "fence target", "fence targets", "Fence Targets");
while (targets != NULL) {
out->list_item(out, NULL, "%s", (const char *) targets->data);
targets = targets->next;
}
out->end_list(out);
free(lists);
return rc;
}
int
pcmk_fence_list_targets(xmlNodePtr *xml, const char *device_id, unsigned int timeout)
{
stonith_t *st = NULL;
pcmk__output_t *out = NULL;
int rc = pcmk_rc_ok;
rc = pcmk__setup_output_fencing(&out, &st, xml);
if (rc != pcmk_rc_ok) {
return rc;
}
rc = pcmk__fence_list_targets(out, st, device_id, timeout);
pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml);
st->cmds->disconnect(st);
- stonith_api_delete(st);
+ stonith__api_free(st);
return rc;
}
int
pcmk__fence_metadata(pcmk__output_t *out, stonith_t *st, const char *agent,
unsigned int timeout)
{
char *buffer = NULL;
int rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer,
pcmk__timeout_ms2s(timeout));
if (rc != pcmk_rc_ok) {
return pcmk_legacy2rc(rc);
}
out->output_xml(out, PCMK_XE_METADATA, buffer);
free(buffer);
return rc;
}
int
pcmk_fence_metadata(xmlNodePtr *xml, const char *agent, unsigned int timeout)
{
stonith_t *st = NULL;
pcmk__output_t *out = NULL;
int rc = pcmk_rc_ok;
rc = pcmk__setup_output_fencing(&out, &st, xml);
if (rc != pcmk_rc_ok) {
return rc;
}
rc = pcmk__fence_metadata(out, st, agent, timeout);
pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml);
st->cmds->disconnect(st);
- stonith_api_delete(st);
+ stonith__api_free(st);
return rc;
}
int
pcmk__fence_registered(pcmk__output_t *out, stonith_t *st, const char *target,
unsigned int timeout)
{
stonith_key_value_t *devices = NULL;
int rc = pcmk_rc_ok;
rc = st->cmds->query(st, st_opts, target, &devices, pcmk__timeout_ms2s(timeout));
/* query returns a negative error code or a positive number of results. */
if (rc < 0) {
return pcmk_legacy2rc(rc);
}
out->begin_list(out, "fence device", "fence devices",
"Registered fence devices");
for (stonith_key_value_t *iter = devices; iter != NULL; iter = iter->next) {
out->list_item(out, "device", "%s", iter->value);
}
out->end_list(out);
- stonith_key_value_freeall(devices, 1, 1);
+ stonith__key_value_freeall(devices, true, true);
/* Return pcmk_rc_ok here, not the number of results. Callers probably
* don't care.
*/
return pcmk_rc_ok;
}
int
pcmk_fence_registered(xmlNodePtr *xml, const char *target, unsigned int timeout)
{
stonith_t *st = NULL;
pcmk__output_t *out = NULL;
int rc = pcmk_rc_ok;
rc = pcmk__setup_output_fencing(&out, &st, xml);
if (rc != pcmk_rc_ok) {
return rc;
}
rc = pcmk__fence_registered(out, st, target, timeout);
pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml);
st->cmds->disconnect(st);
- stonith_api_delete(st);
+ stonith__api_free(st);
return rc;
}
int
pcmk__fence_register_level(stonith_t *st, const char *target, int fence_level,
GList *devices)
{
return handle_level(st, target, fence_level, devices, true);
}
int
pcmk_fence_register_level(xmlNodePtr *xml, const char *target, int fence_level,
GList *devices)
{
stonith_t* st = NULL;
pcmk__output_t *out = NULL;
int rc = pcmk_rc_ok;
rc = pcmk__setup_output_fencing(&out, &st, xml);
if (rc != pcmk_rc_ok) {
return rc;
}
rc = pcmk__fence_register_level(st, target, fence_level, devices);
pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml);
st->cmds->disconnect(st);
- stonith_api_delete(st);
+ stonith__api_free(st);
return rc;
}
int
pcmk__fence_unregister_level(stonith_t *st, const char *target, int fence_level)
{
return handle_level(st, target, fence_level, NULL, false);
}
int
pcmk_fence_unregister_level(xmlNodePtr *xml, const char *target, int fence_level)
{
stonith_t* st = NULL;
pcmk__output_t *out = NULL;
int rc = pcmk_rc_ok;
rc = pcmk__setup_output_fencing(&out, &st, xml);
if (rc != pcmk_rc_ok) {
return rc;
}
rc = pcmk__fence_unregister_level(st, target, fence_level);
pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml);
st->cmds->disconnect(st);
- stonith_api_delete(st);
+ stonith__api_free(st);
return rc;
}
int
pcmk__fence_validate(pcmk__output_t *out, stonith_t *st, const char *agent,
const char *id, GHashTable *params, unsigned int timeout)
{
char *output = NULL;
char *error_output = NULL;
int rc;
- rc = stonith__validate(st, st_opt_sync_call, id, NULL, agent, params,
+ rc = stonith__validate(st, st_opt_sync_call, id, agent, params,
pcmk__timeout_ms2s(timeout), &output, &error_output);
out->message(out, "validate", agent, id, output, error_output, rc);
return pcmk_legacy2rc(rc);
}
int
pcmk_fence_validate(xmlNodePtr *xml, const char *agent, const char *id,
GHashTable *params, unsigned int timeout)
{
stonith_t *st = NULL;
pcmk__output_t *out = NULL;
int rc = pcmk_rc_ok;
rc = pcmk__setup_output_fencing(&out, &st, xml);
if (rc != pcmk_rc_ok) {
return rc;
}
rc = pcmk__fence_validate(out, st, agent, id, params, timeout);
pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml);
st->cmds->disconnect(st);
- stonith_api_delete(st);
+ stonith__api_free(st);
return rc;
}
int
pcmk__get_fencing_history(stonith_t *st, stonith_history_t **stonith_history,
enum pcmk__fence_history fence_history)
{
int rc = pcmk_rc_ok;
if ((st == NULL) || (st->state == stonith_disconnected)) {
rc = ENOTCONN;
} else if (fence_history != pcmk__fence_history_none) {
rc = st->cmds->history(st, st_opt_sync_call, NULL, stonith_history,
120);
rc = pcmk_legacy2rc(rc);
if (rc != pcmk_rc_ok) {
return rc;
}
*stonith_history = stonith__sort_history(*stonith_history);
if (fence_history == pcmk__fence_history_reduced) {
*stonith_history = reduce_fence_history(*stonith_history);
}
}
return rc;
}
diff --git a/lib/pacemaker/pcmk_setup.c b/lib/pacemaker/pcmk_setup.c
index 42dd3f7b0c..80a78c5fb6 100644
--- a/lib/pacemaker/pcmk_setup.c
+++ b/lib/pacemaker/pcmk_setup.c
@@ -1,121 +1,121 @@
/*
- * Copyright 2024 the Pacemaker project contributors
+ * Copyright 2024-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/cib/internal.h>
#include <crm/common/output.h>
#include <crm/common/results.h>
#include <crm/common/scheduler.h>
#include <crm/fencing/internal.h>
#include <pacemaker-internal.h>
#include <pacemaker.h>
#include "libpacemaker_private.h"
/*!
* \internal
* \brief Set up a pcmk__output_t, (optionally) cib_t, and
* (optionally) pcmk_scheduler_t for use in implementing
* public/private API function pairs
*
* \param[in,out] out Where to store a \c pcmk__output_t object
* \param[in,out] cib Where to store a \c cib_t object
* (may be \c NULL if a CIB is not needed)
* \param[in,out] scheduler Where to store a \c pcmk_scheduler_t object
* (may be \c NULL if a scheduler is not needed)
* \param[in,out] xml Where to write any result XML
*
* \note The \p cib and \p scheduler arguments will only be valid if there
* are no errors in this function. However, \p out will always be
* valid unless there are errors setting it up so that other errors
* may still be reported.
*
* \return Standard Pacemaker return code
*/
int
pcmk__setup_output_cib_sched(pcmk__output_t **out, cib_t **cib,
pcmk_scheduler_t **scheduler, xmlNode **xml)
{
int rc = pcmk_rc_ok;
rc = pcmk__xml_output_new(out, xml);
if (rc != pcmk_rc_ok) {
return rc;
}
if (cib != NULL) {
*cib = cib_new();
if (*cib == NULL) {
return pcmk_rc_cib_corrupt;
}
rc = (*cib)->cmds->signon(*cib, crm_system_name, cib_command);
rc = pcmk_legacy2rc(rc);
if (rc != pcmk_rc_ok) {
cib__clean_up_connection(cib);
return rc;
}
}
if (scheduler != NULL) {
rc = pcmk__init_scheduler(*out, NULL, NULL, scheduler);
if (rc != pcmk_rc_ok && cib != NULL) {
cib__clean_up_connection(cib);
return rc;
}
pcmk__unpack_constraints(*scheduler);
}
pcmk__register_lib_messages(*out);
return rc;
}
/*!
* \internal
* \brief Set up a pcmk__output_t and stonith_t for use in implementing
* public/private API function pairs
*
* \param[in,out] out Where to store a \c pcmk__output_t object
* \param[in,out] st Where to store a \c stonith_t object
* \param[in,out] xml Where to write any result XML
*
* \note The \p st argument will only be valid if there are no errors in this
* function. However, \p out will always be valid unless there are
* errors setting it up so that other errors may still be reported.
*
* \return Standard Pacemaker return code
*/
int
pcmk__setup_output_fencing(pcmk__output_t **out, stonith_t **st, xmlNode **xml)
{
int rc = pcmk_rc_ok;
rc = pcmk__xml_output_new(out, xml);
if (rc != pcmk_rc_ok) {
return rc;
}
- *st = stonith_api_new();
+ *st = stonith__api_new();
if (*st == NULL) {
return ENOMEM;
}
rc = (*st)->cmds->connect(*st, crm_system_name, NULL);
if (rc < 0) {
rc = pcmk_legacy2rc(rc);
- stonith_api_delete(*st);
+ stonith__api_free(*st);
return rc;
}
pcmk__register_lib_messages(*out);
stonith__register_messages(*out);
return rc;
}
diff --git a/lib/pacemaker/pcmk_status.c b/lib/pacemaker/pcmk_status.c
index 6fc1af7a11..c0052e558a 100644
--- a/lib/pacemaker/pcmk_status.c
+++ b/lib/pacemaker/pcmk_status.c
@@ -1,282 +1,282 @@
/*
* Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <stdint.h>
#include <crm/cib/internal.h>
#include <crm/common/output.h>
#include <crm/common/results.h>
#include <crm/fencing/internal.h>
#include <crm/pengine/internal.h>
#include <crm/stonith-ng.h> // stonith__register_messages()
#include <pacemaker.h>
#include <pacemaker-internal.h>
static stonith_t *
fencing_connect(void)
{
- stonith_t *st = stonith_api_new();
+ stonith_t *st = stonith__api_new();
int rc = pcmk_rc_ok;
if (st == NULL) {
return NULL;
}
rc = st->cmds->connect(st, crm_system_name, NULL);
if (rc == pcmk_rc_ok) {
return st;
} else {
- stonith_api_delete(st);
+ stonith__api_free(st);
return NULL;
}
}
/*!
* \internal
* \brief Output the cluster status given a fencer and CIB connection
*
* \param[in,out] scheduler Scheduler object (will be reset)
* \param[in,out] stonith Fencer connection
* \param[in,out] cib CIB connection
* \param[in] current_cib Current CIB XML
* \param[in] pcmkd_state \p pacemakerd state
* \param[in] fence_history How much of the fencing history to output
* \param[in] show Group of \p pcmk_section_e flags
* \param[in] show_opts Group of \p pcmk_show_opt_e flags
* \param[in] only_node If a node name or tag, include only the
* matching node(s) (if any) in the output.
* If \p "*" or \p NULL, include all nodes
* in the output.
* \param[in] only_rsc If a resource ID or tag, include only the
* matching resource(s) (if any) in the
* output. If \p "*" or \p NULL, include all
* resources in the output.
* \param[in] neg_location_prefix Prefix denoting a ban in a constraint ID
*
* \return Standard Pacemaker return code
*/
int
pcmk__output_cluster_status(pcmk_scheduler_t *scheduler, stonith_t *stonith,
cib_t *cib, xmlNode *current_cib,
enum pcmk_pacemakerd_state pcmkd_state,
enum pcmk__fence_history fence_history,
uint32_t show, uint32_t show_opts,
const char *only_node, const char *only_rsc,
const char *neg_location_prefix)
{
xmlNode *cib_copy = pcmk__xml_copy(NULL, current_cib);
stonith_history_t *stonith_history = NULL;
int history_rc = 0;
GList *unames = NULL;
GList *resources = NULL;
pcmk__output_t *out = NULL;
int rc = pcmk_rc_ok;
if ((scheduler == NULL) || (scheduler->priv->out == NULL)) {
return EINVAL;
}
out = scheduler->priv->out;
rc = pcmk__update_configured_schema(&cib_copy, false);
if (rc != pcmk_rc_ok) {
cib__clean_up_connection(&cib);
pcmk__xml_free(cib_copy);
out->err(out, "Upgrade failed: %s", pcmk_rc_str(rc));
return rc;
}
/* get the stonith-history if there is evidence we need it */
if (fence_history != pcmk__fence_history_none) {
history_rc = pcmk__get_fencing_history(stonith, &stonith_history,
fence_history);
}
pcmk_reset_scheduler(scheduler);
scheduler->input = cib_copy;
cluster_status(scheduler);
/* Unpack constraints if any section will need them
* (tickets may be referenced in constraints but not granted yet,
* and bans need negative location constraints) */
if (pcmk_is_set(show, pcmk_section_bans)
|| pcmk_is_set(show, pcmk_section_tickets)) {
pcmk__unpack_constraints(scheduler);
}
unames = pe__build_node_name_list(scheduler, only_node);
resources = pe__build_rsc_list(scheduler, only_rsc);
/* Always print DC if NULL. */
if (scheduler->dc_node == NULL) {
show |= pcmk_section_dc;
}
out->message(out, "cluster-status",
scheduler, pcmkd_state, pcmk_rc2exitc(history_rc),
stonith_history, fence_history, show, show_opts,
neg_location_prefix, unames, resources);
g_list_free_full(unames, free);
g_list_free_full(resources, free);
- stonith_history_free(stonith_history);
+ stonith__history_free(stonith_history);
stonith_history = NULL;
return rc;
}
int
pcmk_status(xmlNodePtr *xml)
{
cib_t *cib = NULL;
pcmk__output_t *out = NULL;
int rc = pcmk_rc_ok;
uint32_t show_opts = pcmk_show_pending
|pcmk_show_inactive_rscs
|pcmk_show_timing;
cib = cib_new();
if (cib == NULL) {
return pcmk_rc_cib_corrupt;
}
rc = pcmk__xml_output_new(&out, xml);
if (rc != pcmk_rc_ok) {
cib_delete(cib);
return rc;
}
pcmk__register_lib_messages(out);
pe__register_messages(out);
stonith__register_messages(out);
rc = pcmk__status(out, cib, pcmk__fence_history_full, pcmk_section_all,
show_opts, NULL, NULL, NULL, 0);
pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml);
cib_delete(cib);
return rc;
}
/*!
* \internal
* \brief Query and output the cluster status
*
* The operation is considered a success if we're able to get the \p pacemakerd
* state. If possible, we'll also try to connect to the fencer and CIB and
* output their respective status information.
*
* \param[in,out] out Output object
* \param[in,out] cib CIB connection
* \param[in] fence_history How much of the fencing history to output
* \param[in] show Group of \p pcmk_section_e flags
* \param[in] show_opts Group of \p pcmk_show_opt_e flags
* \param[in] only_node If a node name or tag, include only the
* matching node(s) (if any) in the output.
* If \p "*" or \p NULL, include all nodes
* in the output.
* \param[in] only_rsc If a resource ID or tag, include only the
* matching resource(s) (if any) in the
* output. If \p "*" or \p NULL, include all
* resources in the output.
* \param[in] neg_location_prefix Prefix denoting a ban in a constraint ID
* \param[in] timeout_ms How long to wait for a reply from the
* \p pacemakerd API. If 0,
* \p pcmk_ipc_dispatch_sync will be used.
* If positive, \p pcmk_ipc_dispatch_main
* will be used, and a new mainloop will be
* created for this purpose (freed before
* return).
*
* \return Standard Pacemaker return code
*/
int
pcmk__status(pcmk__output_t *out, cib_t *cib,
enum pcmk__fence_history fence_history, uint32_t show,
uint32_t show_opts, const char *only_node, const char *only_rsc,
const char *neg_location_prefix, unsigned int timeout_ms)
{
xmlNode *current_cib = NULL;
int rc = pcmk_rc_ok;
stonith_t *stonith = NULL;
enum pcmk_pacemakerd_state pcmkd_state = pcmk_pacemakerd_state_invalid;
time_t last_updated = 0;
pcmk_scheduler_t *scheduler = NULL;
if (cib == NULL) {
return ENOTCONN;
}
if (cib->variant == cib_native) {
rc = pcmk__pacemakerd_status(out, crm_system_name, timeout_ms, false,
&pcmkd_state);
if (rc != pcmk_rc_ok) {
return rc;
}
last_updated = time(NULL);
switch (pcmkd_state) {
case pcmk_pacemakerd_state_running:
case pcmk_pacemakerd_state_shutting_down:
case pcmk_pacemakerd_state_remote:
/* Fencer and CIB may still be available while shutting down or
* running on a Pacemaker Remote node
*/
break;
default:
// Fencer and CIB are definitely unavailable
out->message(out, "pacemakerd-health",
NULL, pcmkd_state, NULL, last_updated);
return rc;
}
if (fence_history != pcmk__fence_history_none) {
stonith = fencing_connect();
}
}
rc = cib__signon_query(out, &cib, &current_cib);
if (rc != pcmk_rc_ok) {
if (pcmkd_state != pcmk_pacemakerd_state_invalid) {
// Invalid at this point means we didn't query the pcmkd state
out->message(out, "pacemakerd-health",
NULL, pcmkd_state, NULL, last_updated);
}
goto done;
}
scheduler = pcmk_new_scheduler();
pcmk__mem_assert(scheduler);
scheduler->priv->out = out;
if ((cib->variant == cib_native) && pcmk_is_set(show, pcmk_section_times)) {
// Currently used only in the times section
pcmk__query_node_name(out, 0, &(scheduler->priv->local_node_name), 0);
}
rc = pcmk__output_cluster_status(scheduler, stonith, cib, current_cib,
pcmkd_state, fence_history, show,
show_opts, only_node, only_rsc,
neg_location_prefix);
if (rc != pcmk_rc_ok) {
out->err(out, "Error outputting status info from the fencer or CIB");
}
done:
pcmk_free_scheduler(scheduler);
- stonith_api_delete(stonith);
+ stonith__api_free(stonith);
pcmk__xml_free(current_cib);
return pcmk_rc_ok;
}
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
index c0ad6c47b9..ba3cc415de 100644
--- a/tools/crm_mon.c
+++ b/tools/crm_mon.c
@@ -1,2190 +1,2190 @@
/*
- * Copyright 2004-2024 the Pacemaker project contributors
+ * Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <libgen.h>
#include <signal.h>
#include <sys/utsname.h>
#include <crm/services.h>
#include <crm/lrmd.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/internal.h> // pcmk__ends_with_ext()
#include <crm/common/ipc.h>
#include <crm/common/mainloop.h>
#include <crm/common/output.h>
#include <crm/common/output_internal.h>
#include <crm/common/results.h>
#include <crm/common/util.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/cib/internal.h>
#include <crm/pengine/status.h>
#include <crm/pengine/internal.h>
#include <pacemaker-internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h> // stonith__*
#include "crm_mon.h"
#define SUMMARY "Provides a summary of cluster's current state.\n\n" \
"Outputs varying levels of detail in a number of different formats."
/*
* Definitions indicating which items to print
*/
static uint32_t show;
static uint32_t show_opts = pcmk_show_pending;
/*
* Definitions indicating how to output
*/
static mon_output_format_t output_format = mon_output_unset;
/* other globals */
static GIOChannel *io_channel = NULL;
static GMainLoop *mainloop = NULL;
static guint reconnect_timer = 0;
static mainloop_timer_t *refresh_timer = NULL;
static enum pcmk_pacemakerd_state pcmkd_state = pcmk_pacemakerd_state_invalid;
static cib_t *cib = NULL;
static stonith_t *st = NULL;
static xmlNode *current_cib = NULL;
static GError *error = NULL;
static pcmk__common_args_t *args = NULL;
static pcmk__output_t *out = NULL;
static GOptionContext *context = NULL;
static gchar **processed_args = NULL;
static time_t last_refresh = 0;
volatile crm_trigger_t *refresh_trigger = NULL;
static pcmk_scheduler_t *scheduler = NULL;
static enum pcmk__fence_history fence_history = pcmk__fence_history_none;
int interactive_fence_level = 0;
static pcmk__supported_format_t formats[] = {
#if PCMK__ENABLE_CURSES
CRM_MON_SUPPORTED_FORMAT_CURSES,
#endif
PCMK__SUPPORTED_FORMAT_HTML,
PCMK__SUPPORTED_FORMAT_NONE,
PCMK__SUPPORTED_FORMAT_TEXT,
PCMK__SUPPORTED_FORMAT_XML,
{ NULL, NULL, NULL }
};
PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *",
"enum pcmk_pacemakerd_state")
static int
crm_mon_disconnected_default(pcmk__output_t *out, va_list args)
{
return pcmk_rc_no_output;
}
PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *",
"enum pcmk_pacemakerd_state")
static int
crm_mon_disconnected_html(pcmk__output_t *out, va_list args)
{
const char *desc = va_arg(args, const char *);
enum pcmk_pacemakerd_state state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
if (out->dest != stdout) {
out->reset(out);
}
pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN,
"Not connected to CIB");
if (desc != NULL) {
pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, ": ");
pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, desc);
}
if (state != pcmk_pacemakerd_state_invalid) {
const char *state_s = pcmk__pcmkd_state_enum2friendly(state);
pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, " (");
pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, state_s);
pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, ")");
}
out->finish(out, CRM_EX_DISCONNECT, true, NULL);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *",
"enum pcmk_pacemakerd_state")
static int
crm_mon_disconnected_text(pcmk__output_t *out, va_list args)
{
const char *desc = va_arg(args, const char *);
enum pcmk_pacemakerd_state state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
int rc = pcmk_rc_ok;
if (out->dest != stdout) {
out->reset(out);
}
if (state != pcmk_pacemakerd_state_invalid) {
rc = out->info(out, "Not connected to CIB%s%s (%s)",
(desc != NULL)? ": " : "", pcmk__s(desc, ""),
pcmk__pcmkd_state_enum2friendly(state));
} else {
rc = out->info(out, "Not connected to CIB%s%s",
(desc != NULL)? ": " : "", pcmk__s(desc, ""));
}
out->finish(out, CRM_EX_DISCONNECT, true, NULL);
return rc;
}
PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *",
"enum pcmk_pacemakerd_state")
static int
crm_mon_disconnected_xml(pcmk__output_t *out, va_list args)
{
const char *desc = va_arg(args, const char *);
enum pcmk_pacemakerd_state state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
const char *state_s = NULL;
if (out->dest != stdout) {
out->reset(out);
}
if (state != pcmk_pacemakerd_state_invalid) {
state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state);
}
pcmk__output_create_xml_node(out, PCMK_XE_CRM_MON_DISCONNECTED,
PCMK_XA_DESCRIPTION, desc,
PCMK_XA_PACEMAKERD_STATE, state_s,
NULL);
out->finish(out, CRM_EX_DISCONNECT, true, NULL);
return pcmk_rc_ok;
}
static pcmk__message_entry_t fmt_functions[] = {
{ "crm-mon-disconnected", "default", crm_mon_disconnected_default },
{ "crm-mon-disconnected", "html", crm_mon_disconnected_html },
{ "crm-mon-disconnected", "text", crm_mon_disconnected_text },
{ "crm-mon-disconnected", "xml", crm_mon_disconnected_xml },
{ NULL, NULL, NULL },
};
#define RECONNECT_MSECS 5000
struct {
guint reconnect_ms;
enum mon_exec_mode exec_mode;
gboolean fence_connect;
gboolean print_pending;
gboolean show_bans;
gboolean watch_fencing;
char *pid_file;
char *external_agent;
char *external_recipient;
char *neg_location_prefix;
char *only_node;
char *only_rsc;
GSList *user_includes_excludes;
GSList *includes_excludes;
} options = {
.reconnect_ms = RECONNECT_MSECS,
.exec_mode = mon_exec_unset,
.fence_connect = TRUE,
};
static crm_exit_t clean_up(crm_exit_t exit_code);
static void crm_diff_update(const char *event, xmlNode * msg);
static void clean_up_on_connection_failure(int rc);
static int mon_refresh_display(gpointer user_data);
static int setup_cib_connection(void);
static int setup_fencer_connection(void);
static int setup_api_connections(void);
static void mon_st_callback_event(stonith_t * st, stonith_event_t * e);
static void mon_st_callback_display(stonith_t * st, stonith_event_t * e);
static void refresh_after_event(gboolean data_updated, gboolean enforce);
static uint32_t
all_includes(mon_output_format_t fmt) {
if ((fmt == mon_output_plain) || (fmt == mon_output_console)) {
return ~pcmk_section_options;
} else {
return pcmk_section_all;
}
}
static uint32_t
default_includes(mon_output_format_t fmt) {
switch (fmt) {
case mon_output_plain:
case mon_output_console:
case mon_output_html:
return pcmk_section_summary
|pcmk_section_nodes
|pcmk_section_resources
|pcmk_section_failures;
case mon_output_xml:
return all_includes(fmt);
default:
return 0;
}
}
struct {
const char *name;
uint32_t bit;
} sections[] = {
{ "attributes", pcmk_section_attributes },
{ "bans", pcmk_section_bans },
{ "counts", pcmk_section_counts },
{ "dc", pcmk_section_dc },
{ "failcounts", pcmk_section_failcounts },
{ "failures", pcmk_section_failures },
{ PCMK_VALUE_FENCING, pcmk_section_fencing_all },
{ "fencing-failed", pcmk_section_fence_failed },
{ "fencing-pending", pcmk_section_fence_pending },
{ "fencing-succeeded", pcmk_section_fence_worked },
{ "maint-mode", pcmk_section_maint_mode },
{ "nodes", pcmk_section_nodes },
{ "operations", pcmk_section_operations },
{ "options", pcmk_section_options },
{ "resources", pcmk_section_resources },
{ "stack", pcmk_section_stack },
{ "summary", pcmk_section_summary },
{ "tickets", pcmk_section_tickets },
{ "times", pcmk_section_times },
{ NULL }
};
static uint32_t
find_section_bit(const char *name) {
for (int i = 0; sections[i].name != NULL; i++) {
if (pcmk__str_eq(sections[i].name, name, pcmk__str_casei)) {
return sections[i].bit;
}
}
return 0;
}
static gboolean
apply_exclude(const gchar *excludes, GError **error) {
char **parts = NULL;
gboolean result = TRUE;
parts = g_strsplit(excludes, ",", 0);
for (char **s = parts; *s != NULL; s++) {
uint32_t bit = find_section_bit(*s);
if (pcmk__str_eq(*s, "all", pcmk__str_none)) {
show = 0;
} else if (pcmk__str_eq(*s, PCMK_VALUE_NONE, pcmk__str_none)) {
show = all_includes(output_format);
} else if (bit != 0) {
show &= ~bit;
} else {
g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
"--exclude options: all, attributes, bans, counts, dc, "
"failcounts, failures, fencing, fencing-failed, "
"fencing-pending, fencing-succeeded, maint-mode, nodes, "
PCMK_VALUE_NONE ", operations, options, resources, "
"stack, summary, tickets, times");
result = FALSE;
break;
}
}
g_strfreev(parts);
return result;
}
static gboolean
apply_include(const gchar *includes, GError **error) {
char **parts = NULL;
gboolean result = TRUE;
parts = g_strsplit(includes, ",", 0);
for (char **s = parts; *s != NULL; s++) {
uint32_t bit = find_section_bit(*s);
if (pcmk__str_eq(*s, "all", pcmk__str_none)) {
show = all_includes(output_format);
} else if (pcmk__starts_with(*s, "bans")) {
show |= pcmk_section_bans;
if (options.neg_location_prefix != NULL) {
free(options.neg_location_prefix);
options.neg_location_prefix = NULL;
}
if (strlen(*s) > 4 && (*s)[4] == ':') {
options.neg_location_prefix = strdup(*s+5);
}
} else if (pcmk__str_any_of(*s, PCMK_VALUE_DEFAULT, "defaults", NULL)) {
show |= default_includes(output_format);
} else if (pcmk__str_eq(*s, PCMK_VALUE_NONE, pcmk__str_none)) {
show = 0;
} else if (bit != 0) {
show |= bit;
} else {
g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
"--include options: all, attributes, bans[:PREFIX], counts, dc, "
PCMK_VALUE_DEFAULT ", failcounts, failures, fencing, "
"fencing-failed, fencing-pending, fencing-succeeded, "
"maint-mode, nodes, " PCMK_VALUE_NONE ", operations, "
"options, resources, stack, summary, tickets, times");
result = FALSE;
break;
}
}
g_strfreev(parts);
return result;
}
static gboolean
apply_include_exclude(GSList *lst, GError **error) {
gboolean rc = TRUE;
GSList *node = lst;
while (node != NULL) {
char *s = node->data;
if (pcmk__starts_with(s, "--include=")) {
rc = apply_include(s+10, error);
} else if (pcmk__starts_with(s, "-I=")) {
rc = apply_include(s+3, error);
} else if (pcmk__starts_with(s, "--exclude=")) {
rc = apply_exclude(s+10, error);
} else if (pcmk__starts_with(s, "-U=")) {
rc = apply_exclude(s+3, error);
}
if (rc != TRUE) {
break;
}
node = node->next;
}
return rc;
}
static gboolean
user_include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
char *s = crm_strdup_printf("%s=%s", option_name, optarg);
options.user_includes_excludes = g_slist_append(options.user_includes_excludes, s);
return TRUE;
}
static gboolean
include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
char *s = crm_strdup_printf("%s=%s", option_name, optarg);
options.includes_excludes = g_slist_append(options.includes_excludes, s);
return TRUE;
}
static gboolean
as_xml_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
pcmk__str_update(&args->output_ty, "xml");
output_format = mon_output_legacy_xml;
return TRUE;
}
static gboolean
fence_history_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
if (optarg == NULL) {
interactive_fence_level = 2;
} else {
pcmk__scan_min_int(optarg, &interactive_fence_level, 0);
}
switch (interactive_fence_level) {
case 3:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
return include_exclude_cb("--include", PCMK_VALUE_FENCING, data,
err);
case 2:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
return include_exclude_cb("--include", PCMK_VALUE_FENCING, data,
err);
case 1:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
return include_exclude_cb("--include", "fencing-failed,fencing-pending", data, err);
case 0:
options.fence_connect = FALSE;
fence_history = pcmk__fence_history_none;
return include_exclude_cb("--exclude", PCMK_VALUE_FENCING, data,
err);
default:
g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Fence history must be 0-3");
return FALSE;
}
}
static gboolean
group_by_node_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_rscs_by_node;
return TRUE;
}
static gboolean
hide_headers_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--exclude", "summary", data, err);
}
static gboolean
inactive_resources_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_inactive_rscs;
return TRUE;
}
static gboolean
print_brief_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_brief;
return TRUE;
}
static gboolean
print_detail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_details;
return TRUE;
}
static gboolean
print_description_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_description;
return TRUE;
}
static gboolean
print_timing_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_timing;
return user_include_exclude_cb("--include", "operations", data, err);
}
static gboolean
reconnect_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
int rc = crm_get_msec(optarg);
if (rc == -1) {
g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Invalid value for -i: %s", optarg);
return FALSE;
} else {
pcmk_parse_interval_spec(optarg, &options.reconnect_ms);
if (options.exec_mode != mon_exec_daemonized) {
// Reconnect interval applies to daemonized too, so don't override
options.exec_mode = mon_exec_update;
}
}
return TRUE;
}
/*!
* \internal
* \brief Enable one-shot mode
*
* \param[in] option_name Name of option being parsed (ignored)
* \param[in] optarg Value to be parsed (ignored)
* \param[in] data User data (ignored)
* \param[out] err Where to store error (ignored)
*/
static gboolean
one_shot_cb(const gchar *option_name, const gchar *optarg, gpointer data,
GError **err)
{
options.exec_mode = mon_exec_one_shot;
return TRUE;
}
/*!
* \internal
* \brief Enable daemonized mode
*
* \param[in] option_name Name of option being parsed (ignored)
* \param[in] optarg Value to be parsed (ignored)
* \param[in] data User data (ignored)
* \param[out] err Where to store error (ignored)
*/
static gboolean
daemonize_cb(const gchar *option_name, const gchar *optarg, gpointer data,
GError **err)
{
options.exec_mode = mon_exec_daemonized;
return TRUE;
}
static gboolean
show_attributes_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--include", "attributes", data, err);
}
static gboolean
show_bans_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
if (optarg != NULL) {
char *s = crm_strdup_printf("bans:%s", optarg);
gboolean rc = user_include_exclude_cb("--include", s, data, err);
free(s);
return rc;
} else {
return user_include_exclude_cb("--include", "bans", data, err);
}
}
static gboolean
show_failcounts_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--include", "failcounts", data, err);
}
static gboolean
show_operations_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--include", "failcounts,operations", data, err);
}
static gboolean
show_tickets_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--include", "tickets", data, err);
}
static gboolean
use_cib_file_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
setenv("CIB_file", optarg, 1);
options.exec_mode = mon_exec_one_shot;
return TRUE;
}
#define INDENT " "
/* *INDENT-OFF* */
static GOptionEntry addl_entries[] = {
{ "interval", 'i', 0, G_OPTION_ARG_CALLBACK, reconnect_cb,
"Update frequency (default is 5 seconds). Note: When run interactively\n"
INDENT "on a live cluster, the display will be updated automatically\n"
INDENT "whenever the cluster configuration or status changes.",
"TIMESPEC" },
{ "one-shot", '1', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
one_shot_cb,
"Display the cluster status once and exit",
NULL },
{ "daemonize", 'd', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
daemonize_cb,
"Run in the background as a daemon.\n"
INDENT "Requires at least one of --output-to and --external-agent.",
NULL },
{ "pid-file", 'p', 0, G_OPTION_ARG_FILENAME, &options.pid_file,
"(Advanced) Daemon pid file location",
"FILE" },
{ "external-agent", 'E', 0, G_OPTION_ARG_FILENAME, &options.external_agent,
"A program to run when resource operations take place",
"FILE" },
{ "external-recipient", 'e', 0, G_OPTION_ARG_STRING, &options.external_recipient,
"A recipient for your program (assuming you want the program to send something to someone).",
"RCPT" },
{ "watch-fencing", 'W', 0, G_OPTION_ARG_NONE, &options.watch_fencing,
"Listen for fencing events. For use with --external-agent.",
NULL },
{ "xml-file", 'x', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, use_cib_file_cb,
NULL,
NULL },
{ NULL }
};
static GOptionEntry display_entries[] = {
{ "include", 'I', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb,
"A list of sections to include in the output.\n"
INDENT "See `Output Control` help for more information.",
"SECTION(s)" },
{ "exclude", 'U', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb,
"A list of sections to exclude from the output.\n"
INDENT "See `Output Control` help for more information.",
"SECTION(s)" },
{ "node", 0, 0, G_OPTION_ARG_STRING, &options.only_node,
"When displaying information about nodes, show only what's related to the given\n"
INDENT "node, or to all nodes tagged with the given tag",
"NODE" },
{ "resource", 0, 0, G_OPTION_ARG_STRING, &options.only_rsc,
"When displaying information about resources, show only what's related to the given\n"
INDENT "resource, or to all resources tagged with the given tag",
"RSC" },
{ "group-by-node", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, group_by_node_cb,
"Group resources by node",
NULL },
{ "inactive", 'r', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, inactive_resources_cb,
"Display inactive resources",
NULL },
{ "failcounts", 'f', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_failcounts_cb,
"Display resource fail counts",
NULL },
{ "operations", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_operations_cb,
"Display resource operation history",
NULL },
{ "timing-details", 't', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_timing_cb,
"Display resource operation history with timing details",
NULL },
{ "tickets", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_tickets_cb,
"Display cluster tickets",
NULL },
{ "fence-history", 'm', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, fence_history_cb,
"Show fence history:\n"
INDENT "0=off, 1=failures and pending (default without option),\n"
INDENT "2=add successes (default without value for option),\n"
INDENT "3=show full history without reduction to most recent of each flavor",
"LEVEL" },
{ "neg-locations", 'L', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, show_bans_cb,
"Display negative location constraints [optionally filtered by id prefix]",
NULL },
{ "show-node-attributes", 'A', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_attributes_cb,
"Display node attributes",
NULL },
{ "hide-headers", 'D', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, hide_headers_cb,
"Hide all headers",
NULL },
{ "show-detail", 'R', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_detail_cb,
"Show more details (node IDs, individual clone instances)",
NULL },
{ "show-description", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_description_cb,
"Show resource descriptions",
NULL },
{ "brief", 'b', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_brief_cb,
"Brief output",
NULL },
{ "pending", 'j', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, &options.print_pending,
"Display pending state if '" PCMK_META_RECORD_PENDING "' is enabled",
NULL },
{ NULL }
};
static GOptionEntry deprecated_entries[] = {
/* @COMPAT resource-agents <4.15.0 uses --as-xml, so removing this option
* must wait until we no longer support building on any platforms that ship
* the older agents.
*/
{ "as-xml", 'X', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_xml_cb,
"Write cluster status as XML to stdout. This will enable one-shot mode.\n"
INDENT "Use --output-as=xml instead.",
NULL },
{ NULL }
};
/* *INDENT-ON* */
/* Reconnect to the CIB and fencing agent after reconnect_ms has passed. This sounds
* like it would be more broadly useful, but only ever happens after a disconnect via
* mon_cib_connection_destroy.
*/
static gboolean
reconnect_after_timeout(gpointer data)
{
#if PCMK__ENABLE_CURSES
if (output_format == mon_output_console) {
clear();
refresh();
}
#endif
out->transient(out, "Reconnecting...");
if (setup_api_connections() == pcmk_rc_ok) {
// Trigger redrawing the screen (needs reconnect_timer == 0)
reconnect_timer = 0;
refresh_after_event(FALSE, TRUE);
return G_SOURCE_REMOVE;
}
out->message(out, "crm-mon-disconnected",
"Latest connection attempt failed", pcmkd_state);
reconnect_timer = pcmk__create_timer(options.reconnect_ms,
reconnect_after_timeout, NULL);
return G_SOURCE_REMOVE;
}
/* Called from various places when we are disconnected from the CIB or from the
* fencing agent. If the CIB connection is still valid, this function will also
* attempt to sign off and reconnect.
*/
static void
mon_cib_connection_destroy(gpointer user_data)
{
const char *msg = "Connection to the cluster lost";
pcmkd_state = pcmk_pacemakerd_state_invalid;
/* No crm-mon-disconnected message for console; a working implementation
* is not currently worth the effort
*/
out->transient(out, "%s", msg);
out->message(out, "crm-mon-disconnected", msg, pcmkd_state);
if (refresh_timer != NULL) {
/* we'll trigger a refresh after reconnect */
mainloop_timer_stop(refresh_timer);
}
if (reconnect_timer) {
/* we'll trigger a new reconnect-timeout at the end */
g_source_remove(reconnect_timer);
reconnect_timer = 0;
}
/* the client API won't properly reconnect notifications if they are still
* in the table - so remove them
*/
if (st != NULL) {
if (st->state != stonith_disconnected) {
st->cmds->disconnect(st);
}
st->cmds->remove_notification(st, NULL);
}
if (cib) {
cib->cmds->signoff(cib);
reconnect_timer = pcmk__create_timer(options.reconnect_ms,
reconnect_after_timeout, NULL);
}
}
/* Signal handler installed into the mainloop for normal program shutdown */
static void
mon_shutdown(int nsig)
{
clean_up(CRM_EX_OK);
}
#if PCMK__ENABLE_CURSES
static volatile sighandler_t ncurses_winch_handler;
/* Signal handler installed the regular way (not into the main loop) for when
* the screen is resized. Commonly, this happens when running in an xterm and
* the user changes its size.
*/
static void
mon_winresize(int nsig)
{
static int not_done;
int lines = 0, cols = 0;
if (!not_done++) {
if (ncurses_winch_handler)
/* the original ncurses WINCH signal handler does the
* magic of retrieving the new window size;
* otherwise, we'd have to use ioctl or tgetent */
(*ncurses_winch_handler) (SIGWINCH);
getmaxyx(stdscr, lines, cols);
resizeterm(lines, cols);
/* Alert the mainloop code we'd like the refresh_trigger to run next
* time the mainloop gets around to checking.
*/
mainloop_set_trigger((crm_trigger_t *) refresh_trigger);
}
not_done--;
}
#endif
static int
setup_fencer_connection(void)
{
int rc = pcmk_ok;
if (options.fence_connect && st == NULL) {
- st = stonith_api_new();
+ st = stonith__api_new();
}
if (!options.fence_connect || st == NULL || st->state != stonith_disconnected) {
return rc;
}
rc = st->cmds->connect(st, crm_system_name, NULL);
if (rc == pcmk_ok) {
crm_trace("Setting up stonith callbacks");
if (options.watch_fencing) {
st->cmds->register_notification(st,
PCMK__VALUE_ST_NOTIFY_DISCONNECT,
mon_st_callback_event);
st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_FENCE,
mon_st_callback_event);
} else {
st->cmds->register_notification(st,
PCMK__VALUE_ST_NOTIFY_DISCONNECT,
mon_st_callback_display);
st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_HISTORY,
mon_st_callback_display);
}
} else {
- stonith_api_delete(st);
+ stonith__api_free(st);
st = NULL;
}
return rc;
}
static int
setup_cib_connection(void)
{
int rc = pcmk_rc_ok;
CRM_CHECK(cib != NULL, return EINVAL);
if (cib->state != cib_disconnected) {
// Already connected with notifications registered for CIB updates
return rc;
}
rc = cib__signon_query(out, &cib, &current_cib);
if (rc == pcmk_rc_ok) {
rc = pcmk_legacy2rc(cib->cmds->set_connection_dnotify(cib,
mon_cib_connection_destroy));
if (rc == EPROTONOSUPPORT) {
out->err(out,
"CIB client does not support connection loss "
"notifications; crm_mon will be unable to reconnect after "
"connection loss");
rc = pcmk_rc_ok;
}
if (rc == pcmk_rc_ok) {
cib->cmds->del_notify_callback(cib, PCMK__VALUE_CIB_DIFF_NOTIFY,
crm_diff_update);
rc = cib->cmds->add_notify_callback(cib, PCMK__VALUE_CIB_DIFF_NOTIFY,
crm_diff_update);
rc = pcmk_legacy2rc(rc);
}
if (rc != pcmk_rc_ok) {
if (rc == EPROTONOSUPPORT) {
out->err(out,
"CIB client does not support CIB diff "
"notifications");
} else {
out->err(out, "CIB diff notification setup failed");
}
out->err(out, "Cannot monitor CIB changes; exiting");
cib__clean_up_connection(&cib);
- stonith_api_delete(st);
+ stonith__api_free(st);
st = NULL;
}
}
return rc;
}
/* This is used to set up the fencing options after the interactive UI has been stared.
* fence_history_cb can't be used because it builds up a list of includes/excludes that
* then have to be processed with apply_include_exclude and that could affect other
* things.
*/
static void
set_fencing_options(int level)
{
switch (level) {
case 3:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
show |= pcmk_section_fencing_all;
break;
case 2:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
show |= pcmk_section_fencing_all;
break;
case 1:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
show |= pcmk_section_fence_failed | pcmk_section_fence_pending;
break;
default:
interactive_fence_level = 0;
options.fence_connect = FALSE;
fence_history = pcmk__fence_history_none;
show &= ~pcmk_section_fencing_all;
break;
}
}
static int
setup_api_connections(void)
{
int rc = pcmk_rc_ok;
CRM_CHECK(cib != NULL, return EINVAL);
if (cib->state != cib_disconnected) {
return rc;
}
if (cib->variant == cib_native) {
rc = pcmk__pacemakerd_status(out, crm_system_name,
options.reconnect_ms / 2, false,
&pcmkd_state);
if (rc != pcmk_rc_ok) {
return rc;
}
switch (pcmkd_state) {
case pcmk_pacemakerd_state_running:
case pcmk_pacemakerd_state_remote:
case pcmk_pacemakerd_state_shutting_down:
/* Fencer and CIB may still be available while shutting down or
* running on a Pacemaker Remote node
*/
break;
default:
// Fencer and CIB are definitely unavailable
return ENOTCONN;
}
setup_fencer_connection();
}
rc = setup_cib_connection();
return rc;
}
#if PCMK__ENABLE_CURSES
static const char *
get_option_desc(char c)
{
const char *desc = "No help available";
for (GOptionEntry *entry = display_entries; entry != NULL; entry++) {
if (entry->short_name == c) {
desc = entry->description;
break;
}
}
return desc;
}
#define print_option_help(out, option, condition) \
curses_formatted_printf(out, "%c %c: \t%s\n", ((condition)? '*': ' '), option, get_option_desc(option));
/* This function is called from the main loop when there is something to be read
* on stdin, like an interactive user's keystroke. All it does is read the keystroke,
* set flags (or show the page showing which keystrokes are valid), and redraw the
* screen. It does not do anything with connections to the CIB or fencing agent
* agent what would happen in mon_refresh_display.
*/
static gboolean
detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data)
{
int c;
gboolean config_mode = FALSE;
gboolean rc = G_SOURCE_CONTINUE;
/* If the attached pty device (pseudo-terminal) has been closed/deleted,
* the condition (G_IO_IN | G_IO_ERR | G_IO_HUP) occurs.
* Exit with an error, otherwise the process would persist in the
* background and significantly raise the CPU usage.
*/
if ((condition & G_IO_ERR) && (condition & G_IO_HUP)) {
rc = G_SOURCE_REMOVE;
clean_up(CRM_EX_IOERR);
}
/* The connection/fd has been closed. Refresh the screen and remove this
* event source hence ignore stdin.
*/
if (condition & (G_IO_HUP | G_IO_NVAL)) {
rc = G_SOURCE_REMOVE;
}
if ((condition & G_IO_IN) == 0) {
return rc;
}
while (1) {
/* Get user input */
c = getchar();
switch (c) {
case 'm':
interactive_fence_level++;
if (interactive_fence_level > 3) {
interactive_fence_level = 0;
}
set_fencing_options(interactive_fence_level);
break;
case 'c':
show ^= pcmk_section_tickets;
break;
case 'f':
show ^= pcmk_section_failcounts;
break;
case 'n':
show_opts ^= pcmk_show_rscs_by_node;
break;
case 'o':
show ^= pcmk_section_operations;
if (!pcmk_is_set(show, pcmk_section_operations)) {
show_opts &= ~pcmk_show_timing;
}
break;
case 'r':
show_opts ^= pcmk_show_inactive_rscs;
break;
case 'R':
show_opts ^= pcmk_show_details;
break;
case 't':
show_opts ^= pcmk_show_timing;
if (pcmk_is_set(show_opts, pcmk_show_timing)) {
show |= pcmk_section_operations;
}
break;
case 'A':
show ^= pcmk_section_attributes;
break;
case 'L':
show ^= pcmk_section_bans;
break;
case 'D':
/* If any header is shown, clear them all, otherwise set them all */
if (pcmk_any_flags_set(show, pcmk_section_summary)) {
show &= ~pcmk_section_summary;
} else {
show |= pcmk_section_summary;
}
/* Regardless, we don't show options in console mode. */
show &= ~pcmk_section_options;
break;
case 'b':
show_opts ^= pcmk_show_brief;
break;
case 'j':
show_opts ^= pcmk_show_pending;
break;
case '?':
config_mode = TRUE;
break;
default:
/* All other keys just redraw the screen. */
goto refresh;
}
if (!config_mode)
goto refresh;
clear();
refresh();
curses_formatted_printf(out, "%s", "Display option change mode\n");
print_option_help(out, 'c', pcmk_is_set(show, pcmk_section_tickets));
print_option_help(out, 'f', pcmk_is_set(show, pcmk_section_failcounts));
print_option_help(out, 'n', pcmk_is_set(show_opts, pcmk_show_rscs_by_node));
print_option_help(out, 'o', pcmk_is_set(show, pcmk_section_operations));
print_option_help(out, 'r', pcmk_is_set(show_opts, pcmk_show_inactive_rscs));
print_option_help(out, 't', pcmk_is_set(show_opts, pcmk_show_timing));
print_option_help(out, 'A', pcmk_is_set(show, pcmk_section_attributes));
print_option_help(out, 'L', pcmk_is_set(show, pcmk_section_bans));
print_option_help(out, 'D', !pcmk_is_set(show, pcmk_section_summary));
print_option_help(out, 'R', pcmk_any_flags_set(show_opts, pcmk_show_details));
print_option_help(out, 'b', pcmk_is_set(show_opts, pcmk_show_brief));
print_option_help(out, 'j', pcmk_is_set(show_opts, pcmk_show_pending));
curses_formatted_printf(out, "%d m: \t%s\n", interactive_fence_level, get_option_desc('m'));
curses_formatted_printf(out, "%s", "\nToggle fields via field letter, type any other key to return\n");
}
refresh:
refresh_after_event(FALSE, TRUE);
return rc;
}
#endif // PCMK__ENABLE_CURSES
// Basically crm_signal_handler(SIGCHLD, SIG_IGN) plus the SA_NOCLDWAIT flag
static void
avoid_zombies(void)
{
struct sigaction sa;
memset(&sa, 0, sizeof(struct sigaction));
if (sigemptyset(&sa.sa_mask) < 0) {
crm_warn("Cannot avoid zombies: %s", pcmk_rc_str(errno));
return;
}
sa.sa_handler = SIG_IGN;
sa.sa_flags = SA_RESTART|SA_NOCLDWAIT;
if (sigaction(SIGCHLD, &sa, NULL) < 0) {
crm_warn("Cannot avoid zombies: %s", pcmk_rc_str(errno));
}
}
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
GOptionContext *context = NULL;
GOptionEntry extra_prog_entries[] = {
{ "quiet", 'Q', 0, G_OPTION_ARG_NONE, &(args->quiet),
"Be less descriptive in output.",
NULL },
{ NULL }
};
#if PCMK__ENABLE_CURSES
const char *fmts = "console (default), html, text, xml, none";
#else
const char *fmts = "text (default), html, xml, none";
#endif // PCMK__ENABLE_CURSES
const char *desc = NULL;
desc = "Notes:\n\n"
"Time Specification:\n\n"
"The TIMESPEC in any command line option can be specified in many\n"
"different formats. It can be an integer number of seconds, a\n"
"number plus units (us/usec/ms/msec/s/sec/m/min/h/hr), or an ISO\n"
"8601 period specification.\n\n"
"Output Control:\n\n"
"By default, a particular set of sections are written to the\n"
"output destination. The default varies based on the output\n"
"format: XML includes all sections by default, while other output\n"
"formats include less. This set can be modified with the --include\n"
"and --exclude command line options. Each option may be passed\n"
"multiple times, and each can specify a comma-separated list of\n"
"sections. The options are applied to the default set, in order\n"
"from left to right as they are passed on the command line. For a\n"
"list of valid sections, pass --include=list or --exclude=list.\n\n"
"Interactive Use:\n\n"
#if PCMK__ENABLE_CURSES
"When run interactively, crm_mon can be told to hide and show\n"
"various sections of output. To see a help screen explaining the\n"
"options, press '?'. Any key stroke aside from those listed will\n"
"cause the screen to refresh.\n\n"
#else
"The local installation of Pacemaker was built without support for\n"
"interactive (console) mode. A curses library must be available at\n"
"build time to support interactive mode.\n\n"
#endif // PCMK__ENABLE_CURSES
"Examples:\n\n"
#if PCMK__ENABLE_CURSES
"Display the cluster status on the console with updates as they\n"
"occur:\n\n"
"\tcrm_mon\n\n"
#endif // PCMK__ENABLE_CURSES
"Display the cluster status once and exit:\n\n"
"\tcrm_mon -1\n\n"
"Display the cluster status, group resources by node, and include\n"
"inactive resources in the list:\n\n"
"\tcrm_mon --group-by-node --inactive\n\n"
"Start crm_mon as a background daemon and have it write the\n"
"cluster status to an HTML file:\n\n"
"\tcrm_mon --daemonize --output-as html "
"--output-to /path/to/docroot/filename.html\n\n"
"Display the cluster status as XML:\n\n"
"\tcrm_mon --output-as xml\n\n";
context = pcmk__build_arg_context(args, fmts, group, NULL);
pcmk__add_main_args(context, extra_prog_entries);
g_option_context_set_description(context, desc);
pcmk__add_arg_group(context, "display", "Display Options:",
"Show display options", display_entries);
pcmk__add_arg_group(context, "additional", "Additional Options:",
"Show additional options", addl_entries);
pcmk__add_arg_group(context, "deprecated", "Deprecated Options:",
"Show deprecated options", deprecated_entries);
return context;
}
/*!
* \internal
* \brief Set output format based on \c --output-as arguments and mode arguments
*
* When the deprecated \c --as-xml argument is parsed, a callback function sets
* \c output_format. Otherwise, this function does the same based on the current
* \c --output-as arguments and the \c --one-shot and \c --daemonize arguments.
*
* \param[in,out] args Command line arguments
*/
static void
reconcile_output_format(pcmk__common_args_t *args)
{
if (output_format != mon_output_unset) {
/* The deprecated --as-xml argument was used, and we're finished. Note
* that this means the deprecated argument takes precedence.
*/
return;
}
if (pcmk__str_eq(args->output_ty, PCMK_VALUE_NONE, pcmk__str_none)) {
output_format = mon_output_none;
} else if (pcmk__str_eq(args->output_ty, "html", pcmk__str_none)) {
output_format = mon_output_html;
umask(S_IWGRP | S_IWOTH); // World-readable HTML
} else if (pcmk__str_eq(args->output_ty, "xml", pcmk__str_none)) {
output_format = mon_output_xml;
#if PCMK__ENABLE_CURSES
} else if (pcmk__str_eq(args->output_ty, "console",
pcmk__str_null_matches)) {
/* Console is the default format if no conflicting options are given.
*
* Use text output instead if one of the following conditions is met:
* * We've requested daemonized or one-shot mode (console output is
* incompatible with modes other than mon_exec_update)
* * We requested the version, which is effectively one-shot
* * We specified a non-stdout output destination (console mode is
* compatible only with stdout)
*/
if ((options.exec_mode == mon_exec_daemonized)
|| (options.exec_mode == mon_exec_one_shot)
|| args->version
|| !pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches)) {
pcmk__str_update(&args->output_ty, "text");
output_format = mon_output_plain;
} else {
pcmk__str_update(&args->output_ty, "console");
output_format = mon_output_console;
crm_enable_stderr(FALSE);
}
#endif // PCMK__ENABLE_CURSES
} else if (pcmk__str_eq(args->output_ty, "text", pcmk__str_null_matches)) {
/* Text output was explicitly requested, or it's the default because
* curses is not enabled
*/
pcmk__str_update(&args->output_ty, "text");
output_format = mon_output_plain;
}
// Otherwise, invalid format. Let pcmk__output_new() throw an error.
}
/*!
* \internal
* \brief Set execution mode to the output format's default if appropriate
*
* \param[in,out] args Command line arguments
*/
static void
set_default_exec_mode(const pcmk__common_args_t *args)
{
if (output_format == mon_output_console) {
/* Update is the only valid mode for console, but set here instead of
* reconcile_output_format() for isolation and consistency
*/
options.exec_mode = mon_exec_update;
} else if (options.exec_mode == mon_exec_unset) {
// Default to one-shot mode for all other formats
options.exec_mode = mon_exec_one_shot;
} else if ((options.exec_mode == mon_exec_update)
&& pcmk__str_eq(args->output_dest, "-",
pcmk__str_null_matches)) {
// If not using console format, update mode cannot be used with stdout
options.exec_mode = mon_exec_one_shot;
}
}
static void
clean_up_on_connection_failure(int rc)
{
if (rc == ENOTCONN) {
if (pcmkd_state == pcmk_pacemakerd_state_remote) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: remote-node not connected to cluster");
} else {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node");
}
} else {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_rc_str(rc));
}
clean_up(pcmk_rc2exitc(rc));
}
static void
one_shot(void)
{
int rc = pcmk__status(out, cib, fence_history, show, show_opts,
options.only_node, options.only_rsc,
options.neg_location_prefix, 0);
if (rc == pcmk_rc_ok) {
clean_up(pcmk_rc2exitc(rc));
} else {
clean_up_on_connection_failure(rc);
}
}
static void
exit_on_invalid_cib(void)
{
if (cib != NULL) {
return;
}
// Shouldn't really be possible
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Invalid CIB source");
clean_up(CRM_EX_ERROR);
}
int
main(int argc, char **argv)
{
int rc = pcmk_rc_ok;
GOptionGroup *output_group = NULL;
args = pcmk__new_common_args(SUMMARY);
context = build_arg_context(args, &output_group);
pcmk__register_formats(output_group, formats);
options.pid_file = strdup("/tmp/ClusterMon.pid");
pcmk__cli_init_logging("crm_mon", 0);
// Avoid needing to wait for subprocesses forked for -E/--external-agent
avoid_zombies();
processed_args = pcmk__cmdline_preproc(argv, "eimpxEILU");
fence_history_cb("--fence-history", "1", NULL, NULL);
/* Set an HTML title regardless of what format we will eventually use.
* Doing this here means the user can give their own title on the command
* line.
*/
if (!pcmk__force_args(context, &error, "%s --html-title \"Cluster Status\"",
g_get_prgname())) {
return clean_up(CRM_EX_USAGE);
}
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
return clean_up(CRM_EX_USAGE);
}
for (int i = 0; i < args->verbosity; i++) {
crm_bump_log_level(argc, argv);
}
if (!args->version) {
if (args->quiet) {
include_exclude_cb("--exclude", "times", NULL, NULL);
}
if (options.watch_fencing) {
fence_history_cb("--fence-history", "0", NULL, NULL);
options.fence_connect = TRUE;
}
/* create the cib-object early to be able to do further
* decisions based on the cib-source
*/
cib = cib_new();
exit_on_invalid_cib();
switch (cib->variant) {
case cib_native:
// Everything (fencer, CIB, pcmkd status) should be available
break;
case cib_file:
// Live fence history is not meaningful
fence_history_cb("--fence-history", "0", NULL, NULL);
/* Notifications are unsupported; nothing to monitor
* @COMPAT: Let setup_cib_connection() handle this by exiting?
*/
options.exec_mode = mon_exec_one_shot;
break;
case cib_remote:
// We won't receive any fencing updates
fence_history_cb("--fence-history", "0", NULL, NULL);
break;
default:
/* something is odd */
exit_on_invalid_cib();
break;
}
if ((options.exec_mode == mon_exec_daemonized)
&& !options.external_agent
&& pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches)) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
"--daemonize requires at least one of --output-to "
"(with value not set to '-') and --external-agent");
return clean_up(CRM_EX_USAGE);
}
}
reconcile_output_format(args);
set_default_exec_mode(args);
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if (rc != pcmk_rc_ok) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error creating output format %s: %s",
args->output_ty, pcmk_rc_str(rc));
return clean_up(CRM_EX_ERROR);
}
if (output_format == mon_output_legacy_xml) {
output_format = mon_output_xml;
pcmk__output_set_legacy_xml(out);
}
/* output_format MUST NOT BE CHANGED AFTER THIS POINT. */
/* If we had a valid format for pcmk__output_new(), output_format should be
* set by now.
*/
pcmk__assert(output_format != mon_output_unset);
if (output_format == mon_output_plain) {
pcmk__output_text_set_fancy(out, true);
}
if (options.exec_mode == mon_exec_daemonized) {
if (!options.external_agent && (output_format == mon_output_none)) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
"--daemonize requires --external-agent if used with "
"--output-as=none");
return clean_up(CRM_EX_USAGE);
}
crm_enable_stderr(FALSE);
cib_delete(cib);
cib = NULL;
pcmk__daemonize(crm_system_name, options.pid_file);
cib = cib_new();
exit_on_invalid_cib();
}
show = default_includes(output_format);
/* Apply --include/--exclude flags we used internally. There's no error reporting
* here because this would be a programming error.
*/
apply_include_exclude(options.includes_excludes, &error);
/* And now apply any --include/--exclude flags the user gave on the command line.
* These are done in a separate pass from the internal ones because we want to
* make sure whatever the user specifies overrides whatever we do.
*/
if (!apply_include_exclude(options.user_includes_excludes, &error)) {
return clean_up(CRM_EX_USAGE);
}
/* Sync up the initial value of interactive_fence_level with whatever was set with
* --include/--exclude= options.
*/
if (pcmk_all_flags_set(show, pcmk_section_fencing_all)) {
interactive_fence_level = 3;
} else if (pcmk_is_set(show, pcmk_section_fence_worked)) {
interactive_fence_level = 2;
} else if (pcmk_any_flags_set(show, pcmk_section_fence_failed | pcmk_section_fence_pending)) {
interactive_fence_level = 1;
} else {
interactive_fence_level = 0;
}
pcmk__register_lib_messages(out);
crm_mon_register_messages(out);
pe__register_messages(out);
stonith__register_messages(out);
// Messages internal to this file, nothing curses-specific
pcmk__register_messages(out, fmt_functions);
if (args->version) {
out->version(out, false);
return clean_up(CRM_EX_OK);
}
if (output_format == mon_output_xml) {
show_opts |= pcmk_show_inactive_rscs | pcmk_show_timing;
}
if ((output_format == mon_output_html) && (out->dest != stdout)) {
char *content = pcmk__itoa(pcmk__timeout_ms2s(options.reconnect_ms));
pcmk__html_add_header(PCMK__XE_META,
PCMK__XA_HTTP_EQUIV, PCMK__VALUE_REFRESH,
PCMK__XA_CONTENT, content,
NULL);
free(content);
}
crm_info("Starting %s", crm_system_name);
cib__set_output(cib, out);
if (options.exec_mode == mon_exec_one_shot) {
one_shot();
}
scheduler = pcmk_new_scheduler();
pcmk__mem_assert(scheduler);
scheduler->priv->out = out;
if ((cib->variant == cib_native) && pcmk_is_set(show, pcmk_section_times)) {
// Currently used only in the times section
pcmk__query_node_name(out, 0, &(scheduler->priv->local_node_name), 0);
}
out->message(out, "crm-mon-disconnected",
"Waiting for initial connection", pcmkd_state);
do {
out->transient(out, "Connecting to cluster...");
rc = setup_api_connections();
if (rc != pcmk_rc_ok) {
if ((rc == ENOTCONN) || (rc == ECONNREFUSED)) {
out->transient(out, "Connection failed. Retrying in %s...",
pcmk__readable_interval(options.reconnect_ms));
}
// Give some time to view all output even if we won't retry
pcmk__sleep_ms(options.reconnect_ms);
#if PCMK__ENABLE_CURSES
if (output_format == mon_output_console) {
clear();
refresh();
}
#endif
}
} while ((rc == ENOTCONN) || (rc == ECONNREFUSED));
if (rc != pcmk_rc_ok) {
clean_up_on_connection_failure(rc);
}
set_fencing_options(interactive_fence_level);
mon_refresh_display(NULL);
mainloop = g_main_loop_new(NULL, FALSE);
mainloop_add_signal(SIGTERM, mon_shutdown);
mainloop_add_signal(SIGINT, mon_shutdown);
#if PCMK__ENABLE_CURSES
if (output_format == mon_output_console) {
ncurses_winch_handler = crm_signal_handler(SIGWINCH, mon_winresize);
if (ncurses_winch_handler == SIG_DFL ||
ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR)
ncurses_winch_handler = NULL;
io_channel = g_io_channel_unix_new(STDIN_FILENO);
g_io_add_watch(io_channel, (G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL),
detect_user_input, NULL);
}
#endif
/* When refresh_trigger->trigger is set to TRUE, call mon_refresh_display. In
* this file, that is anywhere mainloop_set_trigger is called.
*/
refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL);
g_main_loop_run(mainloop);
g_main_loop_unref(mainloop);
crm_info("Exiting %s", crm_system_name);
return clean_up(CRM_EX_OK);
}
static int
send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc,
int status, const char *desc)
{
pid_t pid;
/*setenv needs chars, these are ints */
char *rc_s = pcmk__itoa(rc);
char *status_s = pcmk__itoa(status);
char *target_rc_s = pcmk__itoa(target_rc);
crm_debug("Sending external notification to '%s' via '%s'", options.external_recipient, options.external_agent);
if(rsc) {
setenv("CRM_notify_rsc", rsc, 1);
}
if (options.external_recipient) {
setenv("CRM_notify_recipient", options.external_recipient, 1);
}
setenv("CRM_notify_node", node, 1);
setenv("CRM_notify_task", task, 1);
setenv("CRM_notify_desc", desc, 1);
setenv("CRM_notify_rc", rc_s, 1);
setenv("CRM_notify_target_rc", target_rc_s, 1);
setenv("CRM_notify_status", status_s, 1);
pid = fork();
if (pid == -1) {
out->err(out, "notification fork() failed: %s", strerror(errno));
}
if (pid == 0) {
/* crm_debug("notification: I am the child. Executing the nofitication program."); */
execl(options.external_agent, options.external_agent, NULL);
crm_exit(CRM_EX_ERROR);
}
crm_trace("Finished running custom notification program '%s'.", options.external_agent);
free(target_rc_s);
free(status_s);
free(rc_s);
return 0;
}
static int
handle_rsc_op(xmlNode *xml, void *userdata)
{
const char *node_id = (const char *) userdata;
int rc = -1;
int status = -1;
int target_rc = -1;
gboolean notify = TRUE;
char *rsc = NULL;
char *task = NULL;
const char *desc = NULL;
const char *magic = NULL;
const char *id = NULL;
const char *node = NULL;
xmlNode *n = xml;
xmlNode * rsc_op = xml;
if(strcmp((const char*)xml->name, PCMK__XE_LRM_RSC_OP) != 0) {
pcmk__xe_foreach_child(xml, NULL, handle_rsc_op, (void *) node_id);
return pcmk_rc_ok;
}
id = pcmk__xe_history_key(rsc_op);
magic = crm_element_value(rsc_op, PCMK__XA_TRANSITION_MAGIC);
if (magic == NULL) {
/* non-change */
return pcmk_rc_ok;
}
if (!decode_transition_magic(magic, NULL, NULL, NULL, &status, &rc,
&target_rc)) {
crm_err("Invalid event %s detected for %s", magic, id);
return pcmk_rc_ok;
}
if (parse_op_key(id, &rsc, &task, NULL) == FALSE) {
crm_err("Invalid event detected for %s", id);
goto bail;
}
node = crm_element_value(rsc_op, PCMK__META_ON_NODE);
while ((n != NULL) && !pcmk__xe_is(n, PCMK__XE_NODE_STATE)) {
n = n->parent;
}
if(node == NULL && n) {
node = crm_element_value(n, PCMK_XA_UNAME);
}
if (node == NULL && n) {
node = pcmk__xe_id(n);
}
if (node == NULL) {
node = node_id;
}
if (node == NULL) {
crm_err("No node detected for event %s (%s)", magic, id);
goto bail;
}
/* look up where we expected it to be? */
desc = pcmk_rc_str(pcmk_rc_ok);
if ((status == PCMK_EXEC_DONE) && (target_rc == rc)) {
crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc);
if (rc == PCMK_OCF_NOT_RUNNING) {
notify = FALSE;
}
} else if (status == PCMK_EXEC_DONE) {
desc = crm_exit_str(rc);
crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc);
} else {
desc = pcmk_exec_status_str(status);
crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc);
}
if (notify && options.external_agent) {
send_custom_trap(node, rsc, task, target_rc, rc, status, desc);
}
bail:
free(rsc);
free(task);
return pcmk_rc_ok;
}
/* This function is just a wrapper around mainloop_set_trigger so that it can be
* called from a mainloop directly. It's simply another way of ensuring the screen
* gets redrawn.
*/
static gboolean
mon_trigger_refresh(gpointer user_data)
{
mainloop_set_trigger((crm_trigger_t *) refresh_trigger);
return FALSE;
}
static int
handle_op_for_node(xmlNode *xml, void *userdata)
{
const char *node = crm_element_value(xml, PCMK_XA_UNAME);
if (node == NULL) {
node = pcmk__xe_id(xml);
}
handle_rsc_op(xml, (void *) node);
return pcmk_rc_ok;
}
static int
crm_diff_update_element(xmlNode *change, void *userdata)
{
const char *name = NULL;
const char *op = crm_element_value(change, PCMK_XA_OPERATION);
const char *xpath = crm_element_value(change, PCMK_XA_PATH);
xmlNode *match = NULL;
const char *node = NULL;
if (op == NULL) {
return pcmk_rc_ok;
} else if (strcmp(op, PCMK_VALUE_CREATE) == 0) {
match = change->children;
} else if (pcmk__str_any_of(op, PCMK_VALUE_MOVE, PCMK_VALUE_DELETE,
NULL)) {
return pcmk_rc_ok;
} else if (strcmp(op, PCMK_VALUE_MODIFY) == 0) {
match = pcmk__xe_first_child(change, PCMK_XE_CHANGE_RESULT, NULL, NULL);
if(match) {
match = match->children;
}
}
if(match) {
name = (const char *)match->name;
}
crm_trace("Handling %s operation for %s %p, %s", op, xpath, match, name);
if(xpath == NULL) {
/* Version field, ignore */
} else if(name == NULL) {
crm_debug("No result for %s operation to %s", op, xpath);
pcmk__assert(pcmk__str_any_of(op, PCMK_VALUE_MOVE, PCMK_VALUE_DELETE,
NULL));
} else if (strcmp(name, PCMK_XE_CIB) == 0) {
pcmk__xe_foreach_child(pcmk__xe_first_child(match, PCMK_XE_STATUS, NULL,
NULL),
NULL, handle_op_for_node, NULL);
} else if (strcmp(name, PCMK_XE_STATUS) == 0) {
pcmk__xe_foreach_child(match, NULL, handle_op_for_node, NULL);
} else if (strcmp(name, PCMK__XE_NODE_STATE) == 0) {
node = crm_element_value(match, PCMK_XA_UNAME);
if (node == NULL) {
node = pcmk__xe_id(match);
}
handle_rsc_op(match, (void *) node);
} else if (strcmp(name, PCMK__XE_LRM) == 0) {
node = pcmk__xe_id(match);
handle_rsc_op(match, (void *) node);
} else if (strcmp(name, PCMK__XE_LRM_RESOURCES) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM);
handle_rsc_op(match, local_node);
free(local_node);
} else if (strcmp(name, PCMK__XE_LRM_RESOURCE) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM);
handle_rsc_op(match, local_node);
free(local_node);
} else if (strcmp(name, PCMK__XE_LRM_RSC_OP) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM);
handle_rsc_op(match, local_node);
free(local_node);
} else {
crm_trace("Ignoring %s operation for %s %p, %s", op, xpath, match, name);
}
return pcmk_rc_ok;
}
static void
crm_diff_update(const char *event, xmlNode * msg)
{
int rc = -1;
static bool stale = FALSE;
gboolean cib_updated = FALSE;
xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT,
NULL, NULL);
xmlNode *diff = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
out->progress(out, false);
if (current_cib != NULL) {
rc = xml_apply_patchset(current_cib, diff, TRUE);
switch (rc) {
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
pcmk__xml_free(current_cib); current_cib = NULL;
break;
case pcmk_ok:
cib_updated = TRUE;
break;
default:
crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
pcmk__xml_free(current_cib); current_cib = NULL;
}
}
if (current_cib == NULL) {
crm_trace("Re-requesting the full cib");
cib->cmds->query(cib, NULL, &current_cib, cib_sync_call);
}
if (options.external_agent) {
int format = 0;
crm_element_value_int(diff, PCMK_XA_FORMAT, &format);
if (format == 2) {
xmlNode *wrapper = pcmk__xe_first_child(msg,
PCMK__XE_CIB_UPDATE_RESULT,
NULL, NULL);
xmlNode *diff = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
pcmk__xe_foreach_child(diff, NULL, crm_diff_update_element, NULL);
} else {
crm_err("Unknown patch format: %d", format);
}
}
if (current_cib == NULL) {
if(!stale) {
out->info(out, "--- Stale data ---");
}
stale = TRUE;
return;
}
stale = FALSE;
refresh_after_event(cib_updated, FALSE);
}
static int
mon_refresh_display(gpointer user_data)
{
int rc = pcmk_rc_ok;
last_refresh = time(NULL);
if (output_format == mon_output_none) {
return G_SOURCE_REMOVE;
}
if (fence_history == pcmk__fence_history_full &&
!pcmk_all_flags_set(show, pcmk_section_fencing_all) &&
output_format != mon_output_xml) {
fence_history = pcmk__fence_history_reduced;
}
// Get an up-to-date pacemakerd status for the cluster summary
if (cib->variant == cib_native) {
pcmk__pacemakerd_status(out, crm_system_name, options.reconnect_ms / 2,
false, &pcmkd_state);
}
if (out->dest != stdout) {
out->reset(out);
}
rc = pcmk__output_cluster_status(scheduler, st, cib, current_cib,
pcmkd_state, fence_history, show,
show_opts,
options.only_node,options.only_rsc,
options.neg_location_prefix);
if (rc == pcmk_rc_schema_validation) {
clean_up(CRM_EX_CONFIG);
return G_SOURCE_REMOVE;
}
if (out->dest != stdout) {
out->finish(out, CRM_EX_OK, true, NULL);
}
return G_SOURCE_CONTINUE;
}
/* This function is called for fencing events (see setup_fencer_connection() for
* which ones) when --watch-fencing is used on the command line
*/
static void
mon_st_callback_event(stonith_t * st, stonith_event_t * e)
{
if (st->state == stonith_disconnected) {
/* disconnect cib as well and have everything reconnect */
mon_cib_connection_destroy(NULL);
} else if (options.external_agent) {
char *desc = stonith__event_description(e);
send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc);
free(desc);
}
}
/* Cause the screen to be redrawn (via mainloop_set_trigger) when various conditions are met:
*
* - If the last update occurred more than reconnect_ms ago (defaults to 5s, but
* can be changed via the -i command line option), or
* - After every 10 CIB updates, or
* - If it's been 2s since the last update
*
* This function sounds like it would be more broadly useful, but it is only called when a
* fencing event is received or a CIB diff occurrs.
*/
static void
refresh_after_event(gboolean data_updated, gboolean enforce)
{
static int updates = 0;
time_t now = time(NULL);
if (data_updated) {
updates++;
}
if(refresh_timer == NULL) {
refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL);
}
if (reconnect_timer > 0) {
/* we will receive a refresh request after successful reconnect */
mainloop_timer_stop(refresh_timer);
return;
}
/* as we're not handling initial failure of fencer-connection as
* fatal give it a retry here
* not getting here if cib-reconnection is already on the way
*/
setup_fencer_connection();
if (enforce ||
((now - last_refresh) > pcmk__timeout_ms2s(options.reconnect_ms)) ||
updates >= 10) {
mainloop_set_trigger((crm_trigger_t *) refresh_trigger);
mainloop_timer_stop(refresh_timer);
updates = 0;
} else {
mainloop_timer_start(refresh_timer);
}
}
/* This function is called for fencing events (see setup_fencer_connection() for
* which ones) when --watch-fencing is NOT used on the command line
*/
static void
mon_st_callback_display(stonith_t * st, stonith_event_t * e)
{
if (st->state == stonith_disconnected) {
/* disconnect cib as well and have everything reconnect */
mon_cib_connection_destroy(NULL);
} else {
out->progress(out, false);
refresh_after_event(TRUE, FALSE);
}
}
/*
* De-init ncurses, disconnect from the CIB manager, disconnect fencing,
* deallocate memory and show usage-message if requested.
*
* We don't actually return, but nominally returning crm_exit_t allows a usage
* like "return clean_up(exit_code);" which helps static analysis understand the
* code flow.
*/
static crm_exit_t
clean_up(crm_exit_t exit_code)
{
/* Quitting crm_mon is much more complicated than it ought to be. */
/* (1) Close connections, free things, etc. */
if (io_channel != NULL) {
g_io_channel_shutdown(io_channel, TRUE, NULL);
}
cib__clean_up_connection(&cib);
- stonith_api_delete(st);
+ stonith__api_free(st);
free(options.neg_location_prefix);
free(options.only_node);
free(options.only_rsc);
free(options.pid_file);
g_slist_free_full(options.includes_excludes, free);
g_strfreev(processed_args);
pcmk_free_scheduler(scheduler);
/* (2) If this is abnormal termination and we're in curses mode, shut down
* curses first. Any messages displayed to the screen before curses is shut
* down will be lost because doing the shut down will also restore the
* screen to whatever it looked like before crm_mon was started.
*/
if (((error != NULL) || (exit_code == CRM_EX_USAGE))
&& (output_format == mon_output_console)
&& (out != NULL)) {
out->finish(out, exit_code, false, NULL);
pcmk__output_free(out);
out = NULL;
}
/* (3) If this is a command line usage related failure, print the usage
* message.
*/
if (exit_code == CRM_EX_USAGE && (output_format == mon_output_console || output_format == mon_output_plain)) {
char *help = g_option_context_get_help(context, TRUE, NULL);
fprintf(stderr, "%s", help);
g_free(help);
}
pcmk__free_arg_context(context);
/* (4) If this is any kind of error, print the error out and exit. Make
* sure to handle situations both before and after formatted output is
* set up. We want errors to appear formatted if at all possible.
*/
if (error != NULL) {
if (out != NULL) {
out->err(out, "%s: %s", g_get_prgname(), error->message);
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
} else {
fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message);
}
g_clear_error(&error);
crm_exit(exit_code);
}
/* (5) Print formatted output to the screen if we made it far enough in
* crm_mon to be able to do so.
*/
if (out != NULL) {
if (options.exec_mode != mon_exec_daemonized) {
out->finish(out, exit_code, true, NULL);
}
pcmk__output_free(out);
pcmk__unregister_formats();
}
crm_exit(exit_code);
}
diff --git a/tools/crm_resource.c b/tools/crm_resource.c
index e4ee98c397..162ae40642 100644
--- a/tools/crm_resource.c
+++ b/tools/crm_resource.c
@@ -1,2405 +1,2406 @@
/*
* Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm_resource.h>
#include <crm/lrmd_internal.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/ipc_attrd_internal.h>
#include <crm/common/lists_internal.h>
#include <crm/common/output.h>
+#include <crm/fencing/internal.h> // stonith__agent_exists()
#include <pacemaker-internal.h>
#include <sys/param.h>
#include <stdbool.h> // bool, true, false
#include <stdint.h> // uint32_t
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <libgen.h>
#include <time.h>
#include <libxml/xpath.h> // xmlXPathObject, etc.
#include <crm/crm.h>
#include <crm/stonith-ng.h>
#include <crm/common/agents.h> // PCMK_RESOURCE_CLASS_*
#include <crm/common/ipc_controld.h>
#include <crm/cib/internal.h>
#define SUMMARY "crm_resource - perform tasks related to Pacemaker cluster resources"
enum rsc_command {
cmd_ban,
cmd_cleanup,
cmd_clear,
cmd_colocations,
cmd_cts,
cmd_delete,
cmd_delete_param,
cmd_digests,
cmd_execute_agent,
cmd_fail,
cmd_get_param,
cmd_list_active_ops,
cmd_list_agents,
cmd_list_all_ops,
cmd_list_alternatives,
cmd_list_instances,
cmd_list_options,
cmd_list_providers,
cmd_list_resources,
cmd_list_standards,
cmd_locate,
cmd_metadata,
cmd_move,
cmd_query_xml,
cmd_query_xml_raw,
cmd_refresh,
cmd_restart,
cmd_set_param,
cmd_wait,
cmd_why,
// Update this when adding new commands
cmd_max = cmd_why,
};
/*!
* \internal
* \brief Handler function for a crm_resource command
*/
typedef crm_exit_t (*crm_resource_fn_t)(pcmk_resource_t *, pcmk_node_t *,
cib_t *, pcmk_scheduler_t *,
pcmk_ipc_api_t *, xmlNode *);
/*!
* \internal
* \brief Flags to define attributes of a given command
*
* These attributes may include required command-line options, how to look up a
* resource in the scheduler data, whether the command supports clone instances,
* etc.
*/
enum crm_rsc_flags {
//! Use \c pcmk_rsc_match_anon_basename when looking up a resource
crm_rsc_find_match_anon_basename = (UINT32_C(1) << 0),
//! Use \c pcmk_rsc_match_basename when looking up a resource
crm_rsc_find_match_basename = (UINT32_C(1) << 1),
//! Use \c pcmk_rsc_match_history when looking up a resource
crm_rsc_find_match_history = (UINT32_C(1) << 2),
//! Fail if \c --resource refers to a particular clone instance
crm_rsc_rejects_clone_instance = (UINT32_C(1) << 3),
//! Require CIB connection unless resource is specified by agent
crm_rsc_requires_cib = (UINT32_C(1) << 4),
//! Require controller connection
crm_rsc_requires_controller = (UINT32_C(1) << 5),
//! Require \c --node argument
crm_rsc_requires_node = (UINT32_C(1) << 6),
//! Require \c --resource argument
crm_rsc_requires_resource = (UINT32_C(1) << 7),
//! Require scheduler data unless resource is specified by agent
crm_rsc_requires_scheduler = (UINT32_C(1) << 8),
};
/*!
* \internal
* \brief Handler function and flags for a given command
*/
typedef struct {
crm_resource_fn_t fn; //!< Command handler function
uint32_t flags; //!< Group of <tt>enum crm_rsc_flags</tt>
} crm_resource_cmd_info_t;
struct {
enum rsc_command rsc_cmd; // crm_resource command to perform
// Command-line option values
gchar *rsc_id; // Value of --resource
gchar *rsc_type; // Value of --resource-type
gboolean all; // --all was given
gboolean force; // --force was given
gboolean clear_expired; // --expired was given
gboolean recursive; // --recursive was given
gboolean promoted_role_only; // --promoted was given
gchar *host_uname; // Value of --node
gchar *interval_spec; // Value of --interval
gchar *move_lifetime; // Value of --lifetime
gchar *operation; // Value of --operation
enum pcmk__opt_flags opt_list; // Parsed from --list-options
const char *attr_set_type; // Instance, meta, utilization, or element attribute
gchar *prop_id; // --nvpair (attribute XML ID)
char *prop_name; // Attribute name
gchar *prop_set; // --set-name (attribute block XML ID)
gchar *prop_value; // --parameter-value (attribute value)
guint timeout_ms; // Parsed from --timeout value
char *agent_spec; // Standard and/or provider and/or agent
int check_level; // Optional value of --validate or --force-check
// Resource configuration specified via command-line arguments
gchar *agent; // Value of --agent
gchar *class; // Value of --class
gchar *provider; // Value of --provider
GHashTable *cmdline_params; // Resource parameters specified
// Positional command-line arguments
gchar **remainder; // Positional arguments as given
GHashTable *override_params; // Resource parameter values that override config
} options = {
.attr_set_type = PCMK_XE_INSTANCE_ATTRIBUTES,
.check_level = -1,
.rsc_cmd = cmd_list_resources, // List all resources if no command given
};
static crm_exit_t exit_code = CRM_EX_OK;
static pcmk__output_t *out = NULL;
static pcmk__common_args_t *args = NULL;
// Things that should be cleaned up on exit
static GError *error = NULL;
static GMainLoop *mainloop = NULL;
#define MESSAGE_TIMEOUT_S 60
#define INDENT " "
static pcmk__supported_format_t formats[] = {
PCMK__SUPPORTED_FORMAT_NONE,
PCMK__SUPPORTED_FORMAT_TEXT,
PCMK__SUPPORTED_FORMAT_XML,
{ NULL, NULL, NULL }
};
static void
quit_main_loop(crm_exit_t ec)
{
exit_code = ec;
if (mainloop != NULL) {
GMainLoop *mloop = mainloop;
mainloop = NULL; // Don't re-enter this block
pcmk_quit_main_loop(mloop, 10);
g_main_loop_unref(mloop);
}
}
static gboolean
resource_ipc_timeout(gpointer data)
{
// Start with newline because "Waiting for ..." message doesn't have one
if (error != NULL) {
g_clear_error(&error);
}
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_TIMEOUT,
_("Aborting because no messages received in %d seconds"), MESSAGE_TIMEOUT_S);
quit_main_loop(CRM_EX_TIMEOUT);
return FALSE;
}
static void
controller_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type,
crm_exit_t status, void *event_data, void *user_data)
{
crm_exit_t *ec = user_data;
pcmk__assert(ec != NULL);
switch (event_type) {
case pcmk_ipc_event_disconnect:
if (exit_code == CRM_EX_DISCONNECT) { // Unexpected
crm_info("Connection to controller was terminated");
}
*ec = exit_code;
quit_main_loop(*ec);
break;
case pcmk_ipc_event_reply:
if (status != CRM_EX_OK) {
out->err(out, "Error: bad reply from controller: %s",
crm_exit_str(status));
pcmk_disconnect_ipc(api);
*ec = status;
quit_main_loop(*ec);
} else {
if ((pcmk_controld_api_replies_expected(api) == 0)
&& (mainloop != NULL)
&& g_main_loop_is_running(mainloop)) {
out->info(out, "... got reply (done)");
crm_debug("Got all the replies we expected");
pcmk_disconnect_ipc(api);
*ec = CRM_EX_OK;
quit_main_loop(*ec);
} else {
out->info(out, "... got reply");
}
}
break;
default:
break;
}
}
static void
start_mainloop(pcmk_ipc_api_t *capi)
{
// @TODO See if we can avoid setting exit_code as a global variable
unsigned int count = pcmk_controld_api_replies_expected(capi);
if (count > 0) {
out->info(out, "Waiting for %u %s from the controller",
count, pcmk__plural_alt(count, "reply", "replies"));
exit_code = CRM_EX_DISCONNECT; // For unexpected disconnects
mainloop = g_main_loop_new(NULL, FALSE);
pcmk__create_timer(MESSAGE_TIMEOUT_S * 1000, resource_ipc_timeout, NULL);
g_main_loop_run(mainloop);
}
}
static GList *
build_constraint_list(xmlNode *root)
{
GList *retval = NULL;
xmlNode *cib_constraints = NULL;
xmlXPathObject *xpathObj = NULL;
int ndx = 0;
int num_results = 0;
cib_constraints = pcmk_find_cib_element(root, PCMK_XE_CONSTRAINTS);
xpathObj = pcmk__xpath_search(cib_constraints->doc,
"//" PCMK_XE_RSC_LOCATION);
num_results = pcmk__xpath_num_results(xpathObj);
for (ndx = 0; ndx < num_results; ndx++) {
xmlNode *match = pcmk__xpath_result(xpathObj, ndx);
if (match != NULL) {
retval = g_list_insert_sorted(retval, (gpointer) pcmk__xe_id(match),
(GCompareFunc) g_strcmp0);
}
}
xmlXPathFreeObject(xpathObj);
return retval;
}
static gboolean
validate_opt_list(const gchar *optarg)
{
if (pcmk__str_eq(optarg, PCMK_VALUE_FENCING, pcmk__str_none)) {
options.opt_list = pcmk__opt_fencing;
} else if (pcmk__str_eq(optarg, PCMK__VALUE_PRIMITIVE, pcmk__str_none)) {
options.opt_list = pcmk__opt_primitive;
} else {
return FALSE;
}
return TRUE;
}
// GOptionArgFunc callback functions
static gboolean
attr_set_type_cb(const gchar *option_name, const gchar *optarg, gpointer data,
GError **error) {
if (pcmk__str_any_of(option_name, "-m", "--meta", NULL)) {
options.attr_set_type = PCMK_XE_META_ATTRIBUTES;
} else if (pcmk__str_any_of(option_name, "-z", "--utilization", NULL)) {
options.attr_set_type = PCMK_XE_UTILIZATION;
} else if (pcmk__str_eq(option_name, "--element", pcmk__str_none)) {
options.attr_set_type = ATTR_SET_ELEMENT;
}
return TRUE;
}
/*!
* \internal
* \brief Process options that set the command
*
* Nothing else should set \c options.rsc_cmd.
*
* \param[in] option_name Name of the option being parsed
* \param[in] optarg Value to be parsed
* \param[in] data Ignored
* \param[out] error Where to store recoverable error, if any
*
* \return \c TRUE if the option was successfully parsed, or \c FALSE if an
* error occurred, in which case \p *error is set
*/
static gboolean
command_cb(const gchar *option_name, const gchar *optarg, gpointer data,
GError **error)
{
// Sorted by enum rsc_command name
if (pcmk__str_any_of(option_name, "-B", "--ban", NULL)) {
options.rsc_cmd = cmd_ban;
} else if (pcmk__str_any_of(option_name, "-C", "--cleanup", NULL)) {
options.rsc_cmd = cmd_cleanup;
} else if (pcmk__str_any_of(option_name, "-U", "--clear", NULL)) {
options.rsc_cmd = cmd_clear;
} else if (pcmk__str_any_of(option_name, "-a", "--constraints", NULL)) {
options.rsc_cmd = cmd_colocations;
} else if (pcmk__str_any_of(option_name, "-A", "--stack", NULL)) {
options.rsc_cmd = cmd_colocations;
options.recursive = TRUE;
} else if (pcmk__str_any_of(option_name, "-c", "--list-cts", NULL)) {
options.rsc_cmd = cmd_cts;
} else if (pcmk__str_any_of(option_name, "-D", "--delete", NULL)) {
options.rsc_cmd = cmd_delete;
} else if (pcmk__str_any_of(option_name, "-d", "--delete-parameter",
NULL)) {
options.rsc_cmd = cmd_delete_param;
pcmk__str_update(&options.prop_name, optarg);
} else if (pcmk__str_eq(option_name, "--digests", pcmk__str_none)) {
options.rsc_cmd = cmd_digests;
if (options.override_params == NULL) {
options.override_params = pcmk__strkey_table(g_free, g_free);
}
} else if (pcmk__str_any_of(option_name,
"--force-demote", "--force-promote",
"--force-start", "--force-stop",
"--force-check", "--validate", NULL)) {
options.rsc_cmd = cmd_execute_agent;
g_free(options.operation);
options.operation = g_strdup(option_name + 2); // skip "--"
if (options.override_params == NULL) {
options.override_params = pcmk__strkey_table(g_free, g_free);
}
if (optarg != NULL) {
if (pcmk__scan_min_int(optarg, &options.check_level,
0) != pcmk_rc_ok) {
g_set_error(error, G_OPTION_ERROR, CRM_EX_INVALID_PARAM,
_("Invalid check level setting: %s"), optarg);
return FALSE;
}
}
} else if (pcmk__str_any_of(option_name, "-F", "--fail", NULL)) {
options.rsc_cmd = cmd_fail;
} else if (pcmk__str_any_of(option_name, "-g", "--get-parameter", NULL)) {
options.rsc_cmd = cmd_get_param;
pcmk__str_update(&options.prop_name, optarg);
} else if (pcmk__str_any_of(option_name, "-O", "--list-operations", NULL)) {
options.rsc_cmd = cmd_list_active_ops;
} else if (pcmk__str_eq(option_name, "--list-agents", pcmk__str_none)) {
options.rsc_cmd = cmd_list_agents;
pcmk__str_update(&options.agent_spec, optarg);
} else if (pcmk__str_any_of(option_name, "-o", "--list-all-operations",
NULL)) {
options.rsc_cmd = cmd_list_all_ops;
} else if (pcmk__str_eq(option_name, "--list-ocf-alternatives",
pcmk__str_none)) {
options.rsc_cmd = cmd_list_alternatives;
pcmk__str_update(&options.agent_spec, optarg);
} else if (pcmk__str_eq(option_name, "--list-options", pcmk__str_none)) {
options.rsc_cmd = cmd_list_options;
return validate_opt_list(optarg);
} else if (pcmk__str_any_of(option_name, "-l", "--list-raw", NULL)) {
options.rsc_cmd = cmd_list_instances;
} else if (pcmk__str_eq(option_name, "--list-ocf-providers",
pcmk__str_none)) {
options.rsc_cmd = cmd_list_providers;
pcmk__str_update(&options.agent_spec, optarg);
} else if (pcmk__str_any_of(option_name, "-L", "--list", NULL)) {
options.rsc_cmd = cmd_list_resources;
} else if (pcmk__str_eq(option_name, "--list-standards", pcmk__str_none)) {
options.rsc_cmd = cmd_list_standards;
} else if (pcmk__str_any_of(option_name, "-W", "--locate", NULL)) {
options.rsc_cmd = cmd_locate;
} else if (pcmk__str_eq(option_name, "--show-metadata", pcmk__str_none)) {
options.rsc_cmd = cmd_metadata;
pcmk__str_update(&options.agent_spec, optarg);
} else if (pcmk__str_any_of(option_name, "-M", "--move", NULL)) {
options.rsc_cmd = cmd_move;
} else if (pcmk__str_any_of(option_name, "-q", "--query-xml", NULL)) {
options.rsc_cmd = cmd_query_xml;
} else if (pcmk__str_any_of(option_name, "-w", "--query-xml-raw", NULL)) {
options.rsc_cmd = cmd_query_xml_raw;
} else if (pcmk__str_any_of(option_name, "-R", "--refresh", NULL)) {
options.rsc_cmd = cmd_refresh;
} else if (pcmk__str_eq(option_name, "--restart", pcmk__str_none)) {
options.rsc_cmd = cmd_restart;
} else if (pcmk__str_any_of(option_name, "-p", "--set-parameter", NULL)) {
options.rsc_cmd = cmd_set_param;
pcmk__str_update(&options.prop_name, optarg);
} else if (pcmk__str_eq(option_name, "--wait", pcmk__str_none)) {
options.rsc_cmd = cmd_wait;
} else if (pcmk__str_any_of(option_name, "-Y", "--why", NULL)) {
options.rsc_cmd = cmd_why;
}
return TRUE;
}
static gboolean
option_cb(const gchar *option_name, const gchar *optarg, gpointer data,
GError **error)
{
gchar *name = NULL;
gchar *value = NULL;
if (pcmk__scan_nvpair(optarg, &name, &value) != pcmk_rc_ok) {
return FALSE;
}
/* services__create_resource_action() ultimately takes ownership of
* options.cmdline_params. It's not worth trying to ensure that the entire
* call path uses (gchar *) strings and g_free(). So create the table for
* (char *) strings, and duplicate the (gchar *) strings when inserting.
*/
if (options.cmdline_params == NULL) {
options.cmdline_params = pcmk__strkey_table(free, free);
}
pcmk__insert_dup(options.cmdline_params, name, value);
g_free(name);
g_free(value);
return TRUE;
}
static gboolean
timeout_cb(const gchar *option_name, const gchar *optarg, gpointer data,
GError **error)
{
long long timeout_ms = crm_get_msec(optarg);
if (timeout_ms < 0) {
return FALSE;
}
options.timeout_ms = (guint) QB_MIN(timeout_ms, UINT_MAX);
return TRUE;
}
// Command line option specification
/* short option letters still available: eEJkKXyYZ */
static GOptionEntry query_entries[] = {
{ "list", 'L', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"List all cluster resources with status",
NULL },
{ "list-raw", 'l', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"List IDs of all instantiated resources (individual members\n"
INDENT "rather than groups etc.)",
NULL },
{ "list-cts", 'c', G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_NO_ARG,
G_OPTION_ARG_CALLBACK, command_cb,
NULL,
NULL },
{ "list-operations", 'O', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
command_cb,
"List active resource operations, optionally filtered by\n"
INDENT "--resource and/or --node",
NULL },
{ "list-all-operations", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
command_cb,
"List all resource operations, optionally filtered by\n"
INDENT "--resource and/or --node",
NULL },
{ "list-options", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, command_cb,
"List all available options of the given type.\n"
INDENT "Allowed values:\n"
INDENT PCMK__VALUE_PRIMITIVE " (primitive resource meta-attributes),\n"
INDENT PCMK_VALUE_FENCING " (parameters common to all fencing resources)",
"TYPE" },
{ "list-standards", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
command_cb,
"List supported standards",
NULL },
{ "list-ocf-providers", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
command_cb,
"List all available OCF providers",
NULL },
{ "list-agents", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK,
command_cb,
"List all agents available for the named standard and/or provider",
"STD:PROV" },
{ "list-ocf-alternatives", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK,
command_cb,
"List all available providers for the named OCF agent",
"AGENT" },
{ "show-metadata", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, command_cb,
"Show the metadata for the named class:provider:agent",
"SPEC" },
{ "query-xml", 'q', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"Show XML configuration of resource (after any template expansion)",
NULL },
{ "query-xml-raw", 'w', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
command_cb,
"Show XML configuration of resource (before any template expansion)",
NULL },
{ "get-parameter", 'g', G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK,
command_cb,
"Display named parameter for resource (use instance attribute\n"
INDENT "unless --element, --meta, or --utilization is specified)",
"PARAM" },
{ "locate", 'W', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"Show node(s) currently running resource",
NULL },
{ "constraints", 'a', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
command_cb,
"Display the location and colocation constraints that apply to a\n"
INDENT "resource, and if --recursive is specified, to the resources\n"
INDENT "directly or indirectly involved in those colocations.\n"
INDENT "If the named resource is part of a group, or a clone or\n"
INDENT "bundle instance, constraints for the collective resource\n"
INDENT "will be shown unless --force is given.",
NULL },
{ "stack", 'A', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"Equivalent to --constraints --recursive",
NULL },
{ "why", 'Y', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"Show why resources are not running, optionally filtered by\n"
INDENT "--resource and/or --node",
NULL },
{ NULL }
};
static GOptionEntry command_entries[] = {
{ "validate", 0, G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK,
command_cb,
"Validate resource configuration by calling agent's validate-all\n"
INDENT "action. The configuration may be specified either by giving an\n"
INDENT "existing resource name with -r, or by specifying --class,\n"
INDENT "--agent, and --provider arguments, along with any number of\n"
INDENT "--option arguments. An optional LEVEL argument can be given\n"
INDENT "to control the level of checking performed.",
"LEVEL" },
{ "cleanup", 'C', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"If resource has any past failures, clear its history and fail\n"
INDENT "count. Optionally filtered by --resource, --node, --operation\n"
INDENT "and --interval (otherwise all). --operation and --interval\n"
INDENT "apply to fail counts, but entire history is always clear, to\n"
INDENT "allow current state to be rechecked. If the named resource is\n"
INDENT "part of a group, or one numbered instance of a clone or bundled\n"
INDENT "resource, the clean-up applies to the whole collective resource\n"
INDENT "unless --force is given.",
NULL },
{ "refresh", 'R', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"Delete resource's history (including failures) so its current state\n"
INDENT "is rechecked. Optionally filtered by --resource and --node\n"
INDENT "(otherwise all). If the named resource is part of a group, or one\n"
INDENT "numbered instance of a clone or bundled resource, the refresh\n"
INDENT "applies to the whole collective resource unless --force is given.",
NULL },
{ "set-parameter", 'p', G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK,
command_cb,
"Set named parameter for resource (requires -v). Use instance\n"
INDENT "attribute unless --element, --meta, or --utilization is "
"specified.",
"PARAM" },
{ "delete-parameter", 'd', G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK,
command_cb,
"Delete named parameter for resource. Use instance attribute\n"
INDENT "unless --element, --meta or, --utilization is specified.",
"PARAM" },
{ NULL }
};
static GOptionEntry location_entries[] = {
{ "move", 'M', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"Create a constraint to move resource. If --node is specified,\n"
INDENT "the constraint will be to move to that node, otherwise it\n"
INDENT "will be to ban the current node. Unless --force is specified\n"
INDENT "this will return an error if the resource is already running\n"
INDENT "on the specified node. If --force is specified, this will\n"
INDENT "always ban the current node.\n"
INDENT "Optional: --lifetime, --promoted. NOTE: This may prevent the\n"
INDENT "resource from running on its previous location until the\n"
INDENT "implicit constraint expires or is removed with --clear.",
NULL },
{ "ban", 'B', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"Create a constraint to keep resource off a node.\n"
INDENT "Optional: --node, --lifetime, --promoted.\n"
INDENT "NOTE: This will prevent the resource from running on the\n"
INDENT "affected node until the implicit constraint expires or is\n"
INDENT "removed with --clear. If --node is not specified, it defaults\n"
INDENT "to the node currently running the resource for primitives\n"
INDENT "and groups, or the promoted instance of promotable clones with\n"
INDENT PCMK_META_PROMOTED_MAX "=1 (all other situations result in an\n"
INDENT "error as there is no sane default).",
NULL },
{ "clear", 'U', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"Remove all constraints created by the --ban and/or --move\n"
INDENT "commands. Requires: --resource. Optional: --node, --promoted,\n"
INDENT "--expired. If --node is not specified, all constraints created\n"
INDENT "by --ban and --move will be removed for the named resource. If\n"
INDENT "--node and --force are specified, any constraint created by\n"
INDENT "--move will be cleared, even if it is not for the specified\n"
INDENT "node. If --expired is specified, only those constraints whose\n"
INDENT "lifetimes have expired will be removed.",
NULL },
{ "expired", 'e', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE,
&options.clear_expired,
"Modifies the --clear argument to remove constraints with\n"
INDENT "expired lifetimes.",
NULL },
{ "lifetime", 'u', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.move_lifetime,
"Lifespan (as ISO 8601 duration) of created constraints (with\n"
INDENT "-B, -M) see https://en.wikipedia.org/wiki/ISO_8601#Durations)",
"TIMESPEC" },
{ "promoted", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE,
&options.promoted_role_only,
"Limit scope of command to promoted role (with -B, -M, -U). For\n"
INDENT "-B and -M, previously promoted instances may remain\n"
INDENT "active in the unpromoted role.",
NULL },
// Deprecated since 2.1.0
{ "master", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE,
&options.promoted_role_only,
"Deprecated: Use --promoted instead", NULL },
{ NULL }
};
static GOptionEntry advanced_entries[] = {
{ "delete", 'D', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"(Advanced) Delete a resource from the CIB. Required: -t",
NULL },
{ "fail", 'F', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"(Advanced) Tell the cluster this resource has failed",
NULL },
{ "restart", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"(Advanced) Tell the cluster to restart this resource and\n"
INDENT "anything that depends on it. This temporarily modifies\n"
INDENT "the CIB, and other CIB modifications should be avoided\n"
INDENT "while this is in progress. If a node is fenced because\n"
INDENT "the stop portion of the restart fails, CIB modifications\n"
INDENT "such as target-role may remain.",
NULL },
{ "wait", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"(Advanced) Wait until the cluster settles into a stable state",
NULL },
{ "digests", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"(Advanced) Show parameter hashes that Pacemaker uses to detect\n"
INDENT "configuration changes (only accurate if there is resource\n"
INDENT "history on the specified node). Required: --resource, --node.\n"
INDENT "Optional: any NAME=VALUE parameters will be used to override\n"
INDENT "the configuration (to see what the hash would be with those\n"
INDENT "changes).",
NULL },
{ "force-demote", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
command_cb,
"(Advanced) Bypass the cluster and demote a resource on the local\n"
INDENT "node. Unless --force is specified, this will refuse to do so if\n"
INDENT "the cluster believes the resource is a clone instance already\n"
INDENT "running on the local node.",
NULL },
{ "force-stop", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"(Advanced) Bypass the cluster and stop a resource on the local node",
NULL },
{ "force-start", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"(Advanced) Bypass the cluster and start a resource on the local\n"
INDENT "node. Unless --force is specified, this will refuse to do so if\n"
INDENT "the cluster believes the resource is a clone instance already\n"
INDENT "running on the local node.",
NULL },
{ "force-promote", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
command_cb,
"(Advanced) Bypass the cluster and promote a resource on the local\n"
INDENT "node. Unless --force is specified, this will refuse to do so if\n"
INDENT "the cluster believes the resource is a clone instance already\n"
INDENT "running on the local node.",
NULL },
{ "force-check", 0, G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK,
command_cb,
"(Advanced) Bypass the cluster and check the state of a resource on\n"
INDENT "the local node. An optional LEVEL argument can be given\n"
INDENT "to control the level of checking performed.",
"LEVEL" },
{ NULL }
};
static GOptionEntry addl_entries[] = {
{ "node", 'N', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.host_uname,
"Node name",
"NAME" },
{ "recursive", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &options.recursive,
"Follow colocation chains when using --set-parameter or --constraints",
NULL },
{ "resource-type", 't', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.rsc_type,
"Resource XML element (primitive, group, etc.) (with -D)",
"ELEMENT" },
{ "parameter-value", 'v', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.prop_value,
"Value to use with -p",
"PARAM" },
{ "meta", 'm', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, attr_set_type_cb,
"Use resource meta-attribute instead of instance attribute\n"
INDENT "(with -p, -g, -d)",
NULL },
{ "utilization", 'z', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, attr_set_type_cb,
"Use resource utilization attribute instead of instance attribute\n"
INDENT "(with -p, -g, -d)",
NULL },
{ "element", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, attr_set_type_cb,
"Use resource element attribute instead of instance attribute\n"
INDENT "(with -p, -g, -d)",
NULL },
{ "operation", 'n', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.operation,
"Operation to clear instead of all (with -C -r)",
"OPERATION" },
{ "interval", 'I', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.interval_spec,
"Interval of operation to clear (default 0s) (with -C -r -n)",
"N" },
{ "class", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.class,
"The standard the resource agent conforms to (for example, ocf).\n"
INDENT "Use with --agent, --provider, --option, and --validate.",
"CLASS" },
{ "agent", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.agent,
"The agent to use (for example, IPaddr). Use with --class,\n"
INDENT "--provider, --option, and --validate.",
"AGENT" },
{ "provider", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.provider,
"The vendor that supplies the resource agent (for example,\n"
INDENT "heartbeat). Use with --class, --agent, --option, and --validate.",
"PROVIDER" },
{ "option", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, option_cb,
"Specify a device configuration parameter as NAME=VALUE (may be\n"
INDENT "specified multiple times). Use with --validate and without the\n"
INDENT "-r option.",
"PARAM" },
{ "set-name", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.prop_set,
"(Advanced) XML ID of attributes element to use (with -p, -d)",
"ID" },
{ "nvpair", 'i', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.prop_id,
"(Advanced) XML ID of nvpair element to use (with -p, -d)",
"ID" },
{ "timeout", 'T', G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, timeout_cb,
"(Advanced) Abort if command does not finish in this time (with\n"
INDENT "--restart, --wait, --force-*)",
"N" },
{ "all", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &options.all,
"List all options, including advanced and deprecated (with\n"
INDENT "--list-options)",
NULL },
{ "force", 'f', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &options.force,
"Force the action to be performed. See help for individual commands for\n"
INDENT "additional behavior.",
NULL },
// @COMPAT Used in resource-agents prior to v4.2.0
{ "host-uname", 'H', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_STRING, &options.host_uname,
NULL,
"HOST" },
{ NULL }
};
static int
ban_or_move(pcmk__output_t *out, pcmk_resource_t *rsc, cib_t *cib_conn,
const char *move_lifetime)
{
int rc = pcmk_rc_ok;
pcmk_node_t *current = NULL;
unsigned int nactive = 0;
CRM_CHECK(rsc != NULL, return EINVAL);
current = pe__find_active_requires(rsc, &nactive);
if (nactive == 1) {
rc = cli_resource_ban(out, options.rsc_id, current->priv->name,
move_lifetime, cib_conn,
options.promoted_role_only, PCMK_ROLE_PROMOTED);
} else if (pcmk_is_set(rsc->flags, pcmk__rsc_promotable)) {
int count = 0;
GList *iter = NULL;
current = NULL;
for (iter = rsc->priv->children; iter != NULL; iter = iter->next) {
pcmk_resource_t *child = (pcmk_resource_t *)iter->data;
enum rsc_role_e child_role = child->priv->fns->state(child, true);
if (child_role == pcmk_role_promoted) {
count++;
current = pcmk__current_node(child);
}
}
if(count == 1 && current) {
rc = cli_resource_ban(out, options.rsc_id, current->priv->name,
move_lifetime, cib_conn,
options.promoted_role_only,
PCMK_ROLE_PROMOTED);
} else {
rc = EINVAL;
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
_("Resource '%s' not moved: active in %d locations (promoted in %d).\n"
"To prevent '%s' from running on a specific location, "
"specify a node."
"To prevent '%s' from being promoted at a specific "
"location, specify a node and the --promoted option."),
options.rsc_id, nactive, count, options.rsc_id, options.rsc_id);
}
} else {
rc = EINVAL;
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
_("Resource '%s' not moved: active in %d locations.\n"
"To prevent '%s' from running on a specific location, "
"specify a node."),
options.rsc_id, nactive, options.rsc_id);
}
return rc;
}
static void
cleanup(pcmk__output_t *out, pcmk_resource_t *rsc, pcmk_node_t *node,
pcmk_ipc_api_t *controld_api)
{
int rc = pcmk_rc_ok;
if (options.force == FALSE) {
rsc = uber_parent(rsc);
}
crm_debug("Erasing failures of %s (%s requested) on %s",
rsc->id, options.rsc_id,
((node != NULL)? pcmk__node_name(node) : "all nodes"));
rc = cli_resource_delete(controld_api, rsc, node, options.operation,
options.interval_spec, true, options.force);
if ((rc == pcmk_rc_ok) && !out->is_quiet(out)) {
// Show any reasons why resource might stay stopped
cli_resource_check(out, rsc, node);
}
/* @FIXME The mainloop functions in this file set exit_code. What happens to
* exit_code if rc != pcmk_rc_ok here?
*/
if (rc == pcmk_rc_ok) {
start_mainloop(controld_api);
}
}
/*!
* \internal
* \brief Allocate a scheduler data object and initialize it from the CIB
*
* We transform the queried CIB XML to the latest schema version before using it
* to populate the scheduler data.
*
* \param[out] scheduler Where to store scheduler data
* \param[in] cib_conn CIB connection
* \param[in] out Output object for new scheduler data object
* \param[out] cib_xml_orig Where to store queried CIB XML from before any
* schema upgrades
*
* \return Standard Pacemaker return code
*
* \note \p *scheduler and \p *cib_xml_orig must be \c NULL when this function
* is called.
* \note The caller is responsible for freeing \p *scheduler using
* \c pcmk_free_scheduler.
*/
static int
initialize_scheduler_data(pcmk_scheduler_t **scheduler, cib_t *cib_conn,
pcmk__output_t *out, xmlNode **cib_xml_orig)
{
int rc = pcmk_rc_ok;
pcmk__assert((scheduler != NULL) && (*scheduler == NULL)
&& (cib_conn != NULL) && (out != NULL)
&& (cib_xml_orig != NULL) && (*cib_xml_orig == NULL));
*scheduler = pcmk_new_scheduler();
if (*scheduler == NULL) {
return ENOMEM;
}
pcmk__set_scheduler_flags(*scheduler, pcmk__sched_no_counts);
(*scheduler)->priv->out = out;
rc = update_scheduler_input(out, *scheduler, cib_conn, cib_xml_orig);
if (rc != pcmk_rc_ok) {
pcmk_free_scheduler(*scheduler);
*scheduler = NULL;
return rc;
}
cluster_status(*scheduler);
return pcmk_rc_ok;
}
static crm_exit_t
refresh(pcmk__output_t *out, const pcmk_node_t *node,
pcmk_ipc_api_t *controld_api)
{
const char *node_name = NULL;
const char *log_node_name = "all nodes";
const char *router_node = NULL;
int attr_options = pcmk__node_attr_none;
int rc = pcmk_rc_ok;
if (node != NULL) {
node_name = node->priv->name;
log_node_name = pcmk__node_name(node);
router_node = node->priv->name;
}
if (pcmk__is_pacemaker_remote_node(node)) {
const pcmk_node_t *conn_host = pcmk__current_node(node->priv->remote);
if (conn_host == NULL) {
rc = ENXIO;
g_set_error(&error, PCMK__RC_ERROR, rc,
_("No cluster connection to Pacemaker Remote node %s "
"detected"),
log_node_name);
return pcmk_rc2exitc(rc);
}
router_node = conn_host->priv->name;
pcmk__set_node_attr_flags(attr_options, pcmk__node_attr_remote);
}
if (controld_api == NULL) {
out->info(out, "Dry run: skipping clean-up of %s due to CIB_file",
log_node_name);
return CRM_EX_OK;
}
crm_debug("Re-checking the state of all resources on %s", log_node_name);
// @FIXME We shouldn't discard rc here
rc = pcmk__attrd_api_clear_failures(NULL, node_name, NULL, NULL, NULL, NULL,
attr_options);
/* @FIXME The mainloop functions in this file set exit_code. What happens to
* exit_code if pcmk_controld_api_reprobe() doesn't return pcmk_rc_ok?
*/
if (pcmk_controld_api_reprobe(controld_api, node_name,
router_node) == pcmk_rc_ok) {
start_mainloop(controld_api);
return exit_code;
}
return pcmk_rc2exitc(rc);
}
static void
refresh_resource(pcmk__output_t *out, pcmk_resource_t *rsc, pcmk_node_t *node,
pcmk_ipc_api_t *controld_api)
{
int rc = pcmk_rc_ok;
if (options.force == FALSE) {
rsc = uber_parent(rsc);
}
crm_debug("Re-checking the state of %s (%s requested) on %s",
rsc->id, options.rsc_id,
((node != NULL)? pcmk__node_name(node) : "all nodes"));
rc = cli_resource_delete(controld_api, rsc, node, NULL, 0, false,
options.force);
if ((rc == pcmk_rc_ok) && !out->is_quiet(out)) {
// Show any reasons why resource might stay stopped
cli_resource_check(out, rsc, node);
}
/* @FIXME The mainloop functions in this file set exit_code. What happens to
* exit_code if rc != pcmk_rc_ok here?
*/
if (rc == pcmk_rc_ok) {
start_mainloop(controld_api);
}
}
/*!
* \internal
* \brief Check whether a command-line resource configuration was given
*
* \return \c true if \c --class, \c --provider, or \c --agent was specified, or
* \c false otherwise
*/
static inline bool
has_cmdline_config(void)
{
return ((options.class != NULL) || (options.provider != NULL)
|| (options.agent != NULL));
}
static void
validate_cmdline_config(void)
{
bool is_ocf = pcmk__str_eq(options.class, PCMK_RESOURCE_CLASS_OCF,
pcmk__str_none);
// Sanity check before throwing any errors
if (!has_cmdline_config()) {
return;
}
// Cannot use both --resource and command-line resource configuration
if (options.rsc_id != NULL) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
_("--class, --agent, and --provider cannot be used with "
"-r/--resource"));
return;
}
/* Check whether command supports command-line resource configuration
*
* @FIXME According to the help text, these options can only be used with
* --validate. The --force-* commands are documented for resources that are
* configured in Pacemaker. So this is a bug. We have two choices:
* * Throw an error if --force-* commands are used with these options.
* * Document that --force-* commands can be used with these options.
*
* An error seems safer. If a user really wants to run a non-trivial
* resource action based on CLI parameters, they can do so by executing the
* resource agent directly. It's unsafe to do so if Pacemaker is managing
* the resource that's specified via --class, --option, etc.
*
* On the other hand, besides safety concerns, running other actions is
* exactly the same as running a validate action, and the implementation is
* already in place.
*/
if (options.rsc_cmd != cmd_execute_agent) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
_("--class, --agent, and --provider can only be used with "
"--validate and --force-*"));
return;
}
// Check for a valid combination of --class, --agent, and --provider
if (is_ocf) {
if ((options.provider == NULL) || (options.agent == NULL)) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
_("--provider and --agent are required with "
"--class=ocf"));
return;
}
} else {
if (options.provider != NULL) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
_("--provider is supported only with --class=ocf"));
return;
}
// Either --class or --agent was given
if (options.agent == NULL) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
_("--agent is required with --class"));
return;
}
if (options.class == NULL) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
_("--class is required with --agent"));
return;
}
}
// Check whether agent exists
if (pcmk__str_eq(options.class, PCMK_RESOURCE_CLASS_STONITH,
pcmk__str_none)) {
- if (!stonith_agent_exists(options.agent, 0)) {
+ if (!stonith__agent_exists(options.agent)) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
_("%s is not a known stonith agent"), options.agent);
return;
}
} else if (!resources_agent_exists(options.class, options.provider,
options.agent)) {
if (is_ocf) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
_("%s:%s:%s is not a known resource agent"),
options.class, options.provider, options.agent);
} else {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
_("%s:%s is not a known resource agent"),
options.class, options.agent);
}
return;
}
if (options.cmdline_params == NULL) {
options.cmdline_params = pcmk__strkey_table(free, free);
}
}
static crm_exit_t
handle_ban(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
int rc = pcmk_rc_ok;
if (node == NULL) {
rc = ban_or_move(out, rsc, cib_conn, options.move_lifetime);
} else {
rc = cli_resource_ban(out, options.rsc_id, node->priv->name,
options.move_lifetime, cib_conn,
options.promoted_role_only, PCMK_ROLE_PROMOTED);
}
if (rc == EINVAL) {
return CRM_EX_USAGE;
}
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_cleanup(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
if (rsc == NULL) {
int rc = cli_cleanup_all(controld_api, node, options.operation,
options.interval_spec, scheduler);
if (rc == pcmk_rc_ok) {
start_mainloop(controld_api);
}
} else {
cleanup(out, rsc, node, controld_api);
}
/* @FIXME Both of the blocks above are supposed to set exit_code via
* start_mainloop(). But if cli_cleanup_all() or cli_resource_delete()
* fails, we never start the mainloop. It looks as if we exit with CRM_EX_OK
* in those cases.
*/
return exit_code;
}
static crm_exit_t
handle_clear(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
const char *node_name = (node != NULL)? node->priv->name : NULL;
GList *before = NULL;
GList *after = NULL;
GList *remaining = NULL;
int rc = pcmk_rc_ok;
if (!out->is_quiet(out)) {
before = build_constraint_list(scheduler->input);
}
if (options.clear_expired) {
rc = cli_resource_clear_all_expired(scheduler->input, cib_conn,
options.rsc_id, node_name,
options.promoted_role_only);
} else if (node != NULL) {
rc = cli_resource_clear(options.rsc_id, node_name, NULL, cib_conn, true,
options.force);
} else {
rc = cli_resource_clear(options.rsc_id, NULL, scheduler->nodes,
cib_conn, true, options.force);
}
if (!out->is_quiet(out)) {
xmlNode *cib_xml = NULL;
rc = cib_conn->cmds->query(cib_conn, NULL, &cib_xml, cib_sync_call);
rc = pcmk_legacy2rc(rc);
if (rc != pcmk_rc_ok) {
g_set_error(&error, PCMK__RC_ERROR, rc,
_("Could not get modified CIB: %s"), pcmk_rc_str(rc));
g_list_free(before);
pcmk__xml_free(cib_xml);
return pcmk_rc2exitc(rc);
}
scheduler->input = cib_xml;
cluster_status(scheduler);
after = build_constraint_list(scheduler->input);
remaining = pcmk__subtract_lists(before, after, (GCompareFunc) strcmp);
for (const GList *iter = remaining; iter != NULL; iter = iter->next) {
const char *constraint = iter->data;
out->info(out, "Removing constraint: %s", constraint);
}
g_list_free(before);
g_list_free(after);
g_list_free(remaining);
}
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_colocations(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
int rc = out->message(out, "locations-and-colocations", rsc,
options.recursive, options.force);
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_cts(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
g_list_foreach(scheduler->priv->resources, (GFunc) cli_resource_print_cts,
out);
cli_resource_print_cts_constraints(scheduler);
return CRM_EX_OK;
}
static crm_exit_t
handle_delete(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
/* rsc_id was already checked for NULL much earlier when validating command
* line arguments
*/
int rc = pcmk_rc_ok;
if (options.rsc_type == NULL) {
crm_exit_t ec = CRM_EX_USAGE;
g_set_error(&error, PCMK__EXITC_ERROR, ec,
_("You need to specify a resource type with -t"));
return ec;
}
rc = pcmk__resource_delete(cib_conn, cib_sync_call, options.rsc_id,
options.rsc_type);
if (rc != pcmk_rc_ok) {
g_set_error(&error, PCMK__RC_ERROR, rc,
_("Could not delete resource %s: %s"),
options.rsc_id, pcmk_rc_str(rc));
}
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_delete_param(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
int rc = cli_resource_delete_attribute(rsc, options.rsc_id,
options.prop_set,
options.attr_set_type,
options.prop_id,
options.prop_name, cib_conn,
cib_xml_orig, options.force);
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_digests(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
int rc = pcmk__resource_digests(out, rsc, node, options.override_params);
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_execute_agent(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
if (has_cmdline_config()) {
return cli_resource_execute_from_params(out, NULL, options.class,
options.provider, options.agent,
options.operation,
options.cmdline_params,
options.override_params,
options.timeout_ms,
args->verbosity, options.force,
options.check_level);
}
return cli_resource_execute(rsc, options.rsc_id, options.operation,
options.override_params, options.timeout_ms,
cib_conn, args->verbosity, options.force,
options.check_level);
}
static crm_exit_t
handle_fail(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
int rc = cli_resource_fail(controld_api, rsc, options.rsc_id, node);
if (rc == pcmk_rc_ok) {
// start_mainloop() sets exit_code
start_mainloop(controld_api);
return exit_code;
}
return pcmk_rc2exitc(rc);;
}
static crm_exit_t
handle_get_param(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
unsigned int count = 0;
GHashTable *params = NULL;
pcmk_node_t *current = rsc->priv->fns->active_node(rsc, &count, NULL);
bool free_params = true;
const char *value = NULL;
int rc = pcmk_rc_ok;
if (count > 1) {
out->err(out,
"%s is active on more than one node, returning the default "
"value for %s",
rsc->id, pcmk__s(options.prop_name, "unspecified property"));
current = NULL;
}
crm_debug("Looking up %s in %s", options.prop_name, rsc->id);
if (pcmk__str_eq(options.attr_set_type, PCMK_XE_INSTANCE_ATTRIBUTES,
pcmk__str_none)) {
params = pe_rsc_params(rsc, current, scheduler);
free_params = false;
value = g_hash_table_lookup(params, options.prop_name);
} else if (pcmk__str_eq(options.attr_set_type, PCMK_XE_META_ATTRIBUTES,
pcmk__str_none)) {
params = pcmk__strkey_table(free, free);
get_meta_attributes(params, rsc, NULL, scheduler);
value = g_hash_table_lookup(params, options.prop_name);
} else if (pcmk__str_eq(options.attr_set_type, ATTR_SET_ELEMENT,
pcmk__str_none)) {
value = crm_element_value(rsc->priv->xml, options.prop_name);
free_params = false;
} else {
const pcmk_rule_input_t rule_input = {
.now = scheduler->priv->now,
};
params = pcmk__strkey_table(free, free);
pe__unpack_dataset_nvpairs(rsc->priv->xml, PCMK_XE_UTILIZATION,
&rule_input, params, NULL, scheduler);
value = g_hash_table_lookup(params, options.prop_name);
}
rc = out->message(out, "attribute-list", rsc, options.prop_name, value);
if (free_params) {
g_hash_table_destroy(params);
}
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_list_active_ops(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
const char *node_name = (node != NULL)? node->priv->name : NULL;
int rc = cli_resource_print_operations(options.rsc_id, node_name, true,
scheduler);
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_list_agents(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
int rc = pcmk__list_agents(out, options.agent_spec);
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_list_all_ops(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
const char *node_name = (node != NULL)? node->priv->name : NULL;
int rc = cli_resource_print_operations(options.rsc_id, node_name, false,
scheduler);
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_list_alternatives(pcmk_resource_t *rsc, pcmk_node_t *node,
cib_t *cib_conn, pcmk_scheduler_t *scheduler,
pcmk_ipc_api_t *controld_api, xmlNode *cib_xml_orig)
{
int rc = pcmk__list_alternatives(out, options.agent_spec);
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_list_instances(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
int rc = out->message(out, "resource-names-list",
scheduler->priv->resources);
if (rc == pcmk_rc_no_output) {
// @COMPAT It seems wrong to return an error because there no resources
return CRM_EX_NOSUCH;
}
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_list_options(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
crm_exit_t ec = CRM_EX_OK;
int rc = pcmk_rc_ok;
switch (options.opt_list) {
case pcmk__opt_fencing:
rc = pcmk__list_fencing_params(out, options.all);
return pcmk_rc2exitc(rc);
case pcmk__opt_primitive:
rc = pcmk__list_primitive_meta(out, options.all);
return pcmk_rc2exitc(rc);
default:
ec = CRM_EX_SOFTWARE;
g_set_error(&error, PCMK__EXITC_ERROR, ec,
"Bug: Invalid option list type");
return ec;
}
}
static crm_exit_t
handle_list_providers(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
int rc = pcmk__list_providers(out, options.agent_spec);
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_list_resources(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
GList *all = g_list_prepend(NULL, (gpointer) "*");
int rc = out->message(out, "resource-list", scheduler,
pcmk_show_inactive_rscs
|pcmk_show_rsc_only
|pcmk_show_pending,
true, all, all, false);
g_list_free(all);
if (rc == pcmk_rc_no_output) {
// @COMPAT It seems wrong to return an error because there no resources
return CRM_EX_NOSUCH;
}
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_list_standards(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
int rc = pcmk__list_standards(out);
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_locate(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
GList *nodes = cli_resource_search(rsc, options.rsc_id);
int rc = out->message(out, "resource-search-list", nodes, options.rsc_id);
g_list_free_full(nodes, free);
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_metadata(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
int rc = pcmk_rc_ok;
char *standard = NULL;
char *provider = NULL;
char *type = NULL;
char *metadata = NULL;
lrmd_t *lrmd_conn = NULL;
rc = lrmd__new(&lrmd_conn, NULL, NULL, 0);
if (rc != pcmk_rc_ok) {
g_set_error(&error, PCMK__RC_ERROR, rc,
_("Could not create executor connection"));
lrmd_api_delete(lrmd_conn);
return pcmk_rc2exitc(rc);
}
rc = crm_parse_agent_spec(options.agent_spec, &standard, &provider, &type);
rc = pcmk_legacy2rc(rc);
if (rc == pcmk_rc_ok) {
rc = lrmd_conn->cmds->get_metadata(lrmd_conn, standard,
provider, type,
&metadata, 0);
rc = pcmk_legacy2rc(rc);
if (metadata != NULL) {
out->output_xml(out, PCMK_XE_METADATA, metadata);
free(metadata);
} else {
/* We were given a validly formatted spec, but it doesn't necessarily
* match up with anything that exists. Use ENXIO as the return code
* here because that maps to an exit code of CRM_EX_NOSUCH, which
* probably is the most common reason to get here.
*/
rc = ENXIO;
g_set_error(&error, PCMK__RC_ERROR, rc,
_("Metadata query for %s failed: %s"),
options.agent_spec, pcmk_rc_str(rc));
}
} else {
rc = ENXIO;
g_set_error(&error, PCMK__RC_ERROR, rc,
_("'%s' is not a valid agent specification"),
options.agent_spec);
}
lrmd_api_delete(lrmd_conn);
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_move(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
int rc = pcmk_rc_ok;
if (node == NULL) {
rc = ban_or_move(out, rsc, cib_conn, options.move_lifetime);
} else {
rc = cli_resource_move(rsc, options.rsc_id, node, options.move_lifetime,
cib_conn, options.promoted_role_only,
options.force);
}
if (rc == EINVAL) {
return CRM_EX_USAGE;
}
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_query_xml(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
int rc = cli_resource_print(rsc, true);
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_query_xml_raw(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
int rc = cli_resource_print(rsc, false);
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_refresh(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
if (rsc == NULL) {
return refresh(out, node, controld_api);
}
refresh_resource(out, rsc, node, controld_api);
/* @FIXME Both of the calls above are supposed to set exit_code via
* start_mainloop(). But there appear to be cases in which we can return
* from refresh() or refresh_resource() without starting the mainloop or
* returning an error code. It looks as if we exit with CRM_EX_OK in those
* cases.
*/
return exit_code;
}
static crm_exit_t
handle_restart(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
/* We don't pass scheduler because rsc needs to stay valid for the entire
* lifetime of cli_resource_restart(), but it will reset and update the
* scheduler data multiple times, so it needs to use its own copy.
*/
int rc = cli_resource_restart(out, rsc, node, options.move_lifetime,
options.timeout_ms, cib_conn,
options.promoted_role_only, options.force);
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_set_param(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
int rc = pcmk_rc_ok;
if (pcmk__str_empty(options.prop_value)) {
crm_exit_t ec = CRM_EX_USAGE;
g_set_error(&error, PCMK__EXITC_ERROR, ec,
_("You need to supply a value with the -v option"));
return ec;
}
rc = cli_resource_update_attribute(rsc, options.rsc_id, options.prop_set,
options.attr_set_type, options.prop_id,
options.prop_name, options.prop_value,
options.recursive, cib_conn,
cib_xml_orig, options.force);
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_wait(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
int rc = wait_till_stable(out, options.timeout_ms, cib_conn);
return pcmk_rc2exitc(rc);
}
static crm_exit_t
handle_why(pcmk_resource_t *rsc, pcmk_node_t *node, cib_t *cib_conn,
pcmk_scheduler_t *scheduler, pcmk_ipc_api_t *controld_api,
xmlNode *cib_xml_orig)
{
int rc = out->message(out, "resource-reasons-list",
scheduler->priv->resources, rsc, node);
return pcmk_rc2exitc(rc);
}
static const crm_resource_cmd_info_t crm_resource_command_info[] = {
[cmd_ban] = {
handle_ban,
crm_rsc_find_match_anon_basename
|crm_rsc_find_match_history
|crm_rsc_rejects_clone_instance
|crm_rsc_requires_cib
|crm_rsc_requires_resource
|crm_rsc_requires_scheduler,
},
[cmd_cleanup] = {
handle_cleanup,
crm_rsc_find_match_anon_basename
|crm_rsc_find_match_history
|crm_rsc_requires_cib
|crm_rsc_requires_controller
|crm_rsc_requires_scheduler,
},
[cmd_clear] = {
handle_clear,
crm_rsc_find_match_anon_basename
|crm_rsc_find_match_history
|crm_rsc_rejects_clone_instance
|crm_rsc_requires_cib
|crm_rsc_requires_resource // Unless options.clear_expired
|crm_rsc_requires_scheduler,
},
[cmd_colocations] = {
handle_colocations,
crm_rsc_find_match_anon_basename
|crm_rsc_find_match_history
|crm_rsc_requires_cib
|crm_rsc_requires_resource
|crm_rsc_requires_scheduler,
},
[cmd_cts] = {
handle_cts,
crm_rsc_requires_cib
|crm_rsc_requires_scheduler,
},
[cmd_delete] = {
handle_delete,
crm_rsc_rejects_clone_instance
|crm_rsc_requires_cib
|crm_rsc_requires_resource,
},
[cmd_delete_param] = {
handle_delete_param,
crm_rsc_find_match_basename
|crm_rsc_find_match_history
|crm_rsc_requires_cib
|crm_rsc_requires_resource
|crm_rsc_requires_scheduler,
},
[cmd_digests] = {
handle_digests,
crm_rsc_find_match_anon_basename
|crm_rsc_find_match_history
|crm_rsc_requires_cib
|crm_rsc_requires_node
|crm_rsc_requires_resource
|crm_rsc_requires_scheduler,
},
[cmd_execute_agent] = {
handle_execute_agent,
crm_rsc_find_match_anon_basename
|crm_rsc_find_match_history
|crm_rsc_requires_cib
|crm_rsc_requires_resource
|crm_rsc_requires_scheduler,
},
[cmd_fail] = {
handle_fail,
crm_rsc_find_match_history
|crm_rsc_requires_cib
|crm_rsc_requires_controller
|crm_rsc_requires_node
|crm_rsc_requires_resource
|crm_rsc_requires_scheduler,
},
[cmd_get_param] = {
handle_get_param,
crm_rsc_find_match_basename
|crm_rsc_find_match_history
|crm_rsc_requires_cib
|crm_rsc_requires_resource
|crm_rsc_requires_scheduler,
},
[cmd_list_active_ops] = {
handle_list_active_ops,
crm_rsc_requires_cib
|crm_rsc_requires_scheduler,
},
[cmd_list_agents] = {
handle_list_agents,
0,
},
[cmd_list_all_ops] = {
handle_list_all_ops,
crm_rsc_requires_cib
|crm_rsc_requires_scheduler,
},
[cmd_list_alternatives] = {
handle_list_alternatives,
0,
},
[cmd_list_instances] = {
handle_list_instances,
crm_rsc_requires_cib
|crm_rsc_requires_scheduler,
},
[cmd_list_options] = {
handle_list_options,
0,
},
[cmd_list_providers] = {
handle_list_providers,
0,
},
[cmd_list_resources] = {
handle_list_resources,
crm_rsc_requires_cib
|crm_rsc_requires_scheduler,
},
[cmd_list_standards] = {
handle_list_standards,
0,
},
[cmd_locate] = {
handle_locate,
crm_rsc_find_match_anon_basename
|crm_rsc_find_match_history
|crm_rsc_requires_cib
|crm_rsc_requires_resource
|crm_rsc_requires_scheduler,
},
[cmd_metadata] = {
handle_metadata,
0,
},
[cmd_move] = {
handle_move,
crm_rsc_find_match_anon_basename
|crm_rsc_find_match_history
|crm_rsc_rejects_clone_instance
|crm_rsc_requires_cib
|crm_rsc_requires_resource
|crm_rsc_requires_scheduler,
},
[cmd_query_xml] = {
handle_query_xml,
crm_rsc_find_match_basename
|crm_rsc_find_match_history
|crm_rsc_requires_cib
|crm_rsc_requires_resource
|crm_rsc_requires_scheduler,
},
[cmd_query_xml_raw] = {
handle_query_xml_raw,
crm_rsc_find_match_basename
|crm_rsc_find_match_history
|crm_rsc_requires_cib
|crm_rsc_requires_resource
|crm_rsc_requires_scheduler,
},
[cmd_refresh] = {
handle_refresh,
crm_rsc_find_match_anon_basename
|crm_rsc_find_match_history
|crm_rsc_requires_cib
|crm_rsc_requires_controller
|crm_rsc_requires_scheduler,
},
[cmd_restart] = {
handle_restart,
crm_rsc_find_match_anon_basename
|crm_rsc_find_match_history
|crm_rsc_rejects_clone_instance
|crm_rsc_requires_cib
|crm_rsc_requires_resource
|crm_rsc_requires_scheduler,
},
[cmd_set_param] = {
handle_set_param,
crm_rsc_find_match_basename
|crm_rsc_find_match_history
|crm_rsc_requires_cib
|crm_rsc_requires_resource
|crm_rsc_requires_scheduler,
},
[cmd_wait] = {
handle_wait,
crm_rsc_requires_cib,
},
[cmd_why] = {
handle_why,
crm_rsc_find_match_anon_basename
|crm_rsc_find_match_history
|crm_rsc_requires_cib
|crm_rsc_requires_scheduler,
},
};
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
GOptionContext *context = NULL;
GOptionEntry extra_prog_entries[] = {
{ "quiet", 'Q', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &(args->quiet),
"Be less descriptive in output.",
NULL },
{ "resource", 'r', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.rsc_id,
"Resource ID",
"ID" },
{ G_OPTION_REMAINING, 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING_ARRAY, &options.remainder,
NULL,
NULL },
{ NULL }
};
const char *description = "Examples:\n\n"
"List the available OCF agents:\n\n"
"\t# crm_resource --list-agents ocf\n\n"
"List the available OCF agents from the linux-ha project:\n\n"
"\t# crm_resource --list-agents ocf:heartbeat\n\n"
"Move 'myResource' to a specific node:\n\n"
"\t# crm_resource --resource myResource --move --node altNode\n\n"
"Allow (but not force) 'myResource' to move back to its original "
"location:\n\n"
"\t# crm_resource --resource myResource --clear\n\n"
"Stop 'myResource' (and anything that depends on it):\n\n"
"\t# crm_resource --resource myResource --set-parameter "
PCMK_META_TARGET_ROLE "--meta --parameter-value Stopped\n\n"
"Tell the cluster not to manage 'myResource' (the cluster will not "
"attempt to start or stop the\n"
"resource under any circumstances; useful when performing maintenance "
"tasks on a resource):\n\n"
"\t# crm_resource --resource myResource --set-parameter "
PCMK_META_IS_MANAGED "--meta --parameter-value false\n\n"
"Erase the operation history of 'myResource' on 'aNode' (the cluster "
"will 'forget' the existing\n"
"resource state, including any errors, and attempt to recover the"
"resource; useful when a resource\n"
"had failed permanently and has been repaired by an administrator):\n\n"
"\t# crm_resource --resource myResource --cleanup --node aNode\n\n";
context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
g_option_context_set_description(context, description);
/* Add the -Q option, which cannot be part of the globally supported options
* because some tools use that flag for something else.
*/
pcmk__add_main_args(context, extra_prog_entries);
pcmk__add_arg_group(context, "queries", "Queries:",
"Show query help", query_entries);
pcmk__add_arg_group(context, "commands", "Commands:",
"Show command help", command_entries);
pcmk__add_arg_group(context, "locations", "Locations:",
"Show location help", location_entries);
pcmk__add_arg_group(context, "advanced", "Advanced:",
"Show advanced option help", advanced_entries);
pcmk__add_arg_group(context, "additional", "Additional Options:",
"Show additional options", addl_entries);
return context;
}
int
main(int argc, char **argv)
{
const crm_resource_cmd_info_t *command_info = NULL;
pcmk_resource_t *rsc = NULL;
pcmk_node_t *node = NULL;
cib_t *cib_conn = NULL;
pcmk_scheduler_t *scheduler = NULL;
pcmk_ipc_api_t *controld_api = NULL;
xmlNode *cib_xml_orig = NULL;
uint32_t find_flags = 0;
int rc = pcmk_rc_ok;
GOptionGroup *output_group = NULL;
gchar **processed_args = NULL;
GOptionContext *context = NULL;
/*
* Parse command line arguments
*/
args = pcmk__new_common_args(SUMMARY);
processed_args = pcmk__cmdline_preproc(argv, "GHINSTdginpstuvx");
context = build_arg_context(args, &output_group);
pcmk__register_formats(output_group, formats);
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
exit_code = CRM_EX_USAGE;
goto done;
}
pcmk__cli_init_logging("crm_resource", args->verbosity);
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_ERROR;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code, _("Error creating output format %s: %s"),
args->output_ty, pcmk_rc_str(rc));
goto done;
}
pe__register_messages(out);
crm_resource_register_messages(out);
lrmd__register_messages(out);
pcmk__register_lib_messages(out);
out->quiet = args->quiet;
crm_log_args(argc, argv);
/*
* Validate option combinations
*/
// --expired without --clear/-U doesn't make sense
if (options.clear_expired && (options.rsc_cmd != cmd_clear)) {
exit_code = CRM_EX_USAGE;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code, _("--expired requires --clear or -U"));
goto done;
}
if (options.remainder != NULL) {
// Commands that use positional arguments will create override_params
if (options.override_params == NULL) {
GString *msg = g_string_sized_new(128);
guint len = g_strv_length(options.remainder);
g_string_append(msg, "non-option ARGV-elements:");
for (int i = 0; i < len; i++) {
g_string_append_printf(msg, "\n[%d of %u] %s",
i + 1, len, options.remainder[i]);
}
exit_code = CRM_EX_USAGE;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "%s", msg->str);
g_string_free(msg, TRUE);
goto done;
}
for (gchar **arg = options.remainder; *arg != NULL; arg++) {
gchar *name = NULL;
gchar *value = NULL;
int rc = pcmk__scan_nvpair(*arg, &name, &value);
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_USAGE;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
_("Error parsing '%s' as a name=value pair"), *arg);
goto done;
}
g_hash_table_insert(options.override_params, name, value);
}
}
if (pcmk__str_eq(args->output_ty, "xml", pcmk__str_none)) {
switch (options.rsc_cmd) {
/* These are the only commands that have historically used the <list>
* elements in their XML schema. For all others, use the simple list
* argument.
*/
case cmd_get_param:
case cmd_list_instances:
case cmd_list_standards:
pcmk__output_enable_list_element(out);
break;
default:
break;
}
} else if (pcmk__str_eq(args->output_ty, "text", pcmk__str_null_matches)) {
switch (options.rsc_cmd) {
case cmd_colocations:
case cmd_list_resources:
pcmk__output_text_set_fancy(out, true);
break;
default:
break;
}
}
if (args->version) {
out->version(out, false);
goto done;
}
// Ensure command is in valid range and has a handler function
if ((options.rsc_cmd >= 0) && (options.rsc_cmd <= cmd_max)) {
command_info = &crm_resource_command_info[options.rsc_cmd];
}
if ((command_info == NULL) || (command_info->fn == NULL)) {
exit_code = CRM_EX_SOFTWARE;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
_("Bug: Unimplemented command: %d"), (int) options.rsc_cmd);
goto done;
}
/* If a command-line resource agent specification was given, validate it.
* Otherwise, ensure --option was not given.
*/
if (has_cmdline_config()) {
validate_cmdline_config();
if (error != NULL) {
exit_code = CRM_EX_USAGE;
goto done;
}
} else if (options.cmdline_params != NULL) {
exit_code = CRM_EX_USAGE;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
_("--option must be used with --validate and without -r"));
g_hash_table_destroy(options.cmdline_params);
goto done;
}
// Ensure --resource is set if it's required
if (pcmk_is_set(command_info->flags, crm_rsc_requires_resource)
&& !has_cmdline_config()
&& !options.clear_expired
&& (options.rsc_id == NULL)) {
exit_code = CRM_EX_USAGE;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
_("Must supply a resource ID with -r/--resource"));
goto done;
}
// Ensure --node is set if it's required
if (pcmk_is_set(command_info->flags, crm_rsc_requires_node)
&& (options.host_uname == NULL)) {
exit_code = CRM_EX_USAGE;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
_("Must supply a node name with -N/--node"));
goto done;
}
// Establish a connection to the CIB if needed
if (pcmk_is_set(command_info->flags, crm_rsc_requires_cib)
&& !has_cmdline_config()) {
cib_conn = cib_new();
if ((cib_conn == NULL) || (cib_conn->cmds == NULL)) {
exit_code = CRM_EX_DISCONNECT;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
_("Could not create CIB connection"));
goto done;
}
rc = cib__signon_attempts(cib_conn, cib_command, 5);
rc = pcmk_legacy2rc(rc);
if (rc != pcmk_rc_ok) {
exit_code = pcmk_rc2exitc(rc);
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
_("Could not connect to the CIB: %s"), pcmk_rc_str(rc));
goto done;
}
}
// Populate scheduler data from CIB query if needed
if (pcmk_is_set(command_info->flags, crm_rsc_requires_scheduler)
&& !has_cmdline_config()) {
rc = initialize_scheduler_data(&scheduler, cib_conn, out,
&cib_xml_orig);
if (rc != pcmk_rc_ok) {
exit_code = pcmk_rc2exitc(rc);
goto done;
}
}
// Establish a connection to the controller if needed
if (pcmk_is_set(command_info->flags, crm_rsc_requires_controller)
&& (getenv("CIB_file") == NULL)) {
rc = pcmk_new_ipc_api(&controld_api, pcmk_ipc_controld);
if (rc != pcmk_rc_ok) {
exit_code = pcmk_rc2exitc(rc);
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
_("Error connecting to the controller: %s"), pcmk_rc_str(rc));
goto done;
}
pcmk_register_ipc_callback(controld_api, controller_event_callback,
&exit_code);
rc = pcmk__connect_ipc(controld_api, pcmk_ipc_dispatch_main, 5);
if (rc != pcmk_rc_ok) {
exit_code = pcmk_rc2exitc(rc);
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
_("Error connecting to %s: %s"),
pcmk_ipc_name(controld_api, true), pcmk_rc_str(rc));
goto done;
}
}
/* Find node if --node was given.
*
* @TODO Consider stricter validation. Currently we ignore the --node
* argument for commands that don't require scheduler data, since we have no
* way to find the node in that case. This is really a usage error, but we
* don't validate strictly. We allow multiple commands (and in some cases
* their options like --node) to be specified, and we use the last one in
* case of conflicts.
*
* This isn't universally true. --expired results in a usage error unless
* the final command is --clear.
*/
if (options.host_uname != NULL) {
node = pcmk_find_node(scheduler, options.host_uname);
if (node == NULL) {
exit_code = CRM_EX_NOSUCH;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
_("Node '%s' not found"), options.host_uname);
goto done;
}
}
/* Find resource if --resource was given and any find flags are set.
*
* @TODO Consider stricter validation. See comment above for --node.
* @TODO Setter macro for tracing?
*/
if (pcmk_is_set(command_info->flags, crm_rsc_find_match_anon_basename)) {
find_flags |= pcmk_rsc_match_anon_basename;
}
if (pcmk_is_set(command_info->flags, crm_rsc_find_match_basename)) {
find_flags |= pcmk_rsc_match_basename;
}
if (pcmk_is_set(command_info->flags, crm_rsc_find_match_history)) {
find_flags |= pcmk_rsc_match_history;
}
if ((find_flags != 0) && (options.rsc_id != NULL)) {
pcmk__assert(scheduler != NULL);
rsc = pe_find_resource_with_flags(scheduler->priv->resources,
options.rsc_id, find_flags);
if (rsc == NULL) {
exit_code = CRM_EX_NOSUCH;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
_("Resource '%s' not found"), options.rsc_id);
goto done;
}
if (pcmk_is_set(command_info->flags, crm_rsc_rejects_clone_instance)
&& pcmk__is_clone(rsc->priv->parent)
&& (strchr(options.rsc_id, ':') != NULL)) {
exit_code = CRM_EX_INVALID_PARAM;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
_("Cannot operate on clone resource instance '%s'"),
options.rsc_id);
goto done;
}
}
exit_code = command_info->fn(rsc, node, cib_conn, scheduler, controld_api,
cib_xml_orig);
done:
// For CRM_EX_USAGE, error is already set satisfactorily
if ((exit_code != CRM_EX_OK) && (exit_code != CRM_EX_USAGE)) {
if (error != NULL) {
char *msg = crm_strdup_printf("%s\nError performing operation: %s",
error->message, crm_exit_str(exit_code));
g_clear_error(&error);
g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "%s", msg);
free(msg);
} else {
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
_("Error performing operation: %s"), crm_exit_str(exit_code));
}
}
g_free(options.host_uname);
g_free(options.interval_spec);
g_free(options.move_lifetime);
g_free(options.operation);
g_free(options.prop_id);
free(options.prop_name);
g_free(options.prop_set);
g_free(options.prop_value);
g_free(options.rsc_id);
g_free(options.rsc_type);
free(options.agent_spec);
g_free(options.agent);
g_free(options.class);
g_free(options.provider);
if (options.override_params != NULL) {
g_hash_table_destroy(options.override_params);
}
g_strfreev(options.remainder);
// Don't destroy options.cmdline_params here. See comment in option_cb().
g_strfreev(processed_args);
g_option_context_free(context);
pcmk__xml_free(cib_xml_orig);
cib__clean_up_connection(&cib_conn);
pcmk_free_ipc_api(controld_api);
pcmk_free_scheduler(scheduler);
if (mainloop != NULL) {
g_main_loop_unref(mainloop);
}
pcmk__output_and_clear_error(&error, out);
if (out != NULL) {
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
}
pcmk__unregister_formats();
return crm_exit(exit_code);
}
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
index b52e7deeb9..b2b72cc941 100644
--- a/tools/stonith_admin.c
+++ b/tools/stonith_admin.c
@@ -1,726 +1,726 @@
/*
* Copyright 2009-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <errno.h>
#include <fcntl.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <glib.h> // gboolean, gchar, etc.
#include <crm/crm.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/output_internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h> // stonith__register_messages()
#include <crm/cib.h>
#include <crm/pengine/status.h>
#include <crm/common/xml.h>
#include <pacemaker-internal.h>
#define SUMMARY "stonith_admin - Access the Pacemaker fencing API"
char action = 0;
struct {
gboolean as_nodeid;
gboolean broadcast;
gboolean cleanup;
gboolean installed;
gboolean metadata;
gboolean registered;
gboolean validate_cfg;
GList *devices;
GHashTable *params;
int fence_level;
int timeout ;
long long tolerance_ms;
int delay;
char *agent;
char *confirm_host;
char *fence_host;
char *history;
char *last_fenced;
char *query;
char *reboot_host;
char *register_dev;
char *register_level;
char *targets;
char *terminate;
char *unfence_host;
char *unregister_dev;
char *unregister_level;
} options = {
.timeout = 120,
.delay = 0
};
gboolean add_env_params(const gchar *option_name, const gchar *optarg, gpointer data, GError **error);
gboolean add_stonith_device(const gchar *option_name, const gchar *optarg, gpointer data, GError **error);
gboolean add_stonith_params(const gchar *option_name, const gchar *optarg, gpointer data, GError **error);
gboolean add_tolerance(const gchar *option_name, const gchar *optarg, gpointer data, GError **error);
gboolean set_tag(const gchar *option_name, const gchar *optarg, gpointer data, GError **error);
#define INDENT " "
/* *INDENT-OFF* */
static GOptionEntry defn_entries[] = {
{ "register", 'R', 0, G_OPTION_ARG_STRING, &options.register_dev,
"Register the named stonith device. Requires: --agent.\n"
INDENT "Optional: --option, --env-option.",
"DEVICE" },
{ "deregister", 'D', 0, G_OPTION_ARG_STRING, &options.unregister_dev,
"De-register the named stonith device.",
"DEVICE" },
{ "register-level", 'r', 0, G_OPTION_ARG_STRING, &options.register_level,
"Register a stonith level for the named target,\n"
INDENT "specified as one of NAME, @PATTERN, or ATTR=VALUE.\n"
INDENT "Requires: --index and one or more --device entries.",
"TARGET" },
{ "deregister-level", 'd', 0, G_OPTION_ARG_STRING, &options.unregister_level,
"Unregister a stonith level for the named target,\n"
INDENT "specified as for --register-level. Requires: --index",
"TARGET" },
{ NULL }
};
static GOptionEntry query_entries[] = {
{ "list", 'l', 0, G_OPTION_ARG_STRING, &options.terminate,
"List devices that can terminate the specified host.\n"
INDENT "Optional: --timeout",
"HOST" },
{ "list-registered", 'L', 0, G_OPTION_ARG_NONE, &options.registered,
"List all registered devices. Optional: --timeout.",
NULL },
{ "list-installed", 'I', 0, G_OPTION_ARG_NONE, &options.installed,
"List all installed devices. Optional: --timeout.",
NULL },
{ "list-targets", 's', 0, G_OPTION_ARG_STRING, &options.targets,
"List the targets that can be fenced by the\n"
INDENT "named device. Optional: --timeout.",
"DEVICE" },
{ "metadata", 'M', 0, G_OPTION_ARG_NONE, &options.metadata,
"Show agent metadata. Requires: --agent.\n"
INDENT "Optional: --timeout.",
NULL },
{ "query", 'Q', 0, G_OPTION_ARG_STRING, &options.query,
"Check the named device's status. Optional: --timeout.",
"DEVICE" },
{ "history", 'H', 0, G_OPTION_ARG_STRING, &options.history,
"Show last successful fencing operation for named node\n"
INDENT "(or '*' for all nodes). Optional: --timeout, --cleanup,\n"
INDENT "--quiet (show only the operation's epoch timestamp),\n"
INDENT "--verbose (show all recorded and pending operations),\n"
INDENT "--broadcast (update history from all nodes available).",
"NODE" },
{ "last", 'h', 0, G_OPTION_ARG_STRING, &options.last_fenced,
"Indicate when the named node was last fenced.\n"
INDENT "Optional: --as-node-id.",
"NODE" },
{ "validate", 'K', 0, G_OPTION_ARG_NONE, &options.validate_cfg,
"Validate a fence device configuration.\n"
INDENT "Requires: --agent. Optional: --option, --env-option,\n"
INDENT "--quiet (print no output, only return status).",
NULL },
{ NULL }
};
static GOptionEntry fence_entries[] = {
{ "fence", 'F', 0, G_OPTION_ARG_STRING, &options.fence_host,
"Fence named host. Optional: --timeout, --tolerance, --delay.",
"HOST" },
{ "unfence", 'U', 0, G_OPTION_ARG_STRING, &options.unfence_host,
"Unfence named host. Optional: --timeout, --tolerance, --delay.",
"HOST" },
{ "reboot", 'B', 0, G_OPTION_ARG_STRING, &options.reboot_host,
"Reboot named host. Optional: --timeout, --tolerance, --delay.",
"HOST" },
{ "confirm", 'C', 0, G_OPTION_ARG_STRING, &options.confirm_host,
"Tell cluster that named host is now safely down.",
"HOST", },
{ NULL }
};
static GOptionEntry addl_entries[] = {
{ "cleanup", 'c', 0, G_OPTION_ARG_NONE, &options.cleanup,
"Cleanup wherever appropriate. Requires --history.",
NULL },
{ "broadcast", 'b', 0, G_OPTION_ARG_NONE, &options.broadcast,
"Broadcast wherever appropriate.",
NULL },
{ "agent", 'a', 0, G_OPTION_ARG_STRING, &options.agent,
"The agent to use (for example, fence_xvm;\n"
INDENT "with --register, --metadata, --validate).",
"AGENT" },
{ "option", 'o', 0, G_OPTION_ARG_CALLBACK, add_stonith_params,
"Specify a device configuration parameter as NAME=VALUE\n"
INDENT "(may be specified multiple times; with --register,\n"
INDENT "--validate).",
"PARAM" },
{ "env-option", 'e', 0, G_OPTION_ARG_CALLBACK, add_env_params,
"Specify a device configuration parameter with the\n"
INDENT "specified name, using the value of the\n"
INDENT "environment variable of the same name prefixed with\n"
INDENT "OCF_RESKEY_ (may be specified multiple times;\n"
INDENT "with --register, --validate).",
"PARAM" },
{ "tag", 'T', 0, G_OPTION_ARG_CALLBACK, set_tag,
"Identify fencing operations in logs with the specified\n"
INDENT "tag; useful when multiple entities might invoke\n"
INDENT "stonith_admin (used with most commands).",
"TAG" },
{ "device", 'v', 0, G_OPTION_ARG_CALLBACK, add_stonith_device,
"Device ID (with --register-level, device to associate with\n"
INDENT "a given host and level; may be specified multiple times)"
#if PCMK__ENABLE_CIBSECRETS
"\n" INDENT "(with --validate, name to use to load CIB secrets)"
#endif
".",
"DEVICE" },
{ "index", 'i', 0, G_OPTION_ARG_INT, &options.fence_level,
"The stonith level (1-9) (with --register-level,\n"
INDENT "--deregister-level).",
"LEVEL" },
{ "timeout", 't', 0, G_OPTION_ARG_INT, &options.timeout,
"Operation timeout in seconds (default 120;\n"
INDENT "used with most commands).",
"SECONDS" },
{ "delay", 'y', 0, G_OPTION_ARG_INT, &options.delay,
"Apply a fencing delay in seconds. Any static/random delays from\n"
INDENT "pcmk_delay_base/max will be added, otherwise all\n"
INDENT "disabled with the value -1\n"
INDENT "(default 0; with --fence, --reboot, --unfence).",
"SECONDS" },
{ "as-node-id", 'n', 0, G_OPTION_ARG_NONE, &options.as_nodeid,
"(Advanced) The supplied node is the corosync node ID\n"
INDENT "(with --last).",
NULL },
{ "tolerance", 0, 0, G_OPTION_ARG_CALLBACK, add_tolerance,
"(Advanced) Do nothing if an equivalent --fence request\n"
INDENT "succeeded less than this many seconds earlier\n"
INDENT "(with --fence, --unfence, --reboot).",
"SECONDS" },
{ NULL }
};
/* *INDENT-ON* */
static pcmk__supported_format_t formats[] = {
PCMK__SUPPORTED_FORMAT_HTML,
PCMK__SUPPORTED_FORMAT_NONE,
PCMK__SUPPORTED_FORMAT_TEXT,
PCMK__SUPPORTED_FORMAT_XML,
{ NULL, NULL, NULL }
};
static const int st_opts = st_opt_sync_call|st_opt_allow_self_fencing;
static char *name = NULL;
gboolean
add_env_params(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
char *key = crm_strdup_printf("OCF_RESKEY_%s", optarg);
const char *env = getenv(key);
gboolean retval = TRUE;
if (env == NULL) {
g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Invalid option: -e %s", optarg);
retval = FALSE;
} else {
crm_info("Got: '%s'='%s'", optarg, env);
if (options.params != NULL) {
options.params = pcmk__strkey_table(free, free);
}
pcmk__insert_dup(options.params, optarg, env);
}
free(key);
return retval;
}
gboolean
add_stonith_device(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
options.devices = g_list_append(options.devices, pcmk__str_copy(optarg));
return TRUE;
}
gboolean
add_tolerance(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
// pcmk__request_fencing() expects an unsigned int
options.tolerance_ms = crm_get_msec(optarg);
if (options.tolerance_ms < 0) {
crm_warn("Ignoring invalid tolerance '%s'", optarg);
options.tolerance_ms = 0;
} else {
options.tolerance_ms = QB_MIN(options.tolerance_ms, UINT_MAX);
}
return TRUE;
}
gboolean
add_stonith_params(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
gchar *name = NULL;
gchar *value = NULL;
int rc = 0;
gboolean retval = TRUE;
crm_info("Scanning: -o %s", optarg);
rc = pcmk__scan_nvpair(optarg, &name, &value);
if (rc != pcmk_rc_ok) {
g_set_error(error, PCMK__RC_ERROR, rc, "Invalid option: -o %s: %s", optarg, pcmk_rc_str(rc));
retval = FALSE;
} else {
crm_info("Got: '%s'='%s'", name, value);
if (options.params == NULL) {
options.params = pcmk__strkey_table(free, free);
}
pcmk__insert_dup(options.params, name, value);
}
g_free(name);
g_free(value);
return retval;
}
gboolean
set_tag(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
free(name);
name = crm_strdup_printf("%s.%s", crm_system_name, optarg);
return TRUE;
}
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
GOptionContext *context = NULL;
GOptionEntry extra_prog_entries[] = {
{ "quiet", 'q', 0, G_OPTION_ARG_NONE, &(args->quiet),
"Be less descriptive in output.",
NULL },
{ NULL }
};
context = pcmk__build_arg_context(args, "text (default), html, xml", group, NULL);
/* Add the -q option, which cannot be part of the globally supported options
* because some tools use that flag for something else.
*/
pcmk__add_main_args(context, extra_prog_entries);
pcmk__add_arg_group(context, "definition", "Device Definition Commands:",
"Show device definition help", defn_entries);
pcmk__add_arg_group(context, "queries", "Queries:",
"Show query help", query_entries);
pcmk__add_arg_group(context, "fence", "Fencing Commands:",
"Show fence help", fence_entries);
pcmk__add_arg_group(context, "additional", "Additional Options:",
"Show additional options", addl_entries);
return context;
}
// \return Standard Pacemaker return code
static int
request_fencing(stonith_t *st, const char *target, const char *command,
GError **error)
{
char *reason = NULL;
int rc = pcmk__request_fencing(st, target, command, name,
options.timeout * 1000,
options.tolerance_ms, options.delay,
&reason);
if (rc != pcmk_rc_ok) {
const char *rc_str = pcmk_rc_str(rc);
const char *what = "fence";
if (strcmp(command, PCMK_ACTION_ON) == 0) {
what = "unfence";
}
// If reason is identical to return code string, don't display it twice
if (pcmk__str_eq(rc_str, reason, pcmk__str_none)) {
free(reason);
reason = NULL;
}
g_set_error(error, PCMK__RC_ERROR, rc,
"Couldn't %s %s: %s%s%s%s",
what, target, rc_str,
((reason == NULL)? "" : " ("),
((reason == NULL)? "" : reason),
((reason == NULL)? "" : ")"));
}
free(reason);
return rc;
}
int
main(int argc, char **argv)
{
int rc = 0;
crm_exit_t exit_code = CRM_EX_OK;
bool no_connect = false;
bool required_agent = false;
char *target = NULL;
const char *device = NULL;
stonith_t *st = NULL;
GError *error = NULL;
pcmk__output_t *out = NULL;
GOptionGroup *output_group = NULL;
pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
gchar **processed_args = pcmk__cmdline_preproc(argv, "adehilorstvyBCDFHQRTU");
GOptionContext *context = build_arg_context(args, &output_group);
pcmk__register_formats(output_group, formats);
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
exit_code = CRM_EX_USAGE;
goto done;
}
pcmk__cli_init_logging("stonith_admin", args->verbosity);
if (name == NULL) {
name = strdup(crm_system_name);
}
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_ERROR;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s",
args->output_ty, pcmk_rc_str(rc));
goto done;
}
pcmk__output_enable_list_element(out);
stonith__register_messages(out);
if (args->version) {
out->version(out, false);
goto done;
}
if (options.validate_cfg) {
required_agent = true;
no_connect = true;
action = 'K';
}
if (options.installed) {
no_connect = true;
action = 'I';
}
if (options.registered) {
action = 'L';
}
if (options.register_dev != NULL) {
required_agent = true;
action = 'R';
device = options.register_dev;
}
if (options.query != NULL) {
action = 'Q';
device = options.query;
}
if (options.unregister_dev != NULL) {
action = 'D';
device = options.unregister_dev;
}
if (options.targets != NULL) {
action = 's';
device = options.targets;
}
if (options.terminate != NULL) {
action = 'L';
target = options.terminate;
}
if (options.metadata) {
no_connect = true;
required_agent = true;
action = 'M';
}
if (options.reboot_host != NULL) {
no_connect = true;
action = 'B';
target = options.reboot_host;
crm_log_args(argc, argv);
}
if (options.fence_host != NULL) {
no_connect = true;
action = 'F';
target = options.fence_host;
crm_log_args(argc, argv);
}
if (options.unfence_host != NULL) {
no_connect = true;
action = 'U';
target = options.unfence_host;
crm_log_args(argc, argv);
}
if (options.confirm_host != NULL) {
action = 'C';
target = options.confirm_host;
crm_log_args(argc, argv);
}
if (options.last_fenced != NULL) {
action = 'h';
target = options.last_fenced;
}
if (options.history != NULL) {
action = 'H';
target = options.history;
}
if (options.register_level != NULL) {
action = 'r';
target = options.register_level;
}
if (options.unregister_level != NULL) {
action = 'd';
target = options.unregister_level;
}
if ((options.timeout > (UINT_MAX / 1000)) || (options.timeout < 0)) {
out->err(out, "Integer value \"%d\" for -t out of range", options.timeout);
exit_code = CRM_EX_USAGE;
goto done;
}
if (action == 0) {
char *help = g_option_context_get_help(context, TRUE, NULL);
out->err(out, "%s", help);
g_free(help);
exit_code = CRM_EX_USAGE;
goto done;
}
if (required_agent && options.agent == NULL) {
char *help = g_option_context_get_help(context, TRUE, NULL);
out->err(out, "Please specify an agent to query using -a,--agent [value]");
out->err(out, "%s", help);
g_free(help);
exit_code = CRM_EX_USAGE;
goto done;
}
out->quiet = args->quiet;
- st = stonith_api_new();
+ st = stonith__api_new();
if (st == NULL) {
rc = -ENOMEM;
} else if (!no_connect) {
rc = st->cmds->connect(st, name, NULL);
}
if (rc < 0) {
out->err(out, "Could not connect to fencer: %s", pcmk_strerror(rc));
exit_code = CRM_EX_DISCONNECT;
goto done;
}
switch (action) {
case 'I':
- rc = pcmk__fence_installed(out, st, options.timeout*1000);
+ rc = pcmk__fence_installed(out, st);
if (rc != pcmk_rc_ok) {
out->err(out, "Failed to list installed devices: %s", pcmk_rc_str(rc));
}
break;
case 'L':
rc = pcmk__fence_registered(out, st, target, options.timeout*1000);
if (rc != pcmk_rc_ok) {
out->err(out, "Failed to list registered devices: %s", pcmk_rc_str(rc));
}
break;
case 'Q':
rc = st->cmds->monitor(st, st_opts, device, options.timeout);
if (rc != pcmk_rc_ok) {
rc = st->cmds->list(st, st_opts, device, NULL, options.timeout);
}
rc = pcmk_legacy2rc(rc);
break;
case 's':
rc = pcmk__fence_list_targets(out, st, device, options.timeout*1000);
if (rc != pcmk_rc_ok) {
out->err(out, "Couldn't list targets: %s", pcmk_rc_str(rc));
}
break;
case 'R': {
/* register_device wants a stonith_key_value_t instead of a GHashTable */
stonith_key_value_t *params = NULL;
GHashTableIter iter;
gpointer key, val;
if (options.params != NULL) {
g_hash_table_iter_init(&iter, options.params);
while (g_hash_table_iter_next(&iter, &key, &val)) {
- params = stonith_key_value_add(params, key, val);
+ params = stonith__key_value_add(params, key, val);
}
}
rc = st->cmds->register_device(st, st_opts, device, NULL, options.agent,
params);
- stonith_key_value_freeall(params, 1, 1);
+ stonith__key_value_freeall(params, true, true);
rc = pcmk_legacy2rc(rc);
if (rc != pcmk_rc_ok) {
out->err(out, "Can't register device %s using agent %s: %s",
device, options.agent, pcmk_rc_str(rc));
}
break;
}
case 'D':
rc = st->cmds->remove_device(st, st_opts, device);
rc = pcmk_legacy2rc(rc);
if (rc != pcmk_rc_ok) {
out->err(out, "Can't unregister device %s: %s",
device, pcmk_rc_str(rc));
}
break;
case 'd':
rc = pcmk__fence_unregister_level(st, target, options.fence_level);
if (rc != pcmk_rc_ok) {
out->err(out, "Can't unregister topology level %d for %s: %s",
options.fence_level, target, pcmk_rc_str(rc));
}
break;
case 'r':
rc = pcmk__fence_register_level(st, target, options.fence_level, options.devices);
if (rc != pcmk_rc_ok) {
out->err(out, "Can't register topology level %d for %s: %s",
options.fence_level, target, pcmk_rc_str(rc));
}
break;
case 'M':
rc = pcmk__fence_metadata(out, st, options.agent, options.timeout*1000);
if (rc != pcmk_rc_ok) {
out->err(out, "Can't get fence agent meta-data: %s",
pcmk_rc_str(rc));
}
break;
case 'C':
rc = st->cmds->confirm(st, st_opts, target);
rc = pcmk_legacy2rc(rc);
break;
case 'B':
rc = request_fencing(st, target, PCMK_ACTION_REBOOT, &error);
break;
case 'F':
rc = request_fencing(st, target, PCMK_ACTION_OFF, &error);
break;
case 'U':
rc = request_fencing(st, target, PCMK_ACTION_ON, &error);
break;
case 'h':
rc = pcmk__fence_last(out, target, options.as_nodeid);
break;
case 'H':
rc = pcmk__fence_history(out, st, target, options.timeout*1000, args->verbosity,
options.broadcast, options.cleanup);
break;
case 'K':
device = NULL;
if (options.devices != NULL) {
device = g_list_nth_data(options.devices, 0);
}
rc = pcmk__fence_validate(out, st, options.agent, device, options.params,
options.timeout*1000);
break;
}
crm_info("Command returned: %s (%d)", pcmk_rc_str(rc), rc);
exit_code = pcmk_rc2exitc(rc);
done:
g_strfreev(processed_args);
pcmk__free_arg_context(context);
pcmk__output_and_clear_error(&error, out);
if (out != NULL) {
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
}
pcmk__unregister_formats();
free(name);
g_list_free_full(options.devices, free);
if (options.params != NULL) {
g_hash_table_destroy(options.params);
}
if (st != NULL) {
st->cmds->disconnect(st);
- stonith_api_delete(st);
+ stonith__api_free(st);
}
return exit_code;
}

File Metadata

Mime Type
text/x-diff
Expires
Mon, Apr 21, 7:13 PM (16 h, 15 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1665439
Default Alt Text
(1021 KB)

Event Timeline