Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index 6b743e28f6..bb1ee4694b 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -1,1117 +1,1118 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <pacemaker-controld.h>
static void
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
/*
* stonith failure counting
*
* We don't want to get stuck in a permanent fencing loop. Keep track of the
* number of fencing failures for each target node, and the most we'll restart a
* transition for.
*/
struct st_fail_rec {
int count;
};
static bool fence_reaction_panic = false;
static unsigned long int stonith_max_attempts = 10;
static GHashTable *stonith_failures = NULL;
/*!
* \internal
* \brief Update max fencing attempts before giving up
*
* \param[in] value New max fencing attempts
*/
static void
update_stonith_max_attempts(const char *value)
{
stonith_max_attempts = char2score(value);
if (stonith_max_attempts < 1UL) {
stonith_max_attempts = 10UL;
}
}
/*!
* \internal
* \brief Configure reaction to notification of local node being fenced
*
* \param[in] reaction_s Reaction type
*/
static void
set_fence_reaction(const char *reaction_s)
{
if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
fence_reaction_panic = true;
} else {
if (!pcmk__str_eq(reaction_s, PCMK_VALUE_STOP, pcmk__str_casei)) {
crm_warn("Invalid value '%s' for %s, using 'stop'",
reaction_s, PCMK_OPT_FENCE_REACTION);
}
fence_reaction_panic = false;
}
}
/*!
* \internal
* \brief Configure fencing options based on the CIB
*
* \param[in,out] options Name/value pairs for configured options
*/
void
controld_configure_fencing(GHashTable *options)
{
const char *value = NULL;
value = g_hash_table_lookup(options, PCMK_OPT_FENCE_REACTION);
set_fence_reaction(value);
value = g_hash_table_lookup(options, PCMK_OPT_STONITH_MAX_ATTEMPTS);
update_stonith_max_attempts(value);
}
static gboolean
too_many_st_failures(const char *target)
{
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *value = NULL;
if (stonith_failures == NULL) {
return FALSE;
}
if (target == NULL) {
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &value)) {
if (value->count >= stonith_max_attempts) {
target = (const char*)key;
goto too_many;
}
}
} else {
value = g_hash_table_lookup(stonith_failures, target);
if ((value != NULL) && (value->count >= stonith_max_attempts)) {
goto too_many;
}
}
return FALSE;
too_many:
crm_warn("Too many failures (%d) to fence %s, giving up",
value->count, target);
return TRUE;
}
/*!
* \internal
* \brief Reset a stonith fail count
*
* \param[in] target Name of node to reset, or NULL for all
*/
void
st_fail_count_reset(const char *target)
{
if (stonith_failures == NULL) {
return;
}
if (target) {
struct st_fail_rec *rec = NULL;
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count = 0;
}
} else {
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *rec = NULL;
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &rec)) {
rec->count = 0;
}
}
}
static void
st_fail_count_increment(const char *target)
{
struct st_fail_rec *rec = NULL;
if (stonith_failures == NULL) {
stonith_failures = pcmk__strkey_table(free, free);
}
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count++;
} else {
rec = malloc(sizeof(struct st_fail_rec));
if(rec == NULL) {
return;
}
rec->count = 1;
g_hash_table_insert(stonith_failures, strdup(target), rec);
}
}
/* end stonith fail count functions */
static void
cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
void *user_data)
{
if (rc < pcmk_ok) {
crm_err("Fencing update %d for %s: failed - %s (%d)",
call_id, (char *)user_data, pcmk_strerror(rc), rc);
crm_log_xml_warn(msg, "Failed update");
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_shutdown,
"CIB update failed", NULL);
} else {
crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
}
}
static void
send_stonith_update(pcmk__graph_action_t *action, const char *target,
const char *uuid)
{
int rc = pcmk_ok;
crm_node_t *peer = NULL;
/* We (usually) rely on the membership layer to do node_update_cluster,
* and the peer status callback to do node_update_peer, because the node
* might have already rejoined before we get the stonith result here.
*/
int flags = node_update_join | node_update_expected;
/* zero out the node-status & remove all LRM status info */
xmlNode *node_state = NULL;
CRM_CHECK(target != NULL, return);
CRM_CHECK(uuid != NULL, return);
/* Make sure the membership and join caches are accurate.
* Try getting any existing node cache entry also by node uuid in case it
* doesn't have an uname yet.
*/
peer = pcmk__get_node(0, target, uuid, pcmk__node_search_any);
CRM_CHECK(peer != NULL, return);
if (peer->state == NULL) {
/* Usually, we rely on the membership layer to update the cluster state
* in the CIB. However, if the node has never been seen, do it here, so
* the node is not considered unclean.
*/
flags |= node_update_cluster;
}
if (peer->uuid == NULL) {
crm_info("Recording uuid '%s' for node '%s'", uuid, target);
peer->uuid = strdup(uuid);
}
crmd_peer_down(peer, TRUE);
/* Generate a node state update for the CIB */
node_state = create_node_state_update(peer, flags, NULL, __func__);
/* we have to mark whether or not remote nodes have already been fenced */
if (peer->flags & crm_remote_node) {
char *now_s = pcmk__ttoa(time(NULL));
crm_xml_add(node_state, PCMK__XA_NODE_FENCED, now_s);
free(now_s);
}
/* Force our known ID */
crm_xml_add(node_state, PCMK_XA_ID, uuid);
rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
PCMK_XE_STATUS, node_state,
cib_scope_local
|cib_can_create);
/* Delay processing the trigger until the update completes */
crm_debug("Sending fencing update %d for %s", rc, target);
fsa_register_cib_callback(rc, strdup(target), cib_fencing_updated);
// Make sure it sticks
/* controld_globals.cib_conn->cmds->bump_epoch(controld_globals.cib_conn,
* cib_scope_local);
*/
controld_delete_node_state(peer->uname, controld_section_all,
cib_scope_local);
free_xml(node_state);
return;
}
/*!
* \internal
* \brief Abort transition due to stonith failure
*
* \param[in] abort_action Whether to restart or stop transition
* \param[in] target Don't restart if this (NULL for any) has too many failures
* \param[in] reason Log this stonith action XML as abort reason (or NULL)
*/
static void
abort_for_stonith_failure(enum pcmk__graph_next abort_action,
const char *target, const xmlNode *reason)
{
/* If stonith repeatedly fails, we eventually give up on starting a new
* transition for that reason.
*/
if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) {
abort_action = pcmk__graph_wait;
}
abort_transition(PCMK_SCORE_INFINITY, abort_action, "Stonith failed",
reason);
}
/*
* stonith cleanup list
*
* If the DC is shot, proper notifications might not go out.
* The stonith cleanup list allows the cluster to (re-)send
* notifications once a new DC is elected.
*/
static GList *stonith_cleanup_list = NULL;
/*!
* \internal
* \brief Add a node to the stonith cleanup list
*
* \param[in] target Name of node to add
*/
void
add_stonith_cleanup(const char *target) {
stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
}
/*!
* \internal
* \brief Remove a node from the stonith cleanup list
*
* \param[in] Name of node to remove
*/
void
remove_stonith_cleanup(const char *target)
{
GList *iter = stonith_cleanup_list;
while (iter != NULL) {
GList *tmp = iter;
char *iter_name = tmp->data;
iter = iter->next;
if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
crm_trace("Removing %s from the cleanup list", iter_name);
stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
free(iter_name);
}
}
}
/*!
* \internal
* \brief Purge all entries from the stonith cleanup list
*/
void
purge_stonith_cleanup(void)
{
if (stonith_cleanup_list) {
GList *iter = NULL;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_info("Purging %s from stonith cleanup list", target);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
}
/*!
* \internal
* \brief Send stonith updates for all entries in cleanup list, then purge it
*/
void
execute_stonith_cleanup(void)
{
GList *iter;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_node_t *target_node = pcmk__get_node(0, target, NULL,
pcmk__node_search_cluster);
const char *uuid = crm_peer_uuid(target_node);
crm_notice("Marking %s, target of a previous stonith action, as clean", target);
send_stonith_update(NULL, target, uuid);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
/* end stonith cleanup list functions */
/* stonith API client
*
* Functions that need to interact directly with the fencer via its API
*/
static stonith_t *stonith_api = NULL;
static mainloop_timer_t *controld_fencer_connect_timer = NULL;
static char *te_client_id = NULL;
static gboolean
fail_incompletable_stonith(pcmk__graph_t *graph)
{
GList *lpc = NULL;
const char *task = NULL;
xmlNode *last_action = NULL;
if (graph == NULL) {
return FALSE;
}
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
GList *lpc2 = NULL;
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
continue;
}
for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data;
if ((action->type != pcmk__cluster_graph_action)
|| pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
continue;
}
task = crm_element_value(action->xml, PCMK_XA_OPERATION);
if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
last_action = action->xml;
pcmk__update_graph(graph, action);
crm_notice("Failing action %d (%s): fencer terminated",
action->id, pcmk__xe_id(action->xml));
}
}
}
if (last_action != NULL) {
crm_warn("Fencer failure resulted in unrunnable actions");
abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action);
return TRUE;
}
return FALSE;
}
static void
tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
{
te_cleanup_stonith_history_sync(st, FALSE);
if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
crm_err("Lost fencer connection (will attempt to reconnect)");
if (!mainloop_timer_running(controld_fencer_connect_timer)) {
mainloop_timer_start(controld_fencer_connect_timer);
}
} else {
crm_info("Disconnected from fencer");
}
if (stonith_api) {
/* the client API won't properly reconnect notifications
* if they are still in the table - so remove them
*/
if (stonith_api->state != stonith_disconnected) {
stonith_api->cmds->disconnect(st);
}
stonith_api->cmds->remove_notification(stonith_api, NULL);
}
if (AM_I_DC) {
fail_incompletable_stonith(controld_globals.transition_graph);
trigger_graph();
}
}
/*!
* \internal
* \brief Handle an event notification from the fencing API
*
* \param[in] st Fencing API connection (ignored)
* \param[in] event Fencing API event notification
*/
static void
handle_fence_notification(stonith_t *st, stonith_event_t *event)
{
bool succeeded = true;
const char *executioner = "the cluster";
const char *client = "a client";
const char *reason = NULL;
int exec_status;
if (te_client_id == NULL) {
te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
(unsigned long) getpid());
}
if (event == NULL) {
crm_err("Notify data not found");
return;
}
if (event->executioner != NULL) {
executioner = event->executioner;
}
if (event->client_origin != NULL) {
client = event->client_origin;
}
exec_status = stonith__event_execution_status(event);
if ((stonith__event_exit_status(event) != CRM_EX_OK)
|| (exec_status != PCMK_EXEC_DONE)) {
succeeded = false;
if (exec_status == PCMK_EXEC_DONE) {
exec_status = PCMK_EXEC_ERROR;
}
}
reason = stonith__event_exit_reason(event);
crmd_alert_fencing_op(event);
if (pcmk__str_eq(PCMK_ACTION_ON, event->action, pcmk__str_none)) {
// Unfencing doesn't need special handling, just a log message
if (succeeded) {
crm_notice("%s was unfenced by %s at the request of %s@%s",
event->target, executioner, client, event->origin);
} else {
crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
event->target, executioner,
pcmk_exec_status_str(exec_status),
((reason == NULL)? "" : ": "),
((reason == NULL)? "" : reason),
stonith__event_exit_status(event));
}
return;
}
if (succeeded
&& pcmk__str_eq(event->target, controld_globals.our_nodename,
pcmk__str_casei)) {
/* We were notified of our own fencing. Most likely, either fencing was
* misconfigured, or fabric fencing that doesn't cut cluster
* communication is in use.
*
* Either way, shutting down the local host is a good idea, to require
* administrator intervention. Also, other nodes would otherwise likely
* set our status to lost because of the fencing callback and discard
* our subsequent election votes as "not part of our cluster".
*/
crm_crit("We were allegedly just fenced by %s for %s!",
executioner, event->origin); // Dumps blackbox if enabled
if (fence_reaction_panic) {
pcmk__panic(__func__);
} else {
crm_exit(CRM_EX_FATAL);
}
return; // Should never get here
}
/* Update the count of fencing failures for this target, in case we become
* DC later. The current DC has already updated its fail count in
* tengine_stonith_callback().
*/
if (!AM_I_DC) {
if (succeeded) {
st_fail_count_reset(event->target);
} else {
st_fail_count_increment(event->target);
}
}
crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
"%s%s%s%s " CRM_XS " event=%s",
event->target, (succeeded? "" : " not"),
event->action, executioner, client, event->origin,
(succeeded? "OK" : pcmk_exec_status_str(exec_status)),
((reason == NULL)? "" : " ("),
((reason == NULL)? "" : reason),
((reason == NULL)? "" : ")"),
event->id);
if (succeeded) {
crm_node_t *peer = pcmk__search_node_caches(0, event->target,
pcmk__node_search_any
|pcmk__node_search_known);
const char *uuid = NULL;
if (peer == NULL) {
return;
}
uuid = crm_peer_uuid(peer);
if (AM_I_DC) {
/* The DC always sends updates */
send_stonith_update(NULL, event->target, uuid);
/* @TODO Ideally, at this point, we'd check whether the fenced node
* hosted any guest nodes, and call remote_node_down() for them.
* Unfortunately, the controller doesn't have a simple, reliable way
* to map hosts to guests. It might be possible to track this in the
* peer cache via crm_remote_peer_cache_refresh(). For now, we rely
* on the scheduler creating fence pseudo-events for the guests.
*/
if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
/* Abort the current transition if it wasn't the cluster that
* initiated fencing.
*/
crm_info("External fencing operation from %s fenced %s",
client, event->target);
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"External Fencing Operation", NULL);
}
} else if (pcmk__str_eq(controld_globals.dc_name, event->target,
pcmk__str_null_matches|pcmk__str_casei)
&& !pcmk_is_set(peer->flags, crm_remote_node)) {
// Assume the target was our DC if we don't currently have one
if (controld_globals.dc_name != NULL) {
crm_notice("Fencing target %s was our DC", event->target);
} else {
crm_notice("Fencing target %s may have been our DC",
event->target);
}
/* Given the CIB resyncing that occurs around elections,
* have one node update the CIB now and, if the new DC is different,
* have them do so too after the election
*/
if (pcmk__str_eq(event->executioner, controld_globals.our_nodename,
pcmk__str_casei)) {
send_stonith_update(NULL, event->target, uuid);
}
add_stonith_cleanup(event->target);
}
/* If the target is a remote node, and we host its connection,
* immediately fail all monitors so it can be recovered quickly.
* The connection won't necessarily drop when a remote node is fenced,
* so the failure might not otherwise be detected until the next poke.
*/
if (pcmk_is_set(peer->flags, crm_remote_node)) {
remote_ra_fail(event->target);
}
crmd_peer_down(peer, TRUE);
}
}
/*!
* \brief Connect to fencer
*
* \param[in] user_data If NULL, retry failures now, otherwise retry in mainloop timer
*
* \return G_SOURCE_REMOVE on success, G_SOURCE_CONTINUE to retry
* \note If user_data is NULL, this will wait 2s between attempts, for up to
* 30 attempts, meaning the controller could be blocked as long as 58s.
*/
gboolean
controld_timer_fencer_connect(gpointer user_data)
{
int rc = pcmk_ok;
if (stonith_api == NULL) {
stonith_api = stonith_api_new();
if (stonith_api == NULL) {
crm_err("Could not connect to fencer: API memory allocation failed");
return G_SOURCE_REMOVE;
}
}
if (stonith_api->state != stonith_disconnected) {
crm_trace("Already connected to fencer, no need to retry");
return G_SOURCE_REMOVE;
}
if (user_data == NULL) {
// Blocking (retry failures now until successful)
rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
if (rc != pcmk_ok) {
crm_err("Could not connect to fencer in 30 attempts: %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
} else {
// Non-blocking (retry failures later in main loop)
rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
if (controld_fencer_connect_timer == NULL) {
controld_fencer_connect_timer =
mainloop_timer_add("controld_fencer_connect", 1000,
TRUE, controld_timer_fencer_connect,
GINT_TO_POINTER(TRUE));
}
if (rc != pcmk_ok) {
if (pcmk_is_set(controld_globals.fsa_input_register,
R_ST_REQUIRED)) {
crm_notice("Fencer connection failed (will retry): %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
if (!mainloop_timer_running(controld_fencer_connect_timer)) {
mainloop_timer_start(controld_fencer_connect_timer);
}
return G_SOURCE_CONTINUE;
} else {
crm_info("Fencer connection failed (ignoring because no longer required): %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
return G_SOURCE_REMOVE;
}
}
if (rc == pcmk_ok) {
- stonith_api->cmds->register_notification(stonith_api,
- T_STONITH_NOTIFY_DISCONNECT,
- tengine_stonith_connection_destroy);
- stonith_api->cmds->register_notification(stonith_api,
- T_STONITH_NOTIFY_FENCE,
- handle_fence_notification);
- stonith_api->cmds->register_notification(stonith_api,
- T_STONITH_NOTIFY_HISTORY_SYNCED,
- tengine_stonith_history_synced);
+ stonith_api_operations_t *cmds = stonith_api->cmds;
+
+ cmds->register_notification(stonith_api,
+ PCMK__VALUE_ST_NOTIFY_DISCONNECT,
+ tengine_stonith_connection_destroy);
+ cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE,
+ handle_fence_notification);
+ cmds->register_notification(stonith_api,
+ T_STONITH_NOTIFY_HISTORY_SYNCED,
+ tengine_stonith_history_synced);
te_trigger_stonith_history_sync(TRUE);
crm_notice("Fencer successfully connected");
}
return G_SOURCE_REMOVE;
}
void
controld_disconnect_fencer(bool destroy)
{
if (stonith_api) {
// Prevent fencer connection from coming up again
controld_clear_fsa_input_flags(R_ST_REQUIRED);
if (stonith_api->state != stonith_disconnected) {
stonith_api->cmds->disconnect(stonith_api);
}
stonith_api->cmds->remove_notification(stonith_api, NULL);
}
if (destroy) {
if (stonith_api) {
stonith_api->cmds->free(stonith_api);
stonith_api = NULL;
}
if (controld_fencer_connect_timer) {
mainloop_timer_del(controld_fencer_connect_timer);
controld_fencer_connect_timer = NULL;
}
if (te_client_id) {
free(te_client_id);
te_client_id = NULL;
}
}
}
static gboolean
do_stonith_history_sync(gpointer user_data)
{
if (stonith_api && (stonith_api->state != stonith_disconnected)) {
stonith_history_t *history = NULL;
te_cleanup_stonith_history_sync(stonith_api, FALSE);
stonith_api->cmds->history(stonith_api,
st_opt_sync_call | st_opt_broadcast,
NULL, &history, 5);
stonith_history_free(history);
return TRUE;
} else {
crm_info("Skip triggering stonith history-sync as stonith is disconnected");
return FALSE;
}
}
static void
tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
{
char *uuid = NULL;
int stonith_id = -1;
int transition_id = -1;
pcmk__graph_action_t *action = NULL;
const char *target = NULL;
if ((data == NULL) || (data->userdata == NULL)) {
crm_err("Ignoring fence operation %d result: "
"No transition key given (bug?)",
((data == NULL)? -1 : data->call_id));
return;
}
if (!AM_I_DC) {
const char *reason = stonith__exit_reason(data);
if (reason == NULL) {
reason = pcmk_exec_status_str(stonith__execution_status(data));
}
crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s",
data->call_id, stonith__exit_status(data), reason,
(const char *) data->userdata);
return;
}
CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
&stonith_id, NULL),
goto bail);
if (controld_globals.transition_graph->complete || (stonith_id < 0)
|| !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none)
|| (controld_globals.transition_graph->id != transition_id)) {
crm_info("Ignoring fence operation %d result: "
"Not from current transition " CRM_XS
" complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
data->call_id,
pcmk__btoa(controld_globals.transition_graph->complete),
stonith_id, uuid, controld_globals.te_uuid, transition_id,
controld_globals.transition_graph->id);
goto bail;
}
action = controld_get_action(stonith_id);
if (action == NULL) {
crm_err("Ignoring fence operation %d result: "
"Action %d not found in transition graph (bug?) "
CRM_XS " uuid=%s transition=%d",
data->call_id, stonith_id, uuid, transition_id);
goto bail;
}
target = crm_element_value(action->xml, PCMK__META_ON_NODE);
if (target == NULL) {
crm_err("Ignoring fence operation %d result: No target given (bug?)",
data->call_id);
goto bail;
}
stop_te_timer(action);
if (stonith__exit_status(data) == CRM_EX_OK) {
const char *uuid = crm_element_value(action->xml,
PCMK__META_ON_NODE_UUID);
const char *op = crm_meta_value(action->params,
PCMK__META_STONITH_ACTION);
crm_info("Fence operation %d for %s succeeded", data->call_id, target);
if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
te_action_confirmed(action, NULL);
if (pcmk__str_eq(PCMK_ACTION_ON, op, pcmk__str_casei)) {
const char *value = NULL;
char *now = pcmk__ttoa(time(NULL));
gboolean is_remote_node = FALSE;
/* This check is not 100% reliable, since this node is not
* guaranteed to have the remote node cached. However, it
* doesn't have to be reliable, since the attribute manager can
* learn a node's "remoteness" by other means sooner or later.
* This allows it to learn more quickly if this node does have
* the information.
*/
if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
is_remote_node = TRUE;
}
update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
is_remote_node);
free(now);
value = crm_meta_value(action->params, PCMK__META_DIGESTS_ALL);
update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
is_remote_node);
value = crm_meta_value(action->params,
PCMK__META_DIGESTS_SECURE);
update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
is_remote_node);
} else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
send_stonith_update(action, target, uuid);
pcmk__set_graph_action_flags(action,
pcmk__graph_action_sent_update);
}
}
st_fail_count_reset(target);
} else {
enum pcmk__graph_next abort_action = pcmk__graph_restart;
int status = stonith__execution_status(data);
const char *reason = stonith__exit_reason(data);
if (reason == NULL) {
if (status == PCMK_EXEC_DONE) {
reason = "Agent returned error";
} else {
reason = pcmk_exec_status_str(status);
}
}
pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
/* If no fence devices were available, there's no use in immediately
* checking again, so don't start a new transition in that case.
*/
if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
crm_warn("Fence operation %d for %s failed: %s "
"(aborting transition and giving up for now)",
data->call_id, target, reason);
abort_action = pcmk__graph_wait;
} else {
crm_notice("Fence operation %d for %s failed: %s "
"(aborting transition)", data->call_id, target, reason);
}
/* Increment the fail count now, so abort_for_stonith_failure() can
* check it. Non-DC nodes will increment it in
* handle_fence_notification().
*/
st_fail_count_increment(target);
abort_for_stonith_failure(abort_action, target, NULL);
}
pcmk__update_graph(controld_globals.transition_graph, action);
trigger_graph();
bail:
free(data->userdata);
free(uuid);
return;
}
static int
fence_with_delay(const char *target, const char *type, int delay)
{
uint32_t options = st_opt_none; // Group of enum stonith_call_options
int timeout_sec = (int) (controld_globals.transition_graph->stonith_timeout
/ 1000);
if (crmd_join_phase_count(crm_join_confirmed) == 1) {
stonith__set_call_options(options, target, st_opt_allow_suicide);
}
return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
type, timeout_sec, 0, delay);
}
/*!
* \internal
* \brief Execute a fencing action from a transition graph
*
* \param[in] graph Transition graph being executed (ignored)
* \param[in] action Fencing action to execute
*
* \return Standard Pacemaker return code
*/
int
controld_execute_fence_action(pcmk__graph_t *graph,
pcmk__graph_action_t *action)
{
int rc = 0;
const char *id = pcmk__xe_id(action->xml);
const char *uuid = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID);
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
const char *type = crm_meta_value(action->params,
PCMK__META_STONITH_ACTION);
char *transition_key = NULL;
const char *priority_delay = NULL;
int delay_i = 0;
gboolean invalid_action = FALSE;
int stonith_timeout = (int) (controld_globals.transition_graph->stonith_timeout
/ 1000);
CRM_CHECK(id != NULL, invalid_action = TRUE);
CRM_CHECK(uuid != NULL, invalid_action = TRUE);
CRM_CHECK(type != NULL, invalid_action = TRUE);
CRM_CHECK(target != NULL, invalid_action = TRUE);
if (invalid_action) {
crm_log_xml_warn(action->xml, "BadAction");
return EPROTO;
}
priority_delay = crm_meta_value(action->params,
PCMK_OPT_PRIORITY_FENCING_DELAY);
crm_notice("Requesting fencing (%s) targeting node %s "
CRM_XS " action=%s timeout=%i%s%s",
type, target, id, stonith_timeout,
priority_delay ? " priority_delay=" : "",
priority_delay ? priority_delay : "");
/* Passing NULL means block until we can connect... */
controld_timer_fencer_connect(NULL);
pcmk__scan_min_int(priority_delay, &delay_i, 0);
rc = fence_with_delay(target, type, delay_i);
transition_key = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, 0,
controld_globals.te_uuid),
stonith_api->cmds->register_callback(stonith_api, rc,
(stonith_timeout
+ (delay_i > 0 ? delay_i : 0)),
st_opt_timeout_updates, transition_key,
"tengine_stonith_callback",
tengine_stonith_callback);
return pcmk_rc_ok;
}
bool
controld_verify_stonith_watchdog_timeout(const char *value)
{
long long st_timeout = (value != NULL)? crm_get_msec(value) : 0;
const char *our_nodename = controld_globals.our_nodename;
if (st_timeout == 0
|| (stonith_api && (stonith_api->state != stonith_disconnected) &&
stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
our_nodename))) {
return pcmk__valid_stonith_watchdog_timeout(value);
}
return true;
}
/* end stonith API client functions */
/*
* stonith history synchronization
*
* Each node's fencer keeps track of a cluster-wide fencing history. When a node
* joins or leaves, we need to synchronize the history across all nodes.
*/
static crm_trigger_t *stonith_history_sync_trigger = NULL;
static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
void
te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
{
if (free_timers) {
mainloop_timer_del(stonith_history_sync_timer_short);
stonith_history_sync_timer_short = NULL;
mainloop_timer_del(stonith_history_sync_timer_long);
stonith_history_sync_timer_long = NULL;
} else {
mainloop_timer_stop(stonith_history_sync_timer_short);
mainloop_timer_stop(stonith_history_sync_timer_long);
}
if (st) {
st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED);
}
}
static void
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
{
te_cleanup_stonith_history_sync(st, FALSE);
crm_debug("Fence-history synced - cancel all timers");
}
static gboolean
stonith_history_sync_set_trigger(gpointer user_data)
{
mainloop_set_trigger(stonith_history_sync_trigger);
return FALSE;
}
void
te_trigger_stonith_history_sync(bool long_timeout)
{
/* trigger a sync in 5s to give more nodes the
* chance to show up so that we don't create
* unnecessary stonith-history-sync traffic
*
* the long timeout of 30s is there as a fallback
* so that after a successful connection to fenced
* we will wait for 30s for the DC to trigger a
* history-sync
* if this doesn't happen we trigger a sync locally
* (e.g. fenced segfaults and is restarted by pacemakerd)
*/
/* as we are finally checking the stonith-connection
* in do_stonith_history_sync we should be fine
* leaving stonith_history_sync_time & stonith_history_sync_trigger
* around
*/
if (stonith_history_sync_trigger == NULL) {
stonith_history_sync_trigger =
mainloop_add_trigger(G_PRIORITY_LOW,
do_stonith_history_sync, NULL);
}
if (long_timeout) {
if(stonith_history_sync_timer_long == NULL) {
stonith_history_sync_timer_long =
mainloop_timer_add("history_sync_long", 30000,
FALSE, stonith_history_sync_set_trigger,
NULL);
}
crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
mainloop_timer_start(stonith_history_sync_timer_long);
} else {
if(stonith_history_sync_timer_short == NULL) {
stonith_history_sync_timer_short =
mainloop_timer_add("history_sync_short", 5000,
FALSE, stonith_history_sync_set_trigger,
NULL);
}
crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
mainloop_timer_start(stonith_history_sync_timer_short);
}
}
/* end stonith history synchronization functions */
diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c
index 96ea9b83bb..f7af6623a1 100644
--- a/daemons/execd/pacemaker-execd.c
+++ b/daemons/execd/pacemaker-execd.c
@@ -1,585 +1,583 @@
/*
* Copyright 2012-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <glib.h>
#include <signal.h>
#include <sys/types.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/services.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/mainloop.h>
#include <crm/common/output_internal.h>
#include <crm/common/remote_internal.h>
#include <crm/lrmd_internal.h>
#include "pacemaker-execd.h"
#ifdef PCMK__COMPILE_REMOTE
# define EXECD_TYPE "remote"
# define EXECD_NAME "pacemaker-remoted"
# define SUMMARY "resource agent executor daemon for Pacemaker Remote nodes"
#else
# define EXECD_TYPE "local"
# define EXECD_NAME "pacemaker-execd"
# define SUMMARY "resource agent executor daemon for Pacemaker cluster nodes"
#endif
static GMainLoop *mainloop = NULL;
static qb_ipcs_service_t *ipcs = NULL;
static stonith_t *stonith_api = NULL;
int lrmd_call_id = 0;
time_t start_time;
static struct {
gchar **log_files;
#ifdef PCMK__COMPILE_REMOTE
gchar *port;
#endif // PCMK__COMPILE_REMOTE
} options;
#ifdef PCMK__COMPILE_REMOTE
/* whether shutdown request has been sent */
static gboolean shutting_down = FALSE;
/* timer for waiting for acknowledgment of shutdown request */
static guint shutdown_ack_timer = 0;
static gboolean lrmd_exit(gpointer data);
#endif
static void
stonith_connection_destroy_cb(stonith_t * st, stonith_event_t * e)
{
stonith_api->state = stonith_disconnected;
stonith_connection_failed();
}
stonith_t *
get_stonith_connection(void)
{
if (stonith_api && stonith_api->state == stonith_disconnected) {
stonith_api_delete(stonith_api);
stonith_api = NULL;
}
if (stonith_api == NULL) {
int rc = pcmk_ok;
stonith_api = stonith_api_new();
if (stonith_api == NULL) {
crm_err("Could not connect to fencer: API memory allocation failed");
return NULL;
}
rc = stonith_api_connect_retry(stonith_api, crm_system_name, 10);
if (rc != pcmk_ok) {
crm_err("Could not connect to fencer in 10 attempts: %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
stonith_api_delete(stonith_api);
stonith_api = NULL;
} else {
- stonith_api->cmds->register_notification(stonith_api,
- T_STONITH_NOTIFY_DISCONNECT,
- stonith_connection_destroy_cb);
+ stonith_api_operations_t *cmds = stonith_api->cmds;
+
+ cmds->register_notification(stonith_api,
+ PCMK__VALUE_ST_NOTIFY_DISCONNECT,
+ stonith_connection_destroy_cb);
}
}
return stonith_api;
}
static int32_t
lrmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (pcmk__new_client(c, uid, gid) == NULL) {
return -ENOMEM;
}
return 0;
}
static void
lrmd_ipc_created(qb_ipcs_connection_t * c)
{
pcmk__client_t *new_client = pcmk__find_client(c);
crm_trace("Connection %p", c);
CRM_ASSERT(new_client != NULL);
/* Now that the connection is offically established, alert
* the other clients a new connection exists. */
notify_of_new_client(new_client);
}
static int32_t
lrmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
pcmk__client_t *client = pcmk__find_client(c);
xmlNode *request = pcmk__client_data2xml(client, data, &id, &flags);
CRM_CHECK(client != NULL, crm_err("Invalid client");
return FALSE);
CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client);
return FALSE);
CRM_CHECK(flags & crm_ipc_client_response, crm_err("Invalid client request: %p", client);
return FALSE);
if (!request) {
return 0;
}
if (!client->name) {
const char *value = crm_element_value(request,
PCMK__XA_LRMD_CLIENTNAME);
if (value == NULL) {
client->name = pcmk__itoa(pcmk__client_pid(c));
} else {
client->name = strdup(value);
}
}
lrmd_call_id++;
if (lrmd_call_id < 1) {
lrmd_call_id = 1;
}
crm_xml_add(request, PCMK__XA_LRMD_CLIENTID, client->id);
crm_xml_add(request, PCMK__XA_LRMD_CLIENTNAME, client->name);
crm_xml_add_int(request, PCMK__XA_LRMD_CALLID, lrmd_call_id);
process_lrmd_message(client, id, request);
free_xml(request);
return 0;
}
/*!
* \internal
* \brief Free a client connection, and exit if appropriate
*
* \param[in,out] client Client connection to free
*/
void
lrmd_client_destroy(pcmk__client_t *client)
{
pcmk__free_client(client);
#ifdef PCMK__COMPILE_REMOTE
/* If we were waiting to shut down, we can now safely do so
* if there are no more proxied IPC providers
*/
if (shutting_down && (ipc_proxy_get_provider() == NULL)) {
lrmd_exit(NULL);
}
#endif
}
static int32_t
lrmd_ipc_closed(qb_ipcs_connection_t * c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p", c);
client_disconnect_cleanup(client->id);
#ifdef PCMK__COMPILE_REMOTE
ipc_proxy_remove_provider(client);
#endif
lrmd_client_destroy(client);
return 0;
}
static void
lrmd_ipc_destroy(qb_ipcs_connection_t * c)
{
lrmd_ipc_closed(c);
crm_trace("Connection %p", c);
}
static struct qb_ipcs_service_handlers lrmd_ipc_callbacks = {
.connection_accept = lrmd_ipc_accept,
.connection_created = lrmd_ipc_created,
.msg_process = lrmd_ipc_dispatch,
.connection_closed = lrmd_ipc_closed,
.connection_destroyed = lrmd_ipc_destroy
};
// \return Standard Pacemaker return code
int
lrmd_server_send_reply(pcmk__client_t *client, uint32_t id, xmlNode *reply)
{
crm_trace("Sending reply (%d) to client (%s)", id, client->id);
switch (PCMK__CLIENT_TYPE(client)) {
case pcmk__client_ipc:
return pcmk__ipc_send_xml(client, id, reply, FALSE);
#ifdef PCMK__COMPILE_REMOTE
case pcmk__client_tls:
return lrmd__remote_send_xml(client->remote, reply, id, "reply");
#endif
default:
crm_err("Could not send reply: unknown type for client %s "
CRM_XS " flags=%#llx",
pcmk__client_name(client), client->flags);
}
return ENOTCONN;
}
// \return Standard Pacemaker return code
int
lrmd_server_send_notify(pcmk__client_t *client, xmlNode *msg)
{
crm_trace("Sending notification to client (%s)", client->id);
switch (PCMK__CLIENT_TYPE(client)) {
case pcmk__client_ipc:
if (client->ipcs == NULL) {
crm_trace("Could not notify local client: disconnected");
return ENOTCONN;
}
return pcmk__ipc_send_xml(client, 0, msg, crm_ipc_server_event);
#ifdef PCMK__COMPILE_REMOTE
case pcmk__client_tls:
if (client->remote == NULL) {
crm_trace("Could not notify remote client: disconnected");
return ENOTCONN;
} else {
return lrmd__remote_send_xml(client->remote, msg, 0, "notify");
}
#endif
default:
crm_err("Could not notify client %s with unknown transport "
CRM_XS " flags=%#llx",
pcmk__client_name(client), client->flags);
}
return ENOTCONN;
}
/*!
* \internal
* \brief Clean up and exit immediately
*
* \param[in] data Ignored
*
* \return Doesn't return
* \note This can be used as a timer callback.
*/
static gboolean
lrmd_exit(gpointer data)
{
crm_info("Terminating with %d clients", pcmk__ipc_client_count());
- if (stonith_api) {
- stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT);
- stonith_api->cmds->disconnect(stonith_api);
- stonith_api_delete(stonith_api);
- }
+ stonith_api_delete(stonith_api);
if (ipcs) {
mainloop_del_ipc_server(ipcs);
}
#ifdef PCMK__COMPILE_REMOTE
execd_stop_tls_server();
ipc_proxy_cleanup();
#endif
pcmk__client_cleanup();
g_hash_table_destroy(rsc_list);
if (mainloop) {
lrmd_drain_alerts(mainloop);
}
crm_exit(CRM_EX_OK);
return FALSE;
}
/*!
* \internal
* \brief Request cluster shutdown if appropriate, otherwise exit immediately
*
* \param[in] nsig Signal that caused invocation (ignored)
*/
static void
lrmd_shutdown(int nsig)
{
#ifdef PCMK__COMPILE_REMOTE
pcmk__client_t *ipc_proxy = ipc_proxy_get_provider();
/* If there are active proxied IPC providers, then we may be running
* resources, so notify the cluster that we wish to shut down.
*/
if (ipc_proxy) {
if (shutting_down) {
crm_notice("Waiting for cluster to stop resources before exiting");
return;
}
crm_info("Sending shutdown request to cluster");
if (ipc_proxy_shutdown_req(ipc_proxy) < 0) {
crm_crit("Shutdown request failed, exiting immediately");
} else {
/* We requested a shutdown. Now, we need to wait for an
* acknowledgement from the proxy host (which ensures the proxy host
* supports shutdown requests), then wait for all proxy hosts to
* disconnect (which ensures that all resources have been stopped).
*/
shutting_down = TRUE;
/* Stop accepting new proxy connections */
execd_stop_tls_server();
/* Older controller versions will never acknowledge our request, so
* set a fairly short timeout to exit quickly in that case. If we
* get the ack, we'll defuse this timer.
*/
shutdown_ack_timer = g_timeout_add_seconds(20, lrmd_exit, NULL);
/* Currently, we let the OS kill us if the clients don't disconnect
* in a reasonable time. We could instead set a long timer here
* (shorter than what the OS is likely to use) and exit immediately
* if it pops.
*/
return;
}
}
#endif
lrmd_exit(NULL);
}
/*!
* \internal
* \brief Defuse short exit timer if shutting down
*/
void
handle_shutdown_ack(void)
{
#ifdef PCMK__COMPILE_REMOTE
if (shutting_down) {
crm_info("Received shutdown ack");
if (shutdown_ack_timer > 0) {
g_source_remove(shutdown_ack_timer);
shutdown_ack_timer = 0;
}
return;
}
#endif
crm_debug("Ignoring unexpected shutdown ack");
}
/*!
* \internal
* \brief Make short exit timer fire immediately
*/
void
handle_shutdown_nack(void)
{
#ifdef PCMK__COMPILE_REMOTE
if (shutting_down) {
crm_info("Received shutdown nack");
if (shutdown_ack_timer > 0) {
g_source_remove(shutdown_ack_timer);
shutdown_ack_timer = g_timeout_add(0, lrmd_exit, NULL);
}
return;
}
#endif
crm_debug("Ignoring unexpected shutdown nack");
}
static GOptionEntry entries[] = {
{ "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY,
&options.log_files, "Send logs to the additional named logfile", NULL },
#ifdef PCMK__COMPILE_REMOTE
{ "port", 'p', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.port,
"Port to listen on (defaults to " G_STRINGIFY(DEFAULT_REMOTE_PORT) ")", NULL },
#endif // PCMK__COMPILE_REMOTE
{ NULL }
};
static pcmk__supported_format_t formats[] = {
PCMK__SUPPORTED_FORMAT_NONE,
PCMK__SUPPORTED_FORMAT_TEXT,
PCMK__SUPPORTED_FORMAT_XML,
{ NULL, NULL, NULL }
};
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group)
{
GOptionContext *context = NULL;
context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
pcmk__add_main_args(context, entries);
return context;
}
int
main(int argc, char **argv, char **envp)
{
int rc = pcmk_rc_ok;
crm_exit_t exit_code = CRM_EX_OK;
const char *option = NULL;
pcmk__output_t *out = NULL;
GError *error = NULL;
GOptionGroup *output_group = NULL;
pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
#ifdef PCMK__COMPILE_REMOTE
gchar **processed_args = pcmk__cmdline_preproc(argv, "lp");
#else
gchar **processed_args = pcmk__cmdline_preproc(argv, "l");
#endif // PCMK__COMPILE_REMOTE
GOptionContext *context = build_arg_context(args, &output_group);
#ifdef PCMK__COMPILE_REMOTE
// If necessary, create PID 1 now before any file descriptors are opened
remoted_spawn_pidone(argc, argv, envp);
#endif
crm_log_preinit(EXECD_NAME, argc, argv);
pcmk__register_formats(output_group, formats);
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
exit_code = CRM_EX_USAGE;
goto done;
}
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_ERROR;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Error creating output format %s: %s",
args->output_ty, pcmk_rc_str(rc));
goto done;
}
if (args->version) {
out->version(out, false);
goto done;
}
// Open additional log files
if (options.log_files != NULL) {
for (gchar **fname = options.log_files; *fname != NULL; fname++) {
rc = pcmk__add_logfile(*fname);
if (rc != pcmk_rc_ok) {
out->err(out, "Logging to %s is disabled: %s",
*fname, pcmk_rc_str(rc));
}
}
}
pcmk__cli_init_logging(EXECD_NAME, args->verbosity);
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
// ocf_log() (in resource-agents) uses the capitalized env options below
option = pcmk__env_option(PCMK__ENV_LOGFACILITY);
if (!pcmk__str_eq(option, PCMK_VALUE_NONE,
pcmk__str_casei|pcmk__str_null_matches)
&& !pcmk__str_eq(option, "/dev/null", pcmk__str_none)) {
pcmk__set_env_option("LOGFACILITY", option, true);
}
option = pcmk__env_option(PCMK__ENV_LOGFILE);
if (!pcmk__str_eq(option, PCMK_VALUE_NONE,
pcmk__str_casei|pcmk__str_null_matches)) {
pcmk__set_env_option("LOGFILE", option, true);
if (pcmk__env_option_enabled(crm_system_name, PCMK__ENV_DEBUG)) {
pcmk__set_env_option("DEBUGLOG", option, true);
}
}
#ifdef PCMK__COMPILE_REMOTE
if (options.port != NULL) {
pcmk__set_env_option(PCMK__ENV_REMOTE_PORT, options.port, false);
}
#endif // PCMK__COMPILE_REMOTE
start_time = time(NULL);
crm_notice("Starting Pacemaker " EXECD_TYPE " executor");
/* The presence of this variable allegedly controls whether child
* processes like httpd will try and use Systemd's sd_notify
* API
*/
unsetenv("NOTIFY_SOCKET");
{
// Temporary directory for resource agent use (leave owned by root)
int rc = pcmk__build_path(CRM_RSCTMP_DIR, 0755);
if (rc != pcmk_rc_ok) {
crm_warn("Could not create resource agent temporary directory "
CRM_RSCTMP_DIR ": %s", pcmk_rc_str(rc));
}
}
rsc_list = pcmk__strkey_table(NULL, free_rsc);
ipcs = mainloop_add_ipc_server(CRM_SYSTEM_LRMD, QB_IPC_SHM, &lrmd_ipc_callbacks);
if (ipcs == NULL) {
crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
exit_code = CRM_EX_FATAL;
goto done;
}
#ifdef PCMK__COMPILE_REMOTE
if (lrmd_init_remote_tls_server() < 0) {
crm_err("Failed to create TLS listener: shutting down and staying down");
exit_code = CRM_EX_FATAL;
goto done;
}
ipc_proxy_init();
#endif
mainloop_add_signal(SIGTERM, lrmd_shutdown);
mainloop = g_main_loop_new(NULL, FALSE);
crm_notice("Pacemaker " EXECD_TYPE " executor successfully started and accepting connections");
crm_notice("OCF resource agent search path is %s", OCF_RA_PATH);
g_main_loop_run(mainloop);
/* should never get here */
lrmd_exit(NULL);
done:
g_strfreev(options.log_files);
#ifdef PCMK__COMPILE_REMOTE
g_free(options.port);
#endif // PCMK__COMPILE_REMOTE
g_strfreev(processed_args);
pcmk__free_arg_context(context);
pcmk__output_and_clear_error(&error, out);
if (out != NULL) {
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
}
pcmk__unregister_formats();
crm_exit(exit_code);
}
diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c
index efdc219132..378b3e250c 100644
--- a/daemons/fenced/cts-fence-helper.c
+++ b/daemons/fenced/cts-fence-helper.c
@@ -1,691 +1,693 @@
/*
* Copyright 2009-2024 the Pacemaker project contributors
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <crm/crm.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/agents.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/xml.h>
#include <crm/common/mainloop.h>
#define SUMMARY "cts-fence-helper - inject commands into the Pacemaker fencer and watch for events"
static GMainLoop *mainloop = NULL;
static crm_trigger_t *trig = NULL;
static int mainloop_iter = 0;
static pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
typedef void (*mainloop_test_iteration_cb) (int check_event);
#define MAINLOOP_DEFAULT_TIMEOUT 2
enum test_modes {
test_standard = 0, // test using a specific developer environment
test_passive, // watch notifications only
test_api_sanity, // sanity-test stonith client API using fence_dummy
test_api_mainloop, // sanity-test mainloop code with async responses
};
struct {
enum test_modes mode;
} options = {
.mode = test_standard
};
static gboolean
mode_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
if (pcmk__str_any_of(option_name, "--mainloop_api_test", "-m", NULL)) {
options.mode = test_api_mainloop;
} else if (pcmk__str_any_of(option_name, "--api_test", "-t", NULL)) {
options.mode = test_api_sanity;
} else if (pcmk__str_any_of(option_name, "--passive", "-p", NULL)) {
options.mode = test_passive;
}
return TRUE;
}
static GOptionEntry entries[] = {
{ "mainloop_api_test", 'm', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, mode_cb,
NULL, NULL,
},
{ "api_test", 't', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, mode_cb,
NULL, NULL,
},
{ "passive", 'p', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, mode_cb,
NULL, NULL,
},
{ NULL }
};
static stonith_t *st = NULL;
static struct pollfd pollfd;
static const int st_opts = st_opt_sync_call;
static int expected_notifications = 0;
static int verbose = 0;
static void
mainloop_test_done(const char *origin, bool pass)
{
if (pass) {
crm_info("SUCCESS - %s", origin);
mainloop_iter++;
mainloop_set_trigger(trig);
result.execution_status = PCMK_EXEC_DONE;
result.exit_status = CRM_EX_OK;
} else {
crm_err("FAILURE - %s (%d: %s)", origin, result.exit_status,
pcmk_exec_status_str(result.execution_status));
crm_exit(CRM_EX_ERROR);
}
}
static void
dispatch_helper(int timeout)
{
int rc;
crm_debug("Looking for notification");
pollfd.events = POLLIN;
while (true) {
rc = poll(&pollfd, 1, timeout); /* wait 10 minutes, -1 forever */
if (rc > 0) {
if (!stonith_dispatch(st)) {
break;
}
} else {
break;
}
}
}
static void
st_callback(stonith_t * st, stonith_event_t * e)
{
char *desc = NULL;
if (st->state == stonith_disconnected) {
crm_exit(CRM_EX_DISCONNECT);
}
desc = stonith__event_description(e);
crm_notice("%s", desc);
free(desc);
if (expected_notifications) {
expected_notifications--;
}
}
static void
st_global_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
crm_notice("Call %d exited %d: %s (%s)",
data->call_id, stonith__exit_status(data),
stonith__execution_status(data),
pcmk__s(stonith__exit_reason(data), "unspecified reason"));
}
static void
passive_test(void)
{
int rc = 0;
rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
if (rc != pcmk_ok) {
stonith_api_delete(st);
crm_exit(CRM_EX_DISCONNECT);
}
- st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, st_callback);
+ st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_DISCONNECT,
+ st_callback);
st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, st_callback);
st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback);
st->cmds->register_notification(st, STONITH_OP_DEVICE_DEL, st_callback);
st->cmds->register_callback(st, 0, 120, st_opt_timeout_updates, NULL, "st_global_callback",
st_global_callback);
dispatch_helper(600 * 1000);
}
#define single_test(cmd, str, num_notifications, expected_rc) \
{ \
int rc = 0; \
rc = cmd; \
expected_notifications = 0; \
if (num_notifications) { \
expected_notifications = num_notifications; \
dispatch_helper(500); \
} \
if (rc != expected_rc) { \
crm_err("FAILURE - expected rc %d != %d(%s) for cmd - %s", expected_rc, rc, pcmk_strerror(rc), str); \
crm_exit(CRM_EX_ERROR); \
} else if (expected_notifications) { \
crm_err("FAILURE - expected %d notifications, got only %d for cmd - %s", \
num_notifications, num_notifications - expected_notifications, str); \
crm_exit(CRM_EX_ERROR); \
} else { \
if (verbose) { \
crm_info("SUCCESS - %s: %d", str, rc); \
} else { \
crm_debug("SUCCESS - %s: %d", str, rc); \
} \
} \
}\
static void
run_fence_failure_test(void)
{
stonith_key_value_t *params = NULL;
params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
"false_1_node1=1,2 false_1_node2=3,4");
params = stonith_key_value_add(params, "mode", "fail");
single_test(st->
cmds->register_device(st, st_opts, "test-id1", "stonith-ng", "fence_dummy", params),
"Register device1 for failure test", 1, 0);
single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_OFF,
3, 0),
"Fence failure results off", 1, -ENODATA);
single_test(st->cmds->fence(st, st_opts, "false_1_node2",
PCMK_ACTION_REBOOT, 3, 0),
"Fence failure results reboot", 1, -ENODATA);
single_test(st->cmds->remove_device(st, st_opts, "test-id1"),
"Remove device1 for failure test", 1, 0);
stonith_key_value_freeall(params, 1, 1);
}
static void
run_fence_failure_rollover_test(void)
{
stonith_key_value_t *params = NULL;
params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
"false_1_node1=1,2 false_1_node2=3,4");
params = stonith_key_value_add(params, "mode", "fail");
single_test(st->
cmds->register_device(st, st_opts, "test-id1", "stonith-ng", "fence_dummy", params),
"Register device1 for rollover test", 1, 0);
stonith_key_value_freeall(params, 1, 1);
params = NULL;
params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
"false_1_node1=1,2 false_1_node2=3,4");
params = stonith_key_value_add(params, "mode", "pass");
single_test(st->
cmds->register_device(st, st_opts, "test-id2", "stonith-ng", "fence_dummy", params),
"Register device2 for rollover test", 1, 0);
single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_OFF,
3, 0),
"Fence rollover results off", 1, 0);
/* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */
single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_ON, 3,
0),
"Fence rollover results on", 1, -ENODEV);
single_test(st->cmds->remove_device(st, st_opts, "test-id1"),
"Remove device1 for rollover tests", 1, 0);
single_test(st->cmds->remove_device(st, st_opts, "test-id2"),
"Remove device2 for rollover tests", 1, 0);
stonith_key_value_freeall(params, 1, 1);
}
static void
run_standard_test(void)
{
stonith_key_value_t *params = NULL;
params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
"false_1_node1=1,2 false_1_node2=3,4");
params = stonith_key_value_add(params, "mode", "pass");
params = stonith_key_value_add(params, "mock_dynamic_hosts", "false_1_node1 false_1_node2");
single_test(st->
cmds->register_device(st, st_opts, "test-id", "stonith-ng", "fence_dummy", params),
"Register", 1, 0);
stonith_key_value_freeall(params, 1, 1);
params = NULL;
single_test(st->cmds->list(st, st_opts, "test-id", NULL, 1),
PCMK_ACTION_LIST, 1, 0);
single_test(st->cmds->monitor(st, st_opts, "test-id", 1), "Monitor", 1, 0);
single_test(st->cmds->status(st, st_opts, "test-id", "false_1_node2", 1),
"Status false_1_node2", 1, 0);
single_test(st->cmds->status(st, st_opts, "test-id", "false_1_node1", 1),
"Status false_1_node1", 1, 0);
single_test(st->cmds->fence(st, st_opts, "unknown-host", PCMK_ACTION_OFF,
1, 0),
"Fence unknown-host (expected failure)", 0, -ENODEV);
single_test(st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_OFF,
1, 0),
"Fence false_1_node1", 1, 0);
/* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */
single_test(st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_ON, 1,
0),
"Unfence false_1_node1", 1, -ENODEV);
/* Confirm that an invalid level index is rejected */
single_test(st->cmds->register_level(st, st_opts, "node1", 999, params),
"Attempt to register an invalid level index", 0, -EINVAL);
single_test(st->cmds->remove_device(st, st_opts, "test-id"), "Remove test-id", 1, 0);
stonith_key_value_freeall(params, 1, 1);
}
static void
sanity_tests(void)
{
int rc = 0;
rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
if (rc != pcmk_ok) {
stonith_api_delete(st);
crm_exit(CRM_EX_DISCONNECT);
}
- st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, st_callback);
+ st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_DISCONNECT,
+ st_callback);
st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, st_callback);
st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback);
st->cmds->register_notification(st, STONITH_OP_DEVICE_DEL, st_callback);
st->cmds->register_callback(st, 0, 120, st_opt_timeout_updates, NULL, "st_global_callback",
st_global_callback);
crm_info("Starting API Sanity Tests");
run_standard_test();
run_fence_failure_test();
run_fence_failure_rollover_test();
crm_info("Sanity Tests Passed");
}
static void
standard_dev_test(void)
{
int rc = 0;
char *tmp = NULL;
stonith_key_value_t *params = NULL;
rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
if (rc != pcmk_ok) {
stonith_api_delete(st);
crm_exit(CRM_EX_DISCONNECT);
}
params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
"some-host=pcmk-7 true_1_node1=3,4");
rc = st->cmds->register_device(st, st_opts, "test-id", "stonith-ng", "fence_xvm", params);
crm_debug("Register: %d", rc);
rc = st->cmds->list(st, st_opts, "test-id", &tmp, 10);
crm_debug("List: %d output: %s", rc, tmp ? tmp : "<none>");
rc = st->cmds->monitor(st, st_opts, "test-id", 10);
crm_debug("Monitor: %d", rc);
rc = st->cmds->status(st, st_opts, "test-id", "false_1_node2", 10);
crm_debug("Status false_1_node2: %d", rc);
rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10);
crm_debug("Status false_1_node1: %d", rc);
rc = st->cmds->fence(st, st_opts, "unknown-host", PCMK_ACTION_OFF, 60, 0);
crm_debug("Fence unknown-host: %d", rc);
rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10);
crm_debug("Status false_1_node1: %d", rc);
rc = st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_OFF, 60, 0);
crm_debug("Fence false_1_node1: %d", rc);
rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10);
crm_debug("Status false_1_node1: %d", rc);
rc = st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_ON, 10, 0);
crm_debug("Unfence false_1_node1: %d", rc);
rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10);
crm_debug("Status false_1_node1: %d", rc);
rc = st->cmds->fence(st, st_opts, "some-host", PCMK_ACTION_OFF, 10, 0);
crm_debug("Fence alias: %d", rc);
rc = st->cmds->status(st, st_opts, "test-id", "some-host", 10);
crm_debug("Status alias: %d", rc);
rc = st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_ON, 10, 0);
crm_debug("Unfence false_1_node1: %d", rc);
rc = st->cmds->remove_device(st, st_opts, "test-id");
crm_debug("Remove test-id: %d", rc);
stonith_key_value_freeall(params, 1, 1);
}
static void
iterate_mainloop_tests(gboolean event_ready);
static void
mainloop_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
pcmk__set_result(&result, stonith__exit_status(data),
stonith__execution_status(data),
stonith__exit_reason(data));
iterate_mainloop_tests(TRUE);
}
static int
register_callback_helper(int callid)
{
return st->cmds->register_callback(st,
callid,
MAINLOOP_DEFAULT_TIMEOUT,
st_opt_timeout_updates, NULL, "callback", mainloop_callback);
}
static void
test_async_fence_pass(int check_event)
{
int rc = 0;
if (check_event) {
mainloop_test_done(__func__, (result.exit_status == CRM_EX_OK));
return;
}
rc = st->cmds->fence(st, 0, "true_1_node1", PCMK_ACTION_OFF,
MAINLOOP_DEFAULT_TIMEOUT, 0);
if (rc < 0) {
crm_err("fence failed with rc %d", rc);
mainloop_test_done(__func__, false);
}
register_callback_helper(rc);
/* wait for event */
}
#define CUSTOM_TIMEOUT_ADDITION 10
static void
test_async_fence_custom_timeout(int check_event)
{
int rc = 0;
static time_t begin = 0;
if (check_event) {
uint32_t diff = (time(NULL) - begin);
if (result.execution_status != PCMK_EXEC_TIMEOUT) {
mainloop_test_done(__func__, false);
} else if (diff < CUSTOM_TIMEOUT_ADDITION + MAINLOOP_DEFAULT_TIMEOUT) {
crm_err
("Custom timeout test failed, callback expiration should be updated to %d, actual timeout was %d",
CUSTOM_TIMEOUT_ADDITION + MAINLOOP_DEFAULT_TIMEOUT, diff);
mainloop_test_done(__func__, false);
} else {
mainloop_test_done(__func__, true);
}
return;
}
begin = time(NULL);
rc = st->cmds->fence(st, 0, "custom_timeout_node1", PCMK_ACTION_OFF,
MAINLOOP_DEFAULT_TIMEOUT, 0);
if (rc < 0) {
crm_err("fence failed with rc %d", rc);
mainloop_test_done(__func__, false);
}
register_callback_helper(rc);
/* wait for event */
}
static void
test_async_fence_timeout(int check_event)
{
int rc = 0;
if (check_event) {
mainloop_test_done(__func__,
(result.execution_status == PCMK_EXEC_NO_FENCE_DEVICE));
return;
}
rc = st->cmds->fence(st, 0, "false_1_node2", PCMK_ACTION_OFF,
MAINLOOP_DEFAULT_TIMEOUT, 0);
if (rc < 0) {
crm_err("fence failed with rc %d", rc);
mainloop_test_done(__func__, false);
}
register_callback_helper(rc);
/* wait for event */
}
static void
test_async_monitor(int check_event)
{
int rc = 0;
if (check_event) {
mainloop_test_done(__func__, (result.exit_status == CRM_EX_OK));
return;
}
rc = st->cmds->monitor(st, 0, "false_1", MAINLOOP_DEFAULT_TIMEOUT);
if (rc < 0) {
crm_err("monitor failed with rc %d", rc);
mainloop_test_done(__func__, false);
}
register_callback_helper(rc);
/* wait for event */
}
static void
test_register_async_devices(int check_event)
{
char buf[16] = { 0, };
stonith_key_value_t *params = NULL;
params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
"false_1_node1=1,2");
params = stonith_key_value_add(params, "mode", "fail");
st->cmds->register_device(st, st_opts, "false_1", "stonith-ng", "fence_dummy", params);
stonith_key_value_freeall(params, 1, 1);
params = NULL;
params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
"true_1_node1=1,2");
params = stonith_key_value_add(params, "mode", "pass");
st->cmds->register_device(st, st_opts, "true_1", "stonith-ng", "fence_dummy", params);
stonith_key_value_freeall(params, 1, 1);
params = NULL;
params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP,
"custom_timeout_node1=1,2");
params = stonith_key_value_add(params, "mode", "fail");
params = stonith_key_value_add(params, "delay", "1000");
snprintf(buf, sizeof(buf) - 1, "%d", MAINLOOP_DEFAULT_TIMEOUT + CUSTOM_TIMEOUT_ADDITION);
params = stonith_key_value_add(params, "pcmk_off_timeout", buf);
st->cmds->register_device(st, st_opts, "false_custom_timeout", "stonith-ng", "fence_dummy",
params);
stonith_key_value_freeall(params, 1, 1);
mainloop_test_done(__func__, true);
}
static void
try_mainloop_connect(int check_event)
{
int rc = stonith_api_connect_retry(st, crm_system_name, 10);
if (rc == pcmk_ok) {
mainloop_test_done(__func__, true);
return;
}
crm_err("API CONNECTION FAILURE");
mainloop_test_done(__func__, false);
}
static void
iterate_mainloop_tests(gboolean event_ready)
{
static mainloop_test_iteration_cb callbacks[] = {
try_mainloop_connect,
test_register_async_devices,
test_async_monitor,
test_async_fence_pass,
test_async_fence_timeout,
test_async_fence_custom_timeout,
};
if (mainloop_iter == (sizeof(callbacks) / sizeof(mainloop_test_iteration_cb))) {
/* all tests ran, everything passed */
crm_info("ALL MAINLOOP TESTS PASSED!");
crm_exit(CRM_EX_OK);
}
callbacks[mainloop_iter] (event_ready);
}
static gboolean
trigger_iterate_mainloop_tests(gpointer user_data)
{
iterate_mainloop_tests(FALSE);
return TRUE;
}
static void
test_shutdown(int nsig)
{
int rc = 0;
if (st) {
rc = st->cmds->disconnect(st);
crm_info("Disconnect: %d", rc);
crm_debug("Destroy");
stonith_api_delete(st);
}
if (rc) {
crm_exit(CRM_EX_ERROR);
}
}
static void
mainloop_tests(void)
{
trig = mainloop_add_trigger(G_PRIORITY_HIGH, trigger_iterate_mainloop_tests, NULL);
mainloop_set_trigger(trig);
mainloop_add_signal(SIGTERM, test_shutdown);
crm_info("Starting");
mainloop = g_main_loop_new(NULL, FALSE);
g_main_loop_run(mainloop);
}
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
GOptionContext *context = NULL;
context = pcmk__build_arg_context(args, NULL, group, NULL);
pcmk__add_main_args(context, entries);
return context;
}
int
main(int argc, char **argv)
{
GError *error = NULL;
crm_exit_t exit_code = CRM_EX_OK;
pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
gchar **processed_args = pcmk__cmdline_preproc(argv, NULL);
GOptionContext *context = build_arg_context(args, NULL);
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
exit_code = CRM_EX_USAGE;
goto done;
}
/* We have to use crm_log_init here to set up the logging because there's
* different handling for daemons vs. command line programs, and
* pcmk__cli_init_logging is set up to only handle the latter.
*/
crm_log_init(NULL, LOG_INFO, TRUE, (verbose? TRUE : FALSE), argc, argv,
FALSE);
for (int i = 0; i < args->verbosity; i++) {
crm_bump_log_level(argc, argv);
}
st = stonith_api_new();
if (st == NULL) {
exit_code = CRM_EX_DISCONNECT;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Could not connect to fencer: API memory allocation failed");
goto done;
}
switch (options.mode) {
case test_standard:
standard_dev_test();
break;
case test_passive:
passive_test();
break;
case test_api_sanity:
sanity_tests();
break;
case test_api_mainloop:
mainloop_tests();
break;
}
test_shutdown(0);
done:
g_strfreev(processed_args);
pcmk__free_arg_context(context);
pcmk__output_and_clear_error(&error, NULL);
crm_exit(exit_code);
}
diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h
index 6b414b03b4..b47e23ec00 100644
--- a/include/crm/common/options_internal.h
+++ b/include/crm/common/options_internal.h
@@ -1,231 +1,232 @@
/*
* Copyright 2006-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__OPTIONS_INTERNAL__H
# define PCMK__OPTIONS_INTERNAL__H
# ifndef PCMK__CONFIG_H
# define PCMK__CONFIG_H
# include <config.h> // _Noreturn
# endif
# include <glib.h> // GHashTable
# include <stdbool.h> // bool
#include <crm/common/util.h> // pcmk_parse_interval_spec()
_Noreturn void pcmk__cli_help(char cmd);
/*
* Environment variable option handling
*/
const char *pcmk__env_option(const char *option);
void pcmk__set_env_option(const char *option, const char *value, bool compat);
bool pcmk__env_option_enabled(const char *daemon, const char *option);
/*
* Cluster option handling
*/
/*!
* \internal
* \enum pcmk__opt_context
* \brief Context flags for options
*/
enum pcmk__opt_context {
// @COMPAT Used only for daemon metadata
pcmk__opt_context_none = 0, //!< No additional context
pcmk__opt_context_based = (1 << 1), //!< CIB manager metadata
pcmk__opt_context_controld = (1 << 2), //!< Controller metadata
pcmk__opt_context_schedulerd = (1 << 3), //!< Scheduler metadata
};
typedef struct pcmk__cluster_option_s {
const char *name;
const char *alt_name;
const char *type;
const char *values;
const char *default_value;
bool (*is_valid)(const char *);
// @COMPAT context is used only for daemon meta-data
enum pcmk__opt_context context;
const char *description_short;
const char *description_long;
} pcmk__cluster_option_t;
const char *pcmk__cluster_option(GHashTable *options, const char *name);
char *pcmk__format_option_metadata(const char *name, const char *desc_short,
const char *desc_long,
enum pcmk__opt_context filter,
pcmk__cluster_option_t *option_list,
int len);
char *pcmk__cluster_option_metadata(const char *name, const char *desc_short,
const char *desc_long,
enum pcmk__opt_context filter);
void pcmk__validate_cluster_options(GHashTable *options);
bool pcmk__valid_interval_spec(const char *value);
bool pcmk__valid_boolean(const char *value);
bool pcmk__valid_int(const char *value);
bool pcmk__valid_positive_int(const char *value);
bool pcmk__valid_no_quorum_policy(const char *value);
bool pcmk__valid_percentage(const char *value);
bool pcmk__valid_script(const char *value);
bool pcmk__valid_placement_strategy(const char *value);
// from watchdog.c
long pcmk__get_sbd_watchdog_timeout(void);
bool pcmk__get_sbd_sync_resource_startup(void);
long pcmk__auto_stonith_watchdog_timeout(void);
bool pcmk__valid_stonith_watchdog_timeout(const char *value);
// Constants for environment variable names
#define PCMK__ENV_AUTHKEY_LOCATION "authkey_location"
#define PCMK__ENV_BLACKBOX "blackbox"
#define PCMK__ENV_CALLGRIND_ENABLED "callgrind_enabled"
#define PCMK__ENV_CLUSTER_TYPE "cluster_type"
#define PCMK__ENV_DEBUG "debug"
#define PCMK__ENV_DH_MAX_BITS "dh_max_bits"
#define PCMK__ENV_DH_MIN_BITS "dh_min_bits"
#define PCMK__ENV_FAIL_FAST "fail_fast"
#define PCMK__ENV_IPC_BUFFER "ipc_buffer"
#define PCMK__ENV_IPC_TYPE "ipc_type"
#define PCMK__ENV_LOGFACILITY "logfacility"
#define PCMK__ENV_LOGFILE "logfile"
#define PCMK__ENV_LOGFILE_MODE "logfile_mode"
#define PCMK__ENV_LOGPRIORITY "logpriority"
#define PCMK__ENV_NODE_ACTION_LIMIT "node_action_limit"
#define PCMK__ENV_NODE_START_STATE "node_start_state"
#define PCMK__ENV_PANIC_ACTION "panic_action"
#define PCMK__ENV_REMOTE_ADDRESS "remote_address"
#define PCMK__ENV_REMOTE_SCHEMA_DIRECTORY "remote_schema_directory"
#define PCMK__ENV_REMOTE_PID1 "remote_pid1"
#define PCMK__ENV_REMOTE_PORT "remote_port"
#define PCMK__ENV_RESPAWNED "respawned"
#define PCMK__ENV_SCHEMA_DIRECTORY "schema_directory"
#define PCMK__ENV_SERVICE "service"
#define PCMK__ENV_STDERR "stderr"
#define PCMK__ENV_TLS_PRIORITIES "tls_priorities"
#define PCMK__ENV_TRACE_BLACKBOX "trace_blackbox"
#define PCMK__ENV_TRACE_FILES "trace_files"
#define PCMK__ENV_TRACE_FORMATS "trace_formats"
#define PCMK__ENV_TRACE_FUNCTIONS "trace_functions"
#define PCMK__ENV_TRACE_TAGS "trace_tags"
#define PCMK__ENV_VALGRIND_ENABLED "valgrind_enabled"
// @COMPAT Drop at 3.0.0; default is plenty
#define PCMK__ENV_CIB_TIMEOUT "cib_timeout"
// @COMPAT Drop at 3.0.0; likely last used in 1.1.24
#define PCMK__ENV_MCP "mcp"
// @COMPAT Drop at 3.0.0; added unused in 1.1.9
#define PCMK__ENV_QUORUM_TYPE "quorum_type"
/* @COMPAT Drop at 3.0.0; added to debug shutdown issues when Pacemaker is
* managed by systemd, but no longer useful.
*/
#define PCMK__ENV_SHUTDOWN_DELAY "shutdown_delay"
// @COMPAT Deprecated since 2.1.0
#define PCMK__OPT_REMOVE_AFTER_STOP "remove-after-stop"
// Constants for meta-attribute names
#define PCMK__META_CLONE "clone"
#define PCMK__META_CONTAINER "container"
#define PCMK__META_DIGESTS_ALL "digests-all"
#define PCMK__META_DIGESTS_SECURE "digests-secure"
#define PCMK__META_INTERNAL_RSC "internal_rsc"
#define PCMK__META_MIGRATE_SOURCE "migrate_source"
#define PCMK__META_MIGRATE_TARGET "migrate_target"
#define PCMK__META_ON_NODE "on_node"
#define PCMK__META_ON_NODE_UUID "on_node_uuid"
#define PCMK__META_OP_NO_WAIT "op_no_wait"
#define PCMK__META_OP_TARGET_RC "op_target_rc"
#define PCMK__META_PHYSICAL_HOST "physical-host"
#define PCMK__META_STONITH_ACTION "stonith_action"
/* @TODO Plug these in. Currently, they're never set. These are op attrs for use
* with https://projects.clusterlabs.org/T382.
*/
#define PCMK__META_CLEAR_FAILURE_OP "clear_failure_op"
#define PCMK__META_CLEAR_FAILURE_INTERVAL "clear_failure_interval"
// @COMPAT Deprecated meta-attribute since 2.1.0
#define PCMK__META_CAN_FAIL "can_fail"
// @COMPAT Deprecated alias for PCMK__META_PROMOTED_MAX since 2.0.0
#define PCMK__META_PROMOTED_MAX_LEGACY "master-max"
// @COMPAT Deprecated alias for PCMK__META_PROMOTED_NODE_MAX since 2.0.0
#define PCMK__META_PROMOTED_NODE_MAX_LEGACY "master-node-max"
// @COMPAT Deprecated meta-attribute since 2.0.0
#define PCMK__META_RESTART_TYPE "restart-type"
// @COMPAT Deprecated meta-attribute since 2.0.0
#define PCMK__META_ROLE_AFTER_FAILURE "role_after_failure"
// Constants for enumerated values
#define PCMK__VALUE_ATTRD "attrd"
#define PCMK__VALUE_BOLD "bold"
#define PCMK__VALUE_BROADCAST "broadcast"
#define PCMK__VALUE_CIB "cib"
#define PCMK__VALUE_CIB_DIFF_NOTIFY "cib_diff_notify"
#define PCMK__VALUE_CIB_NOTIFY "cib_notify"
#define PCMK__VALUE_CIB_POST_NOTIFY "cib_post_notify"
#define PCMK__VALUE_CIB_PRE_NOTIFY "cib_pre_notify"
#define PCMK__VALUE_CIB_UPDATE_CONFIRMATION "cib_update_confirmation"
#define PCMK__VALUE_CLUSTER "cluster"
#define PCMK__VALUE_CRMD "crmd"
#define PCMK__VALUE_EN "en"
#define PCMK__VALUE_EPOCH "epoch"
#define PCMK__VALUE_HEALTH_RED "health_red"
#define PCMK__VALUE_HEALTH_YELLOW "health_yellow"
#define PCMK__VALUE_INIT "init"
#define PCMK__VALUE_LOCAL "local"
#define PCMK__VALUE_LRMD "lrmd"
#define PCMK__VALUE_MAINT "maint"
#define PCMK__VALUE_OUTPUT "output"
#define PCMK__VALUE_PASSWORD "password"
#define PCMK__VALUE_PING "ping"
#define PCMK__VALUE_REFRESH "refresh"
#define PCMK__VALUE_REQUEST "request"
#define PCMK__VALUE_RESPONSE "response"
#define PCMK__VALUE_RSC_FAILED "rsc-failed"
#define PCMK__VALUE_RSC_FAILURE_IGNORED "rsc-failure-ignored"
#define PCMK__VALUE_RSC_MANAGED "rsc-managed"
#define PCMK__VALUE_RSC_MULTIPLE "rsc-multiple"
#define PCMK__VALUE_RSC_OK "rsc-ok"
#define PCMK__VALUE_RUNNING "running"
#define PCMK__VALUE_SHUTDOWN_COMPLETE "shutdown_complete"
#define PCMK__VALUE_SHUTTING_DOWN "shutting_down"
#define PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE "st-async-timeout-value"
#define PCMK__VALUE_ST_NOTIFY "st_notify"
+#define PCMK__VALUE_ST_NOTIFY_DISCONNECT "st_notify_disconnect"
#define PCMK__VALUE_STARTING_DAEMONS "starting_daemons"
#define PCMK__VALUE_STONITH_NG "stonith-ng"
#define PCMK__VALUE_WAIT_FOR_PING "wait_for_ping"
#define PCMK__VALUE_WARNING "warning"
/* @COMPAT Deprecated since 2.1.7 (used with PCMK__XA_ORDERING attribute of
* resource sets)
*/
#define PCMK__VALUE_GROUP "group"
#endif // PCMK__OPTIONS_INTERNAL__H
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index 5fdfbdc025..0f2d99d29a 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -1,2720 +1,2720 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <ctype.h>
#include <inttypes.h>
#include <sys/types.h>
#include <glib.h>
#include <crm/crm.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <crm/common/mainloop.h>
#include "fencing_private.h"
CRM_TRACE_INIT_DATA(stonith);
// Used as stonith_t:st_private
typedef struct stonith_private_s {
char *token;
crm_ipc_t *ipc;
mainloop_io_t *source;
GHashTable *stonith_op_callback_table;
GList *notify_list;
int notify_refcnt;
bool notify_deletes;
void (*op_callback) (stonith_t * st, stonith_callback_data_t * data);
} stonith_private_t;
// Used as stonith_event_t:opaque
struct event_private {
pcmk__action_result_t result;
};
typedef struct stonith_notify_client_s {
const char *event;
const char *obj_id; /* implement one day */
const char *obj_type; /* implement one day */
void (*notify) (stonith_t * st, stonith_event_t * e);
bool delete;
} stonith_notify_client_t;
typedef struct stonith_callback_client_s {
void (*callback) (stonith_t * st, stonith_callback_data_t * data);
const char *id;
void *user_data;
gboolean only_success;
gboolean allow_timeout_updates;
struct timer_rec_s *timer;
} stonith_callback_client_t;
struct notify_blob_s {
stonith_t *stonith;
xmlNode *xml;
};
struct timer_rec_s {
int call_id;
int timeout;
guint ref;
stonith_t *stonith;
};
typedef int (*stonith_op_t) (const char *, int, const char *, xmlNode *,
xmlNode *, xmlNode *, xmlNode **, xmlNode **);
bool stonith_dispatch(stonith_t * st);
xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
int call_options);
static int stonith_send_command(stonith_t *stonith, const char *op,
xmlNode *data, xmlNode **output_data,
int call_options, int timeout);
static void stonith_connection_destroy(gpointer user_data);
static void stonith_send_notification(gpointer data, gpointer user_data);
static int stonith_api_del_notification(stonith_t *stonith,
const char *event);
/*!
* \brief Get agent namespace by name
*
* \param[in] namespace_s Name of namespace as string
*
* \return Namespace as enum value
*/
enum stonith_namespace
stonith_text2namespace(const char *namespace_s)
{
if (pcmk__str_eq(namespace_s, "any", pcmk__str_null_matches)) {
return st_namespace_any;
} else if (!strcmp(namespace_s, "redhat")
|| !strcmp(namespace_s, "stonith-ng")) {
return st_namespace_rhcs;
} else if (!strcmp(namespace_s, "internal")) {
return st_namespace_internal;
} else if (!strcmp(namespace_s, "heartbeat")) {
return st_namespace_lha;
}
return st_namespace_invalid;
}
/*!
* \brief Get agent namespace name
*
* \param[in] namespace Namespace as enum value
*
* \return Namespace name as string
*/
const char *
stonith_namespace2text(enum stonith_namespace st_namespace)
{
switch (st_namespace) {
case st_namespace_any: return "any";
case st_namespace_rhcs: return "stonith-ng";
case st_namespace_internal: return "internal";
case st_namespace_lha: return "heartbeat";
default: break;
}
return "unsupported";
}
/*!
* \brief Determine namespace of a fence agent
*
* \param[in] agent Fence agent type
* \param[in] namespace_s Name of agent namespace as string, if known
*
* \return Namespace of specified agent, as enum value
*/
enum stonith_namespace
stonith_get_namespace(const char *agent, const char *namespace_s)
{
if (pcmk__str_eq(namespace_s, "internal", pcmk__str_none)) {
return st_namespace_internal;
}
if (stonith__agent_is_rhcs(agent)) {
return st_namespace_rhcs;
}
#if HAVE_STONITH_STONITH_H
if (stonith__agent_is_lha(agent)) {
return st_namespace_lha;
}
#endif
crm_err("Unknown fence agent: %s", agent);
return st_namespace_invalid;
}
gboolean
stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
{
gboolean rv = FALSE;
stonith_t *stonith_api = st?st:stonith_api_new();
char *list = NULL;
if(stonith_api) {
if (stonith_api->state == stonith_disconnected) {
int rc = stonith_api->cmds->connect(stonith_api, "stonith-api", NULL);
if (rc != pcmk_ok) {
crm_err("Failed connecting to Stonith-API for watchdog-fencing-query.");
}
}
if (stonith_api->state != stonith_disconnected) {
/* caveat!!!
* this might fail when when stonithd is just updating the device-list
* probably something we should fix as well for other api-calls */
int rc = stonith_api->cmds->list(stonith_api, st_opt_sync_call, STONITH_WATCHDOG_ID, &list, 0);
if ((rc != pcmk_ok) || (list == NULL)) {
/* due to the race described above it can happen that
* we drop in here - so as not to make remote nodes
* panic on that answer
*/
if (rc == -ENODEV) {
crm_notice("Cluster does not have watchdog fencing device");
} else {
crm_warn("Could not check for watchdog fencing device: %s",
pcmk_strerror(rc));
}
} else if (list[0] == '\0') {
rv = TRUE;
} else {
GList *targets = stonith__parse_targets(list);
rv = pcmk__str_in_list(node, targets, pcmk__str_casei);
g_list_free_full(targets, free);
}
free(list);
if (!st) {
/* if we're provided the api we still might have done the
* connection - but let's assume the caller won't bother
*/
stonith_api->cmds->disconnect(stonith_api);
}
}
if (!st) {
stonith_api_delete(stonith_api);
}
} else {
crm_err("Stonith-API for watchdog-fencing-query couldn't be created.");
}
crm_trace("Pacemaker assumes node %s %sto do watchdog-fencing.",
node, rv?"":"not ");
return rv;
}
gboolean
stonith__watchdog_fencing_enabled_for_node(const char *node)
{
return stonith__watchdog_fencing_enabled_for_node_api(NULL, node);
}
/* when cycling through the list we don't want to delete items
so just mark them and when we know nobody is using the list
loop over it to remove the marked items
*/
static void
foreach_notify_entry (stonith_private_t *private,
GFunc func,
gpointer user_data)
{
private->notify_refcnt++;
g_list_foreach(private->notify_list, func, user_data);
private->notify_refcnt--;
if ((private->notify_refcnt == 0) &&
private->notify_deletes) {
GList *list_item = private->notify_list;
private->notify_deletes = FALSE;
while (list_item != NULL)
{
stonith_notify_client_t *list_client = list_item->data;
GList *next = g_list_next(list_item);
if (list_client->delete) {
free(list_client);
private->notify_list =
g_list_delete_link(private->notify_list, list_item);
}
list_item = next;
}
}
}
static void
stonith_connection_destroy(gpointer user_data)
{
stonith_t *stonith = user_data;
stonith_private_t *native = NULL;
struct notify_blob_s blob;
crm_trace("Sending destroyed notification");
blob.stonith = stonith;
blob.xml = create_xml_node(NULL, PCMK__XE_NOTIFY);
native = stonith->st_private;
native->ipc = NULL;
native->source = NULL;
free(native->token); native->token = NULL;
stonith->state = stonith_disconnected;
crm_xml_add(blob.xml, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY);
- crm_xml_add(blob.xml, PCMK__XA_SUBT, T_STONITH_NOTIFY_DISCONNECT);
+ crm_xml_add(blob.xml, PCMK__XA_SUBT, PCMK__VALUE_ST_NOTIFY_DISCONNECT);
foreach_notify_entry(native, stonith_send_notification, &blob);
free_xml(blob.xml);
}
xmlNode *
create_device_registration_xml(const char *id, enum stonith_namespace namespace,
const char *agent,
const stonith_key_value_t *params,
const char *rsc_provides)
{
xmlNode *data = create_xml_node(NULL, PCMK__XE_ST_DEVICE_ID);
xmlNode *args = create_xml_node(data, PCMK__XE_ATTRIBUTES);
#if HAVE_STONITH_STONITH_H
if (namespace == st_namespace_any) {
namespace = stonith_get_namespace(agent, NULL);
}
if (namespace == st_namespace_lha) {
hash2field((gpointer) "plugin", (gpointer) agent, args);
agent = "fence_legacy";
}
#endif
crm_xml_add(data, PCMK_XA_ID, id);
crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__);
crm_xml_add(data, PCMK_XA_AGENT, agent);
if ((namespace != st_namespace_any) && (namespace != st_namespace_invalid)) {
crm_xml_add(data, PCMK__XA_NAMESPACE,
stonith_namespace2text(namespace));
}
if (rsc_provides) {
crm_xml_add(data, PCMK__XA_RSC_PROVIDES, rsc_provides);
}
for (; params; params = params->next) {
hash2field((gpointer) params->key, (gpointer) params->value, args);
}
return data;
}
static int
stonith_api_register_device(stonith_t *st, int call_options,
const char *id, const char *namespace_s,
const char *agent,
const stonith_key_value_t *params)
{
int rc = 0;
xmlNode *data = NULL;
data = create_device_registration_xml(id,
stonith_text2namespace(namespace_s),
agent, params, NULL);
rc = stonith_send_command(st, STONITH_OP_DEVICE_ADD, data, NULL, call_options, 0);
free_xml(data);
return rc;
}
static int
stonith_api_remove_device(stonith_t * st, int call_options, const char *name)
{
int rc = 0;
xmlNode *data = NULL;
data = create_xml_node(NULL, PCMK__XE_ST_DEVICE_ID);
crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__);
crm_xml_add(data, PCMK_XA_ID, name);
rc = stonith_send_command(st, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0);
free_xml(data);
return rc;
}
static int
stonith_api_remove_level_full(stonith_t *st, int options,
const char *node, const char *pattern,
const char *attr, const char *value, int level)
{
int rc = 0;
xmlNode *data = NULL;
CRM_CHECK(node || pattern || (attr && value), return -EINVAL);
data = create_xml_node(NULL, PCMK_XE_FENCING_LEVEL);
crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__);
if (node) {
crm_xml_add(data, PCMK_XA_TARGET, node);
} else if (pattern) {
crm_xml_add(data, PCMK_XA_TARGET_PATTERN, pattern);
} else {
crm_xml_add(data, PCMK_XA_TARGET_ATTRIBUTE, attr);
crm_xml_add(data, PCMK_XA_TARGET_VALUE, value);
}
crm_xml_add_int(data, PCMK_XA_INDEX, level);
rc = stonith_send_command(st, STONITH_OP_LEVEL_DEL, data, NULL, options, 0);
free_xml(data);
return rc;
}
static int
stonith_api_remove_level(stonith_t * st, int options, const char *node, int level)
{
return stonith_api_remove_level_full(st, options, node,
NULL, NULL, NULL, level);
}
/*!
* \internal
* \brief Create XML for fence topology level registration request
*
* \param[in] node If not NULL, target level by this node name
* \param[in] pattern If not NULL, target by node name using this regex
* \param[in] attr If not NULL, target by this node attribute
* \param[in] value If not NULL, target by this node attribute value
* \param[in] level Index number of level to register
* \param[in] device_list List of devices in level
*
* \return Newly allocated XML tree on success, NULL otherwise
*
* \note The caller should set only one of node, pattern or attr/value.
*/
xmlNode *
create_level_registration_xml(const char *node, const char *pattern,
const char *attr, const char *value,
int level, const stonith_key_value_t *device_list)
{
GString *list = NULL;
xmlNode *data;
CRM_CHECK(node || pattern || (attr && value), return NULL);
data = create_xml_node(NULL, PCMK_XE_FENCING_LEVEL);
CRM_CHECK(data, return NULL);
crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__);
crm_xml_add_int(data, PCMK_XA_ID, level);
crm_xml_add_int(data, PCMK_XA_INDEX, level);
if (node) {
crm_xml_add(data, PCMK_XA_TARGET, node);
} else if (pattern) {
crm_xml_add(data, PCMK_XA_TARGET_PATTERN, pattern);
} else {
crm_xml_add(data, PCMK_XA_TARGET_ATTRIBUTE, attr);
crm_xml_add(data, PCMK_XA_TARGET_VALUE, value);
}
for (; device_list; device_list = device_list->next) {
pcmk__add_separated_word(&list, 1024, device_list->value, ",");
}
if (list != NULL) {
crm_xml_add(data, PCMK_XA_DEVICES, (const char *) list->str);
g_string_free(list, TRUE);
}
return data;
}
static int
stonith_api_register_level_full(stonith_t *st, int options, const char *node,
const char *pattern, const char *attr,
const char *value, int level,
const stonith_key_value_t *device_list)
{
int rc = 0;
xmlNode *data = create_level_registration_xml(node, pattern, attr, value,
level, device_list);
CRM_CHECK(data != NULL, return -EINVAL);
rc = stonith_send_command(st, STONITH_OP_LEVEL_ADD, data, NULL, options, 0);
free_xml(data);
return rc;
}
static int
stonith_api_register_level(stonith_t * st, int options, const char *node, int level,
const stonith_key_value_t * device_list)
{
return stonith_api_register_level_full(st, options, node, NULL, NULL, NULL,
level, device_list);
}
static int
stonith_api_device_list(stonith_t *stonith, int call_options,
const char *namespace_s, stonith_key_value_t **devices,
int timeout)
{
int count = 0;
enum stonith_namespace ns = stonith_text2namespace(namespace_s);
if (devices == NULL) {
crm_err("Parameter error: stonith_api_device_list");
return -EFAULT;
}
#if HAVE_STONITH_STONITH_H
// Include Linux-HA agents if requested
if ((ns == st_namespace_any) || (ns == st_namespace_lha)) {
count += stonith__list_lha_agents(devices);
}
#endif
// Include Red Hat agents if requested
if ((ns == st_namespace_any) || (ns == st_namespace_rhcs)) {
count += stonith__list_rhcs_agents(devices);
}
return count;
}
// See stonith_api_operations_t:metadata() documentation
static int
stonith_api_device_metadata(stonith_t *stonith, int call_options,
const char *agent, const char *namespace_s,
char **output, int timeout_sec)
{
/* By executing meta-data directly, we can get it from stonith_admin when
* the cluster is not running, which is important for higher-level tools.
*/
enum stonith_namespace ns = stonith_get_namespace(agent, namespace_s);
if (timeout_sec <= 0) {
timeout_sec = PCMK_DEFAULT_METADATA_TIMEOUT_MS;
}
crm_trace("Looking up metadata for %s agent %s",
stonith_namespace2text(ns), agent);
switch (ns) {
case st_namespace_rhcs:
return stonith__rhcs_metadata(agent, timeout_sec, output);
#if HAVE_STONITH_STONITH_H
case st_namespace_lha:
return stonith__lha_metadata(agent, timeout_sec, output);
#endif
default:
crm_err("Can't get fence agent '%s' meta-data: No such agent",
agent);
break;
}
return -ENODEV;
}
static int
stonith_api_query(stonith_t * stonith, int call_options, const char *target,
stonith_key_value_t ** devices, int timeout)
{
int rc = 0, lpc = 0, max = 0;
xmlNode *data = NULL;
xmlNode *output = NULL;
xmlXPathObjectPtr xpathObj = NULL;
CRM_CHECK(devices != NULL, return -EINVAL);
data = create_xml_node(NULL, PCMK__XE_ST_DEVICE_ID);
crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__);
crm_xml_add(data, PCMK__XA_ST_TARGET, target);
crm_xml_add(data, PCMK__XA_ST_DEVICE_ACTION, PCMK_ACTION_OFF);
rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout);
if (rc < 0) {
return rc;
}
xpathObj = xpath_search(output, "//@agent");
if (xpathObj) {
max = numXpathResults(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match != NULL) {
xmlChar *match_path = xmlGetNodePath(match);
crm_info("%s[%d] = %s", "//@agent", lpc, match_path);
free(match_path);
*devices = stonith_key_value_add(*devices, NULL,
crm_element_value(match,
PCMK_XA_ID));
}
}
freeXpathObject(xpathObj);
}
free_xml(output);
free_xml(data);
return max;
}
/*!
* \internal
* \brief Make a STONITH_OP_EXEC request
*
* \param[in,out] stonith Fencer connection
* \param[in] call_options Bitmask of \c stonith_call_options
* \param[in] id Fence device ID that request is for
* \param[in] action Agent action to request (list, status, monitor)
* \param[in] target Name of target node for requested action
* \param[in] timeout_sec Error if not completed within this many seconds
* \param[out] output Where to set agent output
*/
static int
stonith_api_call(stonith_t *stonith, int call_options, const char *id,
const char *action, const char *target, int timeout_sec,
xmlNode **output)
{
int rc = 0;
xmlNode *data = NULL;
data = create_xml_node(NULL, PCMK__XE_ST_DEVICE_ID);
crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__);
crm_xml_add(data, PCMK__XA_ST_DEVICE_ID, id);
crm_xml_add(data, PCMK__XA_ST_DEVICE_ACTION, action);
crm_xml_add(data, PCMK__XA_ST_TARGET, target);
rc = stonith_send_command(stonith, STONITH_OP_EXEC, data, output,
call_options, timeout_sec);
free_xml(data);
return rc;
}
static int
stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **list_info,
int timeout)
{
int rc;
xmlNode *output = NULL;
rc = stonith_api_call(stonith, call_options, id, PCMK_ACTION_LIST, NULL,
timeout, &output);
if (output && list_info) {
const char *list_str;
list_str = crm_element_value(output, PCMK__XA_ST_OUTPUT);
if (list_str) {
*list_info = strdup(list_str);
}
}
if (output) {
free_xml(output);
}
return rc;
}
static int
stonith_api_monitor(stonith_t * stonith, int call_options, const char *id, int timeout)
{
return stonith_api_call(stonith, call_options, id, PCMK_ACTION_MONITOR,
NULL, timeout, NULL);
}
static int
stonith_api_status(stonith_t * stonith, int call_options, const char *id, const char *port,
int timeout)
{
return stonith_api_call(stonith, call_options, id, PCMK_ACTION_STATUS, port,
timeout, NULL);
}
static int
stonith_api_fence_with_delay(stonith_t * stonith, int call_options, const char *node,
const char *action, int timeout, int tolerance, int delay)
{
int rc = 0;
xmlNode *data = NULL;
data = create_xml_node(NULL, __func__);
crm_xml_add(data, PCMK__XA_ST_TARGET, node);
crm_xml_add(data, PCMK__XA_ST_DEVICE_ACTION, action);
crm_xml_add_int(data, PCMK__XA_ST_TIMEOUT, timeout);
crm_xml_add_int(data, PCMK__XA_ST_TOLERANCE, tolerance);
crm_xml_add_int(data, PCMK__XA_ST_DELAY, delay);
rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout);
free_xml(data);
return rc;
}
static int
stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const char *action,
int timeout, int tolerance)
{
return stonith_api_fence_with_delay(stonith, call_options, node, action,
timeout, tolerance, 0);
}
static int
stonith_api_confirm(stonith_t * stonith, int call_options, const char *target)
{
stonith__set_call_options(call_options, target, st_opt_manual_ack);
return stonith_api_fence(stonith, call_options, target, PCMK_ACTION_OFF, 0,
0);
}
static int
stonith_api_history(stonith_t * stonith, int call_options, const char *node,
stonith_history_t ** history, int timeout)
{
int rc = 0;
xmlNode *data = NULL;
xmlNode *output = NULL;
stonith_history_t *last = NULL;
*history = NULL;
if (node) {
data = create_xml_node(NULL, __func__);
crm_xml_add(data, PCMK__XA_ST_TARGET, node);
}
stonith__set_call_options(call_options, node, st_opt_sync_call);
rc = stonith_send_command(stonith, STONITH_OP_FENCE_HISTORY, data, &output,
call_options, timeout);
free_xml(data);
if (rc == 0) {
xmlNode *op = NULL;
xmlNode *reply = get_xpath_object("//" PCMK__XE_ST_HISTORY, output,
LOG_NEVER);
for (op = pcmk__xml_first_child(reply); op != NULL;
op = pcmk__xml_next(op)) {
stonith_history_t *kvp;
long long completed;
long long completed_nsec = 0L;
kvp = calloc(1, sizeof(stonith_history_t));
kvp->target = crm_element_value_copy(op, PCMK__XA_ST_TARGET);
kvp->action = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ACTION);
kvp->origin = crm_element_value_copy(op, PCMK__XA_ST_ORIGIN);
kvp->delegate = crm_element_value_copy(op, PCMK__XA_ST_DELEGATE);
kvp->client = crm_element_value_copy(op, PCMK__XA_ST_CLIENTNAME);
crm_element_value_ll(op, PCMK__XA_ST_DATE, &completed);
kvp->completed = (time_t) completed;
crm_element_value_ll(op, PCMK__XA_ST_DATE_NSEC, &completed_nsec);
kvp->completed_nsec = completed_nsec;
crm_element_value_int(op, PCMK__XA_ST_STATE, &kvp->state);
kvp->exit_reason = crm_element_value_copy(op, PCMK_XA_EXIT_REASON);
if (last) {
last->next = kvp;
} else {
*history = kvp;
}
last = kvp;
}
}
free_xml(output);
return rc;
}
void stonith_history_free(stonith_history_t *history)
{
stonith_history_t *hp, *hp_old;
for (hp = history; hp; hp_old = hp, hp = hp->next, free(hp_old)) {
free(hp->target);
free(hp->action);
free(hp->origin);
free(hp->delegate);
free(hp->client);
free(hp->exit_reason);
}
}
static gint
stonithlib_GCompareFunc(gconstpointer a, gconstpointer b)
{
int rc = 0;
const stonith_notify_client_t *a_client = a;
const stonith_notify_client_t *b_client = b;
if (a_client->delete || b_client->delete) {
/* make entries marked for deletion not findable */
return -1;
}
CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0);
rc = strcmp(a_client->event, b_client->event);
if (rc == 0) {
if (a_client->notify == NULL || b_client->notify == NULL) {
return 0;
} else if (a_client->notify == b_client->notify) {
return 0;
} else if (((long)a_client->notify) < ((long)b_client->notify)) {
crm_err("callbacks for %s are not equal: %p vs. %p",
a_client->event, a_client->notify, b_client->notify);
return -1;
}
crm_err("callbacks for %s are not equal: %p vs. %p",
a_client->event, a_client->notify, b_client->notify);
return 1;
}
return rc;
}
xmlNode *
stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options)
{
xmlNode *op_msg = create_xml_node(NULL, PCMK__XE_STONITH_COMMAND);
CRM_CHECK(op_msg != NULL, return NULL);
CRM_CHECK(token != NULL, return NULL);
crm_xml_add(op_msg, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
crm_xml_add(op_msg, PCMK__XA_ST_OP, op);
crm_xml_add_int(op_msg, PCMK__XA_ST_CALLID, call_id);
crm_trace("Sending call options: %.8lx, %d", (long)call_options, call_options);
crm_xml_add_int(op_msg, PCMK__XA_ST_CALLOPT, call_options);
if (data != NULL) {
add_message_xml(op_msg, PCMK__XA_ST_CALLDATA, data);
}
return op_msg;
}
static void
stonith_destroy_op_callback(gpointer data)
{
stonith_callback_client_t *blob = data;
if (blob->timer && blob->timer->ref > 0) {
g_source_remove(blob->timer->ref);
}
free(blob->timer);
free(blob);
}
static int
stonith_api_signoff(stonith_t * stonith)
{
stonith_private_t *native = stonith->st_private;
crm_debug("Disconnecting from the fencer");
if (native->source != NULL) {
/* Attached to mainloop */
mainloop_del_ipc_client(native->source);
native->source = NULL;
native->ipc = NULL;
} else if (native->ipc) {
/* Not attached to mainloop */
crm_ipc_t *ipc = native->ipc;
native->ipc = NULL;
crm_ipc_close(ipc);
crm_ipc_destroy(ipc);
}
free(native->token); native->token = NULL;
stonith->state = stonith_disconnected;
return pcmk_ok;
}
static int
stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks)
{
stonith_private_t *private = stonith->st_private;
if (all_callbacks) {
private->op_callback = NULL;
g_hash_table_destroy(private->stonith_op_callback_table);
private->stonith_op_callback_table = pcmk__intkey_table(stonith_destroy_op_callback);
} else if (call_id == 0) {
private->op_callback = NULL;
} else {
pcmk__intkey_table_remove(private->stonith_op_callback_table, call_id);
}
return pcmk_ok;
}
/*!
* \internal
* \brief Invoke a (single) specified fence action callback
*
* \param[in,out] st Fencer API connection
* \param[in] call_id If positive, call ID of completed fence action,
* otherwise legacy return code for early failure
* \param[in,out] result Full result for action
* \param[in,out] userdata User data to pass to callback
* \param[in] callback Fence action callback to invoke
*/
static void
invoke_fence_action_callback(stonith_t *st, int call_id,
pcmk__action_result_t *result,
void *userdata,
void (*callback) (stonith_t *st,
stonith_callback_data_t *data))
{
stonith_callback_data_t data = { 0, };
data.call_id = call_id;
data.rc = pcmk_rc2legacy(stonith__result2rc(result));
data.userdata = userdata;
data.opaque = (void *) result;
callback(st, &data);
}
/*!
* \internal
* \brief Invoke any callbacks registered for a specified fence action result
*
* Given a fence action result from the fencer, invoke any callback registered
* for that action, as well as any global callback registered.
*
* \param[in,out] stonith Fencer API connection
* \param[in] msg If non-NULL, fencer reply
* \param[in] call_id If \p msg is NULL, call ID of action that timed out
*/
static void
invoke_registered_callbacks(stonith_t *stonith, const xmlNode *msg, int call_id)
{
stonith_private_t *private = NULL;
stonith_callback_client_t *cb_info = NULL;
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
CRM_CHECK(stonith != NULL, return);
CRM_CHECK(stonith->st_private != NULL, return);
private = stonith->st_private;
if (msg == NULL) {
// Fencer didn't reply in time
pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
"Fencer accepted request but did not reply in time");
CRM_LOG_ASSERT(call_id > 0);
} else {
// We have the fencer reply
if ((crm_element_value_int(msg, PCMK__XA_ST_CALLID, &call_id) != 0)
|| (call_id <= 0)) {
crm_log_xml_warn(msg, "Bad fencer reply");
}
stonith__xe_get_result(msg, &result);
}
if (call_id > 0) {
cb_info = pcmk__intkey_table_lookup(private->stonith_op_callback_table,
call_id);
}
if ((cb_info != NULL) && (cb_info->callback != NULL)
&& (pcmk__result_ok(&result) || !(cb_info->only_success))) {
crm_trace("Invoking callback %s for call %d",
pcmk__s(cb_info->id, "without ID"), call_id);
invoke_fence_action_callback(stonith, call_id, &result,
cb_info->user_data, cb_info->callback);
} else if ((private->op_callback == NULL) && !pcmk__result_ok(&result)) {
crm_warn("Fencing action without registered callback failed: %d (%s%s%s)",
result.exit_status,
pcmk_exec_status_str(result.execution_status),
((result.exit_reason == NULL)? "" : ": "),
((result.exit_reason == NULL)? "" : result.exit_reason));
crm_log_xml_debug(msg, "Failed fence update");
}
if (private->op_callback != NULL) {
crm_trace("Invoking global callback for call %d", call_id);
invoke_fence_action_callback(stonith, call_id, &result, NULL,
private->op_callback);
}
if (cb_info != NULL) {
stonith_api_del_callback(stonith, call_id, FALSE);
}
pcmk__reset_result(&result);
}
static gboolean
stonith_async_timeout_handler(gpointer data)
{
struct timer_rec_s *timer = data;
crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout);
invoke_registered_callbacks(timer->stonith, NULL, timer->call_id);
/* Always return TRUE, never remove the handler
* We do that in stonith_del_callback()
*/
return TRUE;
}
static void
set_callback_timeout(stonith_callback_client_t * callback, stonith_t * stonith, int call_id,
int timeout)
{
struct timer_rec_s *async_timer = callback->timer;
if (timeout <= 0) {
return;
}
if (!async_timer) {
async_timer = calloc(1, sizeof(struct timer_rec_s));
callback->timer = async_timer;
}
async_timer->stonith = stonith;
async_timer->call_id = call_id;
/* Allow a fair bit of grace to allow the server to tell us of a timeout
* This is only a fallback
*/
async_timer->timeout = (timeout + 60) * 1000;
if (async_timer->ref) {
g_source_remove(async_timer->ref);
}
async_timer->ref =
g_timeout_add(async_timer->timeout, stonith_async_timeout_handler, async_timer);
}
static void
update_callback_timeout(int call_id, int timeout, stonith_t * st)
{
stonith_callback_client_t *callback = NULL;
stonith_private_t *private = st->st_private;
callback = pcmk__intkey_table_lookup(private->stonith_op_callback_table,
call_id);
if (!callback || !callback->allow_timeout_updates) {
return;
}
set_callback_timeout(callback, st, call_id, timeout);
}
static int
stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata)
{
const char *type = NULL;
struct notify_blob_s blob;
stonith_t *st = userdata;
stonith_private_t *private = NULL;
CRM_ASSERT(st != NULL);
private = st->st_private;
blob.stonith = st;
blob.xml = string2xml(buffer);
if (blob.xml == NULL) {
crm_warn("Received malformed message from fencer: %s", buffer);
return 0;
}
/* do callbacks */
type = crm_element_value(blob.xml, PCMK__XA_T);
crm_trace("Activating %s callbacks...", type);
if (pcmk__str_eq(type, PCMK__VALUE_STONITH_NG, pcmk__str_none)) {
invoke_registered_callbacks(st, blob.xml, 0);
} else if (pcmk__str_eq(type, PCMK__VALUE_ST_NOTIFY, pcmk__str_none)) {
foreach_notify_entry(private, stonith_send_notification, &blob);
} else if (pcmk__str_eq(type, PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE,
pcmk__str_none)) {
int call_id = 0;
int timeout = 0;
crm_element_value_int(blob.xml, PCMK__XA_ST_TIMEOUT, &timeout);
crm_element_value_int(blob.xml, PCMK__XA_ST_CALLID, &call_id);
update_callback_timeout(call_id, timeout, st);
} else {
crm_err("Unknown message type: %s", type);
crm_log_xml_warn(blob.xml, "BadReply");
}
free_xml(blob.xml);
return 1;
}
static int
stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
{
int rc = pcmk_ok;
stonith_private_t *native = NULL;
const char *display_name = name? name : "client";
struct ipc_client_callbacks st_callbacks = {
.dispatch = stonith_dispatch_internal,
.destroy = stonith_connection_destroy
};
CRM_CHECK(stonith != NULL, return -EINVAL);
native = stonith->st_private;
CRM_ASSERT(native != NULL);
crm_debug("Attempting fencer connection by %s with%s mainloop",
display_name, (stonith_fd? "out" : ""));
stonith->state = stonith_connected_command;
if (stonith_fd) {
/* No mainloop */
native->ipc = crm_ipc_new("stonith-ng", 0);
if (native->ipc != NULL) {
rc = pcmk__connect_generic_ipc(native->ipc);
if (rc == pcmk_rc_ok) {
rc = pcmk__ipc_fd(native->ipc, stonith_fd);
if (rc != pcmk_rc_ok) {
crm_debug("Couldn't get file descriptor for IPC: %s",
pcmk_rc_str(rc));
}
}
if (rc != pcmk_rc_ok) {
crm_ipc_close(native->ipc);
crm_ipc_destroy(native->ipc);
native->ipc = NULL;
}
}
} else {
/* With mainloop */
native->source =
mainloop_add_ipc_client("stonith-ng", G_PRIORITY_MEDIUM, 0, stonith, &st_callbacks);
native->ipc = mainloop_get_ipc_client(native->source);
}
if (native->ipc == NULL) {
rc = -ENOTCONN;
} else {
xmlNode *reply = NULL;
xmlNode *hello = create_xml_node(NULL, PCMK__XE_STONITH_COMMAND);
crm_xml_add(hello, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
crm_xml_add(hello, PCMK__XA_ST_OP, CRM_OP_REGISTER);
crm_xml_add(hello, PCMK__XA_ST_CLIENTNAME, name);
rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply);
if (rc < 0) {
crm_debug("Couldn't register with the fencer: %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
rc = -ECOMM;
} else if (reply == NULL) {
crm_debug("Couldn't register with the fencer: no reply");
rc = -EPROTO;
} else {
const char *msg_type = crm_element_value(reply, PCMK__XA_ST_OP);
native->token = crm_element_value_copy(reply, PCMK__XA_ST_CLIENTID);
if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_none)) {
crm_debug("Couldn't register with the fencer: invalid reply type '%s'",
(msg_type? msg_type : "(missing)"));
crm_log_xml_debug(reply, "Invalid fencer reply");
rc = -EPROTO;
} else if (native->token == NULL) {
crm_debug("Couldn't register with the fencer: no token in reply");
crm_log_xml_debug(reply, "Invalid fencer reply");
rc = -EPROTO;
} else {
crm_debug("Connection to fencer by %s succeeded (registration token: %s)",
display_name, native->token);
rc = pcmk_ok;
}
}
free_xml(reply);
free_xml(hello);
}
if (rc != pcmk_ok) {
crm_debug("Connection attempt to fencer by %s failed: %s "
CRM_XS " rc=%d", display_name, pcmk_strerror(rc), rc);
stonith->cmds->disconnect(stonith);
}
return rc;
}
static int
stonith_set_notification(stonith_t * stonith, const char *callback, int enabled)
{
int rc = pcmk_ok;
xmlNode *notify_msg = create_xml_node(NULL, __func__);
stonith_private_t *native = stonith->st_private;
if (stonith->state != stonith_disconnected) {
crm_xml_add(notify_msg, PCMK__XA_ST_OP, STONITH_OP_NOTIFY);
if (enabled) {
crm_xml_add(notify_msg, PCMK__XA_ST_NOTIFY_ACTIVATE, callback);
} else {
crm_xml_add(notify_msg, PCMK__XA_ST_NOTIFY_DEACTIVATE, callback);
}
rc = crm_ipc_send(native->ipc, notify_msg, crm_ipc_client_response, -1, NULL);
if (rc < 0) {
crm_perror(LOG_DEBUG, "Couldn't register for fencing notifications: %d", rc);
rc = -ECOMM;
} else {
rc = pcmk_ok;
}
}
free_xml(notify_msg);
return rc;
}
static int
stonith_api_add_notification(stonith_t * stonith, const char *event,
void (*callback) (stonith_t * stonith, stonith_event_t * e))
{
GList *list_item = NULL;
stonith_notify_client_t *new_client = NULL;
stonith_private_t *private = NULL;
private = stonith->st_private;
crm_trace("Adding callback for %s events (%d)", event, g_list_length(private->notify_list));
new_client = calloc(1, sizeof(stonith_notify_client_t));
new_client->event = event;
new_client->notify = callback;
list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc);
if (list_item != NULL) {
crm_warn("Callback already present");
free(new_client);
return -ENOTUNIQ;
} else {
private->notify_list = g_list_append(private->notify_list, new_client);
stonith_set_notification(stonith, event, 1);
crm_trace("Callback added (%d)", g_list_length(private->notify_list));
}
return pcmk_ok;
}
static void
del_notify_entry(gpointer data, gpointer user_data)
{
stonith_notify_client_t *entry = data;
stonith_t * stonith = user_data;
if (!entry->delete) {
crm_debug("Removing callback for %s events", entry->event);
stonith_api_del_notification(stonith, entry->event);
}
}
static int
stonith_api_del_notification(stonith_t * stonith, const char *event)
{
GList *list_item = NULL;
stonith_notify_client_t *new_client = NULL;
stonith_private_t *private = stonith->st_private;
if (event == NULL) {
foreach_notify_entry(private, del_notify_entry, stonith);
crm_trace("Removed callback");
return pcmk_ok;
}
crm_debug("Removing callback for %s events", event);
new_client = calloc(1, sizeof(stonith_notify_client_t));
new_client->event = event;
new_client->notify = NULL;
list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc);
stonith_set_notification(stonith, event, 0);
if (list_item != NULL) {
stonith_notify_client_t *list_client = list_item->data;
if (private->notify_refcnt) {
list_client->delete = TRUE;
private->notify_deletes = TRUE;
} else {
private->notify_list = g_list_remove(private->notify_list, list_client);
free(list_client);
}
crm_trace("Removed callback");
} else {
crm_trace("Callback not present");
}
free(new_client);
return pcmk_ok;
}
static int
stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int options,
void *user_data, const char *callback_name,
void (*callback) (stonith_t * st, stonith_callback_data_t * data))
{
stonith_callback_client_t *blob = NULL;
stonith_private_t *private = NULL;
CRM_CHECK(stonith != NULL, return -EINVAL);
CRM_CHECK(stonith->st_private != NULL, return -EINVAL);
private = stonith->st_private;
if (call_id == 0) { // Add global callback
private->op_callback = callback;
} else if (call_id < 0) { // Call failed immediately, so call callback now
if (!(options & st_opt_report_only_success)) {
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id));
pcmk__set_result(&result, CRM_EX_ERROR,
stonith__legacy2status(call_id), NULL);
invoke_fence_action_callback(stonith, call_id, &result,
user_data, callback);
} else {
crm_warn("Fencer call failed: %s", pcmk_strerror(call_id));
}
return FALSE;
}
blob = calloc(1, sizeof(stonith_callback_client_t));
blob->id = callback_name;
blob->only_success = (options & st_opt_report_only_success) ? TRUE : FALSE;
blob->user_data = user_data;
blob->callback = callback;
blob->allow_timeout_updates = (options & st_opt_timeout_updates) ? TRUE : FALSE;
if (timeout > 0) {
set_callback_timeout(blob, stonith, call_id, timeout);
}
pcmk__intkey_table_insert(private->stonith_op_callback_table, call_id,
blob);
crm_trace("Added callback to %s for call %d", callback_name, call_id);
return TRUE;
}
static void
stonith_dump_pending_op(gpointer key, gpointer value, gpointer user_data)
{
int call = GPOINTER_TO_INT(key);
stonith_callback_client_t *blob = value;
crm_debug("Call %d (%s): pending", call, pcmk__s(blob->id, "no ID"));
}
void
stonith_dump_pending_callbacks(stonith_t * stonith)
{
stonith_private_t *private = stonith->st_private;
if (private->stonith_op_callback_table == NULL) {
return;
}
return g_hash_table_foreach(private->stonith_op_callback_table, stonith_dump_pending_op, NULL);
}
/*!
* \internal
* \brief Get the data section of a fencer notification
*
* \param[in] msg Notification XML
* \param[in] ntype Notification type
*/
static xmlNode *
get_event_data_xml(xmlNode *msg, const char *ntype)
{
char *data_addr = crm_strdup_printf("//%s", ntype);
xmlNode *data = get_xpath_object(data_addr, msg, LOG_DEBUG);
free(data_addr);
return data;
}
/*
<notify t="st_notify" subt="st_device_register" st_op="st_device_register" st_rc="0" >
<st_calldata >
<stonith_command t="stonith-ng" st_async_id="088fb640-431a-48b9-b2fc-c4ff78d0a2d9" st_op="st_device_register" st_callid="2" st_callopt="4096" st_timeout="0" st_clientid="088fb640-431a-48b9-b2fc-c4ff78d0a2d9" st_clientname="cts-fence-helper" >
<st_calldata >
<st_device_id id="test-id" origin="create_device_registration_xml" agent="fence_virsh" namespace="stonith-ng" >
<attributes ipaddr="localhost" pcmk-portmal="some-host=pcmk-1 pcmk-3=3,4" login="root" identity_file="/root/.ssh/id_dsa" />
</st_device_id>
</st_calldata>
</stonith_command>
</st_calldata>
</notify>
<notify t="st_notify" subt="st_notify_fence" st_op="st_notify_fence" st_rc="0" >
<st_calldata >
<st_notify_fence st_rc="0" st_target="some-host" st_op="st_fence" st_delegate="test-id" st_origin="61dd7759-e229-4be7-b1f8-ef49dd14d9f0" />
</st_calldata>
</notify>
*/
static stonith_event_t *
xml_to_event(xmlNode *msg)
{
stonith_event_t *event = calloc(1, sizeof(stonith_event_t));
struct event_private *event_private = NULL;
CRM_ASSERT(event != NULL);
event->opaque = calloc(1, sizeof(struct event_private));
CRM_ASSERT(event->opaque != NULL);
event_private = (struct event_private *) event->opaque;
crm_log_xml_trace(msg, "stonith_notify");
// All notification types have the operation result and notification subtype
stonith__xe_get_result(msg, &event_private->result);
event->operation = crm_element_value_copy(msg, PCMK__XA_ST_OP);
// @COMPAT The API originally provided the result as a legacy return code
event->result = pcmk_rc2legacy(stonith__result2rc(&event_private->result));
// Some notification subtypes have additional information
if (pcmk__str_eq(event->operation, T_STONITH_NOTIFY_FENCE,
pcmk__str_none)) {
xmlNode *data = get_event_data_xml(msg, event->operation);
if (data == NULL) {
crm_err("No data for %s event", event->operation);
crm_log_xml_notice(msg, "BadEvent");
} else {
event->origin = crm_element_value_copy(data, PCMK__XA_ST_ORIGIN);
event->action = crm_element_value_copy(data,
PCMK__XA_ST_DEVICE_ACTION);
event->target = crm_element_value_copy(data, PCMK__XA_ST_TARGET);
event->executioner = crm_element_value_copy(data,
PCMK__XA_ST_DELEGATE);
event->id = crm_element_value_copy(data, PCMK__XA_ST_REMOTE_OP);
event->client_origin =
crm_element_value_copy(data, PCMK__XA_ST_CLIENTNAME);
event->device = crm_element_value_copy(data, PCMK__XA_ST_DEVICE_ID);
}
} else if (pcmk__str_any_of(event->operation,
STONITH_OP_DEVICE_ADD, STONITH_OP_DEVICE_DEL,
STONITH_OP_LEVEL_ADD, STONITH_OP_LEVEL_DEL,
NULL)) {
xmlNode *data = get_event_data_xml(msg, event->operation);
if (data == NULL) {
crm_err("No data for %s event", event->operation);
crm_log_xml_notice(msg, "BadEvent");
} else {
event->device = crm_element_value_copy(data, PCMK__XA_ST_DEVICE_ID);
}
}
return event;
}
static void
event_free(stonith_event_t * event)
{
struct event_private *event_private = event->opaque;
free(event->id);
free(event->type);
free(event->message);
free(event->operation);
free(event->origin);
free(event->action);
free(event->target);
free(event->executioner);
free(event->device);
free(event->client_origin);
pcmk__reset_result(&event_private->result);
free(event->opaque);
free(event);
}
static void
stonith_send_notification(gpointer data, gpointer user_data)
{
struct notify_blob_s *blob = user_data;
stonith_notify_client_t *entry = data;
stonith_event_t *st_event = NULL;
const char *event = NULL;
if (blob->xml == NULL) {
crm_warn("Skipping callback - NULL message");
return;
}
event = crm_element_value(blob->xml, PCMK__XA_SUBT);
if (entry == NULL) {
crm_warn("Skipping callback - NULL callback client");
return;
} else if (entry->delete) {
crm_trace("Skipping callback - marked for deletion");
return;
} else if (entry->notify == NULL) {
crm_warn("Skipping callback - NULL callback");
return;
} else if (!pcmk__str_eq(entry->event, event, pcmk__str_none)) {
crm_trace("Skipping callback - event mismatch %p/%s vs. %s", entry, entry->event, event);
return;
}
st_event = xml_to_event(blob->xml);
crm_trace("Invoking callback for %p/%s event...", entry, event);
entry->notify(blob->stonith, st_event);
crm_trace("Callback invoked...");
event_free(st_event);
}
/*!
* \internal
* \brief Create and send an API request
*
* \param[in,out] stonith Stonith connection
* \param[in] op API operation to request
* \param[in] data Data to attach to request
* \param[out] output_data If not NULL, will be set to reply if synchronous
* \param[in] call_options Bitmask of stonith_call_options to use
* \param[in] timeout Error if not completed within this many seconds
*
* \return pcmk_ok (for synchronous requests) or positive call ID
* (for asynchronous requests) on success, -errno otherwise
*/
static int
stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data,
int call_options, int timeout)
{
int rc = 0;
int reply_id = -1;
xmlNode *op_msg = NULL;
xmlNode *op_reply = NULL;
stonith_private_t *native = NULL;
CRM_ASSERT(stonith && stonith->st_private && op);
native = stonith->st_private;
if (output_data != NULL) {
*output_data = NULL;
}
if ((stonith->state == stonith_disconnected) || (native->token == NULL)) {
return -ENOTCONN;
}
/* Increment the call ID, which must be positive to avoid conflicting with
* error codes. This shouldn't be a problem unless the client mucked with
* it or the counter wrapped around.
*/
stonith->call_id++;
if (stonith->call_id < 1) {
stonith->call_id = 1;
}
op_msg = stonith_create_op(stonith->call_id, native->token, op, data, call_options);
if (op_msg == NULL) {
return -EINVAL;
}
crm_xml_add_int(op_msg, PCMK__XA_ST_TIMEOUT, timeout);
crm_trace("Sending %s message to fencer with timeout %ds", op, timeout);
if (data) {
const char *delay_s = crm_element_value(data, PCMK__XA_ST_DELAY);
if (delay_s) {
crm_xml_add(op_msg, PCMK__XA_ST_DELAY, delay_s);
}
}
{
enum crm_ipc_flags ipc_flags = crm_ipc_flags_none;
if (call_options & st_opt_sync_call) {
pcmk__set_ipc_flags(ipc_flags, "stonith command",
crm_ipc_client_response);
}
rc = crm_ipc_send(native->ipc, op_msg, ipc_flags,
1000 * (timeout + 60), &op_reply);
}
free_xml(op_msg);
if (rc < 0) {
crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%ds): %d", op, timeout, rc);
rc = -ECOMM;
goto done;
}
crm_log_xml_trace(op_reply, "Reply");
if (!(call_options & st_opt_sync_call)) {
crm_trace("Async call %d, returning", stonith->call_id);
free_xml(op_reply);
return stonith->call_id;
}
crm_element_value_int(op_reply, PCMK__XA_ST_CALLID, &reply_id);
if (reply_id == stonith->call_id) {
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
crm_trace("Synchronous reply %d received", reply_id);
stonith__xe_get_result(op_reply, &result);
rc = pcmk_rc2legacy(stonith__result2rc(&result));
pcmk__reset_result(&result);
if ((call_options & st_opt_discard_reply) || output_data == NULL) {
crm_trace("Discarding reply");
} else {
*output_data = op_reply;
op_reply = NULL; /* Prevent subsequent free */
}
} else if (reply_id <= 0) {
crm_err("Received bad reply: No id set");
crm_log_xml_err(op_reply, "Bad reply");
free_xml(op_reply);
rc = -ENOMSG;
} else {
crm_err("Received bad reply: %d (wanted %d)", reply_id, stonith->call_id);
crm_log_xml_err(op_reply, "Old reply");
free_xml(op_reply);
rc = -ENOMSG;
}
done:
if (!crm_ipc_connected(native->ipc)) {
crm_err("Fencer disconnected");
free(native->token); native->token = NULL;
stonith->state = stonith_disconnected;
}
free_xml(op_reply);
return rc;
}
/* Not used with mainloop */
bool
stonith_dispatch(stonith_t * st)
{
gboolean stay_connected = TRUE;
stonith_private_t *private = NULL;
CRM_ASSERT(st != NULL);
private = st->st_private;
while (crm_ipc_ready(private->ipc)) {
if (crm_ipc_read(private->ipc) > 0) {
const char *msg = crm_ipc_buffer(private->ipc);
stonith_dispatch_internal(msg, strlen(msg), st);
}
if (!crm_ipc_connected(private->ipc)) {
crm_err("Connection closed");
stay_connected = FALSE;
}
}
return stay_connected;
}
static int
stonith_api_free(stonith_t * stonith)
{
int rc = pcmk_ok;
crm_trace("Destroying %p", stonith);
if (stonith->state != stonith_disconnected) {
crm_trace("Unregistering notifications and disconnecting %p first",
stonith);
stonith->cmds->remove_notification(stonith, NULL);
rc = stonith->cmds->disconnect(stonith);
}
if (stonith->state == stonith_disconnected) {
stonith_private_t *private = stonith->st_private;
crm_trace("Removing %d callbacks", g_hash_table_size(private->stonith_op_callback_table));
g_hash_table_destroy(private->stonith_op_callback_table);
crm_trace("Destroying %d notification clients", g_list_length(private->notify_list));
g_list_free_full(private->notify_list, free);
free(stonith->st_private);
free(stonith->cmds);
free(stonith);
} else {
crm_err("Not free'ing active connection: %s (%d)", pcmk_strerror(rc), rc);
}
return rc;
}
void
stonith_api_delete(stonith_t * stonith)
{
crm_trace("Destroying %p", stonith);
if(stonith) {
stonith->cmds->free(stonith);
}
}
static int
stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id,
const char *namespace_s, const char *agent,
const stonith_key_value_t *params, int timeout_sec,
char **output, char **error_output)
{
/* Validation should be done directly via the agent, so we can get it from
* stonith_admin when the cluster is not running, which is important for
* higher-level tools.
*/
int rc = pcmk_ok;
/* Use a dummy node name in case the agent requires a target. We assume the
* actual target doesn't matter for validation purposes (if in practice,
* that is incorrect, we will need to allow the caller to pass the target).
*/
const char *target = "node1";
const char *host_arg = NULL;
GHashTable *params_table = pcmk__strkey_table(free, free);
// Convert parameter list to a hash table
for (; params; params = params->next) {
if (pcmk__str_eq(params->key, PCMK_STONITH_HOST_ARGUMENT,
pcmk__str_none)) {
host_arg = params->value;
}
if (!pcmk_stonith_param(params->key)) {
pcmk__insert_dup(params_table, params->key, params->value);
}
}
#if SUPPORT_CIBSECRETS
rc = pcmk__substitute_secrets(rsc_id, params_table);
if (rc != pcmk_rc_ok) {
crm_warn("Could not replace secret parameters for validation of %s: %s",
agent, pcmk_rc_str(rc));
// rc is standard return value, don't return it in this function
}
#endif
if (output) {
*output = NULL;
}
if (error_output) {
*error_output = NULL;
}
if (timeout_sec <= 0) {
timeout_sec = PCMK_DEFAULT_METADATA_TIMEOUT_MS; // Questionable
}
switch (stonith_get_namespace(agent, namespace_s)) {
case st_namespace_rhcs:
rc = stonith__rhcs_validate(st, call_options, target, agent,
params_table, host_arg, timeout_sec,
output, error_output);
break;
#if HAVE_STONITH_STONITH_H
case st_namespace_lha:
rc = stonith__lha_validate(st, call_options, target, agent,
params_table, timeout_sec, output,
error_output);
break;
#endif
case st_namespace_invalid:
errno = ENOENT;
rc = -errno;
if (error_output) {
*error_output = crm_strdup_printf("Agent %s not found", agent);
} else {
crm_err("Agent %s not found", agent);
}
break;
default:
errno = EOPNOTSUPP;
rc = -errno;
if (error_output) {
*error_output = crm_strdup_printf("Agent %s does not support validation",
agent);
} else {
crm_err("Agent %s does not support validation", agent);
}
break;
}
g_hash_table_destroy(params_table);
return rc;
}
stonith_t *
stonith_api_new(void)
{
stonith_t *new_stonith = NULL;
stonith_private_t *private = NULL;
new_stonith = calloc(1, sizeof(stonith_t));
if (new_stonith == NULL) {
return NULL;
}
private = calloc(1, sizeof(stonith_private_t));
if (private == NULL) {
free(new_stonith);
return NULL;
}
new_stonith->st_private = private;
private->stonith_op_callback_table = pcmk__intkey_table(stonith_destroy_op_callback);
private->notify_list = NULL;
private->notify_refcnt = 0;
private->notify_deletes = FALSE;
new_stonith->call_id = 1;
new_stonith->state = stonith_disconnected;
new_stonith->cmds = calloc(1, sizeof(stonith_api_operations_t));
if (new_stonith->cmds == NULL) {
free(new_stonith->st_private);
free(new_stonith);
return NULL;
}
/* *INDENT-OFF* */
new_stonith->cmds->free = stonith_api_free;
new_stonith->cmds->connect = stonith_api_signon;
new_stonith->cmds->disconnect = stonith_api_signoff;
new_stonith->cmds->list = stonith_api_list;
new_stonith->cmds->monitor = stonith_api_monitor;
new_stonith->cmds->status = stonith_api_status;
new_stonith->cmds->fence = stonith_api_fence;
new_stonith->cmds->fence_with_delay = stonith_api_fence_with_delay;
new_stonith->cmds->confirm = stonith_api_confirm;
new_stonith->cmds->history = stonith_api_history;
new_stonith->cmds->list_agents = stonith_api_device_list;
new_stonith->cmds->metadata = stonith_api_device_metadata;
new_stonith->cmds->query = stonith_api_query;
new_stonith->cmds->remove_device = stonith_api_remove_device;
new_stonith->cmds->register_device = stonith_api_register_device;
new_stonith->cmds->remove_level = stonith_api_remove_level;
new_stonith->cmds->remove_level_full = stonith_api_remove_level_full;
new_stonith->cmds->register_level = stonith_api_register_level;
new_stonith->cmds->register_level_full = stonith_api_register_level_full;
new_stonith->cmds->remove_callback = stonith_api_del_callback;
new_stonith->cmds->register_callback = stonith_api_add_callback;
new_stonith->cmds->remove_notification = stonith_api_del_notification;
new_stonith->cmds->register_notification = stonith_api_add_notification;
new_stonith->cmds->validate = stonith_api_validate;
/* *INDENT-ON* */
return new_stonith;
}
/*!
* \brief Make a blocking connection attempt to the fencer
*
* \param[in,out] st Fencer API object
* \param[in] name Client name to use with fencer
* \param[in] max_attempts Return error if this many attempts fail
*
* \return pcmk_ok on success, result of last attempt otherwise
*/
int
stonith_api_connect_retry(stonith_t *st, const char *name, int max_attempts)
{
int rc = -EINVAL; // if max_attempts is not positive
for (int attempt = 1; attempt <= max_attempts; attempt++) {
rc = st->cmds->connect(st, name, NULL);
if (rc == pcmk_ok) {
return pcmk_ok;
} else if (attempt < max_attempts) {
crm_notice("Fencer connection attempt %d of %d failed (retrying in 2s): %s "
CRM_XS " rc=%d",
attempt, max_attempts, pcmk_strerror(rc), rc);
sleep(2);
}
}
crm_notice("Could not connect to fencer: %s " CRM_XS " rc=%d",
pcmk_strerror(rc), rc);
return rc;
}
stonith_key_value_t *
stonith_key_value_add(stonith_key_value_t * head, const char *key, const char *value)
{
stonith_key_value_t *p, *end;
p = calloc(1, sizeof(stonith_key_value_t));
pcmk__str_update(&p->key, key);
pcmk__str_update(&p->value, value);
end = head;
while (end && end->next) {
end = end->next;
}
if (end) {
end->next = p;
} else {
head = p;
}
return head;
}
void
stonith_key_value_freeall(stonith_key_value_t * head, int keys, int values)
{
stonith_key_value_t *p;
while (head) {
p = head->next;
if (keys) {
free(head->key);
}
if (values) {
free(head->value);
}
free(head);
head = p;
}
}
#define api_log_open() openlog("stonith-api", LOG_CONS | LOG_NDELAY | LOG_PID, LOG_DAEMON)
#define api_log(level, fmt, args...) syslog(level, "%s: "fmt, __func__, args)
int
stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off)
{
int rc = pcmk_ok;
stonith_t *st = stonith_api_new();
const char *action = off? PCMK_ACTION_OFF : PCMK_ACTION_REBOOT;
api_log_open();
if (st == NULL) {
api_log(LOG_ERR, "API initialization failed, could not kick (%s) node %u/%s",
action, nodeid, uname);
return -EPROTO;
}
rc = st->cmds->connect(st, "stonith-api", NULL);
if (rc != pcmk_ok) {
api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)",
action, nodeid, uname, pcmk_strerror(rc), rc);
} else {
char *name = (uname == NULL)? pcmk__itoa(nodeid) : strdup(uname);
int opts = 0;
stonith__set_call_options(opts, name,
st_opt_sync_call|st_opt_allow_suicide);
if ((uname == NULL) && (nodeid > 0)) {
stonith__set_call_options(opts, name, st_opt_cs_nodeid);
}
rc = st->cmds->fence(st, opts, name, action, timeout, 0);
free(name);
if (rc != pcmk_ok) {
api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)",
action, nodeid, uname, pcmk_strerror(rc), rc);
} else {
api_log(LOG_NOTICE, "Node %u/%s kicked: %s", nodeid, uname, action);
}
}
stonith_api_delete(st);
return rc;
}
time_t
stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress)
{
int rc = pcmk_ok;
time_t when = 0;
stonith_t *st = stonith_api_new();
stonith_history_t *history = NULL, *hp = NULL;
if (st == NULL) {
api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: "
"API initialization failed", nodeid, uname);
return when;
}
rc = st->cmds->connect(st, "stonith-api", NULL);
if (rc != pcmk_ok) {
api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc);
} else {
int entries = 0;
int progress = 0;
int completed = 0;
int opts = 0;
char *name = (uname == NULL)? pcmk__itoa(nodeid) : strdup(uname);
stonith__set_call_options(opts, name, st_opt_sync_call);
if ((uname == NULL) && (nodeid > 0)) {
stonith__set_call_options(opts, name, st_opt_cs_nodeid);
}
rc = st->cmds->history(st, opts, name, &history, 120);
free(name);
for (hp = history; hp; hp = hp->next) {
entries++;
if (in_progress) {
progress++;
if (hp->state != st_done && hp->state != st_failed) {
when = time(NULL);
}
} else if (hp->state == st_done) {
completed++;
if (hp->completed > when) {
when = hp->completed;
}
}
}
stonith_history_free(history);
if(rc == pcmk_ok) {
api_log(LOG_INFO, "Found %d entries for %u/%s: %d in progress, %d completed", entries, nodeid, uname, progress, completed);
} else {
api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: %s (%d)", nodeid, uname, pcmk_strerror(rc), rc);
}
}
stonith_api_delete(st);
if(when) {
api_log(LOG_INFO, "Node %u/%s last kicked at: %ld", nodeid, uname, (long int)when);
}
return when;
}
bool
stonith_agent_exists(const char *agent, int timeout)
{
stonith_t *st = NULL;
stonith_key_value_t *devices = NULL;
stonith_key_value_t *dIter = NULL;
bool rc = FALSE;
if (agent == NULL) {
return rc;
}
st = stonith_api_new();
if (st == NULL) {
crm_err("Could not list fence agents: API memory allocation failed");
return FALSE;
}
st->cmds->list_agents(st, st_opt_sync_call, NULL, &devices, timeout == 0 ? 120 : timeout);
for (dIter = devices; dIter != NULL; dIter = dIter->next) {
if (pcmk__str_eq(dIter->value, agent, pcmk__str_none)) {
rc = TRUE;
break;
}
}
stonith_key_value_freeall(devices, 1, 1);
stonith_api_delete(st);
return rc;
}
const char *
stonith_action_str(const char *action)
{
if (action == NULL) {
return "fencing";
} else if (strcmp(action, PCMK_ACTION_ON) == 0) {
return "unfencing";
} else if (strcmp(action, PCMK_ACTION_OFF) == 0) {
return "turning off";
} else {
return action;
}
}
/*!
* \internal
* \brief Parse a target name from one line of a target list string
*
* \param[in] line One line of a target list string
* \param[in] len String length of line
* \param[in,out] output List to add newly allocated target name to
*/
static void
parse_list_line(const char *line, int len, GList **output)
{
size_t i = 0;
size_t entry_start = 0;
/* Skip complaints about additional parameters device doesn't understand
*
* @TODO Document or eliminate the implied restriction of target names
*/
if (strstr(line, "invalid") || strstr(line, "variable")) {
crm_debug("Skipping list output line: %s", line);
return;
}
// Process line content, character by character
for (i = 0; i <= len; i++) {
if (isspace(line[i]) || (line[i] == ',') || (line[i] == ';')
|| (line[i] == '\0')) {
// We've found a separator (i.e. the end of an entry)
int rc = 0;
char *entry = NULL;
if (i == entry_start) {
// Skip leading and sequential separators
entry_start = i + 1;
continue;
}
entry = calloc(i - entry_start + 1, sizeof(char));
CRM_ASSERT(entry != NULL);
/* Read entry, stopping at first separator
*
* @TODO Document or eliminate these character restrictions
*/
rc = sscanf(line + entry_start, "%[a-zA-Z0-9_-.]", entry);
if (rc != 1) {
crm_warn("Could not parse list output entry: %s "
CRM_XS " entry_start=%d position=%d",
line + entry_start, entry_start, i);
free(entry);
} else if (pcmk__strcase_any_of(entry, PCMK_ACTION_ON,
PCMK_ACTION_OFF, NULL)) {
/* Some agents print the target status in the list output,
* though none are known now (the separate list-status command
* is used for this, but it can also print "UNKNOWN"). To handle
* this possibility, skip such entries.
*
* @TODO Document or eliminate the implied restriction of target
* names.
*/
free(entry);
} else {
// We have a valid entry
*output = g_list_append(*output, entry);
}
entry_start = i + 1;
}
}
}
/*!
* \internal
* \brief Parse a list of targets from a string
*
* \param[in] list_output Target list as a string
*
* \return List of target names
* \note The target list string format is flexible, to allow for user-specified
* lists such pcmk_host_list and the output of an agent's list action
* (whether direct or via the API, which escapes newlines). There may be
* multiple lines, separated by either a newline or an escaped newline
* (backslash n). Each line may have one or more target names, separated
* by any combination of whitespace, commas, and semi-colons. Lines
* containing "invalid" or "variable" will be ignored entirely. Target
* names "on" or "off" (case-insensitive) will be ignored. Target names
* may contain only alphanumeric characters, underbars (_), dashes (-),
* and dots (.) (if any other character occurs in the name, it and all
* subsequent characters in the name will be ignored).
* \note The caller is responsible for freeing the result with
* g_list_free_full(result, free).
*/
GList *
stonith__parse_targets(const char *target_spec)
{
GList *targets = NULL;
if (target_spec != NULL) {
size_t out_len = strlen(target_spec);
size_t line_start = 0; // Starting index of line being processed
for (size_t i = 0; i <= out_len; ++i) {
if ((target_spec[i] == '\n') || (target_spec[i] == '\0')
|| ((target_spec[i] == '\\') && (target_spec[i + 1] == 'n'))) {
// We've reached the end of one line of output
int len = i - line_start;
if (len > 0) {
char *line = strndup(target_spec + line_start, len);
line[len] = '\0'; // Because it might be a newline
parse_list_line(line, len, &targets);
free(line);
}
if (target_spec[i] == '\\') {
++i; // backslash-n takes up two positions
}
line_start = i + 1;
}
}
}
return targets;
}
/*!
* \internal
* \brief Check whether a fencing failure was followed by an equivalent success
*
* \param[in] event Fencing failure
* \param[in] top_history Complete fencing history (must be sorted by
* stonith__sort_history() beforehand)
*
* \return The name of the node that executed the fencing if a later successful
* event exists, or NULL if no such event exists
*/
const char *
stonith__later_succeeded(const stonith_history_t *event,
const stonith_history_t *top_history)
{
const char *other = NULL;
for (const stonith_history_t *prev_hp = top_history;
prev_hp != NULL; prev_hp = prev_hp->next) {
if (prev_hp == event) {
break;
}
if ((prev_hp->state == st_done) &&
pcmk__str_eq(event->target, prev_hp->target, pcmk__str_casei) &&
pcmk__str_eq(event->action, prev_hp->action, pcmk__str_none) &&
((event->completed < prev_hp->completed) ||
((event->completed == prev_hp->completed) && (event->completed_nsec < prev_hp->completed_nsec)))) {
if ((event->delegate == NULL)
|| pcmk__str_eq(event->delegate, prev_hp->delegate,
pcmk__str_casei)) {
// Prefer equivalent fencing by same executioner
return prev_hp->delegate;
} else if (other == NULL) {
// Otherwise remember first successful executioner
other = (prev_hp->delegate == NULL)? "some node" : prev_hp->delegate;
}
}
}
return other;
}
/*!
* \internal
* \brief Sort fencing history, pending first then by most recently completed
*
* \param[in,out] history List of stonith actions
*
* \return New head of sorted \p history
*/
stonith_history_t *
stonith__sort_history(stonith_history_t *history)
{
stonith_history_t *new = NULL, *pending = NULL, *hp, *np, *tmp;
for (hp = history; hp; ) {
tmp = hp->next;
if ((hp->state == st_done) || (hp->state == st_failed)) {
/* sort into new */
if ((!new) || (hp->completed > new->completed) ||
((hp->completed == new->completed) && (hp->completed_nsec > new->completed_nsec))) {
hp->next = new;
new = hp;
} else {
np = new;
do {
if ((!np->next) || (hp->completed > np->next->completed) ||
((hp->completed == np->next->completed) && (hp->completed_nsec > np->next->completed_nsec))) {
hp->next = np->next;
np->next = hp;
break;
}
np = np->next;
} while (1);
}
} else {
/* put into pending */
hp->next = pending;
pending = hp;
}
hp = tmp;
}
/* pending actions don't have a completed-stamp so make them go front */
if (pending) {
stonith_history_t *last_pending = pending;
while (last_pending->next) {
last_pending = last_pending->next;
}
last_pending->next = new;
new = pending;
}
return new;
}
/*!
* \brief Return string equivalent of an operation state value
*
* \param[in] state Fencing operation state value
*
* \return Human-friendly string equivalent of state
*/
const char *
stonith_op_state_str(enum op_state state)
{
switch (state) {
case st_query: return "querying";
case st_exec: return "executing";
case st_done: return "completed";
case st_duplicate: return "duplicate";
case st_failed: return "failed";
}
return "unknown";
}
stonith_history_t *
stonith__first_matching_event(stonith_history_t *history,
bool (*matching_fn)(stonith_history_t *, void *),
void *user_data)
{
for (stonith_history_t *hp = history; hp; hp = hp->next) {
if (matching_fn(hp, user_data)) {
return hp;
}
}
return NULL;
}
bool
stonith__event_state_pending(stonith_history_t *history, void *user_data)
{
return history->state != st_failed && history->state != st_done;
}
bool
stonith__event_state_eq(stonith_history_t *history, void *user_data)
{
return history->state == GPOINTER_TO_INT(user_data);
}
bool
stonith__event_state_neq(stonith_history_t *history, void *user_data)
{
return history->state != GPOINTER_TO_INT(user_data);
}
void
stonith__device_parameter_flags(uint32_t *device_flags, const char *device_name,
xmlNode *metadata)
{
xmlXPathObjectPtr xpath = NULL;
int max = 0;
int lpc = 0;
CRM_CHECK((device_flags != NULL) && (metadata != NULL), return);
xpath = xpath_search(metadata, "//" PCMK_XE_PARAMETER);
max = numXpathResults(xpath);
if (max <= 0) {
freeXpathObject(xpath);
return;
}
for (lpc = 0; lpc < max; lpc++) {
const char *parameter = NULL;
xmlNode *match = getXpathResult(xpath, lpc);
CRM_LOG_ASSERT(match != NULL);
if (match == NULL) {
continue;
}
parameter = crm_element_value(match, PCMK_XA_NAME);
if (pcmk__str_eq(parameter, "plug", pcmk__str_casei)) {
stonith__set_device_flags(*device_flags, device_name,
st_device_supports_parameter_plug);
} else if (pcmk__str_eq(parameter, "port", pcmk__str_casei)) {
stonith__set_device_flags(*device_flags, device_name,
st_device_supports_parameter_port);
}
}
freeXpathObject(xpath);
}
/*!
* \internal
* \brief Retrieve fence agent meta-data asynchronously
*
* \param[in] agent Agent to execute
* \param[in] timeout_sec Error if not complete within this time
* \param[in] callback Function to call with result (this will always be
* called, whether by this function directly or
* later via the main loop, and on success the
* metadata will be in its result argument's
* action_stdout)
* \param[in,out] user_data User data to pass to callback
*
* \return Standard Pacemaker return code
* \note The caller must use a main loop. This function is not a
* stonith_api_operations_t method because it does not need a stonith_t
* object and does not go through the fencer, but executes the agent
* directly.
*/
int
stonith__metadata_async(const char *agent, int timeout_sec,
void (*callback)(int pid,
const pcmk__action_result_t *result,
void *user_data),
void *user_data)
{
switch (stonith_get_namespace(agent, NULL)) {
case st_namespace_rhcs:
{
stonith_action_t *action = NULL;
int rc = pcmk_ok;
action = stonith__action_create(agent, PCMK_ACTION_METADATA,
NULL, 0, timeout_sec, NULL,
NULL, NULL);
rc = stonith__execute_async(action, user_data, callback, NULL);
if (rc != pcmk_ok) {
callback(0, stonith__action_result(action), user_data);
stonith__destroy_action(action);
}
return pcmk_legacy2rc(rc);
}
#if HAVE_STONITH_STONITH_H
case st_namespace_lha:
// LHA metadata is simply synthesized, so simulate async
{
pcmk__action_result_t result = {
.exit_status = CRM_EX_OK,
.execution_status = PCMK_EXEC_DONE,
.exit_reason = NULL,
.action_stdout = NULL,
.action_stderr = NULL,
};
stonith__lha_metadata(agent, timeout_sec,
&result.action_stdout);
callback(0, &result, user_data);
pcmk__reset_result(&result);
return pcmk_rc_ok;
}
#endif
default:
{
pcmk__action_result_t result = {
.exit_status = CRM_EX_NOSUCH,
.execution_status = PCMK_EXEC_ERROR_HARD,
.exit_reason = crm_strdup_printf("No such agent '%s'",
agent),
.action_stdout = NULL,
.action_stderr = NULL,
};
callback(0, &result, user_data);
pcmk__reset_result(&result);
return ENOENT;
}
}
}
/*!
* \internal
* \brief Return the exit status from an async action callback
*
* \param[in] data Callback data
*
* \return Exit status from callback data
*/
int
stonith__exit_status(const stonith_callback_data_t *data)
{
if ((data == NULL) || (data->opaque == NULL)) {
return CRM_EX_ERROR;
}
return ((pcmk__action_result_t *) data->opaque)->exit_status;
}
/*!
* \internal
* \brief Return the execution status from an async action callback
*
* \param[in] data Callback data
*
* \return Execution status from callback data
*/
int
stonith__execution_status(const stonith_callback_data_t *data)
{
if ((data == NULL) || (data->opaque == NULL)) {
return PCMK_EXEC_UNKNOWN;
}
return ((pcmk__action_result_t *) data->opaque)->execution_status;
}
/*!
* \internal
* \brief Return the exit reason from an async action callback
*
* \param[in] data Callback data
*
* \return Exit reason from callback data
*/
const char *
stonith__exit_reason(const stonith_callback_data_t *data)
{
if ((data == NULL) || (data->opaque == NULL)) {
return NULL;
}
return ((pcmk__action_result_t *) data->opaque)->exit_reason;
}
/*!
* \internal
* \brief Return the exit status from an event notification
*
* \param[in] event Event
*
* \return Exit status from event
*/
int
stonith__event_exit_status(const stonith_event_t *event)
{
if ((event == NULL) || (event->opaque == NULL)) {
return CRM_EX_ERROR;
} else {
struct event_private *event_private = event->opaque;
return event_private->result.exit_status;
}
}
/*!
* \internal
* \brief Return the execution status from an event notification
*
* \param[in] event Event
*
* \return Execution status from event
*/
int
stonith__event_execution_status(const stonith_event_t *event)
{
if ((event == NULL) || (event->opaque == NULL)) {
return PCMK_EXEC_UNKNOWN;
} else {
struct event_private *event_private = event->opaque;
return event_private->result.execution_status;
}
}
/*!
* \internal
* \brief Return the exit reason from an event notification
*
* \param[in] event Event
*
* \return Exit reason from event
*/
const char *
stonith__event_exit_reason(const stonith_event_t *event)
{
if ((event == NULL) || (event->opaque == NULL)) {
return NULL;
} else {
struct event_private *event_private = event->opaque;
return event_private->result.exit_reason;
}
}
/*!
* \internal
* \brief Return a human-friendly description of a fencing event
*
* \param[in] event Event to describe
*
* \return Newly allocated string with description of \p event
* \note The caller is responsible for freeing the return value.
* This function asserts on memory errors and never returns NULL.
*/
char *
stonith__event_description(const stonith_event_t *event)
{
// Use somewhat readable defaults
const char *origin = pcmk__s(event->client_origin, "a client");
const char *origin_node = pcmk__s(event->origin, "a node");
const char *executioner = pcmk__s(event->executioner, "the cluster");
const char *device = pcmk__s(event->device, "unknown");
const char *action = pcmk__s(event->action, event->operation);
const char *target = pcmk__s(event->target, "no node");
const char *reason = stonith__event_exit_reason(event);
const char *status;
if (action == NULL) {
action = "(unknown)";
}
if (stonith__event_execution_status(event) != PCMK_EXEC_DONE) {
status = pcmk_exec_status_str(stonith__event_execution_status(event));
} else if (stonith__event_exit_status(event) != CRM_EX_OK) {
status = pcmk_exec_status_str(PCMK_EXEC_ERROR);
} else {
status = crm_exit_str(CRM_EX_OK);
}
if (pcmk__str_eq(event->operation, T_STONITH_NOTIFY_HISTORY,
pcmk__str_none)) {
return crm_strdup_printf("Fencing history may have changed");
} else if (pcmk__str_eq(event->operation, STONITH_OP_DEVICE_ADD,
pcmk__str_none)) {
return crm_strdup_printf("A fencing device (%s) was added", device);
} else if (pcmk__str_eq(event->operation, STONITH_OP_DEVICE_DEL,
pcmk__str_none)) {
return crm_strdup_printf("A fencing device (%s) was removed", device);
} else if (pcmk__str_eq(event->operation, STONITH_OP_LEVEL_ADD,
pcmk__str_none)) {
return crm_strdup_printf("A fencing topology level (%s) was added",
device);
} else if (pcmk__str_eq(event->operation, STONITH_OP_LEVEL_DEL,
pcmk__str_none)) {
return crm_strdup_printf("A fencing topology level (%s) was removed",
device);
}
// event->operation should be T_STONITH_NOTIFY_FENCE at this point
return crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s%s%s%s (ref=%s)",
action, target, executioner, origin, origin_node,
status,
((reason == NULL)? "" : " ("), pcmk__s(reason, ""),
((reason == NULL)? "" : ")"),
pcmk__s(event->id, "(none)"));
}
// Deprecated functions kept only for backward API compatibility
// LCOV_EXCL_START
const char *get_stonith_provider(const char *agent, const char *provider);
const char *
get_stonith_provider(const char *agent, const char *provider)
{
return stonith_namespace2text(stonith_get_namespace(agent, provider));
}
// LCOV_EXCL_STOP
// End deprecated API
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
index c8146d9323..8d78ff8d13 100644
--- a/tools/crm_mon.c
+++ b/tools/crm_mon.c
@@ -1,2300 +1,2302 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <libgen.h>
#include <signal.h>
#include <sys/utsname.h>
#include <crm/services.h>
#include <crm/lrmd.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/internal.h> // pcmk__ends_with_ext()
#include <crm/common/ipc.h>
#include <crm/common/mainloop.h>
#include <crm/common/output.h>
#include <crm/common/output_internal.h>
#include <crm/common/results.h>
#include <crm/common/util.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/cib/internal.h>
#include <crm/pengine/status.h>
#include <crm/pengine/internal.h>
#include <pacemaker-internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h> // stonith__*
#include "crm_mon.h"
#define SUMMARY "Provides a summary of cluster's current state.\n\n" \
"Outputs varying levels of detail in a number of different formats."
/*
* Definitions indicating which items to print
*/
static uint32_t show;
static uint32_t show_opts = pcmk_show_pending;
/*
* Definitions indicating how to output
*/
static mon_output_format_t output_format = mon_output_unset;
/* other globals */
static GIOChannel *io_channel = NULL;
static GMainLoop *mainloop = NULL;
static guint reconnect_timer = 0;
static mainloop_timer_t *refresh_timer = NULL;
static enum pcmk_pacemakerd_state pcmkd_state = pcmk_pacemakerd_state_invalid;
static cib_t *cib = NULL;
static stonith_t *st = NULL;
static xmlNode *current_cib = NULL;
static GError *error = NULL;
static pcmk__common_args_t *args = NULL;
static pcmk__output_t *out = NULL;
static GOptionContext *context = NULL;
static gchar **processed_args = NULL;
static time_t last_refresh = 0;
volatile crm_trigger_t *refresh_trigger = NULL;
static enum pcmk__fence_history fence_history = pcmk__fence_history_none;
int interactive_fence_level = 0;
static pcmk__supported_format_t formats[] = {
#if CURSES_ENABLED
CRM_MON_SUPPORTED_FORMAT_CURSES,
#endif
PCMK__SUPPORTED_FORMAT_HTML,
PCMK__SUPPORTED_FORMAT_NONE,
PCMK__SUPPORTED_FORMAT_TEXT,
PCMK__SUPPORTED_FORMAT_XML,
{ NULL, NULL, NULL }
};
PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *",
"enum pcmk_pacemakerd_state")
static int
crm_mon_disconnected_default(pcmk__output_t *out, va_list args)
{
return pcmk_rc_no_output;
}
PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *",
"enum pcmk_pacemakerd_state")
static int
crm_mon_disconnected_html(pcmk__output_t *out, va_list args)
{
const char *desc = va_arg(args, const char *);
enum pcmk_pacemakerd_state state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
if (out->dest != stdout) {
out->reset(out);
}
pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN,
"Not connected to CIB");
if (desc != NULL) {
pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, ": ");
pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, desc);
}
if (state != pcmk_pacemakerd_state_invalid) {
const char *state_s = pcmk__pcmkd_state_enum2friendly(state);
pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, " (");
pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, state_s);
pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, ")");
}
out->finish(out, CRM_EX_DISCONNECT, true, NULL);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *",
"enum pcmk_pacemakerd_state")
static int
crm_mon_disconnected_text(pcmk__output_t *out, va_list args)
{
const char *desc = va_arg(args, const char *);
enum pcmk_pacemakerd_state state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
int rc = pcmk_rc_ok;
if (out->dest != stdout) {
out->reset(out);
}
if (state != pcmk_pacemakerd_state_invalid) {
rc = out->info(out, "Not connected to CIB%s%s (%s)",
(desc != NULL)? ": " : "", pcmk__s(desc, ""),
pcmk__pcmkd_state_enum2friendly(state));
} else {
rc = out->info(out, "Not connected to CIB%s%s",
(desc != NULL)? ": " : "", pcmk__s(desc, ""));
}
out->finish(out, CRM_EX_DISCONNECT, true, NULL);
return rc;
}
PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *",
"enum pcmk_pacemakerd_state")
static int
crm_mon_disconnected_xml(pcmk__output_t *out, va_list args)
{
const char *desc = va_arg(args, const char *);
enum pcmk_pacemakerd_state state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
const char *state_s = NULL;
if (out->dest != stdout) {
out->reset(out);
}
if (state != pcmk_pacemakerd_state_invalid) {
state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state);
}
pcmk__output_create_xml_node(out, PCMK_XE_CRM_MON_DISCONNECTED,
PCMK_XA_DESCRIPTION, desc,
PCMK_XA_PACEMAKERD_STATE, state_s,
NULL);
out->finish(out, CRM_EX_DISCONNECT, true, NULL);
return pcmk_rc_ok;
}
static pcmk__message_entry_t fmt_functions[] = {
{ "crm-mon-disconnected", "default", crm_mon_disconnected_default },
{ "crm-mon-disconnected", "html", crm_mon_disconnected_html },
{ "crm-mon-disconnected", "text", crm_mon_disconnected_text },
{ "crm-mon-disconnected", "xml", crm_mon_disconnected_xml },
{ NULL, NULL, NULL },
};
/* Define exit codes for monitoring-compatible output
* For nagios plugins, the possibilities are
* OK=0, WARN=1, CRIT=2, and UNKNOWN=3
*/
#define MON_STATUS_WARN CRM_EX_ERROR
#define MON_STATUS_CRIT CRM_EX_INVALID_PARAM
#define MON_STATUS_UNKNOWN CRM_EX_UNIMPLEMENT_FEATURE
#define RECONNECT_MSECS 5000
struct {
guint reconnect_ms;
enum mon_exec_mode exec_mode;
gboolean fence_connect;
gboolean print_pending;
gboolean show_bans;
gboolean watch_fencing;
char *pid_file;
char *external_agent;
char *external_recipient;
char *neg_location_prefix;
char *only_node;
char *only_rsc;
GSList *user_includes_excludes;
GSList *includes_excludes;
} options = {
.reconnect_ms = RECONNECT_MSECS,
.exec_mode = mon_exec_unset,
.fence_connect = TRUE,
};
static crm_exit_t clean_up(crm_exit_t exit_code);
static void crm_diff_update(const char *event, xmlNode * msg);
static void clean_up_on_connection_failure(int rc);
static int mon_refresh_display(gpointer user_data);
static int setup_cib_connection(void);
static int setup_fencer_connection(void);
static int setup_api_connections(void);
static void mon_st_callback_event(stonith_t * st, stonith_event_t * e);
static void mon_st_callback_display(stonith_t * st, stonith_event_t * e);
static void refresh_after_event(gboolean data_updated, gboolean enforce);
static uint32_t
all_includes(mon_output_format_t fmt) {
if (fmt == mon_output_monitor || fmt == mon_output_plain || fmt == mon_output_console) {
return ~pcmk_section_options;
} else {
return pcmk_section_all;
}
}
static uint32_t
default_includes(mon_output_format_t fmt) {
switch (fmt) {
case mon_output_monitor:
case mon_output_plain:
case mon_output_console:
case mon_output_html:
case mon_output_cgi:
return pcmk_section_summary
|pcmk_section_nodes
|pcmk_section_resources
|pcmk_section_failures;
case mon_output_xml:
return all_includes(fmt);
default:
return 0;
}
}
struct {
const char *name;
uint32_t bit;
} sections[] = {
{ "attributes", pcmk_section_attributes },
{ "bans", pcmk_section_bans },
{ "counts", pcmk_section_counts },
{ "dc", pcmk_section_dc },
{ "failcounts", pcmk_section_failcounts },
{ "failures", pcmk_section_failures },
{ PCMK_VALUE_FENCING, pcmk_section_fencing_all },
{ "fencing-failed", pcmk_section_fence_failed },
{ "fencing-pending", pcmk_section_fence_pending },
{ "fencing-succeeded", pcmk_section_fence_worked },
{ "maint-mode", pcmk_section_maint_mode },
{ "nodes", pcmk_section_nodes },
{ "operations", pcmk_section_operations },
{ "options", pcmk_section_options },
{ "resources", pcmk_section_resources },
{ "stack", pcmk_section_stack },
{ "summary", pcmk_section_summary },
{ "tickets", pcmk_section_tickets },
{ "times", pcmk_section_times },
{ NULL }
};
static uint32_t
find_section_bit(const char *name) {
for (int i = 0; sections[i].name != NULL; i++) {
if (pcmk__str_eq(sections[i].name, name, pcmk__str_casei)) {
return sections[i].bit;
}
}
return 0;
}
static gboolean
apply_exclude(const gchar *excludes, GError **error) {
char **parts = NULL;
gboolean result = TRUE;
parts = g_strsplit(excludes, ",", 0);
for (char **s = parts; *s != NULL; s++) {
uint32_t bit = find_section_bit(*s);
if (pcmk__str_eq(*s, "all", pcmk__str_none)) {
show = 0;
} else if (pcmk__str_eq(*s, PCMK_VALUE_NONE, pcmk__str_none)) {
show = all_includes(output_format);
} else if (bit != 0) {
show &= ~bit;
} else {
g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
"--exclude options: all, attributes, bans, counts, dc, "
"failcounts, failures, fencing, fencing-failed, "
"fencing-pending, fencing-succeeded, maint-mode, nodes, "
PCMK_VALUE_NONE ", operations, options, resources, "
"stack, summary, tickets, times");
result = FALSE;
break;
}
}
g_strfreev(parts);
return result;
}
static gboolean
apply_include(const gchar *includes, GError **error) {
char **parts = NULL;
gboolean result = TRUE;
parts = g_strsplit(includes, ",", 0);
for (char **s = parts; *s != NULL; s++) {
uint32_t bit = find_section_bit(*s);
if (pcmk__str_eq(*s, "all", pcmk__str_none)) {
show = all_includes(output_format);
} else if (pcmk__starts_with(*s, "bans")) {
show |= pcmk_section_bans;
if (options.neg_location_prefix != NULL) {
free(options.neg_location_prefix);
options.neg_location_prefix = NULL;
}
if (strlen(*s) > 4 && (*s)[4] == ':') {
options.neg_location_prefix = strdup(*s+5);
}
} else if (pcmk__str_any_of(*s, PCMK_VALUE_DEFAULT, "defaults", NULL)) {
show |= default_includes(output_format);
} else if (pcmk__str_eq(*s, PCMK_VALUE_NONE, pcmk__str_none)) {
show = 0;
} else if (bit != 0) {
show |= bit;
} else {
g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
"--include options: all, attributes, bans[:PREFIX], counts, dc, "
PCMK_VALUE_DEFAULT ", failcounts, failures, fencing, "
"fencing-failed, fencing-pending, fencing-succeeded, "
"maint-mode, nodes, " PCMK_VALUE_NONE ", operations, "
"options, resources, stack, summary, tickets, times");
result = FALSE;
break;
}
}
g_strfreev(parts);
return result;
}
static gboolean
apply_include_exclude(GSList *lst, GError **error) {
gboolean rc = TRUE;
GSList *node = lst;
while (node != NULL) {
char *s = node->data;
if (pcmk__starts_with(s, "--include=")) {
rc = apply_include(s+10, error);
} else if (pcmk__starts_with(s, "-I=")) {
rc = apply_include(s+3, error);
} else if (pcmk__starts_with(s, "--exclude=")) {
rc = apply_exclude(s+10, error);
} else if (pcmk__starts_with(s, "-U=")) {
rc = apply_exclude(s+3, error);
}
if (rc != TRUE) {
break;
}
node = node->next;
}
return rc;
}
static gboolean
user_include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
char *s = crm_strdup_printf("%s=%s", option_name, optarg);
options.user_includes_excludes = g_slist_append(options.user_includes_excludes, s);
return TRUE;
}
static gboolean
include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
char *s = crm_strdup_printf("%s=%s", option_name, optarg);
options.includes_excludes = g_slist_append(options.includes_excludes, s);
return TRUE;
}
static gboolean
as_cgi_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
pcmk__str_update(&args->output_ty, "html");
output_format = mon_output_cgi;
options.exec_mode = mon_exec_one_shot;
return TRUE;
}
static gboolean
as_html_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
pcmk__str_update(&args->output_dest, optarg);
pcmk__str_update(&args->output_ty, "html");
output_format = mon_output_html;
umask(S_IWGRP | S_IWOTH); // World-readable HTML
return TRUE;
}
static gboolean
as_simple_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
pcmk__str_update(&args->output_ty, "text");
output_format = mon_output_monitor;
options.exec_mode = mon_exec_one_shot;
return TRUE;
}
static gboolean
as_xml_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
pcmk__str_update(&args->output_ty, "xml");
output_format = mon_output_legacy_xml;
return TRUE;
}
static gboolean
fence_history_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
if (optarg == NULL) {
interactive_fence_level = 2;
} else {
pcmk__scan_min_int(optarg, &interactive_fence_level, 0);
}
switch (interactive_fence_level) {
case 3:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
return include_exclude_cb("--include", PCMK_VALUE_FENCING, data,
err);
case 2:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
return include_exclude_cb("--include", PCMK_VALUE_FENCING, data,
err);
case 1:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
return include_exclude_cb("--include", "fencing-failed,fencing-pending", data, err);
case 0:
options.fence_connect = FALSE;
fence_history = pcmk__fence_history_none;
return include_exclude_cb("--exclude", PCMK_VALUE_FENCING, data,
err);
default:
g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Fence history must be 0-3");
return FALSE;
}
}
static gboolean
group_by_node_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_rscs_by_node;
return TRUE;
}
static gboolean
hide_headers_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--exclude", "summary", data, err);
}
static gboolean
inactive_resources_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_inactive_rscs;
return TRUE;
}
static gboolean
no_curses_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
pcmk__str_update(&args->output_ty, "text");
output_format = mon_output_plain;
return TRUE;
}
static gboolean
print_brief_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_brief;
return TRUE;
}
static gboolean
print_detail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_details;
return TRUE;
}
static gboolean
print_description_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_description;
return TRUE;
}
static gboolean
print_timing_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_timing;
return user_include_exclude_cb("--include", "operations", data, err);
}
static gboolean
reconnect_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
int rc = crm_get_msec(optarg);
if (rc == -1) {
g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Invalid value for -i: %s", optarg);
return FALSE;
} else {
pcmk_parse_interval_spec(optarg, &options.reconnect_ms);
if (options.exec_mode != mon_exec_daemonized) {
// Reconnect interval applies to daemonized too, so don't override
options.exec_mode = mon_exec_update;
}
}
return TRUE;
}
/*!
* \internal
* \brief Enable one-shot mode
*
* \param[in] option_name Name of option being parsed (ignored)
* \param[in] optarg Value to be parsed (ignored)
* \param[in] data User data (ignored)
* \param[out] err Where to store error (ignored)
*/
static gboolean
one_shot_cb(const gchar *option_name, const gchar *optarg, gpointer data,
GError **err)
{
options.exec_mode = mon_exec_one_shot;
return TRUE;
}
/*!
* \internal
* \brief Enable daemonized mode
*
* \param[in] option_name Name of option being parsed (ignored)
* \param[in] optarg Value to be parsed (ignored)
* \param[in] data User data (ignored)
* \param[out] err Where to store error (ignored)
*/
static gboolean
daemonize_cb(const gchar *option_name, const gchar *optarg, gpointer data,
GError **err)
{
options.exec_mode = mon_exec_daemonized;
return TRUE;
}
static gboolean
show_attributes_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--include", "attributes", data, err);
}
static gboolean
show_bans_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
if (optarg != NULL) {
char *s = crm_strdup_printf("bans:%s", optarg);
gboolean rc = user_include_exclude_cb("--include", s, data, err);
free(s);
return rc;
} else {
return user_include_exclude_cb("--include", "bans", data, err);
}
}
static gboolean
show_failcounts_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--include", "failcounts", data, err);
}
static gboolean
show_operations_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--include", "failcounts,operations", data, err);
}
static gboolean
show_tickets_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--include", "tickets", data, err);
}
static gboolean
use_cib_file_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
setenv("CIB_file", optarg, 1);
options.exec_mode = mon_exec_one_shot;
return TRUE;
}
#define INDENT " "
/* *INDENT-OFF* */
static GOptionEntry addl_entries[] = {
{ "interval", 'i', 0, G_OPTION_ARG_CALLBACK, reconnect_cb,
"Update frequency (default is 5 seconds)",
"TIMESPEC" },
{ "one-shot", '1', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
one_shot_cb,
"Display the cluster status once and exit",
NULL },
{ "daemonize", 'd', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
daemonize_cb,
"Run in the background as a daemon.\n"
INDENT "Requires at least one of --output-to and --external-agent.",
NULL },
{ "pid-file", 'p', 0, G_OPTION_ARG_FILENAME, &options.pid_file,
"(Advanced) Daemon pid file location",
"FILE" },
{ "external-agent", 'E', 0, G_OPTION_ARG_FILENAME, &options.external_agent,
"A program to run when resource operations take place",
"FILE" },
{ "external-recipient", 'e', 0, G_OPTION_ARG_STRING, &options.external_recipient,
"A recipient for your program (assuming you want the program to send something to someone).",
"RCPT" },
{ "watch-fencing", 'W', 0, G_OPTION_ARG_NONE, &options.watch_fencing,
"Listen for fencing events. For use with --external-agent.",
NULL },
{ "xml-file", 'x', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, use_cib_file_cb,
NULL,
NULL },
{ NULL }
};
static GOptionEntry display_entries[] = {
{ "include", 'I', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb,
"A list of sections to include in the output.\n"
INDENT "See `Output Control` help for more information.",
"SECTION(s)" },
{ "exclude", 'U', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb,
"A list of sections to exclude from the output.\n"
INDENT "See `Output Control` help for more information.",
"SECTION(s)" },
{ "node", 0, 0, G_OPTION_ARG_STRING, &options.only_node,
"When displaying information about nodes, show only what's related to the given\n"
INDENT "node, or to all nodes tagged with the given tag",
"NODE" },
{ "resource", 0, 0, G_OPTION_ARG_STRING, &options.only_rsc,
"When displaying information about resources, show only what's related to the given\n"
INDENT "resource, or to all resources tagged with the given tag",
"RSC" },
{ "group-by-node", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, group_by_node_cb,
"Group resources by node",
NULL },
{ "inactive", 'r', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, inactive_resources_cb,
"Display inactive resources",
NULL },
{ "failcounts", 'f', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_failcounts_cb,
"Display resource fail counts",
NULL },
{ "operations", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_operations_cb,
"Display resource operation history",
NULL },
{ "timing-details", 't', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_timing_cb,
"Display resource operation history with timing details",
NULL },
{ "tickets", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_tickets_cb,
"Display cluster tickets",
NULL },
{ "fence-history", 'm', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, fence_history_cb,
"Show fence history:\n"
INDENT "0=off, 1=failures and pending (default without option),\n"
INDENT "2=add successes (default without value for option),\n"
INDENT "3=show full history without reduction to most recent of each flavor",
"LEVEL" },
{ "neg-locations", 'L', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, show_bans_cb,
"Display negative location constraints [optionally filtered by id prefix]",
NULL },
{ "show-node-attributes", 'A', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_attributes_cb,
"Display node attributes",
NULL },
{ "hide-headers", 'D', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, hide_headers_cb,
"Hide all headers",
NULL },
{ "show-detail", 'R', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_detail_cb,
"Show more details (node IDs, individual clone instances)",
NULL },
{ "show-description", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_description_cb,
"Show resource descriptions",
NULL },
{ "brief", 'b', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_brief_cb,
"Brief output",
NULL },
{ "pending", 'j', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, &options.print_pending,
"Display pending state if '" PCMK_META_RECORD_PENDING "' is enabled",
NULL },
{ NULL }
};
static GOptionEntry deprecated_entries[] = {
{ "as-html", 'h', G_OPTION_FLAG_FILENAME, G_OPTION_ARG_CALLBACK, as_html_cb,
"Write cluster status to the named HTML file.\n"
INDENT "Use --output-as=html --output-to=FILE instead.",
"FILE" },
{ "as-xml", 'X', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_xml_cb,
"Write cluster status as XML to stdout. This will enable one-shot mode.\n"
INDENT "Use --output-as=xml instead.",
NULL },
{ "simple-status", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
as_simple_cb,
"Display the cluster status once as a simple one line output\n"
INDENT "(suitable for nagios)",
NULL },
{ "disable-ncurses", 'N', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, no_curses_cb,
"Disable the use of ncurses.\n"
INDENT "Use --output-as=text instead.",
NULL },
{ "web-cgi", 'w', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_cgi_cb,
"Web mode with output suitable for CGI (preselected when run as *.cgi).\n"
INDENT "Use --output-as=html --html-cgi instead.",
NULL },
{ NULL }
};
/* *INDENT-ON* */
/* Reconnect to the CIB and fencing agent after reconnect_ms has passed. This sounds
* like it would be more broadly useful, but only ever happens after a disconnect via
* mon_cib_connection_destroy.
*/
static gboolean
reconnect_after_timeout(gpointer data)
{
#if CURSES_ENABLED
if (output_format == mon_output_console) {
clear();
refresh();
}
#endif
out->transient(out, "Reconnecting...");
if (setup_api_connections() == pcmk_rc_ok) {
// Trigger redrawing the screen (needs reconnect_timer == 0)
reconnect_timer = 0;
refresh_after_event(FALSE, TRUE);
return G_SOURCE_REMOVE;
}
out->message(out, "crm-mon-disconnected",
"Latest connection attempt failed", pcmkd_state);
reconnect_timer = g_timeout_add(options.reconnect_ms,
reconnect_after_timeout, NULL);
return G_SOURCE_REMOVE;
}
/* Called from various places when we are disconnected from the CIB or from the
* fencing agent. If the CIB connection is still valid, this function will also
* attempt to sign off and reconnect.
*/
static void
mon_cib_connection_destroy(gpointer user_data)
{
const char *msg = "Connection to the cluster lost";
pcmkd_state = pcmk_pacemakerd_state_invalid;
/* No crm-mon-disconnected message for console; a working implementation
* is not currently worth the effort
*/
out->transient(out, "%s", msg);
out->message(out, "crm-mon-disconnected", msg, pcmkd_state);
if (refresh_timer != NULL) {
/* we'll trigger a refresh after reconnect */
mainloop_timer_stop(refresh_timer);
}
if (reconnect_timer) {
/* we'll trigger a new reconnect-timeout at the end */
g_source_remove(reconnect_timer);
reconnect_timer = 0;
}
/* the client API won't properly reconnect notifications if they are still
* in the table - so remove them
*/
if (st != NULL) {
if (st->state != stonith_disconnected) {
st->cmds->disconnect(st);
}
st->cmds->remove_notification(st, NULL);
}
if (cib) {
cib->cmds->signoff(cib);
reconnect_timer = g_timeout_add(options.reconnect_ms,
reconnect_after_timeout, NULL);
}
}
/* Signal handler installed into the mainloop for normal program shutdown */
static void
mon_shutdown(int nsig)
{
clean_up(CRM_EX_OK);
}
#if CURSES_ENABLED
static volatile sighandler_t ncurses_winch_handler;
/* Signal handler installed the regular way (not into the main loop) for when
* the screen is resized. Commonly, this happens when running in an xterm and
* the user changes its size.
*/
static void
mon_winresize(int nsig)
{
static int not_done;
int lines = 0, cols = 0;
if (!not_done++) {
if (ncurses_winch_handler)
/* the original ncurses WINCH signal handler does the
* magic of retrieving the new window size;
* otherwise, we'd have to use ioctl or tgetent */
(*ncurses_winch_handler) (SIGWINCH);
getmaxyx(stdscr, lines, cols);
resizeterm(lines, cols);
/* Alert the mainloop code we'd like the refresh_trigger to run next
* time the mainloop gets around to checking.
*/
mainloop_set_trigger((crm_trigger_t *) refresh_trigger);
}
not_done--;
}
#endif
static int
setup_fencer_connection(void)
{
int rc = pcmk_ok;
if (options.fence_connect && st == NULL) {
st = stonith_api_new();
}
if (!options.fence_connect || st == NULL || st->state != stonith_disconnected) {
return rc;
}
rc = st->cmds->connect(st, crm_system_name, NULL);
if (rc == pcmk_ok) {
crm_trace("Setting up stonith callbacks");
if (options.watch_fencing) {
- st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
+ st->cmds->register_notification(st,
+ PCMK__VALUE_ST_NOTIFY_DISCONNECT,
mon_st_callback_event);
st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_event);
} else {
- st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
+ st->cmds->register_notification(st,
+ PCMK__VALUE_ST_NOTIFY_DISCONNECT,
mon_st_callback_display);
st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display);
}
} else {
stonith_api_delete(st);
st = NULL;
}
return rc;
}
static int
setup_cib_connection(void)
{
int rc = pcmk_rc_ok;
CRM_CHECK(cib != NULL, return EINVAL);
if (cib->state != cib_disconnected) {
// Already connected with notifications registered for CIB updates
return rc;
}
rc = cib__signon_query(out, &cib, &current_cib);
if (rc == pcmk_rc_ok) {
rc = pcmk_legacy2rc(cib->cmds->set_connection_dnotify(cib,
mon_cib_connection_destroy));
if (rc == EPROTONOSUPPORT) {
out->err(out,
"CIB client does not support connection loss "
"notifications; crm_mon will be unable to reconnect after "
"connection loss");
rc = pcmk_rc_ok;
}
if (rc == pcmk_rc_ok) {
cib->cmds->del_notify_callback(cib, PCMK__VALUE_CIB_DIFF_NOTIFY,
crm_diff_update);
rc = cib->cmds->add_notify_callback(cib, PCMK__VALUE_CIB_DIFF_NOTIFY,
crm_diff_update);
rc = pcmk_legacy2rc(rc);
}
if (rc != pcmk_rc_ok) {
if (rc == EPROTONOSUPPORT) {
out->err(out,
"CIB client does not support CIB diff "
"notifications");
} else {
out->err(out, "CIB diff notification setup failed");
}
out->err(out, "Cannot monitor CIB changes; exiting");
cib__clean_up_connection(&cib);
stonith_api_delete(st);
st = NULL;
}
}
return rc;
}
/* This is used to set up the fencing options after the interactive UI has been stared.
* fence_history_cb can't be used because it builds up a list of includes/excludes that
* then have to be processed with apply_include_exclude and that could affect other
* things.
*/
static void
set_fencing_options(int level)
{
switch (level) {
case 3:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
show |= pcmk_section_fencing_all;
break;
case 2:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
show |= pcmk_section_fencing_all;
break;
case 1:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
show |= pcmk_section_fence_failed | pcmk_section_fence_pending;
break;
default:
interactive_fence_level = 0;
options.fence_connect = FALSE;
fence_history = pcmk__fence_history_none;
show &= ~pcmk_section_fencing_all;
break;
}
}
static int
setup_api_connections(void)
{
int rc = pcmk_rc_ok;
CRM_CHECK(cib != NULL, return EINVAL);
if (cib->state != cib_disconnected) {
return rc;
}
if (cib->variant == cib_native) {
rc = pcmk__pacemakerd_status(out, crm_system_name,
options.reconnect_ms / 2, false,
&pcmkd_state);
if (rc != pcmk_rc_ok) {
return rc;
}
switch (pcmkd_state) {
case pcmk_pacemakerd_state_running:
case pcmk_pacemakerd_state_remote:
case pcmk_pacemakerd_state_shutting_down:
/* Fencer and CIB may still be available while shutting down or
* running on a Pacemaker Remote node
*/
break;
default:
// Fencer and CIB are definitely unavailable
return ENOTCONN;
}
setup_fencer_connection();
}
rc = setup_cib_connection();
return rc;
}
#if CURSES_ENABLED
static const char *
get_option_desc(char c)
{
const char *desc = "No help available";
for (GOptionEntry *entry = display_entries; entry != NULL; entry++) {
if (entry->short_name == c) {
desc = entry->description;
break;
}
}
return desc;
}
#define print_option_help(out, option, condition) \
curses_formatted_printf(out, "%c %c: \t%s\n", ((condition)? '*': ' '), option, get_option_desc(option));
/* This function is called from the main loop when there is something to be read
* on stdin, like an interactive user's keystroke. All it does is read the keystroke,
* set flags (or show the page showing which keystrokes are valid), and redraw the
* screen. It does not do anything with connections to the CIB or fencing agent
* agent what would happen in mon_refresh_display.
*/
static gboolean
detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data)
{
int c;
gboolean config_mode = FALSE;
while (1) {
/* Get user input */
c = getchar();
switch (c) {
case 'm':
interactive_fence_level++;
if (interactive_fence_level > 3) {
interactive_fence_level = 0;
}
set_fencing_options(interactive_fence_level);
break;
case 'c':
show ^= pcmk_section_tickets;
break;
case 'f':
show ^= pcmk_section_failcounts;
break;
case 'n':
show_opts ^= pcmk_show_rscs_by_node;
break;
case 'o':
show ^= pcmk_section_operations;
if (!pcmk_is_set(show, pcmk_section_operations)) {
show_opts &= ~pcmk_show_timing;
}
break;
case 'r':
show_opts ^= pcmk_show_inactive_rscs;
break;
case 'R':
show_opts ^= pcmk_show_details;
#ifdef PCMK__COMPAT_2_0
// Keep failed action output the same as 2.0.x
show_opts |= pcmk_show_failed_detail;
#endif
break;
case 't':
show_opts ^= pcmk_show_timing;
if (pcmk_is_set(show_opts, pcmk_show_timing)) {
show |= pcmk_section_operations;
}
break;
case 'A':
show ^= pcmk_section_attributes;
break;
case 'L':
show ^= pcmk_section_bans;
break;
case 'D':
/* If any header is shown, clear them all, otherwise set them all */
if (pcmk_any_flags_set(show, pcmk_section_summary)) {
show &= ~pcmk_section_summary;
} else {
show |= pcmk_section_summary;
}
/* Regardless, we don't show options in console mode. */
show &= ~pcmk_section_options;
break;
case 'b':
show_opts ^= pcmk_show_brief;
break;
case 'j':
show_opts ^= pcmk_show_pending;
break;
case '?':
config_mode = TRUE;
break;
default:
/* All other keys just redraw the screen. */
goto refresh;
}
if (!config_mode)
goto refresh;
clear();
refresh();
curses_formatted_printf(out, "%s", "Display option change mode\n");
print_option_help(out, 'c', pcmk_is_set(show, pcmk_section_tickets));
print_option_help(out, 'f', pcmk_is_set(show, pcmk_section_failcounts));
print_option_help(out, 'n', pcmk_is_set(show_opts, pcmk_show_rscs_by_node));
print_option_help(out, 'o', pcmk_is_set(show, pcmk_section_operations));
print_option_help(out, 'r', pcmk_is_set(show_opts, pcmk_show_inactive_rscs));
print_option_help(out, 't', pcmk_is_set(show_opts, pcmk_show_timing));
print_option_help(out, 'A', pcmk_is_set(show, pcmk_section_attributes));
print_option_help(out, 'L', pcmk_is_set(show, pcmk_section_bans));
print_option_help(out, 'D', !pcmk_is_set(show, pcmk_section_summary));
#ifdef PCMK__COMPAT_2_0
print_option_help(out, 'R', pcmk_any_flags_set(show_opts, pcmk_show_details & ~pcmk_show_failed_detail));
#else
print_option_help(out, 'R', pcmk_any_flags_set(show_opts, pcmk_show_details));
#endif
print_option_help(out, 'b', pcmk_is_set(show_opts, pcmk_show_brief));
print_option_help(out, 'j', pcmk_is_set(show_opts, pcmk_show_pending));
curses_formatted_printf(out, "%d m: \t%s\n", interactive_fence_level, get_option_desc('m'));
curses_formatted_printf(out, "%s", "\nToggle fields via field letter, type any other key to return\n");
}
refresh:
refresh_after_event(FALSE, TRUE);
return TRUE;
}
#endif // CURSES_ENABLED
// Basically crm_signal_handler(SIGCHLD, SIG_IGN) plus the SA_NOCLDWAIT flag
static void
avoid_zombies(void)
{
struct sigaction sa;
memset(&sa, 0, sizeof(struct sigaction));
if (sigemptyset(&sa.sa_mask) < 0) {
crm_warn("Cannot avoid zombies: %s", pcmk_rc_str(errno));
return;
}
sa.sa_handler = SIG_IGN;
sa.sa_flags = SA_RESTART|SA_NOCLDWAIT;
if (sigaction(SIGCHLD, &sa, NULL) < 0) {
crm_warn("Cannot avoid zombies: %s", pcmk_rc_str(errno));
}
}
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
GOptionContext *context = NULL;
GOptionEntry extra_prog_entries[] = {
{ "quiet", 'Q', 0, G_OPTION_ARG_NONE, &(args->quiet),
"Be less descriptive in output.",
NULL },
{ NULL }
};
#if CURSES_ENABLED
const char *fmts = "console (default), html, text, xml, none";
#else
const char *fmts = "text (default), html, xml, none";
#endif // CURSES_ENABLED
const char *desc = NULL;
desc = "Notes:\n\n"
"If this program is called as crm_mon.cgi, --output-as=html and\n"
"--html-cgi are automatically added to the command line\n"
"arguments.\n\n"
"Time Specification:\n\n"
"The TIMESPEC in any command line option can be specified in many\n"
"different formats. It can be an integer number of seconds, a\n"
"number plus units (us/usec/ms/msec/s/sec/m/min/h/hr), or an ISO\n"
"8601 period specification.\n\n"
"Output Control:\n\n"
"By default, a particular set of sections are written to the\n"
"output destination. The default varies based on the output\n"
"format: XML includes all sections by default, while other output\n"
"formats include less. This set can be modified with the --include\n"
"and --exclude command line options. Each option may be passed\n"
"multiple times, and each can specify a comma-separated list of\n"
"sections. The options are applied to the default set, in order\n"
"from left to right as they are passed on the command line. For a\n"
"list of valid sections, pass --include=list or --exclude=list.\n\n"
"Interactive Use:\n\n"
#if CURSES_ENABLED
"When run interactively, crm_mon can be told to hide and show\n"
"various sections of output. To see a help screen explaining the\n"
"options, press '?'. Any key stroke aside from those listed will\n"
"cause the screen to refresh.\n\n"
#else
"The local installation of Pacemaker was built without support for\n"
"interactive (console) mode. A curses library must be available at\n"
"build time to support interactive mode.\n\n"
#endif // CURSES_ENABLED
"Examples:\n\n"
#if CURSES_ENABLED
"Display the cluster status on the console with updates as they\n"
"occur:\n\n"
"\tcrm_mon\n\n"
#endif // CURSES_ENABLED
"Display the cluster status once and exit:\n\n"
"\tcrm_mon -1\n\n"
"Display the cluster status, group resources by node, and include\n"
"inactive resources in the list:\n\n"
"\tcrm_mon --group-by-node --inactive\n\n"
"Start crm_mon as a background daemon and have it write the\n"
"cluster status to an HTML file:\n\n"
"\tcrm_mon --daemonize --output-as html "
"--output-to /path/to/docroot/filename.html\n\n"
"Display the cluster status as XML:\n\n"
"\tcrm_mon --output-as xml\n\n";
context = pcmk__build_arg_context(args, fmts, group, NULL);
pcmk__add_main_args(context, extra_prog_entries);
g_option_context_set_description(context, desc);
pcmk__add_arg_group(context, "display", "Display Options:",
"Show display options", display_entries);
pcmk__add_arg_group(context, "additional", "Additional Options:",
"Show additional options", addl_entries);
pcmk__add_arg_group(context, "deprecated", "Deprecated Options:",
"Show deprecated options", deprecated_entries);
return context;
}
/* If certain format options were specified, we want to set some extra
* options. We can just process these like they were given on the
* command line.
*/
static void
add_output_args(void) {
GError *err = NULL;
if (output_format == mon_output_plain) {
if (!pcmk__force_args(context, &err, "%s --text-fancy", g_get_prgname())) {
g_propagate_error(&error, err);
clean_up(CRM_EX_USAGE);
}
} else if (output_format == mon_output_cgi) {
if (!pcmk__force_args(context, &err, "%s --html-cgi", g_get_prgname())) {
g_propagate_error(&error, err);
clean_up(CRM_EX_USAGE);
}
} else if (output_format == mon_output_xml) {
if (!pcmk__force_args(context, &err, "%s --xml-simple-list",
g_get_prgname())) {
g_propagate_error(&error, err);
clean_up(CRM_EX_USAGE);
}
} else if (output_format == mon_output_legacy_xml) {
output_format = mon_output_xml;
if (!pcmk__force_args(context, &err, "%s --xml-legacy",
g_get_prgname())) {
g_propagate_error(&error, err);
clean_up(CRM_EX_USAGE);
}
}
}
/*!
* \internal
* \brief Set output format based on \p --output-as arguments and mode arguments
*
* When the deprecated output format arguments (\p --as-cgi, \p --as-html,
* \p --simple-status, \p --as-xml) are parsed, callback functions set
* \p output_format (and the umask if appropriate). If none of the deprecated
* arguments were specified, this function does the same based on the current
* \p --output-as arguments and the \p --one-shot and \p --daemonize arguments.
*
* \param[in,out] args Command line arguments
*/
static void
reconcile_output_format(pcmk__common_args_t *args)
{
if (output_format != mon_output_unset) {
/* One of the deprecated arguments was used, and we're finished. Note
* that this means the deprecated arguments take precedence.
*/
return;
}
if (pcmk__str_eq(args->output_ty, "none", pcmk__str_none)) {
output_format = mon_output_none;
} else if (pcmk__str_eq(args->output_ty, "html", pcmk__str_none)) {
output_format = mon_output_html;
umask(S_IWGRP | S_IWOTH); // World-readable HTML
} else if (pcmk__str_eq(args->output_ty, "xml", pcmk__str_none)) {
output_format = mon_output_xml;
#if CURSES_ENABLED
} else if (pcmk__str_eq(args->output_ty, "console",
pcmk__str_null_matches)) {
/* Console is the default format if no conflicting options are given.
*
* Use text output instead if one of the following conditions is met:
* * We've requested daemonized or one-shot mode (console output is
* incompatible with modes other than mon_exec_update)
* * We requested the version, which is effectively one-shot
* * We specified a non-stdout output destination (console mode is
* compatible only with stdout)
*/
if ((options.exec_mode == mon_exec_daemonized)
|| (options.exec_mode == mon_exec_one_shot)
|| args->version
|| !pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches)) {
pcmk__str_update(&args->output_ty, "text");
output_format = mon_output_plain;
} else {
pcmk__str_update(&args->output_ty, "console");
output_format = mon_output_console;
crm_enable_stderr(FALSE);
}
#endif // CURSES_ENABLED
} else if (pcmk__str_eq(args->output_ty, "text", pcmk__str_null_matches)) {
/* Text output was explicitly requested, or it's the default because
* curses is not enabled
*/
pcmk__str_update(&args->output_ty, "text");
output_format = mon_output_plain;
}
// Otherwise, invalid format. Let pcmk__output_new() throw an error.
}
/*!
* \internal
* \brief Set execution mode to the output format's default if appropriate
*
* \param[in,out] args Command line arguments
*/
static void
set_default_exec_mode(const pcmk__common_args_t *args)
{
if (output_format == mon_output_console) {
/* Update is the only valid mode for console, but set here instead of
* reconcile_output_format() for isolation and consistency
*/
options.exec_mode = mon_exec_update;
} else if (options.exec_mode == mon_exec_unset) {
// Default to one-shot mode for all other formats
options.exec_mode = mon_exec_one_shot;
} else if ((options.exec_mode == mon_exec_update)
&& pcmk__str_eq(args->output_dest, "-",
pcmk__str_null_matches)) {
// If not using console format, update mode cannot be used with stdout
options.exec_mode = mon_exec_one_shot;
}
}
static void
clean_up_on_connection_failure(int rc)
{
if (output_format == mon_output_monitor) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "CLUSTER CRIT: Connection to cluster failed: %s",
pcmk_rc_str(rc));
clean_up(MON_STATUS_CRIT);
} else if (rc == ENOTCONN) {
if (pcmkd_state == pcmk_pacemakerd_state_remote) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: remote-node not connected to cluster");
} else {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node");
}
} else {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_rc_str(rc));
}
clean_up(pcmk_rc2exitc(rc));
}
static void
one_shot(void)
{
int rc = pcmk__status(out, cib, fence_history, show, show_opts,
options.only_node, options.only_rsc,
options.neg_location_prefix,
output_format == mon_output_monitor, 0);
if (rc == pcmk_rc_ok) {
clean_up(pcmk_rc2exitc(rc));
} else {
clean_up_on_connection_failure(rc);
}
}
static void
exit_on_invalid_cib(void)
{
if (cib != NULL) {
return;
}
// Shouldn't really be possible
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Invalid CIB source");
clean_up(CRM_EX_ERROR);
}
int
main(int argc, char **argv)
{
int rc = pcmk_rc_ok;
GOptionGroup *output_group = NULL;
args = pcmk__new_common_args(SUMMARY);
context = build_arg_context(args, &output_group);
pcmk__register_formats(output_group, formats);
options.pid_file = strdup("/tmp/ClusterMon.pid");
pcmk__cli_init_logging("crm_mon", 0);
// Avoid needing to wait for subprocesses forked for -E/--external-agent
avoid_zombies();
if (pcmk__ends_with_ext(argv[0], ".cgi")) {
output_format = mon_output_cgi;
options.exec_mode = mon_exec_one_shot;
}
processed_args = pcmk__cmdline_preproc(argv, "ehimpxEILU");
fence_history_cb("--fence-history", "1", NULL, NULL);
/* Set an HTML title regardless of what format we will eventually use. This can't
* be done in add_output_args. That function is called after command line
* arguments are processed in the next block, which means it'll override whatever
* title the user provides. Doing this here means the user can give their own
* title on the command line.
*/
if (!pcmk__force_args(context, &error, "%s --html-title \"Cluster Status\"",
g_get_prgname())) {
return clean_up(CRM_EX_USAGE);
}
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
return clean_up(CRM_EX_USAGE);
}
for (int i = 0; i < args->verbosity; i++) {
crm_bump_log_level(argc, argv);
}
if (!args->version) {
if (args->quiet) {
include_exclude_cb("--exclude", "times", NULL, NULL);
}
if (options.watch_fencing) {
fence_history_cb("--fence-history", "0", NULL, NULL);
options.fence_connect = TRUE;
}
/* create the cib-object early to be able to do further
* decisions based on the cib-source
*/
cib = cib_new();
exit_on_invalid_cib();
switch (cib->variant) {
case cib_native:
// Everything (fencer, CIB, pcmkd status) should be available
break;
case cib_file:
// Live fence history is not meaningful
fence_history_cb("--fence-history", "0", NULL, NULL);
/* Notifications are unsupported; nothing to monitor
* @COMPAT: Let setup_cib_connection() handle this by exiting?
*/
options.exec_mode = mon_exec_one_shot;
break;
case cib_remote:
// We won't receive any fencing updates
fence_history_cb("--fence-history", "0", NULL, NULL);
break;
default:
/* something is odd */
exit_on_invalid_cib();
break;
}
if ((options.exec_mode == mon_exec_daemonized)
&& !options.external_agent
&& pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches)) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
"--daemonize requires at least one of --output-to "
"(with value not set to '-') and --external-agent");
return clean_up(CRM_EX_USAGE);
}
}
reconcile_output_format(args);
set_default_exec_mode(args);
add_output_args();
/* output_format MUST NOT BE CHANGED AFTER THIS POINT. */
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if (rc != pcmk_rc_ok) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error creating output format %s: %s",
args->output_ty, pcmk_rc_str(rc));
return clean_up(CRM_EX_ERROR);
}
/* If we had a valid format for pcmk__output_new(), output_format should be
* set by now.
*/
CRM_ASSERT(output_format != mon_output_unset);
if (options.exec_mode == mon_exec_daemonized) {
if (!options.external_agent && (output_format == mon_output_none)) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
"--daemonize requires --external-agent if used with "
"--output-as=none");
return clean_up(CRM_EX_USAGE);
}
crm_enable_stderr(FALSE);
cib_delete(cib);
cib = NULL;
pcmk__daemonize(crm_system_name, options.pid_file);
cib = cib_new();
exit_on_invalid_cib();
}
show = default_includes(output_format);
/* Apply --include/--exclude flags we used internally. There's no error reporting
* here because this would be a programming error.
*/
apply_include_exclude(options.includes_excludes, &error);
/* And now apply any --include/--exclude flags the user gave on the command line.
* These are done in a separate pass from the internal ones because we want to
* make sure whatever the user specifies overrides whatever we do.
*/
if (!apply_include_exclude(options.user_includes_excludes, &error)) {
return clean_up(CRM_EX_USAGE);
}
/* Sync up the initial value of interactive_fence_level with whatever was set with
* --include/--exclude= options.
*/
if (pcmk_all_flags_set(show, pcmk_section_fencing_all)) {
interactive_fence_level = 3;
} else if (pcmk_is_set(show, pcmk_section_fence_worked)) {
interactive_fence_level = 2;
} else if (pcmk_any_flags_set(show, pcmk_section_fence_failed | pcmk_section_fence_pending)) {
interactive_fence_level = 1;
} else {
interactive_fence_level = 0;
}
pcmk__register_lib_messages(out);
crm_mon_register_messages(out);
pe__register_messages(out);
stonith__register_messages(out);
// Messages internal to this file, nothing curses-specific
pcmk__register_messages(out, fmt_functions);
if (args->version) {
out->version(out, false);
return clean_up(CRM_EX_OK);
}
/* Extra sanity checks when in CGI mode */
if (output_format == mon_output_cgi) {
if (cib->variant == cib_file) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode used with CIB file");
return clean_up(CRM_EX_USAGE);
} else if (options.external_agent != NULL) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode cannot be used with --external-agent");
return clean_up(CRM_EX_USAGE);
} else if (options.exec_mode == mon_exec_daemonized) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode cannot be used with -d");
return clean_up(CRM_EX_USAGE);
}
}
if (output_format == mon_output_xml) {
show_opts |= pcmk_show_inactive_rscs | pcmk_show_timing;
}
if ((output_format == mon_output_html || output_format == mon_output_cgi)
&& (out->dest != stdout)) {
char *content = pcmk__itoa(options.reconnect_ms / 1000);
pcmk__html_add_header(PCMK__XE_META,
PCMK__XA_HTTP_EQUIV, PCMK__VALUE_REFRESH,
PCMK__XA_CONTENT, content,
NULL);
free(content);
}
#ifdef PCMK__COMPAT_2_0
// Keep failed action output the same as 2.0.x
show_opts |= pcmk_show_failed_detail;
#endif
crm_info("Starting %s", crm_system_name);
cib__set_output(cib, out);
if (options.exec_mode == mon_exec_one_shot) {
one_shot();
}
out->message(out, "crm-mon-disconnected",
"Waiting for initial connection", pcmkd_state);
do {
out->transient(out, "Connecting to cluster...");
rc = setup_api_connections();
if (rc != pcmk_rc_ok) {
if ((rc == ENOTCONN) || (rc == ECONNREFUSED)) {
out->transient(out, "Connection failed. Retrying in %ums...",
options.reconnect_ms);
}
// Give some time to view all output even if we won't retry
pcmk__sleep_ms(options.reconnect_ms);
#if CURSES_ENABLED
if (output_format == mon_output_console) {
clear();
refresh();
}
#endif
}
} while ((rc == ENOTCONN) || (rc == ECONNREFUSED));
if (rc != pcmk_rc_ok) {
clean_up_on_connection_failure(rc);
}
set_fencing_options(interactive_fence_level);
mon_refresh_display(NULL);
mainloop = g_main_loop_new(NULL, FALSE);
mainloop_add_signal(SIGTERM, mon_shutdown);
mainloop_add_signal(SIGINT, mon_shutdown);
#if CURSES_ENABLED
if (output_format == mon_output_console) {
ncurses_winch_handler = crm_signal_handler(SIGWINCH, mon_winresize);
if (ncurses_winch_handler == SIG_DFL ||
ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR)
ncurses_winch_handler = NULL;
io_channel = g_io_channel_unix_new(STDIN_FILENO);
g_io_add_watch(io_channel, G_IO_IN, detect_user_input, NULL);
}
#endif
/* When refresh_trigger->trigger is set to TRUE, call mon_refresh_display. In
* this file, that is anywhere mainloop_set_trigger is called.
*/
refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL);
g_main_loop_run(mainloop);
g_main_loop_unref(mainloop);
if (io_channel != NULL) {
g_io_channel_shutdown(io_channel, TRUE, NULL);
}
crm_info("Exiting %s", crm_system_name);
return clean_up(CRM_EX_OK);
}
static int
send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc,
int status, const char *desc)
{
pid_t pid;
/*setenv needs chars, these are ints */
char *rc_s = pcmk__itoa(rc);
char *status_s = pcmk__itoa(status);
char *target_rc_s = pcmk__itoa(target_rc);
crm_debug("Sending external notification to '%s' via '%s'", options.external_recipient, options.external_agent);
if(rsc) {
setenv("CRM_notify_rsc", rsc, 1);
}
if (options.external_recipient) {
setenv("CRM_notify_recipient", options.external_recipient, 1);
}
setenv("CRM_notify_node", node, 1);
setenv("CRM_notify_task", task, 1);
setenv("CRM_notify_desc", desc, 1);
setenv("CRM_notify_rc", rc_s, 1);
setenv("CRM_notify_target_rc", target_rc_s, 1);
setenv("CRM_notify_status", status_s, 1);
pid = fork();
if (pid == -1) {
out->err(out, "notification fork() failed: %s", strerror(errno));
}
if (pid == 0) {
/* crm_debug("notification: I am the child. Executing the nofitication program."); */
execl(options.external_agent, options.external_agent, NULL);
exit(CRM_EX_ERROR);
}
crm_trace("Finished running custom notification program '%s'.", options.external_agent);
free(target_rc_s);
free(status_s);
free(rc_s);
return 0;
}
static int
handle_rsc_op(xmlNode *xml, void *userdata)
{
const char *node_id = (const char *) userdata;
int rc = -1;
int status = -1;
int target_rc = -1;
gboolean notify = TRUE;
char *rsc = NULL;
char *task = NULL;
const char *desc = NULL;
const char *magic = NULL;
const char *id = NULL;
const char *node = NULL;
xmlNode *n = xml;
xmlNode * rsc_op = xml;
if(strcmp((const char*)xml->name, PCMK__XE_LRM_RSC_OP) != 0) {
pcmk__xe_foreach_child(xml, NULL, handle_rsc_op, (void *) node_id);
return pcmk_rc_ok;
}
id = pcmk__xe_history_key(rsc_op);
magic = crm_element_value(rsc_op, PCMK__XA_TRANSITION_MAGIC);
if (magic == NULL) {
/* non-change */
return pcmk_rc_ok;
}
if (!decode_transition_magic(magic, NULL, NULL, NULL, &status, &rc,
&target_rc)) {
crm_err("Invalid event %s detected for %s", magic, id);
return pcmk_rc_ok;
}
if (parse_op_key(id, &rsc, &task, NULL) == FALSE) {
crm_err("Invalid event detected for %s", id);
goto bail;
}
node = crm_element_value(rsc_op, PCMK__META_ON_NODE);
while ((n != NULL) && !pcmk__xe_is(n, PCMK__XE_NODE_STATE)) {
n = n->parent;
}
if(node == NULL && n) {
node = crm_element_value(n, PCMK_XA_UNAME);
}
if (node == NULL && n) {
node = pcmk__xe_id(n);
}
if (node == NULL) {
node = node_id;
}
if (node == NULL) {
crm_err("No node detected for event %s (%s)", magic, id);
goto bail;
}
/* look up where we expected it to be? */
desc = pcmk_rc_str(pcmk_rc_ok);
if ((status == PCMK_EXEC_DONE) && (target_rc == rc)) {
crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc);
if (rc == PCMK_OCF_NOT_RUNNING) {
notify = FALSE;
}
} else if (status == PCMK_EXEC_DONE) {
desc = services_ocf_exitcode_str(rc);
crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc);
} else {
desc = pcmk_exec_status_str(status);
crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc);
}
if (notify && options.external_agent) {
send_custom_trap(node, rsc, task, target_rc, rc, status, desc);
}
bail:
free(rsc);
free(task);
return pcmk_rc_ok;
}
/* This function is just a wrapper around mainloop_set_trigger so that it can be
* called from a mainloop directly. It's simply another way of ensuring the screen
* gets redrawn.
*/
static gboolean
mon_trigger_refresh(gpointer user_data)
{
mainloop_set_trigger((crm_trigger_t *) refresh_trigger);
return FALSE;
}
static int
handle_op_for_node(xmlNode *xml, void *userdata)
{
const char *node = crm_element_value(xml, PCMK_XA_UNAME);
if (node == NULL) {
node = pcmk__xe_id(xml);
}
handle_rsc_op(xml, (void *) node);
return pcmk_rc_ok;
}
static void
crm_diff_update_v2(const char *event, xmlNode * msg)
{
xmlNode *change = NULL;
xmlNode *diff = get_message_xml(msg, PCMK__XA_CIB_UPDATE_RESULT);
for (change = pcmk__xml_first_child(diff); change != NULL;
change = pcmk__xml_next(change)) {
const char *name = NULL;
const char *op = crm_element_value(change, PCMK_XA_OPERATION);
const char *xpath = crm_element_value(change, PCMK_XA_PATH);
xmlNode *match = NULL;
const char *node = NULL;
if (op == NULL) {
continue;
} else if (strcmp(op, PCMK_VALUE_CREATE) == 0) {
match = change->children;
} else if (pcmk__str_any_of(op, PCMK_VALUE_MOVE, PCMK_VALUE_DELETE,
NULL)) {
continue;
} else if (strcmp(op, PCMK_VALUE_MODIFY) == 0) {
match = first_named_child(change, PCMK_XE_CHANGE_RESULT);
if(match) {
match = match->children;
}
}
if(match) {
name = (const char *)match->name;
}
crm_trace("Handling %s operation for %s %p, %s", op, xpath, match, name);
if(xpath == NULL) {
/* Version field, ignore */
} else if(name == NULL) {
crm_debug("No result for %s operation to %s", op, xpath);
CRM_ASSERT(pcmk__str_any_of(op, PCMK_VALUE_MOVE, PCMK_VALUE_DELETE,
NULL));
} else if (strcmp(name, PCMK_XE_CIB) == 0) {
pcmk__xe_foreach_child(first_named_child(match, PCMK_XE_STATUS),
NULL, handle_op_for_node, NULL);
} else if (strcmp(name, PCMK_XE_STATUS) == 0) {
pcmk__xe_foreach_child(match, NULL, handle_op_for_node, NULL);
} else if (strcmp(name, PCMK__XE_NODE_STATE) == 0) {
node = crm_element_value(match, PCMK_XA_UNAME);
if (node == NULL) {
node = pcmk__xe_id(match);
}
handle_rsc_op(match, (void *) node);
} else if (strcmp(name, PCMK__XE_LRM) == 0) {
node = pcmk__xe_id(match);
handle_rsc_op(match, (void *) node);
} else if (strcmp(name, PCMK__XE_LRM_RESOURCES) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM);
handle_rsc_op(match, local_node);
free(local_node);
} else if (strcmp(name, PCMK__XE_LRM_RESOURCE) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM);
handle_rsc_op(match, local_node);
free(local_node);
} else if (strcmp(name, PCMK__XE_LRM_RSC_OP) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM);
handle_rsc_op(match, local_node);
free(local_node);
} else {
crm_trace("Ignoring %s operation for %s %p, %s", op, xpath, match, name);
}
}
}
static void
crm_diff_update_v1(const char *event, xmlNode * msg)
{
/* Process operation updates */
xmlXPathObject *xpathObj = xpath_search(msg,
"//" PCMK__XA_CIB_UPDATE_RESULT
"//" PCMK__XE_DIFF_ADDED
"//" PCMK__XE_LRM_RSC_OP);
int lpc = 0, max = numXpathResults(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
handle_rsc_op(rsc_op, NULL);
}
freeXpathObject(xpathObj);
}
static void
crm_diff_update(const char *event, xmlNode * msg)
{
int rc = -1;
static bool stale = FALSE;
gboolean cib_updated = FALSE;
xmlNode *diff = get_message_xml(msg, PCMK__XA_CIB_UPDATE_RESULT);
out->progress(out, false);
if (current_cib != NULL) {
rc = xml_apply_patchset(current_cib, diff, TRUE);
switch (rc) {
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(current_cib); current_cib = NULL;
break;
case pcmk_ok:
cib_updated = TRUE;
break;
default:
crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(current_cib); current_cib = NULL;
}
}
if (current_cib == NULL) {
crm_trace("Re-requesting the full cib");
cib->cmds->query(cib, NULL, &current_cib, cib_scope_local | cib_sync_call);
}
if (options.external_agent) {
int format = 0;
crm_element_value_int(diff, PCMK_XA_FORMAT, &format);
switch(format) {
case 1:
crm_diff_update_v1(event, msg);
break;
case 2:
crm_diff_update_v2(event, msg);
break;
default:
crm_err("Unknown patch format: %d", format);
}
}
if (current_cib == NULL) {
if(!stale) {
out->info(out, "--- Stale data ---");
}
stale = TRUE;
return;
}
stale = FALSE;
refresh_after_event(cib_updated, FALSE);
}
static int
mon_refresh_display(gpointer user_data)
{
int rc = pcmk_rc_ok;
last_refresh = time(NULL);
if (output_format == mon_output_none) {
return G_SOURCE_REMOVE;
}
if (fence_history == pcmk__fence_history_full &&
!pcmk_all_flags_set(show, pcmk_section_fencing_all) &&
output_format != mon_output_xml) {
fence_history = pcmk__fence_history_reduced;
}
// Get an up-to-date pacemakerd status for the cluster summary
if (cib->variant == cib_native) {
pcmk__pacemakerd_status(out, crm_system_name, options.reconnect_ms / 2,
false, &pcmkd_state);
}
if (out->dest != stdout) {
out->reset(out);
}
rc = pcmk__output_cluster_status(out, st, cib, current_cib, pcmkd_state,
fence_history, show, show_opts,
options.only_node,options.only_rsc,
options.neg_location_prefix,
output_format == mon_output_monitor);
if (output_format == mon_output_monitor && rc != pcmk_rc_ok) {
clean_up(MON_STATUS_WARN);
return G_SOURCE_REMOVE;
} else if (rc == pcmk_rc_schema_validation) {
clean_up(CRM_EX_CONFIG);
return G_SOURCE_REMOVE;
}
if (out->dest != stdout) {
out->finish(out, CRM_EX_OK, true, NULL);
}
return G_SOURCE_CONTINUE;
}
/* This function is called for fencing events (see setup_fencer_connection() for
* which ones) when --watch-fencing is used on the command line
*/
static void
mon_st_callback_event(stonith_t * st, stonith_event_t * e)
{
if (st->state == stonith_disconnected) {
/* disconnect cib as well and have everything reconnect */
mon_cib_connection_destroy(NULL);
} else if (options.external_agent) {
char *desc = stonith__event_description(e);
send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc);
free(desc);
}
}
/* Cause the screen to be redrawn (via mainloop_set_trigger) when various conditions are met:
*
* - If the last update occurred more than reconnect_ms ago (defaults to 5s, but
* can be changed via the -i command line option), or
* - After every 10 CIB updates, or
* - If it's been 2s since the last update
*
* This function sounds like it would be more broadly useful, but it is only called when a
* fencing event is received or a CIB diff occurrs.
*/
static void
refresh_after_event(gboolean data_updated, gboolean enforce)
{
static int updates = 0;
time_t now = time(NULL);
if (data_updated) {
updates++;
}
if(refresh_timer == NULL) {
refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL);
}
if (reconnect_timer > 0) {
/* we will receive a refresh request after successful reconnect */
mainloop_timer_stop(refresh_timer);
return;
}
/* as we're not handling initial failure of fencer-connection as
* fatal give it a retry here
* not getting here if cib-reconnection is already on the way
*/
setup_fencer_connection();
if (enforce ||
((now - last_refresh) > (options.reconnect_ms / 1000)) ||
updates >= 10) {
mainloop_set_trigger((crm_trigger_t *) refresh_trigger);
mainloop_timer_stop(refresh_timer);
updates = 0;
} else {
mainloop_timer_start(refresh_timer);
}
}
/* This function is called for fencing events (see setup_fencer_connection() for
* which ones) when --watch-fencing is NOT used on the command line
*/
static void
mon_st_callback_display(stonith_t * st, stonith_event_t * e)
{
if (st->state == stonith_disconnected) {
/* disconnect cib as well and have everything reconnect */
mon_cib_connection_destroy(NULL);
} else {
out->progress(out, false);
refresh_after_event(TRUE, FALSE);
}
}
/*
* De-init ncurses, disconnect from the CIB manager, disconnect fencing,
* deallocate memory and show usage-message if requested.
*
* We don't actually return, but nominally returning crm_exit_t allows a usage
* like "return clean_up(exit_code);" which helps static analysis understand the
* code flow.
*/
static crm_exit_t
clean_up(crm_exit_t exit_code)
{
/* Quitting crm_mon is much more complicated than it ought to be. */
/* (1) Close connections, free things, etc. */
cib__clean_up_connection(&cib);
stonith_api_delete(st);
free(options.neg_location_prefix);
free(options.only_node);
free(options.only_rsc);
free(options.pid_file);
g_slist_free_full(options.includes_excludes, free);
g_strfreev(processed_args);
/* (2) If this is abnormal termination and we're in curses mode, shut down
* curses first. Any messages displayed to the screen before curses is shut
* down will be lost because doing the shut down will also restore the
* screen to whatever it looked like before crm_mon was started.
*/
if (((error != NULL) || (exit_code == CRM_EX_USAGE))
&& (output_format == mon_output_console)
&& (out != NULL)) {
out->finish(out, exit_code, false, NULL);
pcmk__output_free(out);
out = NULL;
}
/* (3) If this is a command line usage related failure, print the usage
* message.
*/
if (exit_code == CRM_EX_USAGE && (output_format == mon_output_console || output_format == mon_output_plain)) {
char *help = g_option_context_get_help(context, TRUE, NULL);
fprintf(stderr, "%s", help);
g_free(help);
}
pcmk__free_arg_context(context);
/* (4) If this is any kind of error, print the error out and exit. Make
* sure to handle situations both before and after formatted output is
* set up. We want errors to appear formatted if at all possible.
*/
if (error != NULL) {
if (out != NULL) {
out->err(out, "%s: %s", g_get_prgname(), error->message);
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
} else {
fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message);
}
g_clear_error(&error);
crm_exit(exit_code);
}
/* (5) Print formatted output to the screen if we made it far enough in
* crm_mon to be able to do so.
*/
if (out != NULL) {
if (options.exec_mode != mon_exec_daemonized) {
out->finish(out, exit_code, true, NULL);
}
pcmk__output_free(out);
pcmk__unregister_formats();
}
crm_exit(exit_code);
}

File Metadata

Mime Type
text/x-diff
Expires
Thu, Jul 10, 3:06 AM (1 d, 5 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2009859
Default Alt Text
(252 KB)

Event Timeline