diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
index 0a03d14254..9965b5c841 100644
--- a/daemons/controld/controld_callbacks.c
+++ b/daemons/controld/controld_callbacks.c
@@ -1,288 +1,294 @@
/*
* Copyright 2004-2018 Andrew Beekhof
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
/* From join_dc... */
extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
void
crmd_ha_msg_filter(xmlNode * msg)
{
if (AM_I_DC) {
const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
if (safe_str_eq(sys_from, CRM_SYSTEM_DC)) {
const char *from = crm_element_value(msg, F_ORIG);
if (safe_str_neq(from, fsa_our_uname)) {
int level = LOG_INFO;
const char *op = crm_element_value(msg, F_CRM_TASK);
/* make sure the election happens NOW */
if (fsa_state != S_ELECTION) {
ha_msg_input_t new_input;
level = LOG_WARNING;
new_input.msg = msg;
register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input,
__FUNCTION__);
}
do_crm_log(level, "Another DC detected: %s (op=%s)", from, op);
goto done;
}
}
} else {
const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
if (safe_str_eq(sys_to, CRM_SYSTEM_DC)) {
return;
}
}
/* crm_log_xml_trace("HA[inbound]", msg); */
route_message(C_HA_MESSAGE, msg);
done:
trigger_fsa(fsa_source);
}
#define state_text(state) ((state)? (const char *)(state) : "in unknown state")
void
peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
uint32_t old = 0;
uint32_t changed = 0;
bool appeared = FALSE;
bool is_remote = is_set(node->flags, crm_remote_node);
const char *status = NULL;
/* The controller waits to receive some information from the membership
* layer before declaring itself operational. If this is being called for a
* cluster node, indicate that we have it.
*/
if (!is_remote) {
set_bit(fsa_input_register, R_PEER_DATA);
}
if (node->uname == NULL) {
return;
}
switch (type) {
case crm_status_uname:
/* If we've never seen the node, then it also won't be in the status section */
crm_info("%s node %s is now %s",
(is_remote? "Remote" : "Cluster"),
node->uname, state_text(node->state));
return;
case crm_status_nstate:
/* This callback should not be called unless the state actually
* changed, but here's a failsafe just in case.
*/
CRM_CHECK(safe_str_neq(data, node->state), return);
crm_info("%s node %s is now %s (was %s)",
(is_remote? "Remote" : "Cluster"),
node->uname, state_text(node->state), state_text(data));
if (safe_str_eq(CRM_NODE_MEMBER, node->state)) {
appeared = TRUE;
if (!is_remote) {
remove_stonith_cleanup(node->uname);
}
}
crmd_alert_node_event(node);
break;
case crm_status_processes:
if (data) {
old = *(const uint32_t *)data;
changed = node->processes ^ old;
}
status = (node->processes & proc_flags) ? ONLINESTATUS : OFFLINESTATUS;
crm_info("Client %s/%s now has status [%s] (DC=%s, changed=%6x)",
node->uname, peer2text(proc_flags), status,
AM_I_DC ? "true" : crm_str(fsa_our_dc), changed);
if ((changed & proc_flags) == 0) {
/* Peer process did not change */
crm_trace("No change %6x %6x %6x", old, node->processes, proc_flags);
return;
} else if (is_not_set(fsa_input_register, R_CIB_CONNECTED)) {
crm_trace("Not connected");
return;
} else if (fsa_state == S_STOPPING) {
crm_trace("Stopping");
return;
}
appeared = (node->processes & proc_flags) != 0;
if (safe_str_eq(node->uname, fsa_our_uname) && (node->processes & proc_flags) == 0) {
/* Did we get evicted? */
crm_notice("Our peer connection failed");
register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);
} else if (safe_str_eq(node->uname, fsa_our_dc) && crm_is_peer_active(node) == FALSE) {
/* Did the DC leave us? */
crm_notice("Our peer on the DC (%s) is dead", fsa_our_dc);
register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);
/* @COMPAT DC < 1.1.13: If a DC shuts down normally, we don't
* want to fence it. Newer DCs will send their shutdown request
* to all peers, who will update the DC's expected state to
* down, thus avoiding fencing. We can safely erase the DC's
* transient attributes when it leaves in that case. However,
* the only way to avoid fencing older DCs is to leave the
* transient attributes intact until it rejoins.
*/
if (compare_version(fsa_our_dc_version, "3.0.9") > 0) {
erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
}
- } else if(AM_I_DC && appeared == FALSE) {
- crm_info("Peer %s left us", node->uname);
- erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
+ } else if(AM_I_DC) {
+ if (appeared == FALSE) {
+ crm_info("Peer %s left us", node->uname);
+ erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
+ } else {
+ crm_info("New peer %s we want to sync fence history with",
+ node->uname);
+ te_trigger_stonith_history_sync();
+ }
}
break;
}
if (AM_I_DC) {
xmlNode *update = NULL;
int flags = node_update_peer;
gboolean alive = is_remote? appeared : crm_is_peer_active(node);
crm_action_t *down = match_down_event(node->uuid);
crm_trace("Alive=%d, appeared=%d, down=%d",
alive, appeared, (down? down->id : -1));
if (alive && type == crm_status_processes) {
register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
}
if (down) {
const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK);
if (safe_str_eq(task, CRM_OP_FENCE)) {
/* tengine_stonith_callback() confirms fence actions */
crm_trace("Updating CIB %s fencer reported fencing of %s complete",
(down->confirmed? "after" : "before"), node->uname);
} else if ((alive == FALSE) && safe_str_eq(task, CRM_OP_SHUTDOWN)) {
crm_notice("%s of peer %s is complete "CRM_XS" op=%d",
task, node->uname, down->id);
/* down->confirmed = TRUE; */
stop_te_timer(down->timer);
if (!is_remote) {
flags |= node_update_join | node_update_expected;
crmd_peer_down(node, FALSE);
check_join_state(fsa_state, __FUNCTION__);
}
update_graph(transition_graph, down);
trigger_graph();
} else {
crm_trace("Node %s is %salive, was expected to %s (op %d)",
node->uname, (alive? "" : "not "), task, down->id);
}
} else if (appeared == FALSE) {
crm_warn("Stonith/shutdown of node %s was not expected",
node->uname);
if (!is_remote) {
crm_update_peer_join(__FUNCTION__, node, crm_join_none);
check_join_state(fsa_state, __FUNCTION__);
}
abort_transition(INFINITY, tg_restart, "Node failure", NULL);
fail_incompletable_actions(transition_graph, node->uuid);
} else {
crm_trace("Node %s came up, was not expected to be down",
node->uname);
}
if (is_remote) {
/* A pacemaker_remote node won't have its cluster status updated
* in the CIB by membership-layer callbacks, so do it here.
*/
flags |= node_update_cluster;
/* Trigger resource placement on newly integrated nodes */
if (appeared) {
abort_transition(INFINITY, tg_restart,
"pacemaker_remote node integrated", NULL);
}
}
/* Update the CIB node state */
update = create_node_state_update(node, flags, NULL, __FUNCTION__);
fsa_cib_anon_update(XML_CIB_TAG_STATUS, update,
cib_scope_local | cib_quorum_override | cib_can_create);
free_xml(update);
}
trigger_fsa(fsa_source);
}
void
crmd_cib_connection_destroy(gpointer user_data)
{
CRM_CHECK(user_data == fsa_cib_conn,;);
crm_trace("Invoked");
trigger_fsa(fsa_source);
fsa_cib_conn->state = cib_disconnected;
if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) {
crm_info("Connection to the CIB manager terminated");
return;
}
// @TODO This should trigger a reconnect, not a shutdown
crm_crit("Lost connection to the CIB manager, shutting down");
register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
clear_bit(fsa_input_register, R_CIB_CONNECTED);
return;
}
gboolean
crm_fsa_trigger(gpointer user_data)
{
crm_trace("Invoked (queue len: %d)", g_list_length(fsa_message_queue));
s_crmd_fsa(C_FSA_INTERNAL);
crm_trace("Exited (queue len: %d)", g_list_length(fsa_message_queue));
return TRUE;
}
diff --git a/daemons/controld/controld_te_utils.c b/daemons/controld/controld_te_utils.c
index 3fb4e85728..21e6398190 100644
--- a/daemons/controld/controld_te_utils.c
+++ b/daemons/controld/controld_te_utils.c
@@ -1,663 +1,717 @@
/*
* Copyright 2004-2018 Andrew Beekhof
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
crm_trigger_t *stonith_reconnect = NULL;
+static crm_trigger_t *stonith_history_sync_trigger = NULL;
+static mainloop_timer_t *stonith_history_sync_timer = NULL;
/*
* stonith cleanup list
*
* If the DC is shot, proper notifications might not go out.
* The stonith cleanup list allows the cluster to (re-)send
* notifications once a new DC is elected.
*/
static GListPtr stonith_cleanup_list = NULL;
/*!
* \internal
* \brief Add a node to the stonith cleanup list
*
* \param[in] target Name of node to add
*/
void
add_stonith_cleanup(const char *target) {
stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
}
/*!
* \internal
* \brief Remove a node from the stonith cleanup list
*
* \param[in] Name of node to remove
*/
void
remove_stonith_cleanup(const char *target)
{
GListPtr iter = stonith_cleanup_list;
while (iter != NULL) {
GListPtr tmp = iter;
char *iter_name = tmp->data;
iter = iter->next;
if (safe_str_eq(target, iter_name)) {
crm_trace("Removing %s from the cleanup list", iter_name);
stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
free(iter_name);
}
}
}
/*!
* \internal
* \brief Purge all entries from the stonith cleanup list
*/
void
purge_stonith_cleanup()
{
if (stonith_cleanup_list) {
GListPtr iter = NULL;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_info("Purging %s from stonith cleanup list", target);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
}
/*!
* \internal
* \brief Send stonith updates for all entries in cleanup list, then purge it
*/
void
execute_stonith_cleanup()
{
GListPtr iter;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_node_t *target_node = crm_get_peer(0, target);
const char *uuid = crm_peer_uuid(target_node);
crm_notice("Marking %s, target of a previous stonith action, as clean", target);
send_stonith_update(NULL, target, uuid);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
/* end stonith cleanup list functions */
static gboolean
fail_incompletable_stonith(crm_graph_t * graph)
{
GListPtr lpc = NULL;
const char *task = NULL;
xmlNode *last_action = NULL;
if (graph == NULL) {
return FALSE;
}
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
GListPtr lpc2 = NULL;
synapse_t *synapse = (synapse_t *) lpc->data;
if (synapse->confirmed) {
continue;
}
for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
crm_action_t *action = (crm_action_t *) lpc2->data;
if (action->type != action_type_crm || action->confirmed) {
continue;
}
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
if (task && safe_str_eq(task, CRM_OP_FENCE)) {
action->failed = TRUE;
last_action = action->xml;
update_graph(graph, action);
crm_notice("Failing action %d (%s): fencer terminated",
action->id, ID(action->xml));
}
}
}
if (last_action != NULL) {
crm_warn("Fencer failure resulted in unrunnable actions");
abort_for_stonith_failure(tg_restart, NULL, last_action);
return TRUE;
}
return FALSE;
}
static void
tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e)
{
if (is_set(fsa_input_register, R_ST_REQUIRED)) {
crm_crit("Fencing daemon connection failed");
mainloop_set_trigger(stonith_reconnect);
} else {
crm_info("Fencing daemon disconnected");
}
/* cbchan will be garbage at this point, arrange for it to be reset */
if(stonith_api) {
stonith_api->state = stonith_disconnected;
}
if (AM_I_DC) {
fail_incompletable_stonith(transition_graph);
trigger_graph();
}
}
char *te_client_id = NULL;
#ifdef HAVE_SYS_REBOOT_H
# include
# include
#endif
static void
tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
{
if(te_client_id == NULL) {
te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
(unsigned long) getpid());
}
if (st_event == NULL) {
crm_err("Notify data not found");
return;
}
crmd_alert_fencing_op(st_event);
if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) {
crm_notice("%s was successfully unfenced by %s (at the request of %s)",
st_event->target, st_event->executioner ? st_event->executioner : "", st_event->origin);
/* TODO: Hook up st_event->device */
return;
} else if (safe_str_eq("on", st_event->action)) {
crm_err("Unfencing of %s by %s failed: %s (%d)",
st_event->target, st_event->executioner ? st_event->executioner : "",
pcmk_strerror(st_event->result), st_event->result);
return;
} else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
crm_crit("We were allegedly just fenced by %s for %s!",
st_event->executioner ? st_event->executioner : "", st_event->origin); /* Dumps blackbox if enabled */
qb_log_fini(); /* Try to get the above log message to disk - somehow */
/* Get out ASAP and do not come back up.
*
* Triggering a reboot is also not the worst idea either since
* the rest of the cluster thinks we're safely down
*/
#ifdef RB_HALT_SYSTEM
reboot(RB_HALT_SYSTEM);
#endif
/*
* If reboot() fails or is not supported, coming back up will
* probably lead to a situation where the other nodes set our
* status to 'lost' because of the fencing callback and will
* discard subsequent election votes with:
*
* Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster)
*
* So just stay dead, something is seriously messed up anyway.
*
*/
exit(CRM_EX_FATAL); // None of our wrappers since we already called qb_log_fini()
return;
}
/* Update the count of stonith failures for this target, in case we become
* DC later. The current DC has already updated its fail count in
* tengine_stonith_callback().
*/
if (!AM_I_DC && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
if (st_event->result == pcmk_ok) {
st_fail_count_reset(st_event->target);
} else {
st_fail_count_increment(st_event->target);
}
}
crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s "
CRM_XS " initiator=%s ref=%s",
st_event->target, st_event->result == pcmk_ok ? "" : " not",
st_event->action,
st_event->executioner ? st_event->executioner : "",
(st_event->client_origin? st_event->client_origin : ""),
pcmk_strerror(st_event->result),
st_event->origin, st_event->id);
if (st_event->result == pcmk_ok) {
crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_ANY);
const char *uuid = NULL;
gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);
if (peer == NULL) {
return;
}
uuid = crm_peer_uuid(peer);
crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
if(AM_I_DC) {
/* The DC always sends updates */
send_stonith_update(NULL, st_event->target, uuid);
/* @TODO Ideally, at this point, we'd check whether the fenced node
* hosted any guest nodes, and call remote_node_down() for them.
* Unfortunately, the controller doesn't have a simple, reliable way
* to map hosts to guests. It might be possible to track this in the
* peer cache via crm_remote_peer_cache_refresh(). For now, we rely
* on the PE creating fence pseudo-events for the guests.
*/
if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) {
/* Abort the current transition graph if it wasn't us
* that invoked stonith to fence someone
*/
crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
}
/* Assume it was our leader if we don't currently have one */
} else if (((fsa_our_dc == NULL) || safe_str_eq(fsa_our_dc, st_event->target))
&& !is_set(peer->flags, crm_remote_node)) {
crm_notice("Target %s our leader %s (recorded: %s)",
fsa_our_dc ? "was" : "may have been", st_event->target,
fsa_our_dc ? fsa_our_dc : "");
/* Given the CIB resyncing that occurs around elections,
* have one node update the CIB now and, if the new DC is different,
* have them do so too after the election
*/
if (we_are_executioner) {
send_stonith_update(NULL, st_event->target, uuid);
}
add_stonith_cleanup(st_event->target);
}
/* If the target is a remote node, and we host its connection,
* immediately fail all monitors so it can be recovered quickly.
* The connection won't necessarily drop when a remote node is fenced,
* so the failure might not otherwise be detected until the next poke.
*/
if (is_set(peer->flags, crm_remote_node)) {
remote_ra_fail(st_event->target);
}
crmd_peer_down(peer, TRUE);
}
}
+static gboolean
+do_stonith_history_sync(gpointer user_data)
+{
+ if (stonith_api && (stonith_api->state != stonith_disconnected)) {
+ stonith_history_t *history = NULL;
+
+ stonith_api->cmds->history(stonith_api,
+ st_opt_sync_call | st_opt_broadcast,
+ NULL, &history, 5);
+ stonith_history_free(history);
+ return TRUE;
+ } else {
+ crm_info("Skip triggering stonith history-sync as stonith is disconnected");
+ return FALSE;
+ }
+}
+
+static gboolean
+stonith_history_sync_set_trigger(gpointer user_data)
+{
+ mainloop_set_trigger(stonith_history_sync_trigger);
+ return FALSE;
+}
+
+void
+te_trigger_stonith_history_sync(void)
+{
+ /* trigger a sync in 5s to give more nodes the
+ * chance to show up so that we don't create
+ * unnecessary stonith-history-sync traffic
+ */
+
+ /* as we are finally checking the stonith-connection
+ * in do_stonith_history_sync we should be fine
+ * leaving stonith_history_sync_time & stonith_history_sync_trigger
+ * arround
+ */
+ if (stonith_history_sync_trigger == NULL) {
+ stonith_history_sync_trigger =
+ mainloop_add_trigger(G_PRIORITY_LOW,
+ do_stonith_history_sync, NULL);
+ }
+
+ if(stonith_history_sync_timer == NULL) {
+ stonith_history_sync_timer =
+ mainloop_timer_add("history_sync", 5000,
+ FALSE, stonith_history_sync_set_trigger,
+ NULL);
+ }
+ mainloop_timer_start(stonith_history_sync_timer);
+}
+
gboolean
te_connect_stonith(gpointer user_data)
{
int lpc = 0;
int rc = pcmk_ok;
if (stonith_api == NULL) {
stonith_api = stonith_api_new();
}
if (stonith_api->state != stonith_disconnected) {
crm_trace("Still connected");
return TRUE;
}
for (lpc = 0; lpc < 30; lpc++) {
crm_debug("Attempting connection to fencing daemon...");
sleep(1);
rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
if (rc == pcmk_ok) {
break;
}
if (user_data != NULL) {
if (is_set(fsa_input_register, R_ST_REQUIRED)) {
crm_err("Sign-in failed: triggered a retry");
mainloop_set_trigger(stonith_reconnect);
} else {
crm_info("Sign-in failed, but no longer required");
}
return TRUE;
}
crm_err("Sign-in failed: pausing and trying again in 2s...");
sleep(1);
}
CRM_CHECK(rc == pcmk_ok, return TRUE); /* If not, we failed 30 times... just get out */
stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT,
tengine_stonith_connection_destroy);
stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE,
tengine_stonith_notify);
crm_trace("Connected");
return TRUE;
}
gboolean
stop_te_timer(crm_action_timer_t * timer)
{
if (timer == NULL) {
return FALSE;
}
if (timer->source_id != 0) {
crm_trace("Stopping action timer");
g_source_remove(timer->source_id);
timer->source_id = 0;
} else {
crm_trace("Action timer was already stopped");
return FALSE;
}
return TRUE;
}
gboolean
te_graph_trigger(gpointer user_data)
{
enum transition_status graph_rc = -1;
if (transition_graph == NULL) {
crm_debug("Nothing to do");
return TRUE;
}
crm_trace("Invoking graph %d in state %s", transition_graph->id, fsa_state2string(fsa_state));
switch (fsa_state) {
case S_STARTING:
case S_PENDING:
case S_NOT_DC:
case S_HALT:
case S_ILLEGAL:
case S_STOPPING:
case S_TERMINATE:
return TRUE;
break;
default:
break;
}
if (transition_graph->complete == FALSE) {
int limit = transition_graph->batch_limit;
transition_graph->batch_limit = throttle_get_total_job_limit(limit);
graph_rc = run_graph(transition_graph);
transition_graph->batch_limit = limit; /* Restore the configured value */
/* significant overhead... */
/* print_graph(LOG_TRACE, transition_graph); */
if (graph_rc == transition_active) {
crm_trace("Transition not yet complete");
return TRUE;
} else if (graph_rc == transition_pending) {
crm_trace("Transition not yet complete - no actions fired");
return TRUE;
}
if (graph_rc != transition_complete) {
crm_warn("Transition failed: %s", transition_status(graph_rc));
print_graph(LOG_NOTICE, transition_graph);
}
}
crm_debug("Transition %d is now complete", transition_graph->id);
transition_graph->complete = TRUE;
notify_crmd(transition_graph);
return TRUE;
}
void
trigger_graph_processing(const char *fn, int line)
{
crm_trace("%s:%d - Triggered graph processing", fn, line);
mainloop_set_trigger(transition_trigger);
}
static struct abort_timer_s {
bool aborted;
guint id;
int priority;
enum transition_action action;
const char *text;
} abort_timer = { 0, };
static gboolean
abort_timer_popped(gpointer data)
{
if (AM_I_DC && (abort_timer.aborted == FALSE)) {
abort_transition(abort_timer.priority, abort_timer.action,
abort_timer.text, NULL);
}
abort_timer.id = 0;
return FALSE; // do not immediately reschedule timer
}
/*!
* \internal
* \brief Abort transition after delay, if not already aborted in that time
*
* \param[in] abort_text Must be literal string
*/
void
abort_after_delay(int abort_priority, enum transition_action abort_action,
const char *abort_text, guint delay_ms)
{
if (abort_timer.id) {
// Timer already in progress, stop and reschedule
g_source_remove(abort_timer.id);
}
abort_timer.aborted = FALSE;
abort_timer.priority = abort_priority;
abort_timer.action = abort_action;
abort_timer.text = abort_text;
abort_timer.id = g_timeout_add(delay_ms, abort_timer_popped, NULL);
}
void
abort_transition_graph(int abort_priority, enum transition_action abort_action,
const char *abort_text, xmlNode * reason, const char *fn, int line)
{
int add[] = { 0, 0, 0 };
int del[] = { 0, 0, 0 };
int level = LOG_INFO;
xmlNode *diff = NULL;
xmlNode *change = NULL;
CRM_CHECK(transition_graph != NULL, return);
switch (fsa_state) {
case S_STARTING:
case S_PENDING:
case S_NOT_DC:
case S_HALT:
case S_ILLEGAL:
case S_STOPPING:
case S_TERMINATE:
crm_info("Abort %s suppressed: state=%s (complete=%d)",
abort_text, fsa_state2string(fsa_state), transition_graph->complete);
return;
default:
break;
}
abort_timer.aborted = TRUE;
/* Make sure any queued calculations are discarded ASAP */
free(fsa_pe_ref);
fsa_pe_ref = NULL;
if (transition_graph->complete == FALSE) {
if(update_abort_priority(transition_graph, abort_priority, abort_action, abort_text)) {
level = LOG_NOTICE;
}
}
if(reason) {
xmlNode *search = NULL;
for(search = reason; search; search = search->parent) {
if (safe_str_eq(XML_TAG_DIFF, TYPE(search))) {
diff = search;
break;
}
}
if(diff) {
xml_patch_versions(diff, add, del);
for(search = reason; search; search = search->parent) {
if (safe_str_eq(XML_DIFF_CHANGE, TYPE(search))) {
change = search;
break;
}
}
}
}
if(reason == NULL) {
do_crm_log(level, "Transition %d aborted: %s "CRM_XS" source=%s:%d complete=%s",
transition_graph->id, abort_text, fn, line,
(transition_graph->complete? "true" : "false"));
} else if(change == NULL) {
char *local_path = xml_get_path(reason);
do_crm_log(level, "Transition %d aborted by %s.%s: %s "
CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
transition_graph->id, TYPE(reason), ID(reason), abort_text,
add[0], add[1], add[2], fn, line, local_path,
(transition_graph->complete? "true" : "false"));
free(local_path);
} else {
const char *kind = NULL;
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *path = crm_element_value(change, XML_DIFF_PATH);
if(change == reason) {
if(strcmp(op, "create") == 0) {
reason = reason->children;
} else if(strcmp(op, "modify") == 0) {
reason = first_named_child(reason, XML_DIFF_RESULT);
if(reason) {
reason = reason->children;
}
}
}
kind = TYPE(reason);
if(strcmp(op, "delete") == 0) {
const char *shortpath = strrchr(path, '/');
do_crm_log(level, "Transition %d aborted by deletion of %s: %s "
CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
transition_graph->id,
(shortpath? (shortpath + 1) : path), abort_text,
add[0], add[1], add[2], fn, line, path,
(transition_graph->complete? "true" : "false"));
} else if (safe_str_eq(XML_CIB_TAG_NVPAIR, kind)) {
do_crm_log(level, "Transition %d aborted by %s doing %s %s=%s: %s "
CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
transition_graph->id,
crm_element_value(reason, XML_ATTR_ID), op,
crm_element_value(reason, XML_NVPAIR_ATTR_NAME),
crm_element_value(reason, XML_NVPAIR_ATTR_VALUE),
abort_text, add[0], add[1], add[2], fn, line, path,
(transition_graph->complete? "true" : "false"));
} else if (safe_str_eq(XML_LRM_TAG_RSC_OP, kind)) {
const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC);
do_crm_log(level, "Transition %d aborted by operation %s '%s' on %s: %s "
CRM_XS " magic=%s cib=%d.%d.%d source=%s:%d complete=%s",
transition_graph->id,
crm_element_value(reason, XML_LRM_ATTR_TASK_KEY), op,
crm_element_value(reason, XML_LRM_ATTR_TARGET), abort_text,
magic, add[0], add[1], add[2], fn, line,
(transition_graph->complete? "true" : "false"));
} else if (safe_str_eq(XML_CIB_TAG_STATE, kind)
|| safe_str_eq(XML_CIB_TAG_NODE, kind)) {
const char *uname = crm_peer_uname(ID(reason));
do_crm_log(level, "Transition %d aborted by %s '%s' on %s: %s "
CRM_XS " cib=%d.%d.%d source=%s:%d complete=%s",
transition_graph->id,
kind, op, (uname? uname : ID(reason)), abort_text,
add[0], add[1], add[2], fn, line,
(transition_graph->complete? "true" : "false"));
} else {
const char *id = ID(reason);
do_crm_log(level, "Transition %d aborted by %s.%s '%s': %s "
CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
transition_graph->id,
TYPE(reason), (id? id : ""), (op? op : "change"),
abort_text, add[0], add[1], add[2], fn, line, path,
(transition_graph->complete? "true" : "false"));
}
}
if (transition_graph->complete) {
if (transition_timer->period_ms > 0) {
crm_timer_stop(transition_timer);
crm_timer_start(transition_timer);
} else {
register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
}
return;
}
mainloop_set_trigger(transition_trigger);
}
diff --git a/daemons/controld/controld_transition.h b/daemons/controld/controld_transition.h
index 563030642b..e2361f8c37 100644
--- a/daemons/controld/controld_transition.h
+++ b/daemons/controld/controld_transition.h
@@ -1,83 +1,85 @@
/*
* Copyright 2004-2018 Andrew Beekhof
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef TENGINE__H
# define TENGINE__H
# include
# include
# include
# include
extern stonith_t *stonith_api;
extern void send_stonith_update(crm_action_t * stonith_action, const char *target,
const char *uuid);
/* stonith cleanup list */
void add_stonith_cleanup(const char *target);
void remove_stonith_cleanup(const char *target);
void purge_stonith_cleanup(void);
void execute_stonith_cleanup(void);
/* tengine */
extern crm_action_t *match_down_event(const char *target);
extern crm_action_t *get_cancel_action(const char *id, const char *node);
void controld_record_action_timeout(crm_action_t *action);
extern gboolean fail_incompletable_actions(crm_graph_t * graph, const char *down_node);
void process_graph_event(xmlNode *event, const char *event_node);
/* utils */
extern crm_action_t *get_action(int id, gboolean confirmed);
extern gboolean stop_te_timer(crm_action_timer_t * timer);
extern const char *get_rsc_state(const char *task, enum op_status status);
/* unpack */
extern gboolean process_te_message(xmlNode * msg, xmlNode * xml_data);
extern crm_graph_t *transition_graph;
extern crm_trigger_t *transition_trigger;
extern char *te_uuid;
extern void notify_crmd(crm_graph_t * graph);
void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
void *user_data);
void cib_action_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
void *user_data);
gboolean action_timer_callback(gpointer data);
gboolean te_graph_trigger(gpointer user_data);
void te_update_diff(const char *event, xmlNode *msg);
void tengine_stonith_callback(stonith_t *stonith,
stonith_callback_data_t *data);
void update_stonith_max_attempts(const char* value);
extern void trigger_graph_processing(const char *fn, int line);
void abort_after_delay(int abort_priority, enum transition_action abort_action,
const char *abort_text, guint delay_ms);
extern void abort_transition_graph(int abort_priority, enum transition_action abort_action,
const char *abort_text, xmlNode * reason, const char *fn,
int line);
# define trigger_graph() trigger_graph_processing(__FUNCTION__, __LINE__)
# define abort_transition(pri, action, text, reason) \
abort_transition_graph(pri, action, text, reason,__FUNCTION__,__LINE__);
extern gboolean te_connect_stonith(gpointer user_data);
+extern void te_trigger_stonith_history_sync(void);
+
extern crm_trigger_t *transition_trigger;
extern crm_trigger_t *stonith_reconnect;
extern char *failed_stop_offset;
extern char *failed_start_offset;
extern int active_timeout;
extern int stonith_op_active;
void te_action_confirmed(crm_action_t * action);
void te_reset_job_counts(void);
#endif
diff --git a/daemons/fenced/Makefile.am b/daemons/fenced/Makefile.am
index e9dbf1a9de..59d2674d20 100644
--- a/daemons/fenced/Makefile.am
+++ b/daemons/fenced/Makefile.am
@@ -1,49 +1,50 @@
#
# Copyright 2004-2018 International Business Machines
# Author: Sun Jiang Dong
#
# This source code is licensed under the GNU General Public License version 2
# or later (GPLv2+) WITHOUT ANY WARRANTY.
#
include $(top_srcdir)/Makefile.common
halibdir = $(CRM_DAEMON_DIR)
halib_PROGRAMS = pacemaker-fenced cts-fence-helper
sbin_SCRIPTS = fence_legacy
noinst_HEADERS = pacemaker-fenced.h
if BUILD_XML_HELP
man7_MANS = pacemaker-fenced.7
endif
cts_fence_helper_SOURCES = cts-fence-helper.c
cts_fence_helper_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \
$(top_builddir)/lib/fencing/libstonithd.la
pacemaker_fenced_CPPFLAGS = -I$(top_srcdir)/daemons/schedulerd $(AM_CPPFLAGS)
pacemaker_fenced_YFLAGS = -d
pacemaker_fenced_CFLAGS = $(CFLAGS_HARDENED_EXE)
pacemaker_fenced_LDFLAGS = $(LDFLAGS_HARDENED_EXE)
pacemaker_fenced_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \
$(top_builddir)/lib/cluster/libcrmcluster.la \
$(top_builddir)/lib/fencing/libstonithd.la \
$(top_builddir)/lib/pengine/libpe_status.la \
$(top_builddir)/daemons/schedulerd/libpengine.la \
$(CLUSTERLIBS)
pacemaker_fenced_SOURCES = pacemaker-fenced.c \
fenced_commands.c \
- fenced_remote.c
+ fenced_remote.c \
+ fenced_history.c
CLEANFILES = $(man7_MANS) $(man8_MANS)
if BUILD_LEGACY_LINKS
install-exec-hook:
cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f stonithd && $(LN_S) pacemaker-fenced stonithd
uninstall-hook:
cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f stonithd
endif
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 4eb8e02f2b..1fdcee7f4f 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -1,2734 +1,2742 @@
/*
* Copyright 2009-2018 Andrew Beekhof
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#if SUPPORT_CIBSECRETS
# include
#endif
#include
GHashTable *device_list = NULL;
GHashTable *topology = NULL;
GList *cmd_list = NULL;
struct device_search_s {
/* target of fence action */
char *host;
/* requested fence action */
char *action;
/* timeout to use if a device is queried dynamically for possible targets */
int per_device_timeout;
/* number of registered fencing devices at time of request */
int replies_needed;
/* number of device replies received so far */
int replies_received;
/* whether the target is eligible to perform requested action (or off) */
bool allow_suicide;
/* private data to pass to search callback function */
void *user_data;
/* function to call when all replies have been received */
void (*callback) (GList * devices, void *user_data);
/* devices capable of performing requested action (or off if remapping) */
GListPtr capable;
};
static gboolean stonith_device_dispatch(gpointer user_data);
static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data);
static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer,
const char *client_id);
static void search_devices_record_result(struct device_search_s *search, const char *device,
gboolean can_fence);
typedef struct async_command_s {
int id;
int pid;
int fd_stdout;
int options;
int default_timeout; /* seconds */
int timeout; /* seconds */
int start_delay; /* milliseconds */
int delay_id;
char *op;
char *origin;
char *client;
char *client_name;
char *remote_op_id;
char *victim;
uint32_t victim_nodeid;
char *action;
char *device;
char *mode;
GListPtr device_list;
GListPtr device_next;
void *internal_user_data;
void (*done_cb) (GPid pid, int rc, const char *output, gpointer user_data);
guint timer_sigterm;
guint timer_sigkill;
/*! If the operation timed out, this is the last signal
* we sent to the process to get it to terminate */
int last_timeout_signo;
stonith_device_t *active_on;
} async_command_t;
static xmlNode *stonith_construct_async_reply(async_command_t * cmd, const char *output,
xmlNode * data, int rc);
static gboolean
is_action_required(const char *action, stonith_device_t *device)
{
return device && device->automatic_unfencing && safe_str_eq(action, "on");
}
static int
get_action_delay_max(stonith_device_t * device, const char * action)
{
const char *value = NULL;
int delay_max_ms = 0;
if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) {
return 0;
}
value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_MAX);
if (value) {
delay_max_ms = crm_get_msec(value);
}
return delay_max_ms;
}
static int
get_action_delay_base(stonith_device_t * device, const char * action)
{
const char *value = NULL;
int delay_base_ms = 0;
if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) {
return 0;
}
value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_BASE);
if (value) {
delay_base_ms = crm_get_msec(value);
}
return delay_base_ms;
}
/*!
* \internal
* \brief Override STONITH timeout with pcmk_*_timeout if available
*
* \param[in] device STONITH device to use
* \param[in] action STONITH action name
* \param[in] default_timeout Timeout to use if device does not have
* a pcmk_*_timeout parameter for action
*
* \return Value of pcmk_(action)_timeout if available, otherwise default_timeout
* \note For consistency, it would be nice if reboot/off/on timeouts could be
* set the same way as start/stop/monitor timeouts, i.e. with an
* entry in the fencing resource configuration. However that
* is insufficient because fencing devices may be registered directly via
* the fencer's register_device() API instead of going through the CIB
* (e.g. stonith_admin uses it for its -R option, and the executor uses it
* to ensure a device is registered when a command is issued). As device
* properties, pcmk_*_timeout parameters can be grabbed by the fencer when
* the device is registered, whether by CIB change or API call.
*/
static int
get_action_timeout(stonith_device_t * device, const char *action, int default_timeout)
{
if (action && device && device->params) {
char buffer[64] = { 0, };
const char *value = NULL;
/* If "reboot" was requested but the device does not support it,
* we will remap to "off", so check timeout for "off" instead
*/
if (safe_str_eq(action, "reboot")
&& is_not_set(device->flags, st_device_supports_reboot)) {
crm_trace("%s doesn't support reboot, using timeout for off instead",
device->id);
action = "off";
}
/* If the device config specified an action-specific timeout, use it */
snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action);
value = g_hash_table_lookup(device->params, buffer);
if (value) {
return atoi(value);
}
}
return default_timeout;
}
static void
free_async_command(async_command_t * cmd)
{
if (!cmd) {
return;
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
cmd_list = g_list_remove(cmd_list, cmd);
g_list_free_full(cmd->device_list, free);
free(cmd->device);
free(cmd->action);
free(cmd->victim);
free(cmd->remote_op_id);
free(cmd->client);
free(cmd->client_name);
free(cmd->origin);
free(cmd->mode);
free(cmd->op);
free(cmd);
}
static async_command_t *
create_async_command(xmlNode * msg)
{
async_command_t *cmd = NULL;
xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR);
const char *action = crm_element_value(op, F_STONITH_ACTION);
CRM_CHECK(action != NULL, crm_log_xml_warn(msg, "NoAction"); return NULL);
crm_log_xml_trace(msg, "Command");
cmd = calloc(1, sizeof(async_command_t));
crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id));
crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options));
crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout));
cmd->timeout = cmd->default_timeout;
cmd->origin = crm_element_value_copy(msg, F_ORIG);
cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID);
cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID);
cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME);
cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION);
cmd->action = strdup(action);
cmd->victim = crm_element_value_copy(op, F_STONITH_TARGET);
cmd->mode = crm_element_value_copy(op, F_STONITH_MODE);
cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE);
CRM_CHECK(cmd->op != NULL, crm_log_xml_warn(msg, "NoOp"); free_async_command(cmd); return NULL);
CRM_CHECK(cmd->client != NULL, crm_log_xml_warn(msg, "NoClient"));
cmd->done_cb = st_child_done;
cmd_list = g_list_append(cmd_list, cmd);
return cmd;
}
static int
get_action_limit(stonith_device_t * device)
{
const char *value = NULL;
int action_limit = 1;
value = g_hash_table_lookup(device->params, STONITH_ATTR_ACTION_LIMIT);
if (value) {
action_limit = crm_parse_int(value, "1");
if (action_limit == 0) {
/* pcmk_action_limit should not be 0. Enforce it to be 1. */
action_limit = 1;
}
}
return action_limit;
}
static int
get_active_cmds(stonith_device_t * device)
{
int counter = 0;
GListPtr gIter = NULL;
GListPtr gIterNext = NULL;
CRM_CHECK(device != NULL, return 0);
for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
async_command_t *cmd = gIter->data;
gIterNext = gIter->next;
if (cmd->active_on == device) {
counter++;
}
}
return counter;
}
static gboolean
stonith_device_execute(stonith_device_t * device)
{
int exec_rc = 0;
const char *action_str = NULL;
async_command_t *cmd = NULL;
stonith_action_t *action = NULL;
int active_cmds = 0;
int action_limit = 0;
CRM_CHECK(device != NULL, return FALSE);
active_cmds = get_active_cmds(device);
action_limit = get_action_limit(device);
if (action_limit > -1 && active_cmds >= action_limit) {
crm_trace("%s is over its action limit of %d (%u active action%s)",
device->id, action_limit, active_cmds, active_cmds > 1 ? "s" : "");
return TRUE;
}
if (device->pending_ops) {
GList *first = device->pending_ops;
cmd = first->data;
if (cmd && cmd->delay_id) {
crm_trace
("Operation %s%s%s on %s was asked to run too early, waiting for start_delay timeout of %dms",
cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "",
device->id, cmd->start_delay);
return TRUE;
}
device->pending_ops = g_list_remove_link(device->pending_ops, first);
g_list_free_1(first);
}
if (cmd == NULL) {
crm_trace("Nothing further to do for %s", device->id);
return TRUE;
}
if(safe_str_eq(device->agent, STONITH_WATCHDOG_AGENT)) {
if(safe_str_eq(cmd->action, "reboot")) {
pcmk_panic(__FUNCTION__);
return TRUE;
} else if(safe_str_eq(cmd->action, "off")) {
pcmk_panic(__FUNCTION__);
return TRUE;
} else {
crm_info("Faking success for %s watchdog operation", cmd->action);
cmd->done_cb(0, 0, NULL, cmd);
return TRUE;
}
}
#if SUPPORT_CIBSECRETS
if (replace_secret_params(device->id, device->params) < 0) {
/* replacing secrets failed! */
if (safe_str_eq(cmd->action,"stop")) {
/* don't fail on stop! */
crm_info("proceeding with the stop operation for %s", device->id);
} else {
crm_err("failed to get secrets for %s, "
"considering resource not configured", device->id);
exec_rc = PCMK_OCF_NOT_CONFIGURED;
cmd->done_cb(0, exec_rc, NULL, cmd);
return TRUE;
}
}
#endif
action_str = cmd->action;
if (safe_str_eq(cmd->action, "reboot") && is_not_set(device->flags, st_device_supports_reboot)) {
crm_warn("Agent '%s' does not advertise support for 'reboot', performing 'off' action instead", device->agent);
action_str = "off";
}
action = stonith_action_create(device->agent,
action_str,
cmd->victim,
cmd->victim_nodeid,
cmd->timeout, device->params, device->aliases);
/* for async exec, exec_rc is pid if positive and error code if negative/zero */
exec_rc = stonith_action_execute_async(action, (void *)cmd, cmd->done_cb);
if (exec_rc > 0) {
crm_debug("Operation %s%s%s on %s now running with pid=%d, timeout=%ds",
cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "",
device->id, exec_rc, cmd->timeout);
cmd->active_on = device;
} else {
crm_warn("Operation %s%s%s on %s failed: %s (%d)",
cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "",
device->id, pcmk_strerror(exec_rc), exec_rc);
cmd->done_cb(0, exec_rc, NULL, cmd);
}
return TRUE;
}
static gboolean
stonith_device_dispatch(gpointer user_data)
{
return stonith_device_execute(user_data);
}
static gboolean
start_delay_helper(gpointer data)
{
async_command_t *cmd = data;
stonith_device_t *device = NULL;
cmd->delay_id = 0;
device = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL;
if (device) {
mainloop_set_trigger(device->work);
}
return FALSE;
}
static void
schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
{
int delay_max = 0;
int delay_base = 0;
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(device != NULL, return);
if (cmd->device) {
free(cmd->device);
}
if (device->include_nodeid && cmd->victim) {
crm_node_t *node = crm_get_peer(0, cmd->victim);
cmd->victim_nodeid = node->id;
}
cmd->device = strdup(device->id);
cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout);
if (cmd->remote_op_id) {
crm_debug("Scheduling %s on %s for remote peer %s with op id (%s) (timeout=%ds)",
cmd->action, device->id, cmd->origin, cmd->remote_op_id, cmd->timeout);
} else {
crm_debug("Scheduling %s on %s for %s (timeout=%ds)",
cmd->action, device->id, cmd->client, cmd->timeout);
}
device->pending_ops = g_list_append(device->pending_ops, cmd);
mainloop_set_trigger(device->work);
delay_max = get_action_delay_max(device, cmd->action);
delay_base = get_action_delay_base(device, cmd->action);
if (delay_max == 0) {
delay_max = delay_base;
}
if (delay_max < delay_base) {
crm_warn("Base-delay (%dms) is larger than max-delay (%dms) "
"for %s on %s - limiting to max-delay",
delay_base, delay_max, cmd->action, device->id);
delay_base = delay_max;
}
if (delay_max > 0) {
// coverity[dont_call] We're not using rand() for security
cmd->start_delay =
((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
+ delay_base;
crm_notice("Delaying %s on %s for %dms (timeout=%ds, base=%dms, "
"max=%dms)",
cmd->action, device->id, cmd->start_delay, cmd->timeout,
delay_base, delay_max);
cmd->delay_id =
g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
}
static void
free_device(gpointer data)
{
GListPtr gIter = NULL;
stonith_device_t *device = data;
g_hash_table_destroy(device->params);
g_hash_table_destroy(device->aliases);
for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) {
async_command_t *cmd = gIter->data;
crm_warn("Removal of device '%s' purged operation %s", device->id, cmd->action);
cmd->done_cb(0, -ENODEV, NULL, cmd);
}
g_list_free(device->pending_ops);
g_list_free_full(device->targets, free);
mainloop_destroy_trigger(device->work);
free_xml(device->agent_metadata);
free(device->namespace);
free(device->on_target_actions);
free(device->agent);
free(device->id);
free(device);
}
void free_device_list()
{
if (device_list != NULL) {
g_hash_table_destroy(device_list);
device_list = NULL;
}
}
void
init_device_list()
{
if (device_list == NULL) {
device_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL,
free_device);
}
}
static GHashTable *
build_port_aliases(const char *hostmap, GListPtr * targets)
{
char *name = NULL;
int last = 0, lpc = 0, max = 0, added = 0;
GHashTable *aliases = crm_strcase_table_new();
if (hostmap == NULL) {
return aliases;
}
max = strlen(hostmap);
for (; lpc <= max; lpc++) {
switch (hostmap[lpc]) {
/* Assignment chars */
case '=':
case ':':
if (lpc > last) {
free(name);
name = calloc(1, 1 + lpc - last);
memcpy(name, hostmap + last, lpc - last);
}
last = lpc + 1;
break;
/* Delimeter chars */
/* case ',': Potentially used to specify multiple ports */
case 0:
case ';':
case ' ':
case '\t':
if (name) {
char *value = NULL;
value = calloc(1, 1 + lpc - last);
memcpy(value, hostmap + last, lpc - last);
crm_debug("Adding alias '%s'='%s'", name, value);
g_hash_table_replace(aliases, name, value);
if (targets) {
*targets = g_list_append(*targets, strdup(value));
}
value = NULL;
name = NULL;
added++;
} else if (lpc > last) {
crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last);
}
last = lpc + 1;
break;
}
if (hostmap[lpc] == 0) {
break;
}
}
if (added == 0) {
crm_info("No host mappings detected in '%s'", hostmap);
}
free(name);
return aliases;
}
static void
parse_host_line(const char *line, int max, GListPtr * output)
{
int lpc = 0;
int last = 0;
if (max <= 0) {
return;
}
/* Check for any complaints about additional parameters that the device doesn't understand */
if (strstr(line, "invalid") || strstr(line, "variable")) {
crm_debug("Skipping: %s", line);
return;
}
crm_trace("Processing %d bytes: [%s]", max, line);
/* Skip initial whitespace */
for (lpc = 0; lpc <= max && isspace(line[lpc]); lpc++) {
last = lpc + 1;
}
/* Now the actual content */
for (lpc = 0; lpc <= max; lpc++) {
gboolean a_space = isspace(line[lpc]);
if (a_space && lpc < max && isspace(line[lpc + 1])) {
/* fast-forward to the end of the spaces */
} else if (a_space || line[lpc] == ',' || line[lpc] == ';' || line[lpc] == 0) {
int rc = 1;
char *entry = NULL;
if (lpc != last) {
entry = calloc(1, 1 + lpc - last);
rc = sscanf(line + last, "%[a-zA-Z0-9_-.]", entry);
}
if (entry == NULL) {
/* Skip */
} else if (rc != 1) {
crm_warn("Could not parse (%d %d): %s", last, lpc, line + last);
} else if (safe_str_neq(entry, "on") && safe_str_neq(entry, "off")) {
crm_trace("Adding '%s'", entry);
*output = g_list_append(*output, entry);
entry = NULL;
}
free(entry);
last = lpc + 1;
}
}
}
static GListPtr
parse_host_list(const char *hosts)
{
int lpc = 0;
int max = 0;
int last = 0;
GListPtr output = NULL;
if (hosts == NULL) {
return output;
}
max = strlen(hosts);
for (lpc = 0; lpc <= max; lpc++) {
if (hosts[lpc] == '\n' || hosts[lpc] == 0) {
int len = lpc - last;
if(len > 1) {
char *line = strndup(hosts + last, len);
line[len] = 0; /* Because it might be '\n' */
parse_host_line(line, len, &output);
free(line);
}
last = lpc + 1;
}
}
crm_trace("Parsed %d entries from '%s'", g_list_length(output), hosts);
return output;
}
GHashTable *metadata_cache = NULL;
void
free_metadata_cache() {
if (metadata_cache != NULL) {
g_hash_table_destroy(metadata_cache);
metadata_cache = NULL;
}
}
static void
init_metadata_cache() {
if (metadata_cache == NULL) {
metadata_cache = crm_str_table_new();
}
}
static xmlNode *
get_agent_metadata(const char *agent)
{
xmlNode *xml = NULL;
char *buffer = NULL;
init_metadata_cache();
buffer = g_hash_table_lookup(metadata_cache, agent);
if(safe_str_eq(agent, STONITH_WATCHDOG_AGENT)) {
return NULL;
} else if(buffer == NULL) {
stonith_t *st = stonith_api_new();
int rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10);
stonith_api_delete(st);
if (rc || !buffer) {
crm_err("Could not retrieve metadata for fencing agent %s", agent);
return NULL;
}
g_hash_table_replace(metadata_cache, strdup(agent), buffer);
}
xml = string2xml(buffer);
return xml;
}
static gboolean
is_nodeid_required(xmlNode * xml)
{
xmlXPathObjectPtr xpath = NULL;
if (stand_alone) {
return FALSE;
}
if (!xml) {
return FALSE;
}
xpath = xpath_search(xml, "//parameter[@name='nodeid']");
if (numXpathResults(xpath) <= 0) {
freeXpathObject(xpath);
return FALSE;
}
freeXpathObject(xpath);
return TRUE;
}
#define MAX_ACTION_LEN 256
static char *
add_action(char *actions, const char *action)
{
int offset = 0;
if (actions == NULL) {
actions = calloc(1, MAX_ACTION_LEN);
} else {
offset = strlen(actions);
}
if (offset > 0) {
offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, " ");
}
offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, "%s", action);
return actions;
}
static void
read_action_metadata(stonith_device_t *device)
{
xmlXPathObjectPtr xpath = NULL;
int max = 0;
int lpc = 0;
if (device->agent_metadata == NULL) {
return;
}
xpath = xpath_search(device->agent_metadata, "//action");
max = numXpathResults(xpath);
if (max <= 0) {
freeXpathObject(xpath);
return;
}
for (lpc = 0; lpc < max; lpc++) {
const char *on_target = NULL;
const char *action = NULL;
xmlNode *match = getXpathResult(xpath, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match == NULL) { continue; };
on_target = crm_element_value(match, "on_target");
action = crm_element_value(match, "name");
if(safe_str_eq(action, "list")) {
set_bit(device->flags, st_device_supports_list);
} else if(safe_str_eq(action, "status")) {
set_bit(device->flags, st_device_supports_status);
} else if(safe_str_eq(action, "reboot")) {
set_bit(device->flags, st_device_supports_reboot);
} else if (safe_str_eq(action, "on")) {
/* "automatic" means the cluster will unfence node when it joins */
const char *automatic = crm_element_value(match, "automatic");
/* "required" is a deprecated synonym for "automatic" */
const char *required = crm_element_value(match, "required");
if (crm_is_true(automatic) || crm_is_true(required)) {
device->automatic_unfencing = TRUE;
}
}
if (action && crm_is_true(on_target)) {
device->on_target_actions = add_action(device->on_target_actions, action);
}
}
freeXpathObject(xpath);
}
/*!
* \internal
* \brief Set a pcmk_*_action parameter if not already set
*
* \param[in,out] params Device parameters
* \param[in] action Name of action
* \param[in] value Value to use if action is not already set
*/
static void
map_action(GHashTable *params, const char *action, const char *value)
{
char *key = crm_strdup_printf("pcmk_%s_action", action);
if (g_hash_table_lookup(params, key)) {
crm_warn("Ignoring %s='%s', see %s instead",
STONITH_ATTR_ACTION_OP, value, key);
free(key);
} else {
crm_warn("Mapping %s='%s' to %s='%s'",
STONITH_ATTR_ACTION_OP, value, key, value);
g_hash_table_insert(params, key, strdup(value));
}
}
/*!
* \internal
* \brief Create device parameter table from XML
*
* \param[in] name Device name (used for logging only)
* \param[in,out] params Device parameters
*/
static GHashTable *
xml2device_params(const char *name, xmlNode *dev)
{
GHashTable *params = xml2list(dev);
const char *value;
/* Action should never be specified in the device configuration,
* but we support it for users who are familiar with other software
* that worked that way.
*/
value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP);
if (value != NULL) {
crm_warn("%s has '%s' parameter, which should never be specified in configuration",
name, STONITH_ATTR_ACTION_OP);
if (*value == '\0') {
crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP);
} else if (strcmp(value, "reboot") == 0) {
crm_warn("Ignoring %s='reboot' (see stonith-action cluster property instead)",
STONITH_ATTR_ACTION_OP);
} else if (strcmp(value, "off") == 0) {
map_action(params, "reboot", value);
} else {
map_action(params, "off", value);
map_action(params, "reboot", value);
}
g_hash_table_remove(params, STONITH_ATTR_ACTION_OP);
}
return params;
}
static stonith_device_t *
build_device_from_xml(xmlNode * msg)
{
const char *value = NULL;
xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR);
stonith_device_t *device = NULL;
device = calloc(1, sizeof(stonith_device_t));
device->id = crm_element_value_copy(dev, XML_ATTR_ID);
device->agent = crm_element_value_copy(dev, "agent");
device->namespace = crm_element_value_copy(dev, "namespace");
device->params = xml2device_params(device->id, dev);
value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTLIST);
if (value) {
device->targets = parse_host_list(value);
}
value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTMAP);
device->aliases = build_port_aliases(value, &(device->targets));
device->agent_metadata = get_agent_metadata(device->agent);
read_action_metadata(device);
value = g_hash_table_lookup(device->params, "nodeid");
if (!value) {
device->include_nodeid = is_nodeid_required(device->agent_metadata);
}
value = crm_element_value(dev, "rsc_provides");
if (safe_str_eq(value, "unfencing")) {
device->automatic_unfencing = TRUE;
}
if (is_action_required("on", device)) {
crm_info("The fencing device '%s' requires unfencing", device->id);
}
if (device->on_target_actions) {
crm_info("The fencing device '%s' requires actions (%s) to be executed on the target node",
device->id, device->on_target_actions);
}
device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device);
/* TODO: Hook up priority */
return device;
}
static const char *
target_list_type(stonith_device_t * dev)
{
const char *check_type = NULL;
check_type = g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTCHECK);
if (check_type == NULL) {
if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTLIST)) {
check_type = "static-list";
} else if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP)) {
check_type = "static-list";
} else if(is_set(dev->flags, st_device_supports_list)){
check_type = "dynamic-list";
} else if(is_set(dev->flags, st_device_supports_status)){
check_type = "status";
} else {
check_type = "none";
}
}
return check_type;
}
void
schedule_internal_command(const char *origin,
stonith_device_t * device,
const char *action,
const char *victim,
int timeout,
void *internal_user_data,
void (*done_cb) (GPid pid, int rc, const char *output,
gpointer user_data))
{
async_command_t *cmd = NULL;
cmd = calloc(1, sizeof(async_command_t));
cmd->id = -1;
cmd->default_timeout = timeout ? timeout : 60;
cmd->timeout = cmd->default_timeout;
cmd->action = strdup(action);
cmd->victim = victim ? strdup(victim) : NULL;
cmd->device = strdup(device->id);
cmd->origin = strdup(origin);
cmd->client = strdup(crm_system_name);
cmd->client_name = strdup(crm_system_name);
cmd->internal_user_data = internal_user_data;
cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */
schedule_stonith_command(cmd, device);
}
gboolean
string_in_list(GListPtr list, const char *item)
{
int lpc = 0;
int max = g_list_length(list);
for (lpc = 0; lpc < max; lpc++) {
const char *value = g_list_nth_data(list, lpc);
if (safe_str_eq(item, value)) {
return TRUE;
} else {
crm_trace("%d: '%s' != '%s'", lpc, item, value);
}
}
return FALSE;
}
static void
status_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
{
async_command_t *cmd = user_data;
struct device_search_s *search = cmd->internal_user_data;
stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL;
gboolean can = FALSE;
free_async_command(cmd);
if (!dev) {
search_devices_record_result(search, NULL, FALSE);
return;
}
mainloop_set_trigger(dev->work);
if (rc == 1 /* unknown */ ) {
crm_trace("Host %s is not known by %s", search->host, dev->id);
} else if (rc == 0 /* active */ || rc == 2 /* inactive */ ) {
crm_trace("Host %s is known by %s", search->host, dev->id);
can = TRUE;
} else {
crm_notice("Unknown result when testing if %s can fence %s: rc=%d", dev->id, search->host,
rc);
}
search_devices_record_result(search, dev->id, can);
}
static void
dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
{
async_command_t *cmd = user_data;
struct device_search_s *search = cmd->internal_user_data;
stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL;
gboolean can_fence = FALSE;
free_async_command(cmd);
/* Host/alias must be in the list output to be eligible to be fenced
*
* Will cause problems if down'd nodes aren't listed or (for virtual nodes)
* if the guest is still listed despite being moved to another machine
*/
if (!dev) {
search_devices_record_result(search, NULL, FALSE);
return;
}
mainloop_set_trigger(dev->work);
/* If we successfully got the targets earlier, don't disable. */
if (rc != 0 && !dev->targets) {
crm_notice("Disabling port list queries for %s (%d): %s", dev->id, rc, output);
/* Fall back to status */
g_hash_table_replace(dev->params, strdup(STONITH_ATTR_HOSTCHECK), strdup("status"));
g_list_free_full(dev->targets, free);
dev->targets = NULL;
} else if (!rc) {
crm_info("Refreshing port list for %s", dev->id);
g_list_free_full(dev->targets, free);
dev->targets = parse_host_list(output);
dev->targets_age = time(NULL);
}
if (dev->targets) {
const char *alias = g_hash_table_lookup(dev->aliases, search->host);
if (!alias) {
alias = search->host;
}
if (string_in_list(dev->targets, alias)) {
can_fence = TRUE;
}
}
search_devices_record_result(search, dev->id, can_fence);
}
/*!
* \internal
* \brief Returns true if any key in first is not in second or second has a different value for key
*/
static int
device_params_diff(GHashTable *first, GHashTable *second) {
char *key = NULL;
char *value = NULL;
GHashTableIter gIter;
g_hash_table_iter_init(&gIter, first);
while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) {
if(strstr(key, "CRM_meta") == key) {
continue;
} else if(strcmp(key, "crm_feature_set") == 0) {
continue;
} else {
char *other_value = g_hash_table_lookup(second, key);
if (!other_value || safe_str_neq(other_value, value)) {
crm_trace("Different value for %s: %s != %s", key, other_value, value);
return 1;
}
}
}
return 0;
}
/*!
* \internal
* \brief Checks to see if an identical device already exists in the device_list
*/
static stonith_device_t *
device_has_duplicate(stonith_device_t * device)
{
stonith_device_t *dup = g_hash_table_lookup(device_list, device->id);
if (!dup) {
crm_trace("No match for %s", device->id);
return NULL;
} else if (safe_str_neq(dup->agent, device->agent)) {
crm_trace("Different agent: %s != %s", dup->agent, device->agent);
return NULL;
}
/* Use calculate_operation_digest() here? */
if (device_params_diff(device->params, dup->params) ||
device_params_diff(dup->params, device->params)) {
return NULL;
}
crm_trace("Match");
return dup;
}
int
stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib)
{
stonith_device_t *dup = NULL;
stonith_device_t *device = build_device_from_xml(msg);
dup = device_has_duplicate(device);
if (dup) {
crm_debug("Device '%s' already existed in device list (%d active devices)", device->id,
g_hash_table_size(device_list));
free_device(device);
device = dup;
} else {
stonith_device_t *old = g_hash_table_lookup(device_list, device->id);
if (from_cib && old && old->api_registered) {
/* If the cib is writing over an entry that is shared with a stonith client,
* copy any pending ops that currently exist on the old entry to the new one.
* Otherwise the pending ops will be reported as failures
*/
crm_info("Overwriting an existing entry for %s from the cib", device->id);
device->pending_ops = old->pending_ops;
device->api_registered = TRUE;
old->pending_ops = NULL;
if (device->pending_ops) {
mainloop_set_trigger(device->work);
}
}
g_hash_table_replace(device_list, device->id, device);
crm_notice("Added '%s' to the device list (%d active devices)", device->id,
g_hash_table_size(device_list));
}
if (desc) {
*desc = device->id;
}
if (from_cib) {
device->cib_registered = TRUE;
} else {
device->api_registered = TRUE;
}
return pcmk_ok;
}
int
stonith_device_remove(const char *id, gboolean from_cib)
{
stonith_device_t *device = g_hash_table_lookup(device_list, id);
if (!device) {
crm_info("Device '%s' not found (%d active devices)", id, g_hash_table_size(device_list));
return pcmk_ok;
}
if (from_cib) {
device->cib_registered = FALSE;
} else {
device->verified = FALSE;
device->api_registered = FALSE;
}
if (!device->cib_registered && !device->api_registered) {
g_hash_table_remove(device_list, id);
crm_info("Removed '%s' from the device list (%d active devices)",
id, g_hash_table_size(device_list));
} else {
crm_trace("Not removing '%s' from the device list (%d active devices) "
"- still %s%s_registered", id, g_hash_table_size(device_list),
device->cib_registered?"cib":"", device->api_registered?"api":"");
}
return pcmk_ok;
}
/*!
* \internal
* \brief Return the number of stonith levels registered for a node
*
* \param[in] tp Node's topology table entry
*
* \return Number of non-NULL levels in topology entry
* \note This function is used only for log messages.
*/
static int
count_active_levels(stonith_topology_t * tp)
{
int lpc = 0;
int count = 0;
for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
if (tp->levels[lpc] != NULL) {
count++;
}
}
return count;
}
static void
free_topology_entry(gpointer data)
{
stonith_topology_t *tp = data;
int lpc = 0;
for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
if (tp->levels[lpc] != NULL) {
g_list_free_full(tp->levels[lpc], free);
}
}
free(tp->target);
free(tp->target_value);
free(tp->target_pattern);
free(tp->target_attribute);
free(tp);
}
void
free_topology_list()
{
if (topology != NULL) {
g_hash_table_destroy(topology);
topology = NULL;
}
}
void
init_topology_list()
{
if (topology == NULL) {
topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL,
free_topology_entry);
}
}
char *stonith_level_key(xmlNode *level, int mode)
{
if(mode == -1) {
mode = stonith_level_kind(level);
}
switch(mode) {
case 0:
return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET);
case 1:
return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN);
case 2:
{
const char *name = crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE);
const char *value = crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE);
if(name && value) {
return crm_strdup_printf("%s=%s", name, value);
}
}
default:
return crm_strdup_printf("Unknown-%d-%s", mode, ID(level));
}
}
int stonith_level_kind(xmlNode * level)
{
int mode = 0;
const char *target = crm_element_value(level, XML_ATTR_STONITH_TARGET);
if(target == NULL) {
mode++;
target = crm_element_value(level, XML_ATTR_STONITH_TARGET_PATTERN);
}
if(stand_alone == FALSE && target == NULL) {
mode++;
if(crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE) == NULL) {
mode++;
} else if(crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE) == NULL) {
mode++;
}
}
return mode;
}
static stonith_key_value_t *
parse_device_list(const char *devices)
{
int lpc = 0;
int max = 0;
int last = 0;
stonith_key_value_t *output = NULL;
if (devices == NULL) {
return output;
}
max = strlen(devices);
for (lpc = 0; lpc <= max; lpc++) {
if (devices[lpc] == ',' || devices[lpc] == 0) {
char *line = strndup(devices + last, lpc - last);
output = stonith_key_value_add(output, NULL, line);
free(line);
last = lpc + 1;
}
}
return output;
}
/*!
* \internal
* \brief Register a STONITH level for a target
*
* Given an XML request specifying the target name, level index, and device IDs
* for the level, this will create an entry for the target in the global topology
* table if one does not already exist, then append the specified device IDs to
* the entry's device list for the specified level.
*
* \param[in] msg XML request for STONITH level registration
* \param[out] desc If not NULL, will be set to string representation ("TARGET[LEVEL]")
*
* \return pcmk_ok on success, -EINVAL if XML does not specify valid level index
*/
int
stonith_level_register(xmlNode *msg, char **desc)
{
int id = 0;
xmlNode *level;
int mode;
char *target;
stonith_topology_t *tp;
stonith_key_value_t *dIter = NULL;
stonith_key_value_t *devices = NULL;
/* Allow the XML here to point to the level tag directly, or wrapped in
* another tag. If directly, don't search by xpath, because it might give
* multiple hits (e.g. if the XML is the CIB).
*/
if (safe_str_eq(TYPE(msg), XML_TAG_FENCING_LEVEL)) {
level = msg;
} else {
level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);
}
CRM_CHECK(level != NULL, return -EINVAL);
mode = stonith_level_kind(level);
target = stonith_level_key(level, mode);
crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id);
if (desc) {
*desc = crm_strdup_printf("%s[%d]", target, id);
}
/* Sanity-check arguments */
if (mode >= 3 || (id <= 0) || (id >= ST_LEVEL_MAX)) {
crm_trace("Could not add %s[%d] (%d) to the topology (%d active entries)", target, id, mode, g_hash_table_size(topology));
free(target);
crm_log_xml_err(level, "Bad topology");
return -EINVAL;
}
/* Find or create topology table entry */
tp = g_hash_table_lookup(topology, target);
if (tp == NULL) {
tp = calloc(1, sizeof(stonith_topology_t));
tp->kind = mode;
tp->target = target;
tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE);
tp->target_pattern = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN);
tp->target_attribute = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE);
g_hash_table_replace(topology, tp->target, tp);
crm_trace("Added %s (%d) to the topology (%d active entries)",
target, mode, g_hash_table_size(topology));
} else {
free(target);
}
if (tp->levels[id] != NULL) {
crm_info("Adding to the existing %s[%d] topology entry",
tp->target, id);
}
devices = parse_device_list(crm_element_value(level, XML_ATTR_STONITH_DEVICES));
for (dIter = devices; dIter; dIter = dIter->next) {
const char *device = dIter->value;
crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id);
tp->levels[id] = g_list_append(tp->levels[id], strdup(device));
}
stonith_key_value_freeall(devices, 1, 1);
crm_info("Target %s has %d active fencing levels",
tp->target, count_active_levels(tp));
return pcmk_ok;
}
int
stonith_level_remove(xmlNode *msg, char **desc)
{
int id = 0;
stonith_topology_t *tp;
char *target;
/* Unlike additions, removal requests should always have one level tag */
xmlNode *level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);
CRM_CHECK(level != NULL, return -EINVAL);
target = stonith_level_key(level, -1);
crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id);
if (desc) {
*desc = crm_strdup_printf("%s[%d]", target, id);
}
/* Sanity-check arguments */
if (id >= ST_LEVEL_MAX) {
free(target);
return -EINVAL;
}
tp = g_hash_table_lookup(topology, target);
if (tp == NULL) {
crm_info("Topology for %s not found (%d active entries)",
target, g_hash_table_size(topology));
} else if (id == 0 && g_hash_table_remove(topology, target)) {
crm_info("Removed all %s related entries from the topology (%d active entries)",
target, g_hash_table_size(topology));
} else if (id > 0 && tp->levels[id] != NULL) {
g_list_free_full(tp->levels[id], free);
tp->levels[id] = NULL;
crm_info("Removed level '%d' from topology for %s (%d active levels remaining)",
id, target, count_active_levels(tp));
}
free(target);
return pcmk_ok;
}
static int
stonith_device_action(xmlNode * msg, char **output)
{
int rc = pcmk_ok;
xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR);
const char *id = crm_element_value(dev, F_STONITH_DEVICE);
async_command_t *cmd = NULL;
stonith_device_t *device = NULL;
if (id) {
crm_trace("Looking for '%s'", id);
device = g_hash_table_lookup(device_list, id);
}
if (device && device->api_registered == FALSE) {
rc = -ENODEV;
} else if (device) {
cmd = create_async_command(msg);
if (cmd == NULL) {
return -EPROTO;
}
schedule_stonith_command(cmd, device);
rc = -EINPROGRESS;
} else {
crm_info("Device %s not found", id ? id : "");
rc = -ENODEV;
}
return rc;
}
static void
search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence)
{
search->replies_received++;
if (can_fence && device) {
search->capable = g_list_append(search->capable, strdup(device));
}
if (search->replies_needed == search->replies_received) {
crm_debug("Finished Search. %d devices can perform action (%s) on node %s",
g_list_length(search->capable),
search->action ? search->action : "",
search->host ? search->host : "");
search->callback(search->capable, search->user_data);
free(search->host);
free(search->action);
free(search);
}
}
/*!
* \internal
* \brief Check whether the local host is allowed to execute a fencing action
*
* \param[in] device Fence device to check
* \param[in] action Fence action to check
* \param[in] target Hostname of fence target
* \param[in] allow_suicide Whether self-fencing is allowed for this operation
*
* \return TRUE if local host is allowed to execute action, FALSE otherwise
*/
static gboolean
localhost_is_eligible(const stonith_device_t *device, const char *action,
const char *target, gboolean allow_suicide)
{
gboolean localhost_is_target = safe_str_eq(target, stonith_our_uname);
if (device && action && device->on_target_actions
&& strstr(device->on_target_actions, action)) {
if (!localhost_is_target) {
crm_trace("%s operation with %s can only be executed for localhost not %s",
action, device->id, target);
return FALSE;
}
} else if (localhost_is_target && !allow_suicide) {
crm_trace("%s operation does not support self-fencing", action);
return FALSE;
}
return TRUE;
}
static void
can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *search)
{
gboolean can = FALSE;
const char *check_type = NULL;
const char *host = search->host;
const char *alias = NULL;
CRM_LOG_ASSERT(dev != NULL);
if (dev == NULL) {
goto search_report_results;
} else if (host == NULL) {
can = TRUE;
goto search_report_results;
}
/* Short-circuit query if this host is not allowed to perform the action */
if (safe_str_eq(search->action, "reboot")) {
/* A "reboot" *might* get remapped to "off" then "on", so short-circuit
* only if all three are disallowed. If only one or two are disallowed,
* we'll report that with the results. We never allow suicide for
* remapped "on" operations because the host is off at that point.
*/
if (!localhost_is_eligible(dev, "reboot", host, search->allow_suicide)
&& !localhost_is_eligible(dev, "off", host, search->allow_suicide)
&& !localhost_is_eligible(dev, "on", host, FALSE)) {
goto search_report_results;
}
} else if (!localhost_is_eligible(dev, search->action, host,
search->allow_suicide)) {
goto search_report_results;
}
alias = g_hash_table_lookup(dev->aliases, host);
if (alias == NULL) {
alias = host;
}
check_type = target_list_type(dev);
if (safe_str_eq(check_type, "none")) {
can = TRUE;
} else if (safe_str_eq(check_type, "static-list")) {
/* Presence in the hostmap is sufficient
* Only use if all hosts on which the device can be active can always fence all listed hosts
*/
if (string_in_list(dev->targets, host)) {
can = TRUE;
} else if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP)
&& g_hash_table_lookup(dev->aliases, host)) {
can = TRUE;
}
} else if (safe_str_eq(check_type, "dynamic-list")) {
time_t now = time(NULL);
if (dev->targets == NULL || dev->targets_age + 60 < now) {
crm_trace("Running %s command to see if %s can fence %s (%s)",
check_type, dev->id, search->host, search->action);
schedule_internal_command(__FUNCTION__, dev, "list", NULL,
search->per_device_timeout, search, dynamic_list_search_cb);
/* we'll respond to this search request async in the cb */
return;
}
if (string_in_list(dev->targets, alias)) {
can = TRUE;
}
} else if (safe_str_eq(check_type, "status")) {
crm_trace("Running %s command to see if %s can fence %s (%s)",
check_type, dev->id, search->host, search->action);
schedule_internal_command(__FUNCTION__, dev, "status", search->host,
search->per_device_timeout, search, status_search_cb);
/* we'll respond to this search request async in the cb */
return;
} else {
crm_err("Unknown check type: %s", check_type);
}
if (safe_str_eq(host, alias)) {
crm_notice("%s can%s fence (%s) %s: %s", dev->id, can ? "" : " not", search->action, host, check_type);
} else {
crm_notice("%s can%s fence (%s) %s (aka. '%s'): %s", dev->id, can ? "" : " not", search->action, host, alias,
check_type);
}
search_report_results:
search_devices_record_result(search, dev ? dev->id : NULL, can);
}
static void
search_devices(gpointer key, gpointer value, gpointer user_data)
{
stonith_device_t *dev = value;
struct device_search_s *search = user_data;
can_fence_host_with_device(dev, search);
}
#define DEFAULT_QUERY_TIMEOUT 20
static void
get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data,
void (*callback) (GList * devices, void *user_data))
{
struct device_search_s *search;
int per_device_timeout = DEFAULT_QUERY_TIMEOUT;
int devices_needing_async_query = 0;
char *key = NULL;
const char *check_type = NULL;
GHashTableIter gIter;
stonith_device_t *device = NULL;
if (!g_hash_table_size(device_list)) {
callback(NULL, user_data);
return;
}
search = calloc(1, sizeof(struct device_search_s));
if (!search) {
callback(NULL, user_data);
return;
}
g_hash_table_iter_init(&gIter, device_list);
while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&device)) {
check_type = target_list_type(device);
if (safe_str_eq(check_type, "status") || safe_str_eq(check_type, "dynamic-list")) {
devices_needing_async_query++;
}
}
/* If we have devices that require an async event in order to know what
* nodes they can fence, we have to give the events a timeout. The total
* query timeout is divided among those events. */
if (devices_needing_async_query) {
per_device_timeout = timeout / devices_needing_async_query;
if (!per_device_timeout) {
crm_err("STONITH timeout %ds is too low; using %ds, but consider raising to at least %ds",
timeout, DEFAULT_QUERY_TIMEOUT,
DEFAULT_QUERY_TIMEOUT * devices_needing_async_query);
per_device_timeout = DEFAULT_QUERY_TIMEOUT;
} else if (per_device_timeout < DEFAULT_QUERY_TIMEOUT) {
crm_notice("STONITH timeout %ds is low for the current configuration;"
" consider raising to at least %ds",
timeout, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query);
}
}
search->host = host ? strdup(host) : NULL;
search->action = action ? strdup(action) : NULL;
search->per_device_timeout = per_device_timeout;
/* We are guaranteed this many replies. Even if a device gets
* unregistered some how during the async search, we will get
* the correct number of replies. */
search->replies_needed = g_hash_table_size(device_list);
search->allow_suicide = suicide;
search->callback = callback;
search->user_data = user_data;
/* kick off the search */
crm_debug("Searching through %d devices to see what is capable of action (%s) for target %s",
search->replies_needed,
search->action ? search->action : "",
search->host ? search->host : "");
g_hash_table_foreach(device_list, search_devices, search);
}
struct st_query_data {
xmlNode *reply;
char *remote_peer;
char *client_id;
char *target;
char *action;
int call_options;
};
/*!
* \internal
* \brief Add action-specific attributes to query reply XML
*
* \param[in,out] xml XML to add attributes to
* \param[in] action Fence action
* \param[in] device Fence device
*/
static void
add_action_specific_attributes(xmlNode *xml, const char *action,
stonith_device_t *device)
{
int action_specific_timeout;
int delay_max;
int delay_base;
CRM_CHECK(xml && action && device, return);
if (is_action_required(action, device)) {
crm_trace("Action %s is required on %s", action, device->id);
crm_xml_add_int(xml, F_STONITH_DEVICE_REQUIRED, 1);
}
action_specific_timeout = get_action_timeout(device, action, 0);
if (action_specific_timeout) {
crm_trace("Action %s has timeout %dms on %s",
action, action_specific_timeout, device->id);
crm_xml_add_int(xml, F_STONITH_ACTION_TIMEOUT, action_specific_timeout);
}
delay_max = get_action_delay_max(device, action);
if (delay_max > 0) {
crm_trace("Action %s has maximum random delay %dms on %s",
action, delay_max, device->id);
crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max / 1000);
}
delay_base = get_action_delay_base(device, action);
if (delay_base > 0) {
crm_xml_add_int(xml, F_STONITH_DELAY_BASE, delay_base / 1000);
}
if ((delay_max > 0) && (delay_base == 0)) {
crm_trace("Action %s has maximum random delay %dms on %s",
action, delay_max, device->id);
} else if ((delay_max == 0) && (delay_base > 0)) {
crm_trace("Action %s has a static delay of %dms on %s",
action, delay_base, device->id);
} else if ((delay_max > 0) && (delay_base > 0)) {
crm_trace("Action %s has a minimum delay of %dms and a randomly chosen "
"maximum delay of %dms on %s",
action, delay_base, delay_max, device->id);
}
}
/*!
* \internal
* \brief Add "disallowed" attribute to query reply XML if appropriate
*
* \param[in,out] xml XML to add attribute to
* \param[in] action Fence action
* \param[in] device Fence device
* \param[in] target Fence target
* \param[in] allow_suicide Whether self-fencing is allowed
*/
static void
add_disallowed(xmlNode *xml, const char *action, stonith_device_t *device,
const char *target, gboolean allow_suicide)
{
if (!localhost_is_eligible(device, action, target, allow_suicide)) {
crm_trace("Action %s on %s is disallowed for local host",
action, device->id);
crm_xml_add(xml, F_STONITH_ACTION_DISALLOWED, XML_BOOLEAN_TRUE);
}
}
/*!
* \internal
* \brief Add child element with action-specific values to query reply XML
*
* \param[in,out] xml XML to add attribute to
* \param[in] action Fence action
* \param[in] device Fence device
* \param[in] target Fence target
* \param[in] allow_suicide Whether self-fencing is allowed
*/
static void
add_action_reply(xmlNode *xml, const char *action, stonith_device_t *device,
const char *target, gboolean allow_suicide)
{
xmlNode *child = create_xml_node(xml, F_STONITH_ACTION);
crm_xml_add(child, XML_ATTR_ID, action);
add_action_specific_attributes(child, action, device);
add_disallowed(child, action, device, target, allow_suicide);
}
static void
stonith_query_capable_device_cb(GList * devices, void *user_data)
{
struct st_query_data *query = user_data;
int available_devices = 0;
xmlNode *dev = NULL;
xmlNode *list = NULL;
GListPtr lpc = NULL;
/* Pack the results into XML */
list = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(list, F_STONITH_TARGET, query->target);
for (lpc = devices; lpc != NULL; lpc = lpc->next) {
stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data);
const char *action = query->action;
if (!device) {
/* It is possible the device got unregistered while
* determining who can fence the target */
continue;
}
available_devices++;
dev = create_xml_node(list, F_STONITH_DEVICE);
crm_xml_add(dev, XML_ATTR_ID, device->id);
crm_xml_add(dev, "namespace", device->namespace);
crm_xml_add(dev, "agent", device->agent);
crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified);
/* If the originating fencer wants to reboot the node, and we have a
* capable device that doesn't support "reboot", remap to "off" instead.
*/
if (is_not_set(device->flags, st_device_supports_reboot)
&& safe_str_eq(query->action, "reboot")) {
crm_trace("%s doesn't support reboot, using values for off instead",
device->id);
action = "off";
}
/* Add action-specific values if available */
add_action_specific_attributes(dev, action, device);
if (safe_str_eq(query->action, "reboot")) {
/* A "reboot" *might* get remapped to "off" then "on", so after
* sending the "reboot"-specific values in the main element, we add
* sub-elements for "off" and "on" values.
*
* We short-circuited earlier if "reboot", "off" and "on" are all
* disallowed for the local host. However if only one or two are
* disallowed, we send back the results and mark which ones are
* disallowed. If "reboot" is disallowed, this might cause problems
* with older fencer versions, which won't check for it. Older
* versions will ignore "off" and "on", so they are not a problem.
*/
add_disallowed(dev, action, device, query->target,
is_set(query->call_options, st_opt_allow_suicide));
add_action_reply(dev, "off", device, query->target,
is_set(query->call_options, st_opt_allow_suicide));
add_action_reply(dev, "on", device, query->target, FALSE);
}
/* A query without a target wants device parameters */
if (query->target == NULL) {
xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS);
g_hash_table_foreach(device->params, hash2field, attrs);
}
}
crm_xml_add_int(list, F_STONITH_AVAILABLE_DEVICES, available_devices);
if (query->target) {
crm_debug("Found %d matching devices for '%s'", available_devices, query->target);
} else {
crm_debug("%d devices installed", available_devices);
}
if (list != NULL) {
crm_log_xml_trace(list, "Add query results");
add_message_xml(query->reply, F_STONITH_CALLDATA, list);
}
stonith_send_reply(query->reply, query->call_options, query->remote_peer, query->client_id);
free_xml(query->reply);
free(query->remote_peer);
free(query->client_id);
free(query->target);
free(query->action);
free(query);
free_xml(list);
g_list_free_full(devices, free);
}
static void
stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int call_options)
{
struct st_query_data *query = NULL;
const char *action = NULL;
const char *target = NULL;
int timeout = 0;
xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_TRACE);
crm_element_value_int(msg, F_STONITH_TIMEOUT, &timeout);
if (dev) {
const char *device = crm_element_value(dev, F_STONITH_DEVICE);
target = crm_element_value(dev, F_STONITH_TARGET);
action = crm_element_value(dev, F_STONITH_ACTION);
if (device && safe_str_eq(device, "manual_ack")) {
/* No query or reply necessary */
return;
}
}
crm_log_xml_debug(msg, "Query");
query = calloc(1, sizeof(struct st_query_data));
query->reply = stonith_construct_reply(msg, NULL, NULL, pcmk_ok);
query->remote_peer = remote_peer ? strdup(remote_peer) : NULL;
query->client_id = client_id ? strdup(client_id) : NULL;
query->target = target ? strdup(target) : NULL;
query->action = action ? strdup(action) : NULL;
query->call_options = call_options;
get_capable_devices(target, action, timeout,
is_set(call_options, st_opt_allow_suicide),
query, stonith_query_capable_device_cb);
}
#define ST_LOG_OUTPUT_MAX 512
static void
log_operation(async_command_t * cmd, int rc, int pid, const char *next, const char *output)
{
if (rc == 0) {
next = NULL;
}
if (cmd->victim != NULL) {
do_crm_log(rc == 0 ? LOG_NOTICE : LOG_ERR,
"Operation '%s' [%d] (call %d from %s) for host '%s' with device '%s' returned: %d (%s)%s%s",
cmd->action, pid, cmd->id, cmd->client_name, cmd->victim, cmd->device, rc,
pcmk_strerror(rc), next ? ". Trying: " : "", next ? next : "");
} else {
do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
"Operation '%s' [%d] for device '%s' returned: %d (%s)%s%s",
cmd->action, pid, cmd->device, rc, pcmk_strerror(rc),
next ? ". Trying: " : "", next ? next : "");
}
if (output) {
/* Logging the whole string confuses syslog when the string is xml */
char *prefix = crm_strdup_printf("%s:%d", cmd->device, pid);
crm_log_output(rc == 0 ? LOG_DEBUG : LOG_WARNING, prefix, output);
free(prefix);
}
}
static void
stonith_send_async_reply(async_command_t * cmd, const char *output, int rc, GPid pid)
{
xmlNode *reply = NULL;
gboolean bcast = FALSE;
reply = stonith_construct_async_reply(cmd, output, NULL, rc);
if (safe_str_eq(cmd->action, "metadata")) {
/* Too verbose to log */
crm_trace("Metadata query for %s", cmd->device);
output = NULL;
} else if (crm_str_eq(cmd->action, "monitor", TRUE) ||
crm_str_eq(cmd->action, "list", TRUE) || crm_str_eq(cmd->action, "status", TRUE)) {
crm_trace("Never broadcast %s replies", cmd->action);
} else if (!stand_alone && safe_str_eq(cmd->origin, cmd->victim) && safe_str_neq(cmd->action, "on")) {
crm_trace("Broadcast %s reply for %s", cmd->action, cmd->victim);
crm_xml_add(reply, F_SUBTYPE, "broadcast");
bcast = TRUE;
}
log_operation(cmd, rc, pid, NULL, output);
crm_log_xml_trace(reply, "Reply");
if (bcast) {
crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY);
send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
} else if (cmd->origin) {
crm_trace("Directed reply to %s", cmd->origin);
send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE);
} else {
crm_trace("Directed local %ssync reply to %s",
(cmd->options & st_opt_sync_call) ? "" : "a-", cmd->client_name);
do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE);
}
if (stand_alone) {
/* Do notification with a clean data object */
xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
crm_xml_add_int(notify_data, F_STONITH_RC, rc);
crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim);
crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op);
crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost");
crm_xml_add(notify_data, F_STONITH_DEVICE, cmd->device);
crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client);
do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data);
+ do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
}
free_xml(reply);
}
static void
cancel_stonith_command(async_command_t * cmd)
{
stonith_device_t *device;
CRM_CHECK(cmd != NULL, return);
if (!cmd->device) {
return;
}
device = g_hash_table_lookup(device_list, cmd->device);
if (device) {
crm_trace("Cancel scheduled %s on %s", cmd->action, device->id);
device->pending_ops = g_list_remove(device->pending_ops, cmd);
}
}
static void
st_child_done(GPid pid, int rc, const char *output, gpointer user_data)
{
stonith_device_t *device = NULL;
stonith_device_t *next_device = NULL;
async_command_t *cmd = user_data;
GListPtr gIter = NULL;
GListPtr gIterNext = NULL;
CRM_CHECK(cmd != NULL, return);
cmd->active_on = NULL;
/* The device is ready to do something else now */
device = g_hash_table_lookup(device_list, cmd->device);
if (device) {
if (rc == pcmk_ok &&
(safe_str_eq(cmd->action, "list") ||
safe_str_eq(cmd->action, "monitor") || safe_str_eq(cmd->action, "status"))) {
device->verified = TRUE;
}
mainloop_set_trigger(device->work);
}
crm_debug("Operation '%s' on '%s' completed with rc=%d (%d remaining)",
cmd->action, cmd->device, rc, g_list_length(cmd->device_next));
if (rc == 0) {
GListPtr iter;
/* see if there are any required devices left to execute for this op */
for (iter = cmd->device_next; iter != NULL; iter = iter->next) {
next_device = g_hash_table_lookup(device_list, iter->data);
if (next_device != NULL && is_action_required(cmd->action, next_device)) {
cmd->device_next = iter->next;
break;
}
next_device = NULL;
}
} else if (rc != 0 && cmd->device_next && (is_action_required(cmd->action, device) == FALSE)) {
/* if this device didn't work out, see if there are any others we can try.
* if the failed device was 'required', we can't pick another device. */
next_device = g_hash_table_lookup(device_list, cmd->device_next->data);
cmd->device_next = cmd->device_next->next;
}
/* this operation requires more fencing, hooray! */
if (next_device) {
log_operation(cmd, rc, pid, cmd->device, output);
schedule_stonith_command(cmd, next_device);
/* Prevent cmd from being freed */
cmd = NULL;
goto done;
}
stonith_send_async_reply(cmd, output, rc, pid);
if (rc != 0) {
goto done;
}
/* Check to see if any operations are scheduled to do the exact
* same thing that just completed. If so, rather than
* performing the same fencing operation twice, return the result
* of this operation for all pending commands it matches. */
for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
async_command_t *cmd_other = gIter->data;
gIterNext = gIter->next;
if (cmd == cmd_other) {
continue;
}
/* A pending scheduled command matches the command that just finished if.
* 1. The client connections are different.
* 2. The node victim is the same.
* 3. The fencing action is the same.
* 4. The device scheduled to execute the action is the same.
*/
if (safe_str_eq(cmd->client, cmd_other->client) ||
safe_str_neq(cmd->victim, cmd_other->victim) ||
safe_str_neq(cmd->action, cmd_other->action) ||
safe_str_neq(cmd->device, cmd_other->device)) {
continue;
}
/* Duplicate merging will do the right thing for either type of remapped
* reboot. If the executing fencer remapped an unsupported reboot to
* off, then cmd->action will be reboot and will be merged with any
* other reboot requests. If the originating fencer remapped a
* topology reboot to off then on, we will get here once with
* cmd->action "off" and once with "on", and they will be merged
* separately with similar requests.
*/
crm_notice
("Merging stonith action %s for node %s originating from client %s with identical stonith request from client %s",
cmd_other->action, cmd_other->victim, cmd_other->client_name, cmd->client_name);
cmd_list = g_list_remove_link(cmd_list, gIter);
stonith_send_async_reply(cmd_other, output, rc, pid);
cancel_stonith_command(cmd_other);
free_async_command(cmd_other);
g_list_free_1(gIter);
}
done:
free_async_command(cmd);
}
static gint
sort_device_priority(gconstpointer a, gconstpointer b)
{
const stonith_device_t *dev_a = a;
const stonith_device_t *dev_b = b;
if (dev_a->priority > dev_b->priority) {
return -1;
} else if (dev_a->priority < dev_b->priority) {
return 1;
}
return 0;
}
static void
stonith_fence_get_devices_cb(GList * devices, void *user_data)
{
async_command_t *cmd = user_data;
stonith_device_t *device = NULL;
crm_info("Found %d matching devices for '%s'", g_list_length(devices), cmd->victim);
if (g_list_length(devices) > 0) {
/* Order based on priority */
devices = g_list_sort(devices, sort_device_priority);
device = g_hash_table_lookup(device_list, devices->data);
if (device) {
cmd->device_list = devices;
cmd->device_next = devices->next;
devices = NULL; /* list owned by cmd now */
}
}
/* we have a device, schedule it for fencing. */
if (device) {
schedule_stonith_command(cmd, device);
/* in progress */
return;
}
/* no device found! */
stonith_send_async_reply(cmd, NULL, -ENODEV, 0);
free_async_command(cmd);
g_list_free_full(devices, free);
}
static int
stonith_fence(xmlNode * msg)
{
const char *device_id = NULL;
stonith_device_t *device = NULL;
async_command_t *cmd = create_async_command(msg);
xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
if (cmd == NULL) {
return -EPROTO;
}
device_id = crm_element_value(dev, F_STONITH_DEVICE);
if (device_id) {
device = g_hash_table_lookup(device_list, device_id);
if (device == NULL) {
crm_err("Requested device '%s' is not available", device_id);
return -ENODEV;
}
schedule_stonith_command(cmd, device);
} else {
const char *host = crm_element_value(dev, F_STONITH_TARGET);
char *nodename = NULL;
if (cmd->options & st_opt_cs_nodeid) {
int nodeid = crm_atoi(host, NULL);
nodename = stonith_get_peer_name(nodeid);
if (nodename) {
host = nodename;
}
}
/* If we get to here, then self-fencing is implicitly allowed */
get_capable_devices(host, cmd->action, cmd->default_timeout,
TRUE, cmd, stonith_fence_get_devices_cb);
free(nodename);
}
return -EINPROGRESS;
}
xmlNode *
stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc)
{
int lpc = 0;
xmlNode *reply = NULL;
const char *name = NULL;
const char *value = NULL;
const char *names[] = {
F_STONITH_OPERATION,
F_STONITH_CALLID,
F_STONITH_CLIENTID,
F_STONITH_CLIENTNAME,
F_STONITH_REMOTE_OP_ID,
F_STONITH_CALLOPTS
};
crm_trace("Creating a basic reply");
reply = create_xml_node(NULL, T_STONITH_REPLY);
crm_xml_add(reply, "st_origin", __FUNCTION__);
crm_xml_add(reply, F_TYPE, T_STONITH_NG);
crm_xml_add(reply, "st_output", output);
crm_xml_add_int(reply, F_STONITH_RC, rc);
CRM_CHECK(request != NULL, crm_warn("Can't create a sane reply"); return reply);
for (lpc = 0; lpc < DIMOF(names); lpc++) {
name = names[lpc];
value = crm_element_value(request, name);
crm_xml_add(reply, name, value);
}
if (data != NULL) {
crm_trace("Attaching reply output");
add_message_xml(reply, F_STONITH_CALLDATA, data);
}
return reply;
}
static xmlNode *
stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc)
{
xmlNode *reply = NULL;
crm_trace("Creating a basic reply");
reply = create_xml_node(NULL, T_STONITH_REPLY);
crm_xml_add(reply, "st_origin", __FUNCTION__);
crm_xml_add(reply, F_TYPE, T_STONITH_NG);
crm_xml_add(reply, F_STONITH_OPERATION, cmd->op);
crm_xml_add(reply, F_STONITH_DEVICE, cmd->device);
crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client);
crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name);
crm_xml_add(reply, F_STONITH_TARGET, cmd->victim);
crm_xml_add(reply, F_STONITH_ACTION, cmd->op);
crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin);
crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id);
crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options);
crm_xml_add_int(reply, F_STONITH_RC, rc);
crm_xml_add(reply, "st_output", output);
if (data != NULL) {
crm_info("Attaching reply output");
add_message_xml(reply, F_STONITH_CALLDATA, data);
}
return reply;
}
bool fencing_peer_active(crm_node_t *peer)
{
if (peer == NULL) {
return FALSE;
} else if (peer->uname == NULL) {
return FALSE;
} else if (is_set(peer->processes, crm_get_cluster_proc())) {
return TRUE;
}
return FALSE;
}
/*!
* \internal
* \brief Determine if we need to use an alternate node to
* fence the target. If so return that node's uname
*
* \retval NULL, no alternate host
* \retval uname, uname of alternate host to use
*/
static const char *
check_alternate_host(const char *target)
{
const char *alternate_host = NULL;
crm_trace("Checking if we (%s) can fence %s", stonith_our_uname, target);
if (find_topology_for_host(target) && safe_str_eq(target, stonith_our_uname)) {
GHashTableIter gIter;
crm_node_t *entry = NULL;
g_hash_table_iter_init(&gIter, crm_peer_cache);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
crm_trace("Checking for %s.%d != %s", entry->uname, entry->id, target);
if (fencing_peer_active(entry)
&& safe_str_neq(entry->uname, target)) {
alternate_host = entry->uname;
break;
}
}
if (alternate_host == NULL) {
crm_err("No alternate host available to handle complex self fencing request");
g_hash_table_iter_init(&gIter, crm_peer_cache);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
crm_notice("Peer[%d] %s", entry->id, entry->uname);
}
}
}
return alternate_host;
}
static void
stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer,
const char *client_id)
{
if (remote_peer) {
send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE);
} else {
do_local_reply(reply, client_id, is_set(call_options, st_opt_sync_call), remote_peer != NULL);
}
}
static int
handle_request(crm_client_t * client, uint32_t id, uint32_t flags, xmlNode * request,
const char *remote_peer)
{
int call_options = 0;
int rc = -EOPNOTSUPP;
xmlNode *data = NULL;
xmlNode *reply = NULL;
char *output = NULL;
const char *op = crm_element_value(request, F_STONITH_OPERATION);
const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
if (is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(client == NULL || client->request_id == id);
}
if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) {
xmlNode *reply = create_xml_node(NULL, "reply");
CRM_ASSERT(client);
crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER);
crm_xml_add(reply, F_STONITH_CLIENTID, client->id);
crm_ipcs_send(client, id, reply, flags);
client->request_id = 0;
free_xml(reply);
return 0;
} else if (crm_str_eq(op, STONITH_OP_EXEC, TRUE)) {
rc = stonith_device_action(request, &output);
} else if (crm_str_eq(op, STONITH_OP_TIMEOUT_UPDATE, TRUE)) {
const char *call_id = crm_element_value(request, F_STONITH_CALLID);
const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
int op_timeout = 0;
crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout);
do_stonith_async_timeout_update(client_id, call_id, op_timeout);
return 0;
} else if (crm_str_eq(op, STONITH_OP_QUERY, TRUE)) {
if (remote_peer) {
create_remote_stonith_op(client_id, request, TRUE); /* Record it for the future notification */
}
stonith_query(request, remote_peer, client_id, call_options);
return 0;
} else if (crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) {
const char *flag_name = NULL;
CRM_ASSERT(client);
flag_name = crm_element_value(request, F_STONITH_NOTIFY_ACTIVATE);
if (flag_name) {
crm_debug("Setting %s callbacks for %s (%s): ON", flag_name, client->name, client->id);
client->options |= get_stonith_flag(flag_name);
}
flag_name = crm_element_value(request, F_STONITH_NOTIFY_DEACTIVATE);
if (flag_name) {
crm_debug("Setting %s callbacks for %s (%s): off", flag_name, client->name, client->id);
client->options |= get_stonith_flag(flag_name);
}
if (flags & crm_ipc_client_response) {
crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__);
}
return 0;
} else if (crm_str_eq(op, STONITH_OP_RELAY, TRUE)) {
xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
crm_notice("Peer %s has received a forwarded fencing request from %s to fence (%s) peer %s",
stonith_our_uname,
client ? client->name : remote_peer,
crm_element_value(dev, F_STONITH_ACTION),
crm_element_value(dev, F_STONITH_TARGET));
if (initiate_remote_stonith_op(NULL, request, FALSE) != NULL) {
rc = -EINPROGRESS;
}
} else if (crm_str_eq(op, STONITH_OP_FENCE, TRUE)) {
if (remote_peer || stand_alone) {
rc = stonith_fence(request);
} else if (call_options & st_opt_manual_ack) {
remote_fencing_op_t *rop = NULL;
xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
const char *target = crm_element_value(dev, F_STONITH_TARGET);
crm_notice("Received manual confirmation that %s is fenced", target);
rop = initiate_remote_stonith_op(client, request, TRUE);
rc = stonith_manual_ack(request, rop);
} else {
const char *alternate_host = NULL;
xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
const char *target = crm_element_value(dev, F_STONITH_TARGET);
const char *action = crm_element_value(dev, F_STONITH_ACTION);
const char *device = crm_element_value(dev, F_STONITH_DEVICE);
if (client) {
int tolerance = 0;
crm_notice("Client %s.%.8s wants to fence (%s) '%s' with device '%s'",
client->name, client->id, action, target, device ? device : "(any)");
crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance);
if (stonith_check_fence_tolerance(tolerance, target, action)) {
rc = 0;
goto done;
}
} else {
crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'",
remote_peer, action, target, device ? device : "(any)");
}
alternate_host = check_alternate_host(target);
if (alternate_host && client) {
const char *client_id = NULL;
crm_notice("Forwarding complex self fencing request to peer %s", alternate_host);
if (client->id) {
client_id = client->id;
} else {
client_id = crm_element_value(request, F_STONITH_CLIENTID);
}
/* Create a record of it, otherwise call_id will be 0 if we need to notify of failures */
create_remote_stonith_op(client_id, request, FALSE);
crm_xml_add(request, F_STONITH_OPERATION, STONITH_OP_RELAY);
crm_xml_add(request, F_STONITH_CLIENTID, client->id);
send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request,
FALSE);
rc = -EINPROGRESS;
} else if (initiate_remote_stonith_op(client, request, FALSE) != NULL) {
rc = -EINPROGRESS;
}
}
} else if (crm_str_eq(op, STONITH_OP_FENCE_HISTORY, TRUE)) {
- rc = stonith_fence_history(request, &data);
+ rc = stonith_fence_history(request, &data, remote_peer, call_options);
+ if (call_options & st_opt_discard_reply) {
+ /* we don't expect answers to the broadcast
+ * we might have sent out
+ */
+ free_xml(data);
+ return pcmk_ok;
+ }
} else if (crm_str_eq(op, STONITH_OP_DEVICE_ADD, TRUE)) {
const char *device_id = NULL;
rc = stonith_device_register(request, &device_id, FALSE);
do_stonith_notify_device(call_options, op, rc, device_id);
} else if (crm_str_eq(op, STONITH_OP_DEVICE_DEL, TRUE)) {
xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR);
const char *device_id = crm_element_value(dev, XML_ATTR_ID);
rc = stonith_device_remove(device_id, FALSE);
do_stonith_notify_device(call_options, op, rc, device_id);
} else if (crm_str_eq(op, STONITH_OP_LEVEL_ADD, TRUE)) {
char *device_id = NULL;
rc = stonith_level_register(request, &device_id);
do_stonith_notify_level(call_options, op, rc, device_id);
free(device_id);
} else if (crm_str_eq(op, STONITH_OP_LEVEL_DEL, TRUE)) {
char *device_id = NULL;
rc = stonith_level_remove(request, &device_id);
do_stonith_notify_level(call_options, op, rc, device_id);
} else if(safe_str_eq(op, CRM_OP_RM_NODE_CACHE)) {
int node_id = 0;
const char *name = NULL;
crm_element_value_int(request, XML_ATTR_ID, &node_id);
name = crm_element_value(request, XML_ATTR_UNAME);
reap_crm_member(node_id, name);
return pcmk_ok;
} else {
crm_err("Unknown %s from %s", op, client ? client->name : remote_peer);
crm_log_xml_warn(request, "UnknownOp");
}
done:
/* Always reply unless the request is in process still.
* If in progress, a reply will happen async after the request
* processing is finished */
if (rc != -EINPROGRESS) {
crm_trace("Reply handling: %p %u %u %d %d %s", client, client?client->request_id:0,
id, is_set(call_options, st_opt_sync_call), call_options,
crm_element_value(request, F_STONITH_CALLOPTS));
if (is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(client == NULL || client->request_id == id);
}
reply = stonith_construct_reply(request, output, data, rc);
stonith_send_reply(reply, call_options, remote_peer, client_id);
}
free(output);
free_xml(data);
free_xml(reply);
return rc;
}
static void
handle_reply(crm_client_t * client, xmlNode * request, const char *remote_peer)
{
const char *op = crm_element_value(request, F_STONITH_OPERATION);
if (crm_str_eq(op, STONITH_OP_QUERY, TRUE)) {
process_remote_stonith_query(request);
} else if (crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) {
process_remote_stonith_exec(request);
} else if (crm_str_eq(op, STONITH_OP_FENCE, TRUE)) {
/* Reply to a complex fencing op */
process_remote_stonith_exec(request);
} else {
crm_err("Unknown %s reply from %s", op, client ? client->name : remote_peer);
crm_log_xml_warn(request, "UnknownOp");
}
}
void
stonith_command(crm_client_t * client, uint32_t id, uint32_t flags, xmlNode * request,
const char *remote_peer)
{
int call_options = 0;
int rc = 0;
gboolean is_reply = FALSE;
/* Copy op for reporting. The original might get freed by handle_reply()
* before we use it in crm_debug():
* handle_reply()
* |- process_remote_stonith_exec()
* |-- remote_op_done()
* |--- handle_local_reply_and_notify()
* |---- crm_xml_add(...F_STONITH_OPERATION...)
* |--- free_xml(op->request)
*/
char *op = crm_element_value_copy(request, F_STONITH_OPERATION);
if (get_xpath_object("//" T_STONITH_REPLY, request, LOG_TRACE)) {
is_reply = TRUE;
}
crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
crm_debug("Processing %s%s %u from %s (%16x)", op, is_reply ? " reply" : "",
id, client ? client->name : remote_peer, call_options);
if (is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(client == NULL || client->request_id == id);
}
if (is_reply) {
handle_reply(client, request, remote_peer);
} else {
rc = handle_request(client, id, flags, request, remote_peer);
}
crm_debug("Processed %s%s from %s: %s (%d)", op,
is_reply ? " reply" : "", client ? client->name : remote_peer,
rc > 0 ? "" : pcmk_strerror(rc), rc);
free(op);
}
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
new file mode 100644
index 0000000000..3ec354ede9
--- /dev/null
+++ b/daemons/fenced/fenced_history.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright 2009-2018 Andrew Beekhof
+ *
+ * This source code is licensed under the GNU General Public License version 2
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
+ */
+
+#include
+
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+#include
+
+/*!
+ * \internal
+ * \brief Send a broadcast to all nodes to trigger cleanup or
+ * history synchronisation
+ *
+ * \param[in] history Optional history to be attached
+ * \param[in] callopts We control cleanup via a flag in the callopts
+ * \param[in] target Cleanup can be limited to certain fence-targets
+ */
+static void
+stonith_send_broadcast_history(xmlNode *history,
+ int callopts,
+ const char *target)
+{
+ xmlNode *bcast = create_xml_node(NULL, "stonith_command");
+ xmlNode *data = create_xml_node(NULL, __FUNCTION__);
+
+ if (target) {
+ crm_xml_add(data, F_STONITH_TARGET, target);
+ }
+ crm_xml_add(bcast, F_TYPE, T_STONITH_NG);
+ crm_xml_add(bcast, F_SUBTYPE, "broadcast");
+ crm_xml_add(bcast, F_STONITH_OPERATION, STONITH_OP_FENCE_HISTORY);
+ crm_xml_add_int(bcast, F_STONITH_CALLOPTS, callopts);
+ if (history) {
+ add_node_copy(data, history);
+ }
+ add_message_xml(bcast, F_STONITH_CALLDATA, data);
+ send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);
+
+ free_xml(data);
+ free_xml(bcast);
+}
+
+static gboolean
+stonith_remove_history_entry (gpointer key,
+ gpointer value,
+ gpointer user_data)
+{
+ remote_fencing_op_t *op = value;
+ const char *target = (const char *) user_data;
+
+ if ((op->state == st_failed) || (op->state == st_done)) {
+ if ((target) && (strcmp(op->target, target) != 0)) {
+ return FALSE;
+ }
+ return TRUE;
+ }
+
+ return FALSE; /* don't clean pending operations */
+}
+
+/*!
+ * \internal
+ * \brief Send out a cleanup broadcast and do a local history-cleanup
+ *
+ * \param[in] request The request message that triggered cleanup
+ * \param[in] target Cleanup can be limited to certain fence-targets
+ */
+static void
+stonith_fence_history_cleanup(xmlNode *request, const char *target)
+{
+ if (crm_element_value(request, F_STONITH_CALLID)) {
+ stonith_send_broadcast_history(NULL,
+ st_opt_cleanup | st_opt_discard_reply,
+ target);
+ /* we'll do the local clean when we receive back our own broadcast */
+ } else if (stonith_remote_op_list) {
+ g_hash_table_foreach_remove(stonith_remote_op_list,
+ stonith_remove_history_entry,
+ (gpointer) target);
+ do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
+ }
+}
+
+/*!
+ * \internal
+ * \brief Convert xml fence-history to a hash-table like stonith_remote_op_list
+ *
+ * \param[in] history Fence-history in xml
+ *
+ * \return Fence-history as hash-table
+ */
+static GHashTable *
+stonith_xml_history_to_list(xmlNode *history)
+{
+ xmlNode *xml_op = NULL;
+ GHashTable *rv = NULL;
+
+ init_stonith_remote_op_hash_table(&rv);
+
+ CRM_LOG_ASSERT(rv != NULL);
+
+ for (xml_op = __xml_first_child(history); xml_op != NULL;
+ xml_op = __xml_next(xml_op)) {
+ remote_fencing_op_t *op = NULL;
+ char *id = crm_element_value_copy(xml_op, F_STONITH_REMOTE_OP_ID);
+ int completed, state;
+
+ if (!id) {
+ crm_warn("History to convert to hashtable has no id in entry");
+ continue;
+ }
+
+ crm_trace("Attaching op %s to hashtable", id);
+
+ op = calloc(1, sizeof(remote_fencing_op_t));
+
+ op->id = id;
+ op->target = crm_element_value_copy(xml_op, F_STONITH_TARGET);
+ op->action = crm_element_value_copy(xml_op, F_STONITH_ACTION);
+ op->originator = crm_element_value_copy(xml_op, F_STONITH_ORIGIN);
+ op->delegate = crm_element_value_copy(xml_op, F_STONITH_DELEGATE);
+ op->client_name = crm_element_value_copy(xml_op, F_STONITH_CLIENTNAME);
+ crm_element_value_int(xml_op, F_STONITH_DATE, &completed);
+ op->completed = (time_t) completed;
+ crm_element_value_int(xml_op, F_STONITH_STATE, &state);
+ op->state = (enum op_state) state;
+
+ g_hash_table_replace(rv, id, op);
+ CRM_LOG_ASSERT(g_hash_table_lookup(rv, id) != NULL);
+ }
+
+ return rv;
+}
+
+/*!
+ * \internal
+ * \brief Craft xml difference between local fence-history and a history
+ * coming from remote
+ *
+ * \param[in] remote_history Fence-history as hash-table (may be NULL)
+ * \param[in] add_id If crafting the answer for an API
+ * history-request there is no need for the id
+ * \param[in] target Optionally limit to certain fence-target
+ *
+ * \return The fence-history as xml
+ */
+static xmlNode *
+stonith_local_history_diff(GHashTable *remote_history,
+ gboolean add_id,
+ const char *target)
+{
+ xmlNode *history = NULL;
+ int cnt = 0;
+
+ if (stonith_remote_op_list) {
+ GHashTableIter iter;
+ remote_fencing_op_t *op = NULL;
+
+ history = create_xml_node(NULL, F_STONITH_HISTORY_LIST);
+
+ g_hash_table_iter_init(&iter, stonith_remote_op_list);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
+ xmlNode *entry = NULL;
+
+ if (remote_history &&
+ g_hash_table_lookup(remote_history, op->id)) {
+ continue; /* skip entries broadcasted already */
+ }
+
+ if (target && strcmp(op->target, target) != 0) {
+ continue;
+ }
+
+ cnt++;
+ crm_trace("Attaching op %s", op->id);
+ entry = create_xml_node(history, STONITH_OP_EXEC);
+ if (add_id) {
+ crm_xml_add(entry, F_STONITH_REMOTE_OP_ID, op->id);
+ }
+ crm_xml_add(entry, F_STONITH_TARGET, op->target);
+ crm_xml_add(entry, F_STONITH_ACTION, op->action);
+ crm_xml_add(entry, F_STONITH_ORIGIN, op->originator);
+ crm_xml_add(entry, F_STONITH_DELEGATE, op->delegate);
+ crm_xml_add(entry, F_STONITH_CLIENTNAME, op->client_name);
+ crm_xml_add_int(entry, F_STONITH_DATE, op->completed);
+ crm_xml_add_int(entry, F_STONITH_STATE, op->state);
+ }
+ }
+
+ if (cnt == 0) {
+ free_xml(history);
+ return NULL;
+ } else {
+ return history;
+ }
+}
+
+/*!
+ * \internal
+ * \brief Merge fence-history coming from remote into local history
+ *
+ * \param[in] history Hash-table holding remote history to be merged in
+ */
+static void
+stonith_merge_in_history_list(GHashTable *history)
+{
+ GHashTableIter iter;
+ remote_fencing_op_t *op = NULL;
+ gboolean updated = FALSE;
+
+ if (!history) {
+ return;
+ }
+
+ init_stonith_remote_op_hash_table(&stonith_remote_op_list);
+
+ g_hash_table_iter_init(&iter, history);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
+ remote_fencing_op_t *stored_op =
+ g_hash_table_lookup(stonith_remote_op_list, op->id);
+
+ if (stored_op) {
+ continue; /* skip over existant - state-merging migh be desirable */
+ }
+
+ updated = TRUE;
+ g_hash_table_iter_steal(&iter);
+ CRM_LOG_ASSERT(g_hash_table_insert(stonith_remote_op_list, op->id, op));
+ }
+ if (updated) {
+ do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
+ }
+ g_hash_table_destroy(history); /* remove what is left */
+}
+
+/*!
+ * \internal
+ * \brief Handle fence-history messages (either from API or coming in as
+ * broadcasts
+ *
+ * \param[in] msg Request message
+ * \param[in] output In case of a request from the API used to craft
+ * a reply from
+ * \param[in] remote_peer
+ * \param[in] options call-options from the request
+ *
+ * \return always success as there is actully nothing that can go really wrong
+ */
+int
+stonith_fence_history(xmlNode *msg, xmlNode **output,
+ const char *remote_peer, int options)
+{
+ int rc = 0;
+ const char *target = NULL;
+ xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_TRACE);
+ char *nodename = NULL;
+ xmlNode *out_history = NULL;
+
+ if (dev) {
+ target = crm_element_value(dev, F_STONITH_TARGET);
+ if (target && (options & st_opt_cs_nodeid)) {
+ int nodeid = crm_atoi(target, NULL);
+
+ nodename = stonith_get_peer_name(nodeid);
+ if (nodename) {
+ target = nodename;
+ }
+ }
+ }
+
+ if (options & st_opt_cleanup) {
+ crm_trace("Cleaning up operations on %s in %p", target,
+ stonith_remote_op_list);
+
+ stonith_fence_history_cleanup(msg, target);
+ } else if (options & st_opt_broadcast) {
+ if (crm_element_value(msg, F_STONITH_CALLID)) {
+ /* this is coming from the stonith-API
+ *
+ * craft a broadcast with node's history
+ * so that every node can merge and broadcast
+ * what it has on top
+ */
+ out_history = stonith_local_history_diff(NULL, TRUE, NULL);
+ crm_trace("Broadcasting history to peers");
+ stonith_send_broadcast_history(out_history,
+ st_opt_broadcast | st_opt_discard_reply,
+ NULL);
+ } else if (remote_peer &&
+ !safe_str_eq(remote_peer, stonith_our_uname)) {
+ xmlNode *history =
+ get_xpath_object("//" F_STONITH_HISTORY_LIST, msg, LOG_TRACE);
+ GHashTable *received_history =
+ history?stonith_xml_history_to_list(history):NULL;
+
+ /* either a broadcast created directly upon stonith-API request
+ * or a diff as response to such a thing
+ *
+ * in both cases it may have a history or not
+ * if we have differential data
+ * merge in what we've received and stop
+ * otherwise broadcast what we have on top
+ * marking as differential and merge in afterwards
+ */
+ if (!history ||
+ !crm_is_true(crm_element_value(history,
+ F_STONITH_DIFFERENTIAL))) {
+ out_history =
+ stonith_local_history_diff(received_history, TRUE, NULL);
+ if (out_history) {
+ crm_trace("Broadcasting history-diff to peers");
+ crm_xml_add(out_history, F_STONITH_DIFFERENTIAL,
+ XML_BOOLEAN_TRUE);
+ stonith_send_broadcast_history(out_history,
+ st_opt_broadcast | st_opt_discard_reply,
+ NULL);
+ } else {
+ crm_trace("History-diff is empty - skip broadcast");
+ }
+ }
+ stonith_merge_in_history_list(received_history);
+ } else {
+ crm_trace("Skipping history-query-broadcast (%s%s)"
+ " we sent ourselves",
+ remote_peer?"remote-peer=":"local-ipc",
+ remote_peer?remote_peer:"");
+ }
+ } else {
+ /* plain history request */
+ crm_trace("Looking for operations on %s in %p", target,
+ stonith_remote_op_list);
+ *output = stonith_local_history_diff(NULL, FALSE, target);
+ }
+ free(nodename);
+ free_xml(out_history);
+ return rc;
+}
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index ade3c92161..753fafaa67 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -1,2121 +1,2080 @@
/*
* Copyright 2009-2018 Andrew Beekhof
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define TIMEOUT_MULTIPLY_FACTOR 1.2
/* When one fencer queries its peers for devices able to handle a fencing
* request, each peer will reply with a list of such devices available to it.
* Each reply will be parsed into a st_query_result_t, with each device's
* information kept in a device_properties_t.
*/
typedef struct device_properties_s {
/* Whether access to this device has been verified */
gboolean verified;
/* The remaining members are indexed by the operation's "phase" */
/* Whether this device has been executed in each phase */
gboolean executed[st_phase_max];
/* Whether this device is disallowed from executing in each phase */
gboolean disallowed[st_phase_max];
/* Action-specific timeout for each phase */
int custom_action_timeout[st_phase_max];
/* Action-specific maximum random delay for each phase */
int delay_max[st_phase_max];
/* Action-specific base delay for each phase */
int delay_base[st_phase_max];
} device_properties_t;
typedef struct st_query_result_s {
/* Name of peer that sent this result */
char *host;
/* Only try peers for non-topology based operations once */
gboolean tried;
/* Number of entries in the devices table */
int ndevices;
/* Devices available to this host that are capable of fencing the target */
GHashTable *devices;
} st_query_result_t;
-static GHashTable *remote_op_list = NULL;
+GHashTable *stonith_remote_op_list = NULL;
void call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer);
static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup);
extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
int call_options);
static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
static int get_op_total_timeout(const remote_fencing_op_t *op,
const st_query_result_t *chosen_peer);
static gint
sort_strings(gconstpointer a, gconstpointer b)
{
return strcmp(a, b);
}
static void
free_remote_query(gpointer data)
{
if (data) {
st_query_result_t *query = data;
crm_trace("Free'ing query result from %s", query->host);
g_hash_table_destroy(query->devices);
free(query->host);
free(query);
}
}
void
-free_remote_op_list()
+free_stonith_remote_op_list()
{
- if (remote_op_list != NULL) {
- g_hash_table_destroy(remote_op_list);
- remote_op_list = NULL;
+ if (stonith_remote_op_list != NULL) {
+ g_hash_table_destroy(stonith_remote_op_list);
+ stonith_remote_op_list = NULL;
}
}
struct peer_count_data {
const remote_fencing_op_t *op;
gboolean verified_only;
int count;
};
/*!
* \internal
* \brief Increment a counter if a device has not been executed yet
*
* \param[in] key Device ID (ignored)
* \param[in] value Device properties
* \param[in] user_data Peer count data
*/
static void
count_peer_device(gpointer key, gpointer value, gpointer user_data)
{
device_properties_t *props = (device_properties_t*)value;
struct peer_count_data *data = user_data;
if (!props->executed[data->op->phase]
&& (!data->verified_only || props->verified)) {
++(data->count);
}
}
/*!
* \internal
* \brief Check the number of available devices in a peer's query results
*
* \param[in] op Operation that results are for
* \param[in] peer Peer to count
* \param[in] verified_only Whether to count only verified devices
*
* \return Number of devices available to peer that were not already executed
*/
static int
count_peer_devices(const remote_fencing_op_t *op, const st_query_result_t *peer,
gboolean verified_only)
{
struct peer_count_data data;
data.op = op;
data.verified_only = verified_only;
data.count = 0;
if (peer) {
g_hash_table_foreach(peer->devices, count_peer_device, &data);
}
return data.count;
}
/*!
* \internal
* \brief Search for a device in a query result
*
* \param[in] op Operation that result is for
* \param[in] peer Query result for a peer
* \param[in] device Device ID to search for
*
* \return Device properties if found, NULL otherwise
*/
static device_properties_t *
find_peer_device(const remote_fencing_op_t *op, const st_query_result_t *peer,
const char *device)
{
device_properties_t *props = g_hash_table_lookup(peer->devices, device);
return (props && !props->executed[op->phase]
&& !props->disallowed[op->phase])? props : NULL;
}
/*!
* \internal
* \brief Find a device in a peer's device list and mark it as executed
*
* \param[in] op Operation that peer result is for
* \param[in,out] peer Peer with results to search
* \param[in] device ID of device to mark as done
* \param[in] verified_devices_only Only consider verified devices
*
* \return TRUE if device was found and marked, FALSE otherwise
*/
static gboolean
grab_peer_device(const remote_fencing_op_t *op, st_query_result_t *peer,
const char *device, gboolean verified_devices_only)
{
device_properties_t *props = find_peer_device(op, peer, device);
if ((props == NULL) || (verified_devices_only && !props->verified)) {
return FALSE;
}
crm_trace("Removing %s from %s (%d remaining)",
device, peer->host, count_peer_devices(op, peer, FALSE));
props->executed[op->phase] = TRUE;
return TRUE;
}
static void
clear_remote_op_timers(remote_fencing_op_t * op)
{
if (op->query_timer) {
g_source_remove(op->query_timer);
op->query_timer = 0;
}
if (op->op_timer_total) {
g_source_remove(op->op_timer_total);
op->op_timer_total = 0;
}
if (op->op_timer_one) {
g_source_remove(op->op_timer_one);
op->op_timer_one = 0;
}
}
static void
free_remote_op(gpointer data)
{
remote_fencing_op_t *op = data;
crm_trace("Free'ing op %s for %s", op->id, op->target);
crm_log_xml_debug(op->request, "Destroying");
clear_remote_op_timers(op);
free(op->id);
free(op->action);
free(op->delegate);
free(op->target);
free(op->client_id);
free(op->client_name);
free(op->originator);
if (op->query_results) {
g_list_free_full(op->query_results, free_remote_query);
}
if (op->request) {
free_xml(op->request);
op->request = NULL;
}
if (op->devices_list) {
g_list_free_full(op->devices_list, free);
op->devices_list = NULL;
}
g_list_free_full(op->automatic_list, free);
g_list_free(op->duplicates);
free(op);
}
+void
+init_stonith_remote_op_hash_table(GHashTable **table)
+{
+ if (*table == NULL) {
+ *table = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_remote_op);
+ }
+}
+
/*!
* \internal
* \brief Return an operation's originally requested action (before any remap)
*
* \param[in] op Operation to check
*
* \return Operation's original action
*/
static const char *
op_requested_action(const remote_fencing_op_t *op)
{
return ((op->phase > st_phase_requested)? "reboot" : op->action);
}
/*!
* \internal
* \brief Remap a "reboot" operation to the "off" phase
*
* \param[in,out] op Operation to remap
*/
static void
op_phase_off(remote_fencing_op_t *op)
{
crm_info("Remapping multiple-device reboot of %s (%s) to off",
op->target, op->id);
op->phase = st_phase_off;
/* Happily, "off" and "on" are shorter than "reboot", so we can reuse the
* memory allocation at each phase.
*/
strcpy(op->action, "off");
}
/*!
* \internal
* \brief Advance a remapped reboot operation to the "on" phase
*
* \param[in,out] op Operation to remap
*/
static void
op_phase_on(remote_fencing_op_t *op)
{
GListPtr iter = NULL;
crm_info("Remapped off of %s complete, remapping to on for %s.%.8s",
op->target, op->client_name, op->id);
op->phase = st_phase_on;
strcpy(op->action, "on");
/* Skip devices with automatic unfencing, because the cluster will handle it
* when the node rejoins.
*/
for (iter = op->automatic_list; iter != NULL; iter = iter->next) {
GListPtr match = g_list_find_custom(op->devices_list, iter->data,
sort_strings);
if (match) {
op->devices_list = g_list_remove(op->devices_list, match->data);
}
}
g_list_free_full(op->automatic_list, free);
op->automatic_list = NULL;
/* Rewind device list pointer */
op->devices = op->devices_list;
}
/*!
* \internal
* \brief Reset a remapped reboot operation
*
* \param[in,out] op Operation to reset
*/
static void
undo_op_remap(remote_fencing_op_t *op)
{
if (op->phase > 0) {
crm_info("Undoing remap of reboot of %s for %s.%.8s",
op->target, op->client_name, op->id);
op->phase = st_phase_requested;
strcpy(op->action, "reboot");
}
}
static xmlNode *
create_op_done_notify(remote_fencing_op_t * op, int rc)
{
xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
crm_xml_add_int(notify_data, "state", op->state);
crm_xml_add_int(notify_data, F_STONITH_RC, rc);
crm_xml_add(notify_data, F_STONITH_TARGET, op->target);
crm_xml_add(notify_data, F_STONITH_ACTION, op->action);
crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate);
crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id);
crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator);
crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id);
crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name);
return notify_data;
}
static void
bcast_result_to_peers(remote_fencing_op_t * op, int rc)
{
static int count = 0;
xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
xmlNode *notify_data = create_op_done_notify(op, rc);
count++;
crm_trace("Broadcasting result to peers");
crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY);
crm_xml_add(bcast, F_SUBTYPE, "broadcast");
crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY);
crm_xml_add_int(bcast, "count", count);
add_message_xml(bcast, F_STONITH_CALLDATA, notify_data);
send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);
free_xml(notify_data);
free_xml(bcast);
return;
}
static void
handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
{
xmlNode *notify_data = NULL;
xmlNode *reply = NULL;
if (op->notify_sent == TRUE) {
/* nothing to do */
return;
}
/* Do notification with a clean data object */
notify_data = create_op_done_notify(op, rc);
crm_xml_add_int(data, "state", op->state);
crm_xml_add(data, F_STONITH_TARGET, op->target);
crm_xml_add(data, F_STONITH_OPERATION, op->action);
reply = stonith_construct_reply(op->request, NULL, data, rc);
crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
/* Send fencing OP reply to local client that initiated fencing */
do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
/* bcast to all local clients that the fencing operation happend */
do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data);
+ do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
/* mark this op as having notify's already sent */
op->notify_sent = TRUE;
free_xml(reply);
free_xml(notify_data);
}
static void
handle_duplicates(remote_fencing_op_t * op, xmlNode * data, int rc)
{
GListPtr iter = NULL;
for (iter = op->duplicates; iter != NULL; iter = iter->next) {
remote_fencing_op_t *other = iter->data;
if (other->state == st_duplicate) {
other->state = op->state;
crm_debug("Performing duplicate notification for %s@%s.%.8s = %s",
other->client_name, other->originator, other->id,
pcmk_strerror(rc));
remote_op_done(other, data, rc, TRUE);
} else {
// Possible if (for example) it timed out already
crm_err("Skipping duplicate notification for %s@%s - %d", other->client_name,
other->originator, other->state);
}
}
}
/*!
* \internal
* \brief Finalize a remote operation.
*
* \description This function has two code paths.
*
* Path 1. This node is the owner of the operation and needs
* to notify the cpg group via a broadcast as to the operation's
* results.
*
* Path 2. The cpg broadcast is received. All nodes notify their local
* stonith clients the operation results.
*
* So, The owner of the operation first notifies the cluster of the result,
* and once that cpg notify is received back it notifies all the local clients.
*
* Nodes that are passive watchers of the operation will receive the
* broadcast and only need to notify their local clients the operation finished.
*
* \param op, The fencing operation to finalize
* \param data, The xml msg reply (if present) of the last delegated fencing
* operation.
* \param dup, Is this operation a duplicate, if so treat it a little differently
* making sure the broadcast is not sent out.
*/
static void
remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup)
{
int level = LOG_ERR;
const char *subt = NULL;
xmlNode *local_data = NULL;
op->completed = time(NULL);
clear_remote_op_timers(op);
undo_op_remap(op);
if (op->notify_sent == TRUE) {
crm_err("Already sent notifications for '%s of %s by %s' (for=%s@%s.%.8s, state=%d): %s",
op->action, op->target, op->delegate ? op->delegate : "",
op->client_name, op->originator, op->id, op->state, pcmk_strerror(rc));
goto remote_op_done_cleanup;
}
if (!op->delegate && data && rc != -ENODEV && rc != -EHOSTUNREACH) {
xmlNode *ndata = get_xpath_object("//@" F_STONITH_DELEGATE, data, LOG_TRACE);
if(ndata) {
op->delegate = crm_element_value_copy(ndata, F_STONITH_DELEGATE);
} else {
op->delegate = crm_element_value_copy(data, F_ORIG);
}
}
if (data == NULL) {
data = create_xml_node(NULL, "remote-op");
local_data = data;
}
/* Tell everyone the operation is done, we will continue
* with doing the local notifications once we receive
* the broadcast back. */
subt = crm_element_value(data, F_SUBTYPE);
if (dup == FALSE && safe_str_neq(subt, "broadcast")) {
/* Defer notification until the bcast message arrives */
bcast_result_to_peers(op, rc);
goto remote_op_done_cleanup;
}
if (rc == pcmk_ok || dup) {
level = LOG_NOTICE;
} else if (safe_str_neq(op->originator, stonith_our_uname)) {
level = LOG_NOTICE;
}
do_crm_log(level,
"Operation %s of %s by %s for %s@%s.%.8s: %s",
op->action, op->target, op->delegate ? op->delegate : "",
op->client_name, op->originator, op->id, pcmk_strerror(rc));
handle_local_reply_and_notify(op, data, rc);
if (dup == FALSE) {
handle_duplicates(op, data, rc);
}
/* Free non-essential parts of the record
* Keep the record around so we can query the history
*/
if (op->query_results) {
g_list_free_full(op->query_results, free_remote_query);
op->query_results = NULL;
}
if (op->request) {
free_xml(op->request);
op->request = NULL;
}
remote_op_done_cleanup:
free_xml(local_data);
}
static gboolean
remote_op_watchdog_done(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
op->op_timer_one = 0;
crm_notice("Self-fencing (%s) by %s for %s.%8s assumed complete",
op->action, op->target, op->client_name, op->id);
op->state = st_done;
remote_op_done(op, NULL, pcmk_ok, FALSE);
return FALSE;
}
static gboolean
remote_op_timeout_one(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
op->op_timer_one = 0;
crm_notice("Peer's fencing (%s) of %s for %s timed out" CRM_XS "id=%s",
op->action, op->target, op->client_name, op->id);
call_remote_stonith(op, NULL);
return FALSE;
}
static gboolean
remote_op_timeout(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
op->op_timer_total = 0;
if (op->state == st_done) {
crm_debug("Action %s (%s) for %s (%s) already completed",
op->action, op->id, op->target, op->client_name);
return FALSE;
}
crm_debug("Action %s (%s) for %s (%s) timed out",
op->action, op->id, op->target, op->client_name);
if (op->phase == st_phase_on) {
/* A remapped reboot operation timed out in the "on" phase, but the
* "off" phase completed successfully, so quit trying any further
* devices, and return success.
*/
remote_op_done(op, NULL, pcmk_ok, FALSE);
return FALSE;
}
op->state = st_failed;
remote_op_done(op, NULL, -ETIME, FALSE);
return FALSE;
}
static gboolean
remote_op_query_timeout(gpointer data)
{
remote_fencing_op_t *op = data;
op->query_timer = 0;
if (op->state == st_done) {
crm_debug("Operation %s for %s already completed", op->id, op->target);
} else if (op->state == st_exec) {
crm_debug("Operation %s for %s already in progress", op->id, op->target);
} else if (op->query_results) {
crm_debug("Query %s for %s complete: %d", op->id, op->target, op->state);
call_remote_stonith(op, NULL);
} else {
crm_debug("Query %s for %s timed out: %d", op->id, op->target, op->state);
if (op->op_timer_total) {
g_source_remove(op->op_timer_total);
op->op_timer_total = 0;
}
remote_op_timeout(op);
}
return FALSE;
}
static gboolean
topology_is_empty(stonith_topology_t *tp)
{
int i;
if (tp == NULL) {
return TRUE;
}
for (i = 0; i < ST_LEVEL_MAX; i++) {
if (tp->levels[i] != NULL) {
return FALSE;
}
}
return TRUE;
}
/*!
* \internal
* \brief Add a device to an operation's automatic unfencing list
*
* \param[in,out] op Operation to modify
* \param[in] device Device ID to add
*/
static void
add_required_device(remote_fencing_op_t *op, const char *device)
{
GListPtr match = g_list_find_custom(op->automatic_list, device,
sort_strings);
if (!match) {
op->automatic_list = g_list_prepend(op->automatic_list, strdup(device));
}
}
/*!
* \internal
* \brief Remove a device from the automatic unfencing list
*
* \param[in,out] op Operation to modify
* \param[in] device Device ID to remove
*/
static void
remove_required_device(remote_fencing_op_t *op, const char *device)
{
GListPtr match = g_list_find_custom(op->automatic_list, device,
sort_strings);
if (match) {
op->automatic_list = g_list_remove(op->automatic_list, match->data);
}
}
/* deep copy the device list */
static void
set_op_device_list(remote_fencing_op_t * op, GListPtr devices)
{
GListPtr lpc = NULL;
if (op->devices_list) {
g_list_free_full(op->devices_list, free);
op->devices_list = NULL;
}
for (lpc = devices; lpc != NULL; lpc = lpc->next) {
op->devices_list = g_list_append(op->devices_list, strdup(lpc->data));
}
op->devices = op->devices_list;
}
/*!
* \internal
* \brief Check whether a node matches a topology target
*
* \param[in] tp Topology table entry to check
* \param[in] node Name of node to check
*
* \return TRUE if node matches topology target
*/
static gboolean
topology_matches(const stonith_topology_t *tp, const char *node)
{
regex_t r_patt;
CRM_CHECK(node && tp && tp->target, return FALSE);
switch(tp->kind) {
case 2:
/* This level targets by attribute, so tp->target is a NAME=VALUE pair
* of a permanent attribute applied to targeted nodes. The test below
* relies on the locally cached copy of the CIB, so if fencing needs to
* be done before the initial CIB is received or after a malformed CIB
* is received, then the topology will be unable to be used.
*/
if (node_has_attr(node, tp->target_attribute, tp->target_value)) {
crm_notice("Matched %s with %s by attribute", node, tp->target);
return TRUE;
}
break;
case 1:
/* This level targets by name, so tp->target is a regular expression
* matching names of nodes to be targeted.
*/
if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) {
crm_info("Bad regex '%s' for fencing level", tp->target);
} else {
int status = regexec(&r_patt, node, 0, NULL, 0);
regfree(&r_patt);
if (status == 0) {
crm_notice("Matched %s with %s by name", node, tp->target);
return TRUE;
}
}
break;
case 0:
crm_trace("Testing %s against %s", node, tp->target);
return safe_str_eq(tp->target, node);
}
crm_trace("No match for %s with %s", node, tp->target);
return FALSE;
}
stonith_topology_t *
find_topology_for_host(const char *host)
{
GHashTableIter tIter;
stonith_topology_t *tp = g_hash_table_lookup(topology, host);
if(tp != NULL) {
crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
return tp;
}
g_hash_table_iter_init(&tIter, topology);
while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) {
if (topology_matches(tp, host)) {
crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
return tp;
}
}
crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology));
return NULL;
}
/*!
* \internal
* \brief Set fencing operation's device list to target's next topology level
*
* \param[in,out] op Remote fencing operation to modify
*
* \return pcmk_ok if successful, target was not specified (i.e. queries) or
* target has no topology, or -EINVAL if no more topology levels to try
*/
static int
stonith_topology_next(remote_fencing_op_t * op)
{
stonith_topology_t *tp = NULL;
if (op->target) {
/* Queries don't have a target set */
tp = find_topology_for_host(op->target);
}
if (topology_is_empty(tp)) {
return pcmk_ok;
}
set_bit(op->call_options, st_opt_topology);
/* This is a new level, so undo any remapping left over from previous */
undo_op_remap(op);
do {
op->level++;
} while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL);
if (op->level < ST_LEVEL_MAX) {
crm_trace("Attempting fencing level %d for %s (%d devices) - %s@%s.%.8s",
op->level, op->target, g_list_length(tp->levels[op->level]),
op->client_name, op->originator, op->id);
set_op_device_list(op, tp->levels[op->level]);
if (g_list_next(op->devices_list) && safe_str_eq(op->action, "reboot")) {
/* A reboot has been requested for a topology level with multiple
* devices. Instead of rebooting the devices sequentially, we will
* turn them all off, then turn them all on again. (Think about
* switched power outlets for redundant power supplies.)
*/
op_phase_off(op);
}
return pcmk_ok;
}
crm_notice("All fencing options to fence %s for %s@%s.%.8s failed",
op->target, op->client_name, op->originator, op->id);
return -EINVAL;
}
/*!
* \brief Check to see if this operation is a duplicate of another in flight
* operation. If so merge this operation into the inflight operation, and mark
* it as a duplicate.
*/
static void
merge_duplicates(remote_fencing_op_t * op)
{
GHashTableIter iter;
remote_fencing_op_t *other = NULL;
time_t now = time(NULL);
- g_hash_table_iter_init(&iter, remote_op_list);
+ g_hash_table_iter_init(&iter, stonith_remote_op_list);
while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) {
crm_node_t *peer = NULL;
const char *other_action = op_requested_action(other);
if (other->state > st_exec) {
/* Must be in-progress */
continue;
} else if (safe_str_neq(op->target, other->target)) {
/* Must be for the same node */
continue;
} else if (safe_str_neq(op->action, other_action)) {
crm_trace("Must be for the same action: %s vs. %s",
op->action, other_action);
continue;
} else if (safe_str_eq(op->client_name, other->client_name)) {
crm_trace("Must be for different clients: %s", op->client_name);
continue;
} else if (safe_str_eq(other->target, other->originator)) {
crm_trace("Can't be a suicide operation: %s", other->target);
continue;
}
peer = crm_get_peer(0, other->originator);
if(fencing_peer_active(peer) == FALSE) {
crm_notice("Failing stonith action %s for node %s originating from %s@%s.%.8s: Originator is dead",
other->action, other->target, other->client_name, other->originator, other->id);
other->state = st_failed;
continue;
} else if(other->total_timeout > 0 && now > (other->total_timeout + other->created)) {
crm_info("Stonith action %s for node %s originating from %s@%s.%.8s is too old: %ld vs. %ld + %d",
other->action, other->target, other->client_name, other->originator, other->id,
now, other->created, other->total_timeout);
continue;
}
/* There is another in-flight request to fence the same host
* Piggyback on that instead. If it fails, so do we.
*/
other->duplicates = g_list_append(other->duplicates, op);
if (other->total_timeout == 0) {
crm_trace("Making a best-guess as to the timeout used");
other->total_timeout = op->total_timeout =
TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL);
}
crm_notice
("Merging stonith action %s for node %s originating from client %s.%.8s with identical request from %s@%s.%.8s (%ds)",
op->action, op->target, op->client_name, op->id, other->client_name, other->originator,
other->id, other->total_timeout);
report_timeout_period(op, other->total_timeout);
op->state = st_duplicate;
}
}
static uint32_t fencing_active_peers(void)
{
uint32_t count = 0;
crm_node_t *entry;
GHashTableIter gIter;
g_hash_table_iter_init(&gIter, crm_peer_cache);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
if(fencing_peer_active(entry)) {
count++;
}
}
return count;
}
int
stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op)
{
xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
op->state = st_done;
op->completed = time(NULL);
op->delegate = strdup("a human");
crm_notice("Injecting manual confirmation that %s is safely off/down",
crm_element_value(dev, F_STONITH_TARGET));
remote_op_done(op, msg, pcmk_ok, FALSE);
/* Replies are sent via done_cb->stonith_send_async_reply()->do_local_reply() */
return -EINPROGRESS;
}
char *
stonith_get_peer_name(unsigned int nodeid)
{
crm_node_t *node = crm_find_peer(nodeid, NULL);
char *nodename = NULL;
if (node && node->uname) {
return strdup(node->uname);
} else if ((nodename = get_node_name(nodeid))) {
return nodename;
} else {
const char *last_known_name = g_hash_table_lookup(known_peer_names, GUINT_TO_POINTER(nodeid));
if (last_known_name) {
crm_debug("Use the last known name %s for nodeid %u", last_known_name, nodeid);
return strdup(last_known_name);
}
}
return NULL;
}
/*!
* \internal
* \brief Create a new remote stonith operation
*
* \param[in] client ID of local stonith client that initiated the operation
* \param[in] request The request from the client that started the operation
* \param[in] peer TRUE if this operation is owned by another stonith peer
* (an operation owned by one peer is stored on all peers,
* but only the owner executes it; all nodes get the results
* once the owner finishes execution)
*/
void *
create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
{
remote_fencing_op_t *op = NULL;
xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
int call_options = 0;
- if (remote_op_list == NULL) {
- remote_op_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_remote_op);
- }
+ init_stonith_remote_op_hash_table(&stonith_remote_op_list);
/* If this operation is owned by another node, check to make
* sure we haven't already created this operation. */
if (peer && dev) {
const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
CRM_CHECK(op_id != NULL, return NULL);
- op = g_hash_table_lookup(remote_op_list, op_id);
+ op = g_hash_table_lookup(stonith_remote_op_list, op_id);
if (op) {
crm_debug("%s already exists", op_id);
return op;
}
}
op = calloc(1, sizeof(remote_fencing_op_t));
crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout));
if (peer && dev) {
op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID);
} else {
op->id = crm_generate_uuid();
}
- g_hash_table_replace(remote_op_list, op->id, op);
- CRM_LOG_ASSERT(g_hash_table_lookup(remote_op_list, op->id) != NULL);
+ g_hash_table_replace(stonith_remote_op_list, op->id, op);
+ CRM_LOG_ASSERT(g_hash_table_lookup(stonith_remote_op_list, op->id) != NULL);
crm_trace("Created %s", op->id);
op->state = st_query;
op->replies_expected = fencing_active_peers();
op->action = crm_element_value_copy(dev, F_STONITH_ACTION);
op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN);
op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE); /* May not be set */
op->created = time(NULL);
if (op->originator == NULL) {
/* Local or relayed request */
op->originator = strdup(stonith_our_uname);
}
CRM_LOG_ASSERT(client != NULL);
if (client) {
op->client_id = strdup(client);
}
op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME);
op->target = crm_element_value_copy(dev, F_STONITH_TARGET);
op->request = copy_xml(request); /* TODO: Figure out how to avoid this */
crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
op->call_options = call_options;
crm_element_value_int(request, F_STONITH_CALLID, &(op->client_callid));
crm_trace("%s new stonith op: %s - %s of %s for %s",
(peer
&& dev) ? "Recorded" : "Generated", op->id, op->action, op->target, op->client_name);
if (op->call_options & st_opt_cs_nodeid) {
int nodeid = crm_atoi(op->target, NULL);
char *nodename = stonith_get_peer_name(nodeid);
/* Ensure the conversion only happens once */
op->call_options &= ~st_opt_cs_nodeid;
if (nodename) {
free(op->target);
op->target = nodename;
} else {
crm_warn("Could not expand nodeid '%s' into a host name", op->target);
}
}
/* check to see if this is a duplicate operation of another in-flight operation */
merge_duplicates(op);
+ if (op->state != st_duplicate) {
+ /* kick history readers */
+ do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
+ }
+
return op;
}
remote_fencing_op_t *
initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean manual_ack)
{
int query_timeout = 0;
xmlNode *query = NULL;
const char *client_id = NULL;
remote_fencing_op_t *op = NULL;
if (client) {
client_id = client->id;
} else {
client_id = crm_element_value(request, F_STONITH_CLIENTID);
}
CRM_LOG_ASSERT(client_id != NULL);
op = create_remote_stonith_op(client_id, request, FALSE);
op->owner = TRUE;
if (manual_ack) {
crm_notice("Initiating manual confirmation for %s: %s",
op->target, op->id);
return op;
}
CRM_CHECK(op->action, return NULL);
if (stonith_topology_next(op) != pcmk_ok) {
op->state = st_failed;
}
switch (op->state) {
case st_failed:
crm_warn("Could not request peer fencing (%s) of %s "
CRM_XS " id=%s", op->action, op->target, op->id);
remote_op_done(op, NULL, -EINVAL, FALSE);
return op;
case st_duplicate:
crm_info("Requesting peer fencing (%s) of %s (duplicate) "
CRM_XS " id=%s", op->action, op->target, op->id);
return op;
default:
crm_notice("Requesting peer fencing (%s) of %s "
CRM_XS " id=%s state=%d",
op->action, op->target, op->id, op->state);
}
query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY,
NULL, op->call_options);
crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id);
crm_xml_add(query, F_STONITH_TARGET, op->target);
crm_xml_add(query, F_STONITH_ACTION, op_requested_action(op));
crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout);
send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
free_xml(query);
query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR;
op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op);
return op;
}
enum find_best_peer_options {
/*! Skip checking the target peer for capable fencing devices */
FIND_PEER_SKIP_TARGET = 0x0001,
/*! Only check the target peer for capable fencing devices */
FIND_PEER_TARGET_ONLY = 0x0002,
/*! Skip peers and devices that are not verified */
FIND_PEER_VERIFIED_ONLY = 0x0004,
};
static st_query_result_t *
find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options)
{
GListPtr iter = NULL;
gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE;
if (!device && is_set(op->call_options, st_opt_topology)) {
return NULL;
}
for (iter = op->query_results; iter != NULL; iter = iter->next) {
st_query_result_t *peer = iter->data;
crm_trace("Testing result from %s for %s with %d devices: %d %x",
peer->host, op->target, peer->ndevices, peer->tried, options);
if ((options & FIND_PEER_SKIP_TARGET) && safe_str_eq(peer->host, op->target)) {
continue;
}
if ((options & FIND_PEER_TARGET_ONLY) && safe_str_neq(peer->host, op->target)) {
continue;
}
if (is_set(op->call_options, st_opt_topology)) {
if (grab_peer_device(op, peer, device, verified_devices_only)) {
return peer;
}
} else if ((peer->tried == FALSE)
&& count_peer_devices(op, peer, verified_devices_only)) {
/* No topology: Use the current best peer */
crm_trace("Simple fencing");
return peer;
}
}
return NULL;
}
static st_query_result_t *
stonith_choose_peer(remote_fencing_op_t * op)
{
const char *device = NULL;
st_query_result_t *peer = NULL;
uint32_t active = fencing_active_peers();
do {
if (op->devices) {
device = op->devices->data;
crm_trace("Checking for someone to fence (%s) %s with %s",
op->action, op->target, device);
} else {
crm_trace("Checking for someone to fence (%s) %s",
op->action, op->target);
}
/* Best choice is a peer other than the target with verified access */
peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY);
if (peer) {
crm_trace("Found verified peer %s for %s", peer->host, device?device:"");
return peer;
}
if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) {
crm_trace("Waiting before looking for unverified devices to fence %s", op->target);
return NULL;
}
/* If no other peer has verified access, next best is unverified access */
peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET);
if (peer) {
crm_trace("Found best unverified peer %s", peer->host);
return peer;
}
/* If no other peer can do it, last option is self-fencing
* (which is never allowed for the "on" phase of a remapped reboot)
*/
if (op->phase != st_phase_on) {
peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY);
if (peer) {
crm_trace("%s will fence itself", peer->host);
return peer;
}
}
/* Try the next fencing level if there is one (unless we're in the "on"
* phase of a remapped "reboot", because we ignore errors in that case)
*/
} while ((op->phase != st_phase_on)
&& is_set(op->call_options, st_opt_topology)
&& stonith_topology_next(op) == pcmk_ok);
crm_notice("Couldn't find anyone to fence (%s) %s with %s",
op->action, op->target, (device? device : "any device"));
return NULL;
}
static int
get_device_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer,
const char *device)
{
device_properties_t *props;
if (!peer || !device) {
return op->base_timeout;
}
props = g_hash_table_lookup(peer->devices, device);
if (!props) {
return op->base_timeout;
}
return (props->custom_action_timeout[op->phase]?
props->custom_action_timeout[op->phase] : op->base_timeout)
+ props->delay_max[op->phase];
}
struct timeout_data {
const remote_fencing_op_t *op;
const st_query_result_t *peer;
int total_timeout;
};
/*!
* \internal
* \brief Add timeout to a total if device has not been executed yet
*
* \param[in] key GHashTable key (device ID)
* \param[in] value GHashTable value (device properties)
* \param[in] user_data Timeout data
*/
static void
add_device_timeout(gpointer key, gpointer value, gpointer user_data)
{
const char *device_id = key;
device_properties_t *props = value;
struct timeout_data *timeout = user_data;
if (!props->executed[timeout->op->phase]
&& !props->disallowed[timeout->op->phase]) {
timeout->total_timeout += get_device_timeout(timeout->op,
timeout->peer, device_id);
}
}
static int
get_peer_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer)
{
struct timeout_data timeout;
timeout.op = op;
timeout.peer = peer;
timeout.total_timeout = 0;
g_hash_table_foreach(peer->devices, add_device_timeout, &timeout);
return (timeout.total_timeout? timeout.total_timeout : op->base_timeout);
}
static int
get_op_total_timeout(const remote_fencing_op_t *op,
const st_query_result_t *chosen_peer)
{
int total_timeout = 0;
stonith_topology_t *tp = find_topology_for_host(op->target);
if (is_set(op->call_options, st_opt_topology) && tp) {
int i;
GListPtr device_list = NULL;
GListPtr iter = NULL;
/* Yep, this looks scary, nested loops all over the place.
* Here is what is going on.
* Loop1: Iterate through fencing levels.
* Loop2: If a fencing level has devices, loop through each device
* Loop3: For each device in a fencing level, see what peer owns it
* and what that peer has reported the timeout is for the device.
*/
for (i = 0; i < ST_LEVEL_MAX; i++) {
if (!tp->levels[i]) {
continue;
}
for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
for (iter = op->query_results; iter != NULL; iter = iter->next) {
const st_query_result_t *peer = iter->data;
if (find_peer_device(op, peer, device_list->data)) {
total_timeout += get_device_timeout(op, peer,
device_list->data);
break;
}
} /* End Loop3: match device with peer that owns device, find device's timeout period */
} /* End Loop2: iterate through devices at a specific level */
} /*End Loop1: iterate through fencing levels */
} else if (chosen_peer) {
total_timeout = get_peer_timeout(op, chosen_peer);
} else {
total_timeout = op->base_timeout;
}
return total_timeout ? total_timeout : op->base_timeout;
}
static void
report_timeout_period(remote_fencing_op_t * op, int op_timeout)
{
GListPtr iter = NULL;
xmlNode *update = NULL;
const char *client_node = NULL;
const char *client_id = NULL;
const char *call_id = NULL;
if (op->call_options & st_opt_sync_call) {
/* There is no reason to report the timeout for a synchronous call. It
* is impossible to use the reported timeout to do anything when the client
* is blocking for the response. This update is only important for
* async calls that require a callback to report the results in. */
return;
} else if (!op->request) {
return;
}
crm_trace("Reporting timeout for %s.%.8s", op->client_name, op->id);
client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE);
call_id = crm_element_value(op->request, F_STONITH_CALLID);
client_id = crm_element_value(op->request, F_STONITH_CLIENTID);
if (!client_node || !call_id || !client_id) {
return;
}
if (safe_str_eq(client_node, stonith_our_uname)) {
/* The client is connected to this node, send the update direclty to them */
do_stonith_async_timeout_update(client_id, call_id, op_timeout);
return;
}
/* The client is connected to another node, relay this update to them */
update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0);
crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id);
crm_xml_add(update, F_STONITH_CLIENTID, client_id);
crm_xml_add(update, F_STONITH_CALLID, call_id);
crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout);
send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE);
free_xml(update);
for (iter = op->duplicates; iter != NULL; iter = iter->next) {
remote_fencing_op_t *dup = iter->data;
crm_trace("Reporting timeout for duplicate %s.%.8s", dup->client_name, dup->id);
report_timeout_period(iter->data, op_timeout);
}
}
/*!
* \internal
* \brief Advance an operation to the next device in its topology
*
* \param[in,out] op Operation to advance
* \param[in] device ID of device just completed
* \param[in] msg XML reply that contained device result (if available)
* \param[in] rc Return code of device's execution
*/
static void
advance_op_topology(remote_fencing_op_t *op, const char *device, xmlNode *msg,
int rc)
{
/* Advance to the next device at this topology level, if any */
if (op->devices) {
op->devices = op->devices->next;
}
/* Handle automatic unfencing if an "on" action was requested */
if ((op->phase == st_phase_requested) && safe_str_eq(op->action, "on")) {
/* If the device we just executed was required, it's not anymore */
remove_required_device(op, device);
/* If there are no more devices at this topology level, run through any
* remaining devices with automatic unfencing
*/
if (op->devices == NULL) {
op->devices = op->automatic_list;
}
}
if ((op->devices == NULL) && (op->phase == st_phase_off)) {
/* We're done with this level and with required devices, but we had
* remapped "reboot" to "off", so start over with "on". If any devices
* need to be turned back on, op->devices will be non-NULL after this.
*/
op_phase_on(op);
}
if (op->devices) {
/* Necessary devices remain, so execute the next one */
crm_trace("Next for %s on behalf of %s@%s (rc was %d)",
op->target, op->originator, op->client_name, rc);
call_remote_stonith(op, NULL);
} else {
/* We're done with all devices and phases, so finalize operation */
crm_trace("Marking complex fencing op for %s as complete", op->target);
op->state = st_done;
remote_op_done(op, msg, rc, FALSE);
}
}
void
call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer)
{
const char *device = NULL;
int timeout = op->base_timeout;
crm_trace("State for %s.%.8s: %s %d", op->target, op->client_name, op->id, op->state);
if (peer == NULL && !is_set(op->call_options, st_opt_topology)) {
peer = stonith_choose_peer(op);
}
if (!op->op_timer_total) {
int total_timeout = get_op_total_timeout(op, peer);
op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * total_timeout;
op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op);
report_timeout_period(op, op->total_timeout);
crm_info("Total timeout set to %d for peer's fencing of %s for %s"
CRM_XS "id=%s",
total_timeout, op->target, op->client_name, op->id);
}
if (is_set(op->call_options, st_opt_topology) && op->devices) {
/* Ignore any peer preference, they might not have the device we need */
/* When using topology, stonith_choose_peer() removes the device from
* further consideration, so be sure to calculate timeout beforehand */
peer = stonith_choose_peer(op);
device = op->devices->data;
timeout = get_device_timeout(op, peer, device);
}
if (peer) {
int timeout_one = 0;
xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id);
crm_xml_add(remote_op, F_STONITH_TARGET, op->target);
crm_xml_add(remote_op, F_STONITH_ACTION, op->action);
crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator);
crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id);
crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name);
crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout);
crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options);
if (device) {
timeout_one = TIMEOUT_MULTIPLY_FACTOR *
get_device_timeout(op, peer, device);
crm_info("Requesting that '%s' perform op '%s %s' with '%s' for %s (%ds)", peer->host,
op->target, op->action, device, op->client_name, timeout_one);
crm_xml_add(remote_op, F_STONITH_DEVICE, device);
crm_xml_add(remote_op, F_STONITH_MODE, "slave");
} else {
timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer);
crm_info("Requesting that '%s' perform op '%s %s' for %s (%ds, %lds)",
peer->host, op->target, op->action, op->client_name, timeout_one, stonith_watchdog_timeout_ms);
crm_xml_add(remote_op, F_STONITH_MODE, "smart");
}
op->state = st_exec;
if (op->op_timer_one) {
g_source_remove(op->op_timer_one);
}
if(stonith_watchdog_timeout_ms > 0 && device && safe_str_eq(device, "watchdog")) {
crm_notice("Waiting %lds for %s to self-fence (%s) for %s.%.8s (%p)",
stonith_watchdog_timeout_ms/1000, op->target,
op->action, op->client_name, op->id, device);
op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
/* TODO check devices to verify watchdog will be in use */
} else if(stonith_watchdog_timeout_ms > 0
&& safe_str_eq(peer->host, op->target)
&& safe_str_neq(op->action, "on")) {
crm_notice("Waiting %lds for %s to self-fence (%s) for %s.%.8s (%p)",
stonith_watchdog_timeout_ms/1000, op->target,
op->action, op->client_name, op->id, device);
op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
} else {
op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
}
send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE);
peer->tried = TRUE;
free_xml(remote_op);
return;
} else if (op->phase == st_phase_on) {
/* A remapped "on" cannot be executed, but the node was already
* turned off successfully, so ignore the error and continue.
*/
crm_warn("Ignoring %s 'on' failure (no capable peers) for %s after successful 'off'",
device, op->target);
advance_op_topology(op, device, NULL, pcmk_ok);
return;
} else if (op->owner == FALSE) {
crm_err("Fencing (%s) of %s for %s is not ours to control",
op->action, op->target, op->client_name);
} else if (op->query_timer == 0) {
/* We've exhausted all available peers */
crm_info("No remaining peers capable of fencing (%s) %s for %s (%d)",
op->target, op->action, op->client_name, op->state);
CRM_LOG_ASSERT(op->state < st_done);
remote_op_timeout(op);
} else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
int rc = -EHOSTUNREACH;
/* if the operation never left the query state,
* but we have all the expected replies, then no devices
* are available to execute the fencing operation. */
if(stonith_watchdog_timeout_ms && (device == NULL || safe_str_eq(device, "watchdog"))) {
crm_notice("Waiting %lds for %s to self-fence (%s) for %s.%.8s (%p)",
stonith_watchdog_timeout_ms/1000, op->target,
op->action, op->client_name, op->id, device);
op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
return;
}
if (op->state == st_query) {
crm_info("No peers (out of %d) have devices capable of fencing (%s) %s for %s (%d)",
op->replies, op->action, op->target, op->client_name,
op->state);
rc = -ENODEV;
} else {
crm_info("No peers (out of %d) are capable of fencing (%s) %s for %s (%d)",
op->replies, op->action, op->target, op->client_name,
op->state);
}
op->state = st_failed;
remote_op_done(op, NULL, rc, FALSE);
} else if (device) {
crm_info("Waiting for additional peers capable of fencing (%s) %s with %s for %s.%.8s",
op->action, op->target, device, op->client_name, op->id);
} else {
crm_info("Waiting for additional peers capable of fencing (%s) %s for %s%.8s",
op->action, op->target, op->client_name, op->id);
}
}
/*!
* \internal
* \brief Comparison function for sorting query results
*
* \param[in] a GList item to compare
* \param[in] b GList item to compare
*
* \return Per the glib documentation, "a negative integer if the first value
* comes before the second, 0 if they are equal, or a positive integer
* if the first value comes after the second."
*/
static gint
sort_peers(gconstpointer a, gconstpointer b)
{
const st_query_result_t *peer_a = a;
const st_query_result_t *peer_b = b;
return (peer_b->ndevices - peer_a->ndevices);
}
/*!
* \internal
* \brief Determine if all the devices in the topology are found or not
*/
static gboolean
all_topology_devices_found(remote_fencing_op_t * op)
{
GListPtr device = NULL;
GListPtr iter = NULL;
device_properties_t *match = NULL;
stonith_topology_t *tp = NULL;
gboolean skip_target = FALSE;
int i;
tp = find_topology_for_host(op->target);
if (!tp) {
return FALSE;
}
if (safe_str_eq(op->action, "off") || safe_str_eq(op->action, "reboot")) {
/* Don't count the devices on the target node if we are killing
* the target node. */
skip_target = TRUE;
}
for (i = 0; i < ST_LEVEL_MAX; i++) {
for (device = tp->levels[i]; device; device = device->next) {
match = NULL;
for (iter = op->query_results; iter && !match; iter = iter->next) {
st_query_result_t *peer = iter->data;
if (skip_target && safe_str_eq(peer->host, op->target)) {
continue;
}
match = find_peer_device(op, peer, device->data);
}
if (!match) {
return FALSE;
}
}
}
return TRUE;
}
/*!
* \internal
* \brief Parse action-specific device properties from XML
*
* \param[in] msg XML element containing the properties
* \param[in] peer Name of peer that sent XML (for logs)
* \param[in] device Device ID (for logs)
* \param[in] action Action the properties relate to (for logs)
* \param[in] phase Phase the properties relate to
* \param[in,out] props Device properties to update
*/
static void
parse_action_specific(xmlNode *xml, const char *peer, const char *device,
const char *action, remote_fencing_op_t *op,
enum st_remap_phase phase, device_properties_t *props)
{
props->custom_action_timeout[phase] = 0;
crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT,
&props->custom_action_timeout[phase]);
if (props->custom_action_timeout[phase]) {
crm_trace("Peer %s with device %s returned %s action timeout %d",
peer, device, action, props->custom_action_timeout[phase]);
}
props->delay_max[phase] = 0;
crm_element_value_int(xml, F_STONITH_DELAY_MAX, &props->delay_max[phase]);
if (props->delay_max[phase]) {
crm_trace("Peer %s with device %s returned maximum of random delay %d for %s",
peer, device, props->delay_max[phase], action);
}
props->delay_base[phase] = 0;
crm_element_value_int(xml, F_STONITH_DELAY_BASE, &props->delay_base[phase]);
if (props->delay_base[phase]) {
crm_trace("Peer %s with device %s returned base delay %d for %s",
peer, device, props->delay_base[phase], action);
}
/* Handle devices with automatic unfencing */
if (safe_str_eq(action, "on")) {
int required = 0;
crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required);
if (required) {
crm_trace("Peer %s requires device %s to execute for action %s",
peer, device, action);
add_required_device(op, device);
}
}
/* If a reboot is remapped to off+on, it's possible that a node is allowed
* to perform one action but not another.
*/
if (crm_is_true(crm_element_value(xml, F_STONITH_ACTION_DISALLOWED))) {
props->disallowed[phase] = TRUE;
crm_trace("Peer %s is disallowed from executing %s for device %s",
peer, action, device);
}
}
/*!
* \internal
* \brief Parse one device's properties from peer's XML query reply
*
* \param[in] xml XML node containing device properties
* \param[in,out] op Operation that query and reply relate to
* \param[in,out] result Peer's results
* \param[in] device ID of device being parsed
*/
static void
add_device_properties(xmlNode *xml, remote_fencing_op_t *op,
st_query_result_t *result, const char *device)
{
xmlNode *child;
int verified = 0;
device_properties_t *props = calloc(1, sizeof(device_properties_t));
/* Add a new entry to this result's devices list */
CRM_ASSERT(props != NULL);
g_hash_table_insert(result->devices, strdup(device), props);
/* Peers with verified (monitored) access will be preferred */
crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified);
if (verified) {
crm_trace("Peer %s has confirmed a verified device %s",
result->host, device);
props->verified = TRUE;
}
/* Parse action-specific device properties */
parse_action_specific(xml, result->host, device, op_requested_action(op),
op, st_phase_requested, props);
for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) {
/* Replies for "reboot" operations will include the action-specific
* values for "off" and "on" in child elements, just in case the reboot
* winds up getting remapped.
*/
if (safe_str_eq(ID(child), "off")) {
parse_action_specific(child, result->host, device, "off",
op, st_phase_off, props);
} else if (safe_str_eq(ID(child), "on")) {
parse_action_specific(child, result->host, device, "on",
op, st_phase_on, props);
}
}
}
/*!
* \internal
* \brief Parse a peer's XML query reply and add it to operation's results
*
* \param[in,out] op Operation that query and reply relate to
* \param[in] host Name of peer that sent this reply
* \param[in] ndevices Number of devices expected in reply
* \param[in] xml XML node containing device list
*
* \return Newly allocated result structure with parsed reply
*/
static st_query_result_t *
add_result(remote_fencing_op_t *op, const char *host, int ndevices, xmlNode *xml)
{
st_query_result_t *result = calloc(1, sizeof(st_query_result_t));
xmlNode *child;
CRM_CHECK(result != NULL, return NULL);
result->host = strdup(host);
result->devices = crm_str_table_new();
/* Each child element describes one capable device available to the peer */
for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) {
const char *device = ID(child);
if (device) {
add_device_properties(child, op, result, device);
}
}
result->ndevices = g_hash_table_size(result->devices);
CRM_CHECK(ndevices == result->ndevices,
crm_err("Query claimed to have %d devices but %d found",
ndevices, result->ndevices));
op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers);
return result;
}
/*!
* \internal
* \brief Handle a peer's reply to our fencing query
*
* Parse a query result from XML and store it in the remote operation
* table, and when enough replies have been received, issue a fencing request.
*
* \param[in] msg XML reply received
*
* \return pcmk_ok on success, -errno on error
*
* \note See initiate_remote_stonith_op() for how the XML query was initially
* formed, and stonith_query() for how the peer formed its XML reply.
*/
int
process_remote_stonith_query(xmlNode * msg)
{
int ndevices = 0;
gboolean host_is_target = FALSE;
gboolean have_all_replies = FALSE;
const char *id = NULL;
const char *host = NULL;
remote_fencing_op_t *op = NULL;
st_query_result_t *result = NULL;
uint32_t replies_expected;
xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
CRM_CHECK(dev != NULL, return -EPROTO);
id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
CRM_CHECK(id != NULL, return -EPROTO);
dev = get_xpath_object("//@" F_STONITH_AVAILABLE_DEVICES, msg, LOG_ERR);
CRM_CHECK(dev != NULL, return -EPROTO);
crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &ndevices);
- op = g_hash_table_lookup(remote_op_list, id);
+ op = g_hash_table_lookup(stonith_remote_op_list, id);
if (op == NULL) {
crm_debug("Received query reply for unknown or expired operation %s",
id);
return -EOPNOTSUPP;
}
replies_expected = QB_MIN(op->replies_expected, fencing_active_peers());
if ((++op->replies >= replies_expected) && (op->state == st_query)) {
have_all_replies = TRUE;
}
host = crm_element_value(msg, F_ORIG);
host_is_target = safe_str_eq(host, op->target);
crm_info("Query result %d of %d from %s for %s/%s (%d devices) %s",
op->replies, replies_expected, host,
op->target, op->action, ndevices, id);
if (ndevices > 0) {
result = add_result(op, host, ndevices, dev);
}
if (is_set(op->call_options, st_opt_topology)) {
/* If we start the fencing before all the topology results are in,
* it is possible fencing levels will be skipped because of the missing
* query results. */
if (op->state == st_query && all_topology_devices_found(op)) {
/* All the query results are in for the topology, start the fencing ops. */
crm_trace("All topology devices found");
call_remote_stonith(op, result);
} else if (have_all_replies) {
crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
replies_expected, op->replies);
call_remote_stonith(op, NULL);
}
} else if (op->state == st_query) {
int nverified = count_peer_devices(op, result, TRUE);
/* We have a result for a non-topology fencing op that looks promising,
* go ahead and start fencing before query timeout */
if (result && (host_is_target == FALSE) && nverified) {
/* we have a verified device living on a peer that is not the target */
crm_trace("Found %d verified devices", nverified);
call_remote_stonith(op, result);
} else if (have_all_replies) {
crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
replies_expected, op->replies);
call_remote_stonith(op, NULL);
} else {
crm_trace("Waiting for more peer results before launching fencing operation");
}
} else if (result && (op->state == st_done)) {
crm_info("Discarding query result from %s (%d devices): Operation is in state %d",
result->host, result->ndevices, op->state);
}
return pcmk_ok;
}
/*!
* \internal
* \brief Handle a peer's reply to a fencing request
*
* Parse a fencing reply from XML, and either finalize the operation
* or attempt another device as appropriate.
*
* \param[in] msg XML reply received
*
* \return pcmk_ok on success, -errno on error
*/
int
process_remote_stonith_exec(xmlNode * msg)
{
int rc = 0;
const char *id = NULL;
const char *device = NULL;
remote_fencing_op_t *op = NULL;
xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
CRM_CHECK(dev != NULL, return -EPROTO);
id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
CRM_CHECK(id != NULL, return -EPROTO);
dev = get_xpath_object("//@" F_STONITH_RC, msg, LOG_ERR);
CRM_CHECK(dev != NULL, return -EPROTO);
crm_element_value_int(dev, F_STONITH_RC, &rc);
device = crm_element_value(dev, F_STONITH_DEVICE);
- if (remote_op_list) {
- op = g_hash_table_lookup(remote_op_list, id);
+ if (stonith_remote_op_list) {
+ op = g_hash_table_lookup(stonith_remote_op_list, id);
}
if (op == NULL && rc == pcmk_ok) {
/* Record successful fencing operations */
const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID);
op = create_remote_stonith_op(client_id, dev, TRUE);
}
if (op == NULL) {
/* Could be for an event that began before we started */
/* TODO: Record the op for later querying */
crm_info("Received peer result of unknown or expired operation %s", id);
return -EOPNOTSUPP;
}
if (op->devices && device && safe_str_neq(op->devices->data, device)) {
crm_err("Received outdated reply for device %s (instead of %s) to "
"fence (%s) %s. Operation already timed out at peer level.",
device, (const char *) op->devices->data, op->action, op->target);
return rc;
}
if (safe_str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast")) {
crm_debug("Marking call to %s for %s on behalf of %s@%s.%.8s: %s (%d)",
op->action, op->target, op->client_name, op->id, op->originator,
pcmk_strerror(rc), rc);
if (rc == pcmk_ok) {
op->state = st_done;
} else {
op->state = st_failed;
}
remote_op_done(op, msg, rc, FALSE);
return pcmk_ok;
} else if (safe_str_neq(op->originator, stonith_our_uname)) {
/* If this isn't a remote level broadcast, and we are not the
* originator of the operation, we should not be receiving this msg. */
crm_err
("%s received non-broadcast fencing result for operation it does not own (device %s targeting %s)",
stonith_our_uname, device, op->target);
return rc;
}
if (is_set(op->call_options, st_opt_topology)) {
const char *device = crm_element_value(msg, F_STONITH_DEVICE);
crm_notice("Call to %s for '%s %s' on behalf of %s@%s: %s (%d)",
device, op->target, op->action, op->client_name, op->originator,
pcmk_strerror(rc), rc);
/* We own the op, and it is complete. broadcast the result to all nodes
* and notify our local clients. */
if (op->state == st_done) {
remote_op_done(op, msg, rc, FALSE);
return rc;
}
if ((op->phase == 2) && (rc != pcmk_ok)) {
/* A remapped "on" failed, but the node was already turned off
* successfully, so ignore the error and continue.
*/
crm_warn("Ignoring %s 'on' failure (exit code %d) for %s after successful 'off'",
device, rc, op->target);
rc = pcmk_ok;
}
if (rc == pcmk_ok) {
/* An operation completed successfully. Try another device if
* necessary, otherwise mark the operation as done. */
advance_op_topology(op, device, msg, rc);
return rc;
} else {
/* This device failed, time to try another topology level. If no other
* levels are available, mark this operation as failed and report results. */
if (stonith_topology_next(op) != pcmk_ok) {
op->state = st_failed;
remote_op_done(op, msg, rc, FALSE);
return rc;
}
}
} else if (rc == pcmk_ok && op->devices == NULL) {
crm_trace("All done for %s", op->target);
op->state = st_done;
remote_op_done(op, msg, rc, FALSE);
return rc;
} else if (rc == -ETIME && op->devices == NULL) {
/* If the operation timed out don't bother retrying other peers. */
op->state = st_failed;
remote_op_done(op, msg, rc, FALSE);
return rc;
} else {
/* fall-through and attempt other fencing action using another peer */
}
/* Retry on failure */
crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator,
op->client_name, rc);
call_remote_stonith(op, NULL);
return rc;
}
-int
-stonith_fence_history(xmlNode * msg, xmlNode ** output)
-{
- int rc = 0;
- const char *target = NULL;
- xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_TRACE);
- char *nodename = NULL;
-
- if (dev) {
- int options = 0;
-
- target = crm_element_value(dev, F_STONITH_TARGET);
- crm_element_value_int(msg, F_STONITH_CALLOPTS, &options);
- if (target && (options & st_opt_cs_nodeid)) {
- int nodeid = crm_atoi(target, NULL);
-
- nodename = stonith_get_peer_name(nodeid);
- if (nodename) {
- target = nodename;
- }
- }
- }
-
- crm_trace("Looking for operations on %s in %p", target, remote_op_list);
-
- *output = create_xml_node(NULL, F_STONITH_HISTORY_LIST);
- if (remote_op_list) {
- GHashTableIter iter;
- remote_fencing_op_t *op = NULL;
-
- g_hash_table_iter_init(&iter, remote_op_list);
- while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
- xmlNode *entry = NULL;
-
- if (target && strcmp(op->target, target) != 0) {
- continue;
- }
-
- rc = 0;
- crm_trace("Attaching op %s", op->id);
- entry = create_xml_node(*output, STONITH_OP_EXEC);
- crm_xml_add(entry, F_STONITH_TARGET, op->target);
- crm_xml_add(entry, F_STONITH_ACTION, op->action);
- crm_xml_add(entry, F_STONITH_ORIGIN, op->originator);
- crm_xml_add(entry, F_STONITH_DELEGATE, op->delegate);
- crm_xml_add(entry, F_STONITH_CLIENTNAME, op->client_name);
- crm_xml_add_int(entry, F_STONITH_DATE, (int) op->completed);
- crm_xml_add_int(entry, F_STONITH_STATE, op->state);
- }
- }
-
- free(nodename);
- return rc;
-}
-
gboolean
stonith_check_fence_tolerance(int tolerance, const char *target, const char *action)
{
GHashTableIter iter;
time_t now = time(NULL);
remote_fencing_op_t *rop = NULL;
- crm_trace("tolerance=%d, remote_op_list=%p", tolerance, remote_op_list);
+ crm_trace("tolerance=%d, stonith_remote_op_list=%p", tolerance,
+ stonith_remote_op_list);
- if (tolerance <= 0 || !remote_op_list || target == NULL || action == NULL) {
+ if (tolerance <= 0 || !stonith_remote_op_list || target == NULL ||
+ action == NULL) {
return FALSE;
}
- g_hash_table_iter_init(&iter, remote_op_list);
+ g_hash_table_iter_init(&iter, stonith_remote_op_list);
while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) {
if (strcmp(rop->target, target) != 0) {
continue;
} else if (rop->state != st_done) {
continue;
/* We don't have to worry about remapped reboots here
* because if state is done, any remapping has been undone
*/
} else if (strcmp(rop->action, action) != 0) {
continue;
} else if ((rop->completed + tolerance) < now) {
continue;
}
crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s",
target, action, tolerance, rop->delegate, rop->originator);
return TRUE;
}
return FALSE;
}
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
index 5ccd83b9aa..0eaf0491f5 100644
--- a/daemons/fenced/pacemaker-fenced.c
+++ b/daemons/fenced/pacemaker-fenced.c
@@ -1,1493 +1,1497 @@
/*
* Copyright 2009-2018 Andrew Beekhof
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
char *stonith_our_uname = NULL;
char *stonith_our_uuid = NULL;
long stonith_watchdog_timeout_ms = 0;
static GMainLoop *mainloop = NULL;
gboolean stand_alone = FALSE;
static gboolean no_cib_connect = FALSE;
static gboolean stonith_shutdown_flag = FALSE;
static qb_ipcs_service_t *ipcs = NULL;
static xmlNode *local_cib = NULL;
GHashTable *known_peer_names = NULL;
static cib_t *cib_api = NULL;
static void *cib_library = NULL;
static void stonith_shutdown(int nsig);
static void stonith_cleanup(void);
static int32_t
st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
if (stonith_shutdown_flag) {
crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c));
return -EPERM;
}
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
st_ipc_created(qb_ipcs_connection_t * c)
{
crm_trace("Connection created for %p", c);
}
/* Exit code means? */
static int32_t
st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
int call_options = 0;
xmlNode *request = NULL;
crm_client_t *c = crm_client_get(qbc);
const char *op = NULL;
if (c == NULL) {
crm_info("Invalid client: %p", qbc);
return 0;
}
request = crm_ipcs_recv(c, data, size, &id, &flags);
if (request == NULL) {
crm_ipcs_send_ack(c, id, flags, "nack", __FUNCTION__, __LINE__);
return 0;
}
op = crm_element_value(request, F_CRM_TASK);
if(safe_str_eq(op, CRM_OP_RM_NODE_CACHE)) {
crm_xml_add(request, F_TYPE, T_STONITH_NG);
crm_xml_add(request, F_STONITH_OPERATION, op);
crm_xml_add(request, F_STONITH_CLIENTID, c->id);
crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c));
crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE);
free_xml(request);
return 0;
}
if (c->name == NULL) {
const char *value = crm_element_value(request, F_STONITH_CLIENTNAME);
if (value == NULL) {
value = "unknown";
}
c->name = crm_strdup_printf("%s.%u", value, c->pid);
}
crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
crm_trace("Flags %u/%u for command %u from %s", flags, call_options, id, crm_client_name(c));
if (is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(flags & crm_ipc_client_response);
CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */
c->request_id = id; /* Reply only to the last one */
}
crm_xml_add(request, F_STONITH_CLIENTID, c->id);
crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c));
crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
crm_log_xml_trace(request, "Client[inbound]");
stonith_command(c, id, flags, request, NULL);
free_xml(request);
return 0;
}
/* Error code means? */
static int32_t
st_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p closed", c);
crm_client_destroy(client);
/* 0 means: yes, go ahead and destroy the connection */
return 0;
}
static void
st_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p destroyed", c);
st_ipc_closed(c);
}
static void
stonith_peer_callback(xmlNode * msg, void *private_data)
{
const char *remote_peer = crm_element_value(msg, F_ORIG);
const char *op = crm_element_value(msg, F_STONITH_OPERATION);
if (crm_str_eq(op, "poke", TRUE)) {
return;
}
crm_log_xml_trace(msg, "Peer[inbound]");
stonith_command(NULL, 0, 0, msg, remote_peer);
}
#if SUPPORT_COROSYNC
static void
stonith_peer_ais_callback(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
uint32_t kind = 0;
xmlNode *xml = NULL;
const char *from = NULL;
char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
if(data == NULL) {
return;
}
if (kind == crm_class_cluster) {
xml = string2xml(data);
if (xml == NULL) {
crm_err("Invalid XML: '%.120s'", data);
free(data);
return;
}
crm_xml_add(xml, F_ORIG, from);
/* crm_xml_add_int(xml, F_SEQ, wrapper->id); */
stonith_peer_callback(xml, NULL);
}
free_xml(xml);
free(data);
return;
}
static void
stonith_peer_cs_destroy(gpointer user_data)
{
crm_crit("Lost connection to cluster layer, shutting down");
stonith_shutdown(0);
}
#endif
void
do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer)
{
/* send callback to originating child */
crm_client_t *client_obj = NULL;
int local_rc = pcmk_ok;
crm_trace("Sending response");
client_obj = crm_client_get_by_id(client_id);
crm_trace("Sending callback to request originator");
if (client_obj == NULL) {
local_rc = -1;
crm_trace("No client to sent the response to. F_STONITH_CLIENTID not set.");
} else {
int rid = 0;
if (sync_reply) {
CRM_LOG_ASSERT(client_obj->request_id);
rid = client_obj->request_id;
client_obj->request_id = 0;
crm_trace("Sending response %d to %s %s",
rid, client_obj->name, from_peer ? "(originator of delegated request)" : "");
} else {
crm_trace("Sending an event to %s %s",
client_obj->name, from_peer ? "(originator of delegated request)" : "");
}
local_rc = crm_ipcs_send(client_obj, rid, notify_src, sync_reply?crm_ipc_flags_none:crm_ipc_server_event);
}
if (local_rc < pcmk_ok && client_obj != NULL) {
crm_warn("%sSync reply to %s failed: %s",
sync_reply ? "" : "A-",
client_obj ? client_obj->name : "", pcmk_strerror(local_rc));
}
}
long long
get_stonith_flag(const char *name)
{
if (safe_str_eq(name, T_STONITH_NOTIFY_FENCE)) {
- return 0x01;
+ return st_callback_notify_fence;
} else if (safe_str_eq(name, STONITH_OP_DEVICE_ADD)) {
- return 0x04;
+ return st_callback_device_add;
} else if (safe_str_eq(name, STONITH_OP_DEVICE_DEL)) {
- return 0x10;
+ return st_callback_device_del;
+
+ } else if (safe_str_eq(name, T_STONITH_NOTIFY_HISTORY)) {
+ return st_callback_notify_history;
+
}
- return 0;
+ return st_callback_unknown;
}
static void
stonith_notify_client(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
crm_client_t *client = value;
const char *type = NULL;
CRM_CHECK(client != NULL, return);
CRM_CHECK(update_msg != NULL, return);
type = crm_element_value(update_msg, F_SUBTYPE);
CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return);
if (client->ipcs == NULL) {
crm_trace("Skipping client with NULL channel");
return;
}
if (client->options & get_stonith_flag(type)) {
int rc = crm_ipcs_send(client, 0, update_msg, crm_ipc_server_event | crm_ipc_server_error);
if (rc <= 0) {
crm_warn("%s notification of client %s.%.6s failed: %s (%d)",
type, crm_client_name(client), client->id, pcmk_strerror(rc), rc);
} else {
crm_trace("Sent %s notification to client %s.%.6s", type, crm_client_name(client),
client->id);
}
}
}
void
do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout)
{
crm_client_t *client = NULL;
xmlNode *notify_data = NULL;
if (!timeout || !call_id || !client_id) {
return;
}
client = crm_client_get_by_id(client_id);
if (!client) {
return;
}
notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE);
crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE);
crm_xml_add(notify_data, F_STONITH_CALLID, call_id);
crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout);
crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id);
if (client) {
crm_ipcs_send(client, 0, notify_data, crm_ipc_server_event);
}
free_xml(notify_data);
}
void
do_stonith_notify(int options, const char *type, int result, xmlNode * data)
{
/* TODO: Standardize the contents of data */
xmlNode *update_msg = create_xml_node(NULL, "notify");
CRM_CHECK(type != NULL,;);
crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY);
crm_xml_add(update_msg, F_SUBTYPE, type);
crm_xml_add(update_msg, F_STONITH_OPERATION, type);
crm_xml_add_int(update_msg, F_STONITH_RC, result);
if (data != NULL) {
add_message_xml(update_msg, F_STONITH_CALLDATA, data);
}
crm_trace("Notifying clients");
g_hash_table_foreach(client_connections, stonith_notify_client, update_msg);
free_xml(update_msg);
crm_trace("Notify complete");
}
static void
do_stonith_notify_config(int options, const char *op, int rc,
const char *desc, int active)
{
xmlNode *notify_data = create_xml_node(NULL, op);
CRM_CHECK(notify_data != NULL, return);
crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active);
do_stonith_notify(options, op, rc, notify_data);
free_xml(notify_data);
}
void
do_stonith_notify_device(int options, const char *op, int rc, const char *desc)
{
do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(device_list));
}
void
do_stonith_notify_level(int options, const char *op, int rc, const char *desc)
{
do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(topology));
}
static void
topology_remove_helper(const char *node, int level)
{
int rc;
char *desc = NULL;
xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL);
crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level);
crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
rc = stonith_level_remove(data, &desc);
do_stonith_notify_level(0, STONITH_OP_LEVEL_DEL, rc, desc);
free_xml(data);
free(desc);
}
static void
remove_cib_device(xmlXPathObjectPtr xpathObj)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
const char *rsc_id = NULL;
const char *standard = NULL;
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match != NULL) {
standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
}
if (safe_str_neq(standard, PCMK_RESOURCE_CLASS_STONITH)) {
continue;
}
rsc_id = crm_element_value(match, XML_ATTR_ID);
stonith_device_remove(rsc_id, TRUE);
}
}
static void
handle_topology_change(xmlNode *match, bool remove)
{
int rc;
char *desc = NULL;
CRM_CHECK(match != NULL, return);
crm_trace("Updating %s", ID(match));
if(remove) {
int index = 0;
char *key = stonith_level_key(match, -1);
crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
topology_remove_helper(key, index);
free(key);
}
rc = stonith_level_register(match, &desc);
do_stonith_notify_level(0, STONITH_OP_LEVEL_ADD, rc, desc);
free(desc);
}
static void
remove_fencing_topology(xmlXPathObjectPtr xpathObj)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if (match && crm_element_value(match, XML_DIFF_MARKER)) {
/* Deletion */
int index = 0;
char *target = stonith_level_key(match, -1);
crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
if (target == NULL) {
crm_err("Invalid fencing target in element %s", ID(match));
} else if (index <= 0) {
crm_err("Invalid level for %s in element %s", target, ID(match));
} else {
topology_remove_helper(target, index);
}
/* } else { Deal with modifications during the 'addition' stage */
}
}
}
static void
register_fencing_topology(xmlXPathObjectPtr xpathObj)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
handle_topology_change(match, TRUE);
}
}
/* Fencing
*/
static void
fencing_topology_init()
{
xmlXPathObjectPtr xpathObj = NULL;
const char *xpath = "//" XML_TAG_FENCING_LEVEL;
crm_trace("Full topology refresh");
free_topology_list();
init_topology_list();
/* Grab everything */
xpathObj = xpath_search(local_cib, xpath);
register_fencing_topology(xpathObj);
freeXpathObject(xpathObj);
}
#define rsc_name(x) x->clone_name?x->clone_name:x->id
/*!
* \internal
* \brief Check whether our uname is in a resource's allowed node list
*
* \param[in] rsc Resource to check
*
* \return Pointer to node object if found, NULL otherwise
*/
static node_t *
our_node_allowed_for(resource_t *rsc)
{
GHashTableIter iter;
node_t *node = NULL;
if (rsc && stonith_our_uname) {
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if (node && strcmp(node->details->uname, stonith_our_uname) == 0) {
break;
}
node = NULL;
}
}
return node;
}
/*!
* \internal
* \brief If a resource or any of its children are STONITH devices, update their
* definitions given a cluster working set.
*
* \param[in] rsc Resource to check
* \param[in] data_set Cluster working set with device information
*/
static void cib_device_update(resource_t *rsc, pe_working_set_t *data_set)
{
node_t *node = NULL;
const char *value = NULL;
const char *rclass = NULL;
node_t *parent = NULL;
gboolean remove = TRUE;
/* If this is a complex resource, check children rather than this resource itself.
* TODO: Mark each installed device and remove if untouched when this process finishes.
*/
if(rsc->children) {
GListPtr gIter = NULL;
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
cib_device_update(gIter->data, data_set);
if(pe_rsc_is_clone(rsc)) {
crm_trace("Only processing one copy of the clone %s", rsc->id);
break;
}
}
return;
}
/* We only care about STONITH resources. */
rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
if (safe_str_neq(rclass, PCMK_RESOURCE_CLASS_STONITH)) {
return;
}
/* If this STONITH resource is disabled, just remove it. */
value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
if (safe_str_eq(value, RSC_STOPPED)) {
crm_info("Device %s has been disabled", rsc->id);
goto update_done;
}
/* Check whether our node is allowed for this resource (and its parent if in a group) */
node = our_node_allowed_for(rsc);
if (rsc->parent && (rsc->parent->variant == pe_group)) {
parent = our_node_allowed_for(rsc->parent);
}
if(node == NULL) {
/* Our node is disallowed, so remove the device */
GHashTableIter iter;
crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname);
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
crm_trace("Available: %s = %d", node->details->uname, node->weight);
}
goto update_done;
} else if(node->weight < 0 || (parent && parent->weight < 0)) {
/* Our node (or its group) is disallowed by score, so remove the device */
char *score = score2char((node->weight < 0) ? node->weight : parent->weight);
crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, score);
free(score);
goto update_done;
} else {
/* Our node is allowed, so update the device information */
xmlNode *data;
GHashTableIter gIter;
stonith_key_value_t *params = NULL;
const char *name = NULL;
const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE);
const char *rsc_provides = NULL;
crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight);
get_rsc_attributes(rsc->parameters, rsc, node, data_set);
get_meta_attributes(rsc->meta, rsc, node, data_set);
rsc_provides = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROVIDES);
g_hash_table_iter_init(&gIter, rsc->parameters);
while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) {
if (!name || !value) {
continue;
}
params = stonith_key_value_add(params, name, value);
crm_trace(" %s=%s", name, value);
}
remove = FALSE;
data = create_device_registration_xml(rsc_name(rsc), st_namespace_any,
agent, params, rsc_provides);
stonith_device_register(data, NULL, TRUE);
stonith_key_value_freeall(params, 1, 1);
free_xml(data);
}
update_done:
if(remove && g_hash_table_lookup(device_list, rsc_name(rsc))) {
stonith_device_remove(rsc_name(rsc), TRUE);
}
}
extern xmlNode *do_calculations(pe_working_set_t * data_set, xmlNode * xml_input, crm_time_t * now);
/*!
* \internal
* \brief Update all STONITH device definitions based on current CIB
*/
static void
cib_devices_update(void)
{
GListPtr gIter = NULL;
pe_working_set_t data_set;
crm_info("Updating devices to version %s.%s.%s",
crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN),
crm_element_value(local_cib, XML_ATTR_GENERATION),
crm_element_value(local_cib, XML_ATTR_NUMUPDATES));
set_working_set_defaults(&data_set);
data_set.input = local_cib;
data_set.now = crm_time_new(NULL);
data_set.flags |= pe_flag_quick_location;
data_set.localhost = stonith_our_uname;
cluster_status(&data_set);
do_calculations(&data_set, NULL, NULL);
for (gIter = data_set.resources; gIter != NULL; gIter = gIter->next) {
cib_device_update(gIter->data, &data_set);
}
data_set.input = NULL; /* Wasn't a copy */
cleanup_alloc_calculations(&data_set);
}
static void
update_cib_stonith_devices_v2(const char *event, xmlNode * msg)
{
xmlNode *change = NULL;
char *reason = NULL;
bool needs_update = FALSE;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) {
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
const char *shortpath = NULL;
if(op == NULL || strcmp(op, "move") == 0) {
continue;
} else if(safe_str_eq(op, "delete") && strstr(xpath, XML_CIB_TAG_RESOURCE)) {
const char *rsc_id = NULL;
char *search = NULL;
char *mutable = NULL;
if (strstr(xpath, XML_TAG_ATTR_SETS)) {
needs_update = TRUE;
break;
}
mutable = strdup(xpath);
rsc_id = strstr(mutable, "primitive[@id=\'");
if (rsc_id != NULL) {
rsc_id += strlen("primitive[@id=\'");
search = strchr(rsc_id, '\'');
}
if (search != NULL) {
*search = 0;
stonith_device_remove(rsc_id, TRUE);
} else {
crm_warn("Ignoring malformed CIB update (resource deletion)");
}
free(mutable);
} else if(strstr(xpath, XML_CIB_TAG_RESOURCES)) {
shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath);
reason = crm_strdup_printf("%s %s", op, shortpath+1);
needs_update = TRUE;
break;
} else if(strstr(xpath, XML_CIB_TAG_CONSTRAINTS)) {
shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath);
reason = crm_strdup_printf("%s %s", op, shortpath+1);
needs_update = TRUE;
break;
}
}
if(needs_update) {
crm_info("Updating device list from the cib: %s", reason);
cib_devices_update();
} else {
crm_trace("No updates for device list found in cib");
}
free(reason);
}
static void
update_cib_stonith_devices_v1(const char *event, xmlNode * msg)
{
const char *reason = "none";
gboolean needs_update = FALSE;
xmlXPathObjectPtr xpath_obj = NULL;
/* process new constraints */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION);
if (numXpathResults(xpath_obj) > 0) {
int max = numXpathResults(xpath_obj), lpc = 0;
/* Safest and simplest to always recompute */
needs_update = TRUE;
reason = "new location constraint";
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpath_obj, lpc);
crm_log_xml_trace(match, "new constraint");
}
}
freeXpathObject(xpath_obj);
/* process deletions */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE);
if (numXpathResults(xpath_obj) > 0) {
remove_cib_device(xpath_obj);
}
freeXpathObject(xpath_obj);
/* process additions */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE);
if (numXpathResults(xpath_obj) > 0) {
int max = numXpathResults(xpath_obj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
const char *rsc_id = NULL;
const char *standard = NULL;
xmlNode *match = getXpathResult(xpath_obj, lpc);
rsc_id = crm_element_value(match, XML_ATTR_ID);
standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
if (safe_str_neq(standard, PCMK_RESOURCE_CLASS_STONITH)) {
continue;
}
crm_trace("Fencing resource %s was added or modified", rsc_id);
reason = "new resource";
needs_update = TRUE;
}
}
freeXpathObject(xpath_obj);
if(needs_update) {
crm_info("Updating device list from the cib: %s", reason);
cib_devices_update();
}
}
static void
update_cib_stonith_devices(const char *event, xmlNode * msg)
{
int format = 1;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
CRM_ASSERT(patchset);
crm_element_value_int(patchset, "format", &format);
switch(format) {
case 1:
update_cib_stonith_devices_v1(event, msg);
break;
case 2:
update_cib_stonith_devices_v2(event, msg);
break;
default:
crm_warn("Unknown patch format: %d", format);
}
}
/* Needs to hold node name + attribute name + attribute value + 75 */
#define XPATH_MAX 512
/*!
* \internal
* \brief Check whether a node has a specific attribute name/value
*
* \param[in] node Name of node to check
* \param[in] name Name of an attribute to look for
* \param[in] value The value the named attribute needs to be set to in order to be considered a match
*
* \return TRUE if the locally cached CIB has the specified node attribute
*/
gboolean
node_has_attr(const char *node, const char *name, const char *value)
{
char xpath[XPATH_MAX];
xmlNode *match;
int n;
CRM_CHECK(local_cib != NULL, return FALSE);
/* Search for the node's attributes in the CIB. While the schema allows
* multiple sets of instance attributes, and allows instance attributes to
* use id-ref to reference values elsewhere, that is intended for resources,
* so we ignore that here.
*/
n = snprintf(xpath, XPATH_MAX, "//" XML_CIB_TAG_NODES
"/" XML_CIB_TAG_NODE "[@uname='%s']/" XML_TAG_ATTR_SETS
"/" XML_CIB_TAG_NVPAIR "[@name='%s' and @value='%s']",
node, name, value);
match = get_xpath_object(xpath, local_cib, LOG_TRACE);
CRM_CHECK(n < XPATH_MAX, return FALSE);
return (match != NULL);
}
static void
update_fencing_topology(const char *event, xmlNode * msg)
{
int format = 1;
const char *xpath;
xmlXPathObjectPtr xpathObj = NULL;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
CRM_ASSERT(patchset);
crm_element_value_int(patchset, "format", &format);
if(format == 1) {
/* Process deletions (only) */
xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL;
xpathObj = xpath_search(msg, xpath);
remove_fencing_topology(xpathObj);
freeXpathObject(xpathObj);
/* Process additions and changes */
xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL;
xpathObj = xpath_search(msg, xpath);
register_fencing_topology(xpathObj);
freeXpathObject(xpathObj);
} else if(format == 2) {
xmlNode *change = NULL;
int add[] = { 0, 0, 0 };
int del[] = { 0, 0, 0 };
xml_patch_versions(patchset, add, del);
for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) {
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
if(op == NULL) {
continue;
} else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) {
/* Change to a specific entry */
crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath);
if(strcmp(op, "move") == 0) {
continue;
} else if(strcmp(op, "create") == 0) {
handle_topology_change(change->children, FALSE);
} else if(strcmp(op, "modify") == 0) {
xmlNode *match = first_named_child(change, XML_DIFF_RESULT);
if(match) {
handle_topology_change(match->children, TRUE);
}
} else if(strcmp(op, "delete") == 0) {
/* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */
crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
fencing_topology_init();
return;
}
} else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) {
/* Change to the topology in general */
crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
fencing_topology_init();
return;
} else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) {
/* Changes to the whole config section, possibly including the topology as a whild */
if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) {
crm_trace("Nothing for us in %s operation %d.%d.%d for %s.",
op, add[0], add[1], add[2], xpath);
} else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) {
crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.",
op, add[0], add[1], add[2], xpath);
fencing_topology_init();
return;
}
} else {
crm_trace("Nothing for us in %s operation %d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
}
}
} else {
crm_warn("Unknown patch format: %d", format);
}
}
static bool have_cib_devices = FALSE;
static void
update_cib_cache_cb(const char *event, xmlNode * msg)
{
int rc = pcmk_ok;
xmlNode *stonith_enabled_xml = NULL;
xmlNode *stonith_watchdog_xml = NULL;
const char *stonith_enabled_s = NULL;
static gboolean stonith_enabled_saved = TRUE;
if(!have_cib_devices) {
crm_trace("Skipping updates until we get a full dump");
return;
} else if(msg == NULL) {
crm_trace("Missing %s update", event);
return;
}
/* Maintain a local copy of the CIB so that we have full access
* to device definitions, location constraints, and node attributes
*/
if (local_cib != NULL) {
int rc = pcmk_ok;
xmlNode *patchset = NULL;
crm_element_value_int(msg, F_CIB_RC, &rc);
if (rc != pcmk_ok) {
return;
}
patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
xml_log_patchset(LOG_TRACE, "Config update", patchset);
rc = xml_apply_patchset(local_cib, patchset, TRUE);
switch (rc) {
case pcmk_ok:
case -pcmk_err_old_data:
break;
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(local_cib);
local_cib = NULL;
break;
default:
crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(local_cib);
local_cib = NULL;
}
}
if (local_cib == NULL) {
crm_trace("Re-requesting the full cib");
rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call);
if(rc != pcmk_ok) {
crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc);
return;
}
CRM_ASSERT(local_cib != NULL);
stonith_enabled_saved = FALSE; /* Trigger a full refresh below */
}
stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", local_cib, LOG_TRACE);
if (stonith_enabled_xml) {
stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE);
}
if (stonith_enabled_s == NULL || crm_is_true(stonith_enabled_s)) {
long timeout_ms = 0;
const char *value = NULL;
stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", local_cib, LOG_TRACE);
if (stonith_watchdog_xml) {
value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE);
}
if(value) {
timeout_ms = crm_get_msec(value);
}
if (timeout_ms < 0) {
timeout_ms = crm_auto_watchdog_timeout();
}
if(timeout_ms != stonith_watchdog_timeout_ms) {
crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000);
stonith_watchdog_timeout_ms = timeout_ms;
}
} else {
stonith_watchdog_timeout_ms = 0;
}
if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) {
crm_trace("Ignoring cib updates while stonith is disabled");
stonith_enabled_saved = FALSE;
return;
} else if (stonith_enabled_saved == FALSE) {
crm_info("Updating stonith device and topology lists now that stonith is enabled");
stonith_enabled_saved = TRUE;
fencing_topology_init();
cib_devices_update();
} else {
update_fencing_topology(event, msg);
update_cib_stonith_devices(event, msg);
}
}
static void
init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
crm_info("Updating device list from the cib: init");
have_cib_devices = TRUE;
local_cib = copy_xml(output);
fencing_topology_init();
cib_devices_update();
}
static void
stonith_shutdown(int nsig)
{
stonith_shutdown_flag = TRUE;
crm_info("Terminating with %d clients",
crm_hash_table_size(client_connections));
if (mainloop != NULL && g_main_is_running(mainloop)) {
g_main_loop_quit(mainloop);
} else {
stonith_cleanup();
crm_exit(CRM_EX_OK);
}
}
static void
cib_connection_destroy(gpointer user_data)
{
if (stonith_shutdown_flag) {
crm_info("Connection to the CIB manager closed");
return;
} else {
crm_crit("Lost connection to the CIB manager, shutting down");
}
if (cib_api) {
cib_api->cmds->signoff(cib_api);
}
stonith_shutdown(0);
}
static void
stonith_cleanup(void)
{
if (cib_api) {
cib_api->cmds->signoff(cib_api);
}
if (ipcs) {
qb_ipcs_destroy(ipcs);
}
if (known_peer_names != NULL) {
g_hash_table_destroy(known_peer_names);
known_peer_names = NULL;
}
crm_peer_destroy();
crm_client_cleanup();
- free_remote_op_list();
+ free_stonith_remote_op_list();
free_topology_list();
free_device_list();
free_metadata_cache();
free(stonith_our_uname);
stonith_our_uname = NULL;
free_xml(local_cib);
local_cib = NULL;
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
{"stand-alone", 0, 0, 's'},
{"stand-alone-w-cpg", 0, 0, 'c'},
{"logfile", 1, 0, 'l'},
{"verbose", 0, 0, 'V'},
{"version", 0, 0, '$'},
{"help", 0, 0, '?'},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
static void
setup_cib(void)
{
int rc, retries = 0;
static cib_t *(*cib_new_fn) (void) = NULL;
if (cib_new_fn == NULL) {
cib_new_fn = find_library_function(&cib_library, CIB_LIBRARY, "cib_new", TRUE);
}
if (cib_new_fn != NULL) {
cib_api = (*cib_new_fn) ();
}
if (cib_api == NULL) {
crm_err("No connection to the CIB manager");
return;
}
do {
sleep(retries);
rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command);
} while (rc == -ENOTCONN && ++retries < 5);
if (rc != pcmk_ok) {
crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc);
} else if (pcmk_ok !=
cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) {
crm_err("Could not set CIB notification callback");
} else {
rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local);
cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb",
init_cib_cache_cb);
cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy);
crm_info("Watching for stonith topology changes");
}
}
struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = st_ipc_accept,
.connection_created = st_ipc_created,
.msg_process = st_ipc_dispatch,
.connection_closed = st_ipc_closed,
.connection_destroyed = st_ipc_destroy
};
/*!
* \internal
* \brief Callback for peer status changes
*
* \param[in] type What changed
* \param[in] node What peer had the change
* \param[in] data Previous value of what changed
*/
static void
st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
if ((type != crm_status_processes) && !is_set(node->flags, crm_remote_node)) {
xmlNode *query = NULL;
if (node->id && node->uname) {
g_hash_table_insert(known_peer_names, GUINT_TO_POINTER(node->id), strdup(node->uname));
}
/*
* This is a hack until we can send to a nodeid and/or we fix node name lookups
* These messages are ignored in stonith_peer_callback()
*/
query = create_xml_node(NULL, "stonith_command");
crm_xml_add(query, F_XML_TAGNAME, "stonith_command");
crm_xml_add(query, F_TYPE, T_STONITH_NG);
crm_xml_add(query, F_STONITH_OPERATION, "poke");
crm_debug("Broadcasting our uname because of node %u", node->id);
send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
free_xml(query);
}
}
int
main(int argc, char **argv)
{
int flag;
int lpc = 0;
int argerr = 0;
int option_index = 0;
crm_cluster_t cluster;
const char *actions[] = { "reboot", "off", "on", "list", "monitor", "status" };
crm_log_preinit(NULL, argc, argv);
crm_set_options(NULL, "mode [options]", long_options,
"Provides a summary of cluster's current state."
"\n\nOutputs varying levels of detail in a number of different formats.\n");
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1) {
break;
}
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'l':
crm_add_logfile(optarg);
break;
case 's':
stand_alone = TRUE;
break;
case 'c':
stand_alone = FALSE;
no_cib_connect = TRUE;
break;
case '$':
case '?':
crm_help(flag, CRM_EX_OK);
break;
default:
++argerr;
break;
}
}
if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) {
printf("\n");
printf("\n");
printf(" 1.0\n");
printf(" Instance attributes available for all \"stonith\"-class resources"
" and used by Pacemaker's fence daemon, formerly known as stonithd\n");
printf(" Instance attributes available for all \"stonith\"-class resources\n");
printf(" \n");
printf(" \n");
printf
(" The priority of the stonith resource. Devices are tried in order of highest priority to lowest.\n");
printf(" \n");
printf(" \n");
printf(" \n", STONITH_ATTR_HOSTARG);
printf
(" Advanced use only: An alternate parameter to supply instead of 'port'\n");
printf
(" Some devices do not support the standard 'port' parameter or may provide additional ones.\n"
"Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n"
"A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n"
" \n");
printf(" \n");
printf(" \n");
printf(" \n", STONITH_ATTR_HOSTMAP);
printf
(" A mapping of host names to ports numbers for devices that do not support host names.\n");
printf
(" Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2\n");
printf(" \n");
printf(" \n");
printf(" \n", STONITH_ATTR_HOSTLIST);
printf
(" A list of machines controlled by this device (Optional unless %s=static-list).\n",
STONITH_ATTR_HOSTCHECK);
printf(" \n");
printf(" \n");
printf(" \n", STONITH_ATTR_HOSTCHECK);
printf
(" How to determine which machines are controlled by the device.\n");
printf
(" Allowed values: dynamic-list (query the device), static-list (check the %s attribute), none (assume every device can fence every machine)\n",
STONITH_ATTR_HOSTLIST);
printf(" \n");
printf(" \n");
printf(" \n", STONITH_ATTR_DELAY_MAX);
printf
(" Enable a random delay for stonith actions and specify the maximum of random delay.\n");
printf
(" This prevents double fencing when using slow devices such as sbd.\n"
"Use this to enable a random delay for stonith actions.\n"
"The overall delay is derived from this random delay value adding a static delay so that the sum is kept below the maximum delay.\n");
printf(" \n");
printf(" \n");
printf(" \n", STONITH_ATTR_DELAY_BASE);
printf
(" Enable a base delay for stonith actions and specify base delay value.\n");
printf
(" This prevents double fencing when different delays are configured on the nodes.\n"
"Use this to enable a static delay for stonith actions.\n"
"The overall delay is derived from a random delay value adding this static delay so that the sum is kept below the maximum delay.\n");
printf(" \n");
printf(" \n");
printf(" \n", STONITH_ATTR_ACTION_LIMIT);
printf
(" The maximum number of actions can be performed in parallel on this device\n");
printf
(" Cluster property concurrent-fencing=true needs to be configured first.\n"
"Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.\n");
printf(" \n");
printf(" \n");
for (lpc = 0; lpc < DIMOF(actions); lpc++) {
printf(" \n", actions[lpc]);
printf
(" Advanced use only: An alternate command to run instead of '%s'\n",
actions[lpc]);
printf
(" Some devices do not support the standard commands or may provide additional ones.\n"
"Use this to specify an alternate, device-specific, command that implements the '%s' action.\n",
actions[lpc]);
printf(" \n", actions[lpc]);
printf(" \n");
printf(" \n", actions[lpc]);
printf
(" Advanced use only: Specify an alternate timeout to use for %s actions instead of stonith-timeout\n",
actions[lpc]);
printf
(" Some devices need much more/less time to complete than normal.\n"
"Use this to specify an alternate, device-specific, timeout for '%s' actions.\n",
actions[lpc]);
printf(" \n");
printf(" \n");
printf(" \n", actions[lpc]);
printf
(" Advanced use only: The maximum number of times to retry the '%s' command within the timeout period\n",
actions[lpc]);
printf(" Some devices do not support multiple connections."
" Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
" Use this option to alter the number of times Pacemaker retries '%s' actions before giving up."
"\n", actions[lpc]);
printf(" \n");
printf(" \n");
}
printf(" \n");
printf("\n");
return CRM_EX_OK;
}
if (optind != argc) {
++argerr;
}
if (argerr) {
crm_help('?', CRM_EX_USAGE);
}
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
mainloop_add_signal(SIGTERM, stonith_shutdown);
crm_peer_init();
known_peer_names = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free);
if (stand_alone == FALSE) {
if (is_corosync_cluster()) {
#if SUPPORT_COROSYNC
cluster.destroy = stonith_peer_cs_destroy;
cluster.cpg.cpg_deliver_fn = stonith_peer_ais_callback;
cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership;
#endif
}
crm_set_status_callback(&st_peer_update_callback);
if (crm_cluster_connect(&cluster) == FALSE) {
crm_crit("Cannot sign in to the cluster... terminating");
crm_exit(CRM_EX_FATAL);
}
stonith_our_uname = cluster.uname;
stonith_our_uuid = cluster.uuid;
if (no_cib_connect == FALSE) {
setup_cib();
}
} else {
stonith_our_uname = strdup("localhost");
}
init_device_list();
init_topology_list();
if(stonith_watchdog_timeout_ms > 0) {
xmlNode *xml;
stonith_key_value_t *params = NULL;
params = stonith_key_value_add(params, STONITH_ATTR_HOSTLIST, stonith_our_uname);
xml = create_device_registration_xml("watchdog", st_namespace_internal,
STONITH_WATCHDOG_AGENT, params,
NULL);
stonith_device_register(xml, NULL, FALSE);
stonith_key_value_freeall(params, 1, 1);
free_xml(xml);
}
stonith_ipc_server_init(&ipcs, &ipc_callbacks);
/* Create the mainloop and run it... */
mainloop = g_main_loop_new(NULL, FALSE);
crm_info("Starting %s mainloop", crm_system_name);
g_main_loop_run(mainloop);
stonith_cleanup();
crm_info("Done");
return crm_exit(CRM_EX_OK);
}
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
index 34d82c901c..67ea542310 100644
--- a/daemons/fenced/pacemaker-fenced.h
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -1,254 +1,266 @@
/*
* Copyright 2009-2018 Andrew Beekhof
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
/*!
* \internal
* \brief Check to see if target was fenced in the last few seconds.
* \param tolerance, The number of seconds to look back in time
* \param target, The node to search for
* \param action, The action we want to match.
*
* \retval FALSE, not match
* \retval TRUE, fencing operation took place in the last 'tolerance' number of seconds.
*/
gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action);
enum st_device_flags
{
st_device_supports_list = 0x0001,
st_device_supports_status = 0x0002,
st_device_supports_reboot = 0x0004,
};
typedef struct stonith_device_s {
char *id;
char *agent;
char *namespace;
/*! list of actions that must execute on the target node. Used for unfencing */
char *on_target_actions;
GListPtr targets;
time_t targets_age;
gboolean has_attr_map;
/* should nodeid parameter for victim be included in agent arguments */
gboolean include_nodeid;
/* whether the cluster should automatically unfence nodes with the device */
gboolean automatic_unfencing;
guint priority;
enum st_device_flags flags;
GHashTable *params;
GHashTable *aliases;
GList *pending_ops;
crm_trigger_t *work;
xmlNode *agent_metadata;
/*! A verified device is one that has contacted the
* agent successfully to perform a monitor operation */
gboolean verified;
gboolean cib_registered;
gboolean api_registered;
} stonith_device_t;
/* These values are used to index certain arrays by "phase". Usually an
* operation has only one "phase", so phase is always zero. However, some
* reboots are remapped to "off" then "on", in which case "reboot" will be
* phase 0, "off" will be phase 1 and "on" will be phase 2.
*/
enum st_remap_phase {
st_phase_requested = 0,
st_phase_off = 1,
st_phase_on = 2,
st_phase_max = 3
};
typedef struct remote_fencing_op_s {
/* The unique id associated with this operation */
char *id;
/*! The node this operation will fence */
char *target;
/*! The fencing action to perform on the target. (reboot, on, off) */
char *action;
/*! When was the fencing action recorded (seconds since epoch) */
time_t created;
/*! Marks if the final notifications have been sent to local stonith clients. */
gboolean notify_sent;
/*! The number of query replies received */
guint replies;
/*! The number of query replies expected */
guint replies_expected;
/*! Does this node own control of this operation */
gboolean owner;
/*! After query is complete, This the high level timer that expires the entire operation */
guint op_timer_total;
/*! This timer expires the current fencing request. Many fencing
* requests may exist in a single operation */
guint op_timer_one;
/*! This timer expires the query request sent out to determine
* what nodes are contain what devices, and who those devices can fence */
guint query_timer;
/*! This is the default timeout to use for each fencing device if no
* custom timeout is received in the query. */
gint base_timeout;
/*! This is the calculated total timeout an operation can take before
* expiring. This is calculated by adding together all the timeout
* values associated with the devices this fencing operation may call */
gint total_timeout;
/*! Delegate is the node being asked to perform a fencing action
* on behalf of the node that owns the remote operation. Some operations
* will involve multiple delegates. This value represents the final delegate
* that is used. */
char *delegate;
/*! The point at which the remote operation completed */
time_t completed;
/*! The stonith_call_options associated with this remote operation */
long long call_options;
/*! The current state of the remote operation. This indicates
* what stage the op is in, query, exec, done, duplicate, failed. */
enum op_state state;
/*! The node that owns the remote operation */
char *originator;
/*! The local client id that initiated the fencing request */
char *client_id;
/*! The client's call_id that initiated the fencing request */
int client_callid;
/*! The name of client that initiated the fencing request */
char *client_name;
/*! List of the received query results for all the nodes in the cpg group */
GListPtr query_results;
/*! The original request that initiated the remote stonith operation */
xmlNode *request;
/*! The current topology level being executed */
guint level;
/*! The current operation phase being executed */
enum st_remap_phase phase;
/*! Devices with automatic unfencing (always run if "on" requested, never if remapped) */
GListPtr automatic_list;
/*! List of all devices at the currently executing topology level */
GListPtr devices_list;
/*! Current entry in the topology device list */
GListPtr devices;
/*! List of duplicate operations attached to this operation. Once this operation
* completes, the duplicate operations will be closed out as well. */
GListPtr duplicates;
} remote_fencing_op_t;
+enum st_callback_flags {
+ st_callback_unknown = 0x0000,
+ st_callback_notify_fence = 0x0001,
+ st_callback_device_add = 0x0004,
+ st_callback_device_del = 0x0010,
+ st_callback_notify_history = 0x0020
+};
+
/*
* Complex fencing requirements are specified via fencing topologies.
* A topology consists of levels; each level is a list of fencing devices.
* Topologies are stored in a hash table by node name. When a node needs to be
* fenced, if it has an entry in the topology table, the levels are tried
* sequentially, and the devices in each level are tried sequentially.
* Fencing is considered successful as soon as any level succeeds;
* a level is considered successful if all its devices succeed.
* Essentially, all devices at a given level are "and-ed" and the
* levels are "or-ed".
*
* This structure is used for the topology table entries.
* Topology levels start from 1, so levels[0] is unused and always NULL.
*/
typedef struct stonith_topology_s {
int kind;
/*! Node name regex or attribute name=value for which topology applies */
char *target;
char *target_value;
char *target_pattern;
char *target_attribute;
/*! Names of fencing devices at each topology level */
GListPtr levels[ST_LEVEL_MAX];
} stonith_topology_t;
void init_device_list(void);
void free_device_list(void);
void init_topology_list(void);
void free_topology_list(void);
-void free_remote_op_list(void);
+void free_stonith_remote_op_list(void);
+void init_stonith_remote_op_hash_table(GHashTable **table);
void free_metadata_cache(void);
long long get_stonith_flag(const char *name);
void stonith_command(crm_client_t * client, uint32_t id, uint32_t flags,
xmlNode * op_request, const char *remote_peer);
int stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib);
int stonith_device_remove(const char *id, gboolean from_cib);
char *stonith_level_key(xmlNode * msg, int mode);
int stonith_level_kind(xmlNode * msg);
int stonith_level_register(xmlNode * msg, char **desc);
int stonith_level_remove(xmlNode * msg, char **desc);
stonith_topology_t *find_topology_for_host(const char *host);
void do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply,
gboolean from_peer);
xmlNode *stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data,
int rc);
void
do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
void do_stonith_notify(int options, const char *type, int result, xmlNode * data);
void do_stonith_notify_device(int options, const char *op, int rc, const char *desc);
void do_stonith_notify_level(int options, const char *op, int rc, const char *desc);
remote_fencing_op_t *initiate_remote_stonith_op(crm_client_t * client, xmlNode * request,
gboolean manual_ack);
int process_remote_stonith_exec(xmlNode * msg);
int process_remote_stonith_query(xmlNode * msg);
void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer);
-int stonith_fence_history(xmlNode * msg, xmlNode ** output);
+int stonith_fence_history(xmlNode *msg, xmlNode **output,
+ const char *remote_peer, int options);
bool fencing_peer_active(crm_node_t *peer);
int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op);
gboolean string_in_list(GListPtr list, const char *item);
gboolean node_has_attr(const char *node, const char *name, const char *value);
void
schedule_internal_command(const char *origin,
stonith_device_t * device,
const char *action,
const char *victim,
int timeout,
void *internal_user_data,
void (*done_cb) (GPid pid, int rc, const char *output,
gpointer user_data));
char *stonith_get_peer_name(unsigned int nodeid);
extern char *stonith_our_uname;
extern gboolean stand_alone;
extern GHashTable *device_list;
extern GHashTable *topology;
extern long stonith_watchdog_timeout_ms;
extern GHashTable *known_peer_names;
+
+extern GHashTable *stonith_remote_op_list;
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
index ad63faf110..0a090f28fa 100644
--- a/include/crm/fencing/internal.h
+++ b/include/crm/fencing/internal.h
@@ -1,143 +1,144 @@
/*
* Copyright 2011-2018 Andrew Beekhof
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef STONITH_NG_INTERNAL__H
# define STONITH_NG_INTERNAL__H
# include
# include
struct stonith_action_s;
typedef struct stonith_action_s stonith_action_t;
stonith_action_t *stonith_action_create(const char *agent,
const char *_action,
const char *victim,
uint32_t victim_nodeid,
int timeout,
GHashTable * device_args, GHashTable * port_map);
void stonith__destroy_action(stonith_action_t *action);
void stonith__action_result(stonith_action_t *action, int *rc, char **output,
char **error_output);
GPid
stonith_action_execute_async(stonith_action_t * action,
void *userdata,
void (*done) (GPid pid, int rc, const char *output,
gpointer user_data));
int stonith__execute(stonith_action_t *action);
xmlNode *create_level_registration_xml(const char *node, const char *pattern,
const char *attr, const char *value,
int level,
stonith_key_value_t *device_list);
xmlNode *create_device_registration_xml(const char *id,
enum stonith_namespace namespace,
const char *agent,
stonith_key_value_t *params,
const char *rsc_provides);
# define ST_LEVEL_MAX 10
# define F_STONITH_CLIENTID "st_clientid"
# define F_STONITH_CALLOPTS "st_callopt"
# define F_STONITH_CALLID "st_callid"
# define F_STONITH_CALLDATA "st_calldata"
# define F_STONITH_OPERATION "st_op"
# define F_STONITH_TARGET "st_target"
# define F_STONITH_REMOTE_OP_ID "st_remote_op"
# define F_STONITH_RC "st_rc"
/*! Timeout period per a device execution */
# define F_STONITH_TIMEOUT "st_timeout"
# define F_STONITH_TOLERANCE "st_tolerance"
/*! Action specific timeout period returned in query of fencing devices. */
# define F_STONITH_ACTION_TIMEOUT "st_action_timeout"
/*! Host in query result is not allowed to run this action */
# define F_STONITH_ACTION_DISALLOWED "st_action_disallowed"
/*! Maximum of random fencing delay for a device */
# define F_STONITH_DELAY_MAX "st_delay_max"
/*! Base delay used for a fencing delay */
# define F_STONITH_DELAY_BASE "st_delay_base"
/*! Has this device been verified using a monitor type
* operation (monitor, list, status) */
# define F_STONITH_DEVICE_VERIFIED "st_monitor_verified"
/*! device is required for this action */
# define F_STONITH_DEVICE_REQUIRED "st_required"
/*! number of available devices in query result */
# define F_STONITH_AVAILABLE_DEVICES "st-available-devices"
# define F_STONITH_CALLBACK_TOKEN "st_async_id"
# define F_STONITH_CLIENTNAME "st_clientname"
# define F_STONITH_CLIENTNODE "st_clientnode"
# define F_STONITH_NOTIFY_ACTIVATE "st_notify_activate"
# define F_STONITH_NOTIFY_DEACTIVATE "st_notify_deactivate"
# define F_STONITH_DELEGATE "st_delegate"
/*! The node initiating the stonith operation. If an operation
* is relayed, this is the last node the operation lands on. When
* in standalone mode, origin is the client's id that originated the
* operation. */
# define F_STONITH_ORIGIN "st_origin"
# define F_STONITH_HISTORY_LIST "st_history"
# define F_STONITH_DATE "st_date"
# define F_STONITH_STATE "st_state"
# define F_STONITH_ACTIVE "st_active"
+# define F_STONITH_DIFFERENTIAL "st_differential"
# define F_STONITH_DEVICE "st_device_id"
# define F_STONITH_ACTION "st_device_action"
# define F_STONITH_MODE "st_mode"
# define T_STONITH_NG "stonith-ng"
# define T_STONITH_REPLY "st-reply"
/*! For async operations, an event from the server containing
* the total amount of time the server is allowing for the operation
* to take place is returned to the client. */
# define T_STONITH_TIMEOUT_VALUE "st-async-timeout-value"
# define T_STONITH_NOTIFY "st_notify"
# define STONITH_ATTR_HOSTARG "pcmk_host_argument"
# define STONITH_ATTR_HOSTMAP "pcmk_host_map"
# define STONITH_ATTR_HOSTLIST "pcmk_host_list"
# define STONITH_ATTR_HOSTCHECK "pcmk_host_check"
# define STONITH_ATTR_DELAY_MAX "pcmk_delay_max"
# define STONITH_ATTR_DELAY_BASE "pcmk_delay_base"
# define STONITH_ATTR_ACTION_LIMIT "pcmk_action_limit"
# define STONITH_ATTR_ACTION_OP "action"
# define STONITH_OP_EXEC "st_execute"
# define STONITH_OP_TIMEOUT_UPDATE "st_timeout_update"
# define STONITH_OP_QUERY "st_query"
# define STONITH_OP_FENCE "st_fence"
# define STONITH_OP_RELAY "st_relay"
# define STONITH_OP_DEVICE_ADD "st_device_register"
# define STONITH_OP_DEVICE_DEL "st_device_remove"
# define STONITH_OP_FENCE_HISTORY "st_fence_history"
# define STONITH_OP_LEVEL_ADD "st_level_add"
# define STONITH_OP_LEVEL_DEL "st_level_remove"
# define STONITH_WATCHDOG_AGENT "#watchdog"
# ifdef HAVE_STONITH_STONITH_H
// utilities from st_lha.c
int stonith__list_lha_agents(stonith_key_value_t **devices);
int stonith__lha_metadata(const char *agent, int timeout, char **output);
bool stonith__agent_is_lha(const char *agent);
int stonith__lha_validate(stonith_t *st, int call_options, const char *target,
const char *agent, GHashTable *params,
int timeout, char **output, char **error_output);
# endif
// utilities from st_rhcs.c
int stonith__list_rhcs_agents(stonith_key_value_t **devices);
int stonith__rhcs_metadata(const char *agent, int timeout, char **output);
bool stonith__agent_is_rhcs(const char *agent);
int stonith__rhcs_validate(stonith_t *st, int call_options, const char *target,
const char *agent, GHashTable *params,
int timeout, char **output, char **error_output);
#endif
diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h
index 5ca6d07d47..aa225e2c9d 100644
--- a/include/crm/stonith-ng.h
+++ b/include/crm/stonith-ng.h
@@ -1,523 +1,528 @@
/*
* Copyright 2004-2018 Andrew Beekhof
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifdef __cplusplus
extern "C" {
#endif
/**
* \file
* \brief Fencing aka. STONITH
* \ingroup fencing
*/
#ifndef STONITH_NG__H
# define STONITH_NG__H
# include
# include
# include
/* TO-DO: Work out how to drop this requirement */
# include
# define T_STONITH_NOTIFY_DISCONNECT "st_notify_disconnect"
# define T_STONITH_NOTIFY_FENCE "st_notify_fence"
+# define T_STONITH_NOTIFY_HISTORY "st_notify_history"
/* *INDENT-OFF* */
enum stonith_state {
stonith_connected_command,
stonith_connected_query,
stonith_disconnected,
};
enum stonith_call_options {
st_opt_none = 0x00000000,
st_opt_verbose = 0x00000001,
st_opt_allow_suicide = 0x00000002,
st_opt_manual_ack = 0x00000008,
st_opt_discard_reply = 0x00000010,
/* st_opt_all_replies = 0x00000020, */
st_opt_topology = 0x00000040,
st_opt_scope_local = 0x00000100,
st_opt_cs_nodeid = 0x00000200,
st_opt_sync_call = 0x00001000,
/*! Allow the timeout period for a callback to be adjusted
* based on the time the server reports the operation will take. */
st_opt_timeout_updates = 0x00002000,
/*! Only report back if operation is a success in callback */
st_opt_report_only_success = 0x00004000,
+ /* used where ever apropriate - e.g. cleanup of history */
+ st_opt_cleanup = 0x000080000,
+ /* used where ever apropriate - e.g. send out a history query to all nodes */
+ st_opt_broadcast = 0x000100000,
};
/*! Order matters here, do not change values */
enum op_state
{
st_query,
st_exec,
st_done,
st_duplicate,
st_failed,
};
// Supported fence agent interface standards
enum stonith_namespace {
st_namespace_invalid,
st_namespace_any,
st_namespace_internal, // Implemented internally by Pacemaker
/* Neither of these projects are active any longer, but the fence agent
* interfaces they created are still in use and supported by Pacemaker.
*/
st_namespace_rhcs, // Red Hat Cluster Suite compatible
st_namespace_lha, // Linux-HA compatible
};
enum stonith_namespace stonith_text2namespace(const char *namespace_s);
const char *stonith_namespace2text(enum stonith_namespace namespace);
enum stonith_namespace stonith_get_namespace(const char *agent,
const char *namespace_s);
typedef struct stonith_key_value_s {
char *key;
char *value;
struct stonith_key_value_s *next;
} stonith_key_value_t;
typedef struct stonith_history_s {
char *target;
char *action;
char *origin;
char *delegate;
char *client;
int state;
time_t completed;
struct stonith_history_s *next;
} stonith_history_t;
typedef struct stonith_s stonith_t;
typedef struct stonith_event_s
{
char *id;
char *type;
char *message;
char *operation;
int result;
char *origin;
char *target;
char *action;
char *executioner;
char *device;
/*! The name of the client that initiated the action. */
char *client_origin;
} stonith_event_t;
typedef struct stonith_callback_data_s
{
int rc;
int call_id;
void *userdata;
} stonith_callback_data_t;
typedef struct stonith_api_operations_s
{
/*!
* \brief Destroy the stonith api structure.
*/
int (*free) (stonith_t *st);
/*!
* \brief Connect to the local stonith daemon.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*connect) (stonith_t *st, const char *name, int *stonith_fd);
/*!
* \brief Disconnect from the local stonith daemon.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*disconnect)(stonith_t *st);
/*!
* \brief Remove a registered stonith device with the local stonith daemon.
*
* \note Synchronous, guaranteed to occur in daemon before function returns.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*remove_device)(
stonith_t *st, int options, const char *name);
/*!
* \brief Register a stonith device with the local stonith daemon.
*
* \note Synchronous, guaranteed to occur in daemon before function returns.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*register_device)(
stonith_t *st, int options, const char *id,
const char *provider, const char *agent, stonith_key_value_t *params);
/*!
* \brief Remove a fencing level for a specific node.
*
* \note This feature is not available when stonith is in standalone mode.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*remove_level)(
stonith_t *st, int options, const char *node, int level);
/*!
* \brief Register a fencing level containing the fencing devices to be used
* at that level for a specific node.
*
* \note This feature is not available when stonith is in standalone mode.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*register_level)(
stonith_t *st, int options, const char *node, int level, stonith_key_value_t *device_list);
/*!
* \brief Get the metadata documentation for a resource.
*
* \note Value is returned in output. Output must be freed when set.
*
* \retval 0 success
* \retval negative error code on failure
*/
int (*metadata)(stonith_t *st, int options,
const char *device, const char *provider, char **output, int timeout);
/*!
* \brief Retrieve a list of installed stonith agents
*
* \note if provider is not provided, all known agents will be returned
* \note list must be freed using stonith_key_value_freeall()
* \note call_options parameter is not used, it is reserved for future use.
*
* \retval num items in list on success
* \retval negative error code on failure
*/
int (*list_agents)(stonith_t *stonith, int call_options, const char *provider,
stonith_key_value_t **devices, int timeout);
/*!
* \brief Retrieve string listing hosts and port assignments from a local stonith device.
*
* \retval 0 on success
* \retval negative error code on failure
*/
int (*list)(stonith_t *st, int options, const char *id, char **list_output, int timeout);
/*!
* \brief Check to see if a local stonith device is reachable
*
* \retval 0 on success
* \retval negative error code on failure
*/
int (*monitor)(stonith_t *st, int options, const char *id, int timeout);
/*!
* \brief Check to see if a local stonith device's port is reachable
*
* \retval 0 on success
* \retval negative error code on failure
*/
int (*status)(stonith_t *st, int options, const char *id, const char *port, int timeout);
/*!
* \brief Retrieve a list of registered stonith devices.
*
* \note If node is provided, only devices that can fence the node id
* will be returned.
*
* \retval num items in list on success
* \retval negative error code on failure
*/
int (*query)(stonith_t *st, int options, const char *node,
stonith_key_value_t **devices, int timeout);
/*!
* \brief Issue a fencing action against a node.
*
* \note Possible actions are, 'on', 'off', and 'reboot'.
*
* \param st, stonith connection
* \param options, call options
* \param node, The target node to fence
* \param action, The fencing action to take
* \param timeout, The default per device timeout to use with each device
* capable of fencing the target.
*
* \retval 0 success
* \retval negative error code on failure.
*/
int (*fence)(stonith_t *st, int options, const char *node, const char *action,
int timeout, int tolerance);
/*!
* \brief Manually confirm that a node is down.
*
* \retval 0 success
* \retval negative error code on failure.
*/
int (*confirm)(stonith_t *st, int options, const char *node);
/*!
* \brief Retrieve a list of fencing operations that have occurred for a specific node.
*
* \note History is not available in standalone mode.
*
* \retval 0 success
* \retval negative error code on failure.
*/
int (*history)(stonith_t *st, int options, const char *node, stonith_history_t **output, int timeout);
int (*register_notification)(
stonith_t *st, const char *event,
void (*notify)(stonith_t *st, stonith_event_t *e));
int (*remove_notification)(stonith_t *st, const char *event);
/*!
* \brief Register a callback to receive the result of an async call id
*
* \param call_id, The call id to register the callback for.
* \param timeout, The default timeout period to wait until this callback expires
* \param options, Option flags, st_opt_timeout_updates and st_opt_report_only_success are the
* only valid options for this function.
* \param userdate, A pointer that will be handed back in the callback.
* \param callback_name, Unique name given to callback
* \param callback, The callback function
*
* \retval 0 success
* \retval negative error code on failure.
*/
int (*register_callback)(stonith_t *st,
int call_id,
int timeout,
int options,
void *userdata,
const char *callback_name,
void (*callback)(stonith_t *st, stonith_callback_data_t *data));
/*!
* \brief Remove a registered callback for a given call id.
*/
int (*remove_callback)(stonith_t *st, int call_id, bool all_callbacks);
/*!
* \brief Remove fencing level for specific node, node regex or attribute
*
* \param[in] st Fencer connection to use
* \param[in] options Bitmask of stonith_call_options to pass to the fencer
* \param[in] node If not NULL, target level by this node name
* \param[in] pattern If not NULL, target by node name using this regex
* \param[in] attr If not NULL, target by this node attribute
* \param[in] value If not NULL, target by this node attribute value
* \param[in] level Index number of level to remove
*
* \return 0 on success, negative error code otherwise
*
* \note This feature is not available when stonith is in standalone mode.
* The caller should set only one of node, pattern or attr/value.
*/
int (*remove_level_full)(stonith_t *st, int options,
const char *node, const char *pattern,
const char *attr, const char *value, int level);
/*!
* \brief Register fencing level for specific node, node regex or attribute
*
* \param[in] st Fencer connection to use
* \param[in] options Bitmask of stonith_call_options to pass to fencer
* \param[in] node If not NULL, target level by this node name
* \param[in] pattern If not NULL, target by node name using this regex
* \param[in] attr If not NULL, target by this node attribute
* \param[in] value If not NULL, target by this node attribute value
* \param[in] level Index number of level to add
* \param[in] device_list Devices to use in level
*
* \return 0 on success, negative error code otherwise
*
* \note This feature is not available when stonith is in standalone mode.
* The caller should set only one of node, pattern or attr/value.
*/
int (*register_level_full)(stonith_t *st, int options,
const char *node, const char *pattern,
const char *attr, const char *value,
int level, stonith_key_value_t *device_list);
/*!
* \brief Validate an arbitrary stonith device configuration
*
* \param[in] st Stonithd connection to use
* \param[in] call_options Bitmask of stonith_call_options to use with fencer
* \param[in] rsc_id ID used to replace CIB secrets in params
* \param[in] namespace_s Namespace of fence agent to validate (optional)
* \param[in] agent Fence agent to validate
* \param[in] params Configuration parameters to pass to fence agent
* \param[in] timeout Fail if no response within this many seconds
* \param[out] output If non-NULL, where to store any agent output
* \param[out] error_output If non-NULL, where to store agent error output
*
* \return pcmk_ok if validation succeeds, -errno otherwise
*
* \note If pcmk_ok is returned, the caller is responsible for freeing
* the output (if requested).
*/
int (*validate)(stonith_t *st, int call_options, const char *rsc_id,
const char *namespace_s, const char *agent,
stonith_key_value_t *params, int timeout, char **output,
char **error_output);
} stonith_api_operations_t;
struct stonith_s
{
enum stonith_state state;
int call_id;
int call_timeout;
void *st_private;
stonith_api_operations_t *cmds;
};
/* *INDENT-ON* */
/* Core functions */
stonith_t *stonith_api_new(void);
void stonith_api_delete(stonith_t * st);
void stonith_dump_pending_callbacks(stonith_t * st);
// deprecated (use stonith_get_namespace() instead)
const char *get_stonith_provider(const char *agent, const char *provider);
bool stonith_dispatch(stonith_t * st);
stonith_key_value_t *stonith_key_value_add(stonith_key_value_t * kvp, const char *key,
const char *value);
void stonith_key_value_freeall(stonith_key_value_t * kvp, int keys, int values);
void stonith_history_free(stonith_history_t *history);
/* Basic helpers that allows nodes to be fenced and the history to be
* queried without mainloop or the caller understanding the full API
*
* At least one of nodeid and uname are required
*/
int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off);
time_t stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress);
/*
* Helpers for using the above functions without install-time dependencies
*
* Usage:
* #include
*
* To turn a node off by corosync nodeid:
* stonith_api_kick_helper(nodeid, 120, 1);
*
* To check the last fence date/time (also by nodeid):
* last = stonith_api_time_helper(nodeid, 0);
*
* To check if fencing is in progress:
* if(stonith_api_time_helper(nodeid, 1) > 0) { ... }
*
* eg.
#include
#include
#include
int
main(int argc, char ** argv)
{
int rc = 0;
int nodeid = 102;
rc = stonith_api_time_helper(nodeid, 0);
printf("%d last fenced at %s\n", nodeid, ctime(rc));
rc = stonith_api_kick_helper(nodeid, 120, 1);
printf("%d fence result: %d\n", nodeid, rc);
rc = stonith_api_time_helper(nodeid, 0);
printf("%d last fenced at %s\n", nodeid, ctime(rc));
return 0;
}
*/
# define STONITH_LIBRARY "libstonithd.so.26"
typedef int (*st_api_kick_fn) (int nodeid, const char *uname, int timeout, bool off);
typedef time_t (*st_api_time_fn) (int nodeid, const char *uname, bool in_progress);
static inline int
stonith_api_kick_helper(uint32_t nodeid, int timeout, bool off)
{
static void *st_library = NULL;
static st_api_kick_fn st_kick_fn;
if (st_library == NULL) {
st_library = dlopen(STONITH_LIBRARY, RTLD_LAZY);
}
if (st_library && st_kick_fn == NULL) {
st_kick_fn = (st_api_kick_fn) dlsym(st_library, "stonith_api_kick");
}
if (st_kick_fn == NULL) {
#ifdef ELIBACC
return -ELIBACC;
#else
return -ENOSYS;
#endif
}
return (*st_kick_fn) (nodeid, NULL, timeout, off);
}
static inline time_t
stonith_api_time_helper(uint32_t nodeid, bool in_progress)
{
static void *st_library = NULL;
static st_api_time_fn st_time_fn;
if (st_library == NULL) {
st_library = dlopen(STONITH_LIBRARY, RTLD_LAZY);
}
if (st_library && st_time_fn == NULL) {
st_time_fn = (st_api_time_fn) dlsym(st_library, "stonith_api_time");
}
if (st_time_fn == NULL) {
return 0;
}
return (*st_time_fn) (nodeid, NULL, in_progress);
}
#ifdef __cplusplus
}
#endif
#endif
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
index 8f71642a3d..8cc3bd918e 100644
--- a/tools/crm_mon.c
+++ b/tools/crm_mon.c
@@ -1,4296 +1,4294 @@
/*
* Copyright 2004-2018 Andrew Beekhof
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include /* crm_ends_with_ext */
#include
#include
#include
#include
#include
#include
#include
#include <../lib/pengine/unpack.h>
#include
#include
extern void cleanup_alloc_calculations(pe_working_set_t * data_set);
static void clean_up(crm_exit_t exit_code);
void crm_diff_update(const char *event, xmlNode * msg);
gboolean mon_refresh_display(gpointer user_data);
int cib_connect(gboolean full);
void mon_st_callback_event(stonith_t * st, stonith_event_t * e);
void mon_st_callback_display(stonith_t * st, stonith_event_t * e);
void kick_refresh(gboolean data_updated);
static char *get_node_display_name(node_t *node);
/*
* Definitions indicating which items to print
*/
#define mon_show_times (0x0001U)
#define mon_show_stack (0x0002U)
#define mon_show_dc (0x0004U)
#define mon_show_count (0x0008U)
#define mon_show_nodes (0x0010U)
#define mon_show_resources (0x0020U)
#define mon_show_attributes (0x0040U)
#define mon_show_failcounts (0x0080U)
#define mon_show_operations (0x0100U)
#define mon_show_tickets (0x0200U)
#define mon_show_bans (0x0400U)
#define mon_show_fence_history (0x0800U)
#define mon_show_headers (mon_show_times | mon_show_stack | mon_show_dc \
| mon_show_count)
#define mon_show_default (mon_show_headers | mon_show_nodes \
| mon_show_resources)
#define mon_show_all (mon_show_default | mon_show_attributes \
| mon_show_failcounts | mon_show_operations \
| mon_show_tickets | mon_show_bans \
| mon_show_fence_history)
static unsigned int show = mon_show_default;
/*
* Definitions indicating how to output
*/
enum mon_output_format_e {
mon_output_none,
mon_output_monitor,
mon_output_plain,
mon_output_console,
mon_output_xml,
mon_output_html,
mon_output_cgi
} output_format = mon_output_console;
static char *output_filename = NULL; /* if sending output to a file, its name */
/* other globals */
static char *xml_file = NULL;
static char *pid_file = NULL;
static gboolean group_by_node = FALSE;
static gboolean inactive_resources = FALSE;
static int reconnect_msec = 5000;
static gboolean daemonize = FALSE;
static GMainLoop *mainloop = NULL;
static guint timer_id = 0;
static mainloop_timer_t *refresh_timer = NULL;
static GList *attr_list = NULL;
static const char *external_agent = NULL;
static const char *external_recipient = NULL;
static cib_t *cib = NULL;
static stonith_t *st = NULL;
static xmlNode *current_cib = NULL;
static gboolean one_shot = FALSE;
static gboolean has_warnings = FALSE;
static gboolean print_timing = FALSE;
static gboolean watch_fencing = FALSE;
static gboolean fence_history = FALSE;
static gboolean fence_full_history = FALSE;
static gboolean fence_connect = FALSE;
-static int fence_history_level = 0;
+static int fence_history_level = 1;
static gboolean print_brief = FALSE;
static gboolean print_pending = TRUE;
static gboolean print_clone_detail = FALSE;
/* FIXME allow, detect, and correctly interpret glob pattern or regex? */
const char *print_neg_location_prefix = "";
/* Never display node attributes whose name starts with one of these prefixes */
#define FILTER_STR { CRM_FAIL_COUNT_PREFIX, CRM_LAST_FAILURE_PREFIX, \
"shutdown", "terminate", "standby", "probe_complete", \
"#", NULL }
long last_refresh = 0;
crm_trigger_t *refresh_trigger = NULL;
/* Define exit codes for monitoring-compatible output */
#define MON_STATUS_WARN CRM_EX_ERROR
/* Convenience macro for prettifying output (e.g. "node" vs "nodes") */
#define s_if_plural(i) (((i) == 1)? "" : "s")
#if CURSES_ENABLED
# define print_dot() if (output_format == mon_output_console) { \
printw("."); \
clrtoeol(); \
refresh(); \
} else { \
fprintf(stdout, "."); \
}
#else
# define print_dot() fprintf(stdout, ".");
#endif
#if CURSES_ENABLED
# define print_as(fmt, args...) if (output_format == mon_output_console) { \
printw(fmt, ##args); \
clrtoeol(); \
refresh(); \
} else { \
fprintf(stdout, fmt, ##args); \
}
#else
# define print_as(fmt, args...) fprintf(stdout, fmt, ##args);
#endif
static void
blank_screen(void)
{
#if CURSES_ENABLED
int lpc = 0;
for (lpc = 0; lpc < LINES; lpc++) {
move(lpc, 0);
clrtoeol();
}
move(0, 0);
refresh();
#endif
}
static gboolean
mon_timer_popped(gpointer data)
{
int rc = pcmk_ok;
#if CURSES_ENABLED
if (output_format == mon_output_console) {
clear();
refresh();
}
#endif
if (timer_id > 0) {
g_source_remove(timer_id);
timer_id = 0;
}
print_as("Reconnecting...\n");
rc = cib_connect(TRUE);
if (rc != pcmk_ok) {
timer_id = g_timeout_add(reconnect_msec, mon_timer_popped, NULL);
}
return FALSE;
}
static void
mon_cib_connection_destroy(gpointer user_data)
{
print_as("Connection to the cluster-daemons terminated\n");
if (refresh_timer != NULL) {
/* we'll trigger a refresh after reconnect */
mainloop_timer_stop(refresh_timer);
}
if (timer_id) {
/* we'll trigger a new reconnect-timeout at the end */
g_source_remove(timer_id);
timer_id = 0;
}
if (st) {
/* the client API won't properly reconnect notifications
* if they are still in the table - so remove them
*/
st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT);
st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE);
+ st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY);
if (st->state != stonith_disconnected) {
st->cmds->disconnect(st);
}
}
if (cib) {
cib->cmds->signoff(cib);
timer_id = g_timeout_add(reconnect_msec, mon_timer_popped, NULL);
}
return;
}
/*
* Mainloop signal handler.
*/
static void
mon_shutdown(int nsig)
{
clean_up(CRM_EX_OK);
}
#if ON_DARWIN
# define sighandler_t sig_t
#endif
#if CURSES_ENABLED
# ifndef HAVE_SIGHANDLER_T
typedef void (*sighandler_t) (int);
# endif
static sighandler_t ncurses_winch_handler;
static void
mon_winresize(int nsig)
{
static int not_done;
int lines = 0, cols = 0;
if (!not_done++) {
if (ncurses_winch_handler)
/* the original ncurses WINCH signal handler does the
* magic of retrieving the new window size;
* otherwise, we'd have to use ioctl or tgetent */
(*ncurses_winch_handler) (SIGWINCH);
getmaxyx(stdscr, lines, cols);
resizeterm(lines, cols);
mainloop_set_trigger(refresh_trigger);
}
not_done--;
}
#endif
int
cib_connect(gboolean full)
{
int rc = pcmk_ok;
static gboolean need_pass = TRUE;
CRM_CHECK(cib != NULL, return -EINVAL);
if (getenv("CIB_passwd") != NULL) {
need_pass = FALSE;
}
if ((fence_connect) && (st == NULL)) {
st = stonith_api_new();
}
if ((fence_connect) && (st->state == stonith_disconnected)) {
crm_trace("Connecting to stonith");
rc = st->cmds->connect(st, crm_system_name, NULL);
if (rc == pcmk_ok) {
crm_trace("Setting up stonith callbacks");
if (watch_fencing) {
st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
mon_st_callback_event);
st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_event);
} else {
st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
mon_st_callback_display);
- st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_display);
+ st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display);
}
}
}
if (cib->state != cib_connected_query && cib->state != cib_connected_command) {
crm_trace("Connecting to the CIB");
if ((output_format == mon_output_console) && need_pass && (cib->variant == cib_remote)) {
need_pass = FALSE;
print_as("Password:");
}
rc = cib->cmds->signon(cib, crm_system_name, cib_query);
if (rc != pcmk_ok) {
return rc;
}
rc = cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call);
if (rc == pcmk_ok) {
mon_refresh_display(NULL);
}
if (rc == pcmk_ok && full) {
if (rc == pcmk_ok) {
rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy);
if (rc == -EPROTONOSUPPORT) {
print_as
("Notification setup not supported, won't be able to reconnect after failure");
if (output_format == mon_output_console) {
sleep(2);
}
rc = pcmk_ok;
}
}
if (rc == pcmk_ok) {
cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
rc = cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
}
if (rc != pcmk_ok) {
print_as("Notification setup failed, could not monitor CIB actions");
if (output_format == mon_output_console) {
sleep(2);
}
clean_up(crm_errno2exit(rc));
}
}
}
return rc;
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
{"help", 0, 0, '?', "\tThis text"},
{"version", 0, 0, '$', "\tVersion information" },
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{"quiet", 0, 0, 'Q', "\tDisplay only essential output" },
{"-spacer-", 1, 0, '-', "\nModes (mutually exclusive):"},
{"as-html", 1, 0, 'h', "\tWrite cluster status to the named html file"},
{"as-xml", 0, 0, 'X', "\t\tWrite cluster status as xml to stdout. This will enable one-shot mode."},
{"web-cgi", 0, 0, 'w', "\t\tWeb mode with output suitable for CGI (preselected when run as *.cgi)"},
{"simple-status", 0, 0, 's', "\tDisplay the cluster status once as a simple one line output (suitable for nagios)"},
{"-spacer-", 1, 0, '-', "\nDisplay Options:"},
{"group-by-node", 0, 0, 'n', "\tGroup resources by node" },
{"inactive", 0, 0, 'r', "\t\tDisplay inactive resources" },
{"failcounts", 0, 0, 'f', "\tDisplay resource fail counts"},
{"operations", 0, 0, 'o', "\tDisplay resource operation history" },
{"timing-details", 0, 0, 't', "\tDisplay resource operation history with timing details" },
{"tickets", 0, 0, 'c', "\t\tDisplay cluster tickets"},
{"watch-fencing", 0, 0, 'W', "\tListen for fencing events. For use with --external-agent"},
{"fence-history", 2, 0, 'm', "Show fence history\n"
"\t\t\t\t\t0=off, 1=failures, 2=add successes and pending (default without value),\n"
"\t\t\t\t\t3=show full history without reduction to most recent of each flavor"},
{"neg-locations", 2, 0, 'L', "Display negative location constraints [optionally filtered by id prefix]"},
{"show-node-attributes", 0, 0, 'A', "Display node attributes" },
{"hide-headers", 0, 0, 'D', "\tHide all headers" },
{"show-detail", 0, 0, 'R', "\tShow more details (node IDs, individual clone instances)" },
{"brief", 0, 0, 'b', "\t\tBrief output" },
{"pending", 0, 0, 'j', "\t\tDisplay pending state if 'record-pending' is enabled", pcmk_option_hidden},
{"-spacer-", 1, 0, '-', "\nAdditional Options:"},
{"interval", 1, 0, 'i', "\tUpdate frequency in seconds" },
{"one-shot", 0, 0, '1', "\t\tDisplay the cluster status once on the console and exit"},
{"disable-ncurses",0, 0, 'N', "\tDisable the use of ncurses", !CURSES_ENABLED},
{"daemonize", 0, 0, 'd', "\tRun in the background as a daemon"},
{"pid-file", 1, 0, 'p', "\t(Advanced) Daemon pid file location"},
{"external-agent", 1, 0, 'E', "A program to run when resource operations take place."},
{"external-recipient",1, 0, 'e', "A recipient for your program (assuming you want the program to send something to someone)."},
{"xml-file", 1, 0, 'x', NULL, pcmk_option_hidden},
{"-spacer-", 1, 0, '-', "\nExamples:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', "Display the cluster status on the console with updates as they occur:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_mon", pcmk_option_example},
{"-spacer-", 1, 0, '-', "Display the cluster status on the console just once then exit:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_mon -1", pcmk_option_example},
{"-spacer-", 1, 0, '-', "Display your cluster status, group resources by node, and include inactive resources in the list:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_mon --group-by-node --inactive", pcmk_option_example},
{"-spacer-", 1, 0, '-', "Start crm_mon as a background daemon and have it write the cluster status to an HTML file:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_mon --daemonize --as-html /path/to/docroot/filename.html", pcmk_option_example},
{"-spacer-", 1, 0, '-', "Start crm_mon and export the current cluster status as xml to stdout, then exit.:", pcmk_option_paragraph},
{"-spacer-", 1, 0, '-', " crm_mon --as-xml", pcmk_option_example},
{NULL, 0, 0, 0}
};
/* *INDENT-ON* */
#if CURSES_ENABLED
static const char *
get_option_desc(char c)
{
int lpc;
for (lpc = 0; long_options[lpc].name != NULL; lpc++) {
if (long_options[lpc].name[0] == '-')
continue;
if (long_options[lpc].val == c) {
static char *buf = NULL;
const char *rv;
char *nl;
/* chop off tabs and cut at newline */
free(buf); /* free string from last usage */
buf = strdup(long_options[lpc].desc);
rv = buf; /* make a copy to keep buf pointer unaltered
for freeing when we come by next time.
Like this the result stays valid until
the next call.
*/
while(isspace(rv[0])) {
rv++;
}
nl = strchr(rv, '\n');
if (nl) {
*nl = '\0';
}
return rv;
}
}
return NULL;
}
#define print_option_help(option, condition) \
print_as("%c %c: \t%s\n", ((condition)? '*': ' '), option, get_option_desc(option));
static gboolean
detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer unused)
{
int c;
gboolean config_mode = FALSE;
while (1) {
/* Get user input */
c = getchar();
switch (c) {
case 'm':
if (!fence_history_level) {
fence_history = TRUE;
fence_connect = TRUE;
if (st == NULL) {
mon_cib_connection_destroy(NULL);
}
}
show ^= mon_show_fence_history;
break;
case 'c':
show ^= mon_show_tickets;
break;
case 'f':
show ^= mon_show_failcounts;
break;
case 'n':
group_by_node = ! group_by_node;
break;
case 'o':
show ^= mon_show_operations;
if ((show & mon_show_operations) == 0) {
print_timing = 0;
}
break;
case 'r':
inactive_resources = ! inactive_resources;
break;
case 'R':
print_clone_detail = ! print_clone_detail;
break;
case 't':
print_timing = ! print_timing;
if (print_timing) {
show |= mon_show_operations;
}
break;
case 'A':
show ^= mon_show_attributes;
break;
case 'L':
show ^= mon_show_bans;
break;
case 'D':
/* If any header is shown, clear them all, otherwise set them all */
if (show & mon_show_headers) {
show &= ~mon_show_headers;
} else {
show |= mon_show_headers;
}
break;
case 'b':
print_brief = ! print_brief;
break;
case 'j':
print_pending = ! print_pending;
break;
case '?':
config_mode = TRUE;
break;
default:
goto refresh;
}
if (!config_mode)
goto refresh;
blank_screen();
print_as("Display option change mode\n");
print_as("\n");
print_option_help('c', show & mon_show_tickets);
print_option_help('f', show & mon_show_failcounts);
print_option_help('n', group_by_node);
print_option_help('o', show & mon_show_operations);
print_option_help('r', inactive_resources);
print_option_help('t', print_timing);
print_option_help('A', show & mon_show_attributes);
print_option_help('L', show & mon_show_bans);
print_option_help('D', (show & mon_show_headers) == 0);
print_option_help('R', print_clone_detail);
print_option_help('b', print_brief);
print_option_help('j', print_pending);
print_option_help('m', (show & mon_show_fence_history));
print_as("\n");
print_as("Toggle fields via field letter, type any other key to return");
}
refresh:
mon_refresh_display(NULL);
return TRUE;
}
#endif
int
main(int argc, char **argv)
{
int flag;
int argerr = 0;
int option_index = 0;
pid_file = strdup("/tmp/ClusterMon.pid");
crm_log_cli_init("crm_mon");
crm_set_options(NULL, "mode [options]", long_options,
"Provides a summary of cluster's current state."
"\n\nOutputs varying levels of detail in a number of different formats.\n");
#if !defined (ON_DARWIN) && !defined (ON_BSD)
/* prevent zombies */
signal(SIGCLD, SIG_IGN);
#endif
if (crm_ends_with_ext(argv[0], ".cgi") == TRUE) {
output_format = mon_output_cgi;
one_shot = TRUE;
}
/* to enable stonith-connection when called via some application like pcs
* set environment-variable FENCE_HISTORY to desired level
* so you don't have to modify this application
*/
/* fence_history_level = crm_atoi(getenv("FENCE_HISTORY"), "0"); */
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1)
break;
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'Q':
show &= ~mon_show_times;
break;
case 'i':
reconnect_msec = crm_get_msec(optarg);
break;
case 'n':
group_by_node = TRUE;
break;
case 'r':
inactive_resources = TRUE;
break;
case 'W':
watch_fencing = TRUE;
fence_connect = TRUE;
break;
case 'm':
fence_history_level = crm_atoi(optarg, "2");
break;
case 'd':
daemonize = TRUE;
break;
case 't':
print_timing = TRUE;
show |= mon_show_operations;
break;
case 'o':
show |= mon_show_operations;
break;
case 'f':
show |= mon_show_failcounts;
break;
case 'A':
show |= mon_show_attributes;
break;
case 'L':
show |= mon_show_bans;
print_neg_location_prefix = optarg? optarg : "";
break;
case 'D':
show &= ~mon_show_headers;
break;
case 'b':
print_brief = TRUE;
break;
case 'j':
print_pending = TRUE;
break;
case 'R':
print_clone_detail = TRUE;
break;
case 'c':
show |= mon_show_tickets;
break;
case 'p':
free(pid_file);
if(optarg == NULL) {
crm_help(flag, CRM_EX_USAGE);
}
pid_file = strdup(optarg);
break;
case 'x':
if(optarg == NULL) {
crm_help(flag, CRM_EX_USAGE);
}
xml_file = strdup(optarg);
one_shot = TRUE;
break;
case 'h':
if(optarg == NULL) {
crm_help(flag, CRM_EX_USAGE);
}
argerr += (output_format != mon_output_console);
output_format = mon_output_html;
output_filename = strdup(optarg);
umask(S_IWGRP | S_IWOTH);
break;
case 'X':
argerr += (output_format != mon_output_console);
output_format = mon_output_xml;
one_shot = TRUE;
break;
case 'w':
/* do not allow argv[0] and argv[1...] redundancy */
argerr += (output_format != mon_output_console);
output_format = mon_output_cgi;
one_shot = TRUE;
break;
case 's':
argerr += (output_format != mon_output_console);
output_format = mon_output_monitor;
one_shot = TRUE;
break;
case 'E':
external_agent = optarg;
break;
case 'e':
external_recipient = optarg;
break;
case '1':
one_shot = TRUE;
break;
case 'N':
if (output_format == mon_output_console) {
output_format = mon_output_plain;
}
break;
case '$':
case '?':
crm_help(flag, CRM_EX_OK);
break;
default:
printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag);
++argerr;
break;
}
}
+ if (watch_fencing) {
+ /* don't moan as fence_history_level == 1 is default */
+ fence_history_level = 0;
+ }
+
switch (fence_history_level) {
case 3:
fence_full_history = TRUE;
/* fall through to next lower level */
case 2:
show |= mon_show_fence_history;
/* fall through to next lower level */
case 1:
fence_history = TRUE;
fence_connect = TRUE;
break;
default:
break;
}
/* Extra sanity checks when in CGI mode */
if (output_format == mon_output_cgi) {
argerr += (optind < argc);
argerr += (output_filename != NULL);
argerr += (xml_file != NULL);
argerr += (external_agent != NULL);
argerr += (daemonize == TRUE); /* paranoia */
} else if (optind < argc) {
printf("non-option ARGV-elements: ");
while (optind < argc)
printf("%s ", argv[optind++]);
printf("\n");
}
if (argerr) {
if (output_format == mon_output_cgi) {
fprintf(stdout, "Content-Type: text/plain\n"
"Status: 500\n\n");
return CRM_EX_USAGE;
}
crm_help('?', CRM_EX_USAGE);
}
/* XML output always prints everything */
if (output_format == mon_output_xml) {
show = mon_show_all;
print_timing = TRUE;
}
if (one_shot) {
if (output_format == mon_output_console) {
output_format = mon_output_plain;
}
} else if (daemonize) {
if ((output_format == mon_output_console) || (output_format == mon_output_plain)) {
output_format = mon_output_none;
}
crm_enable_stderr(FALSE);
if ((output_format != mon_output_html) && (output_format != mon_output_xml)
&& !external_agent) {
printf
("Looks like you forgot to specify one or more of: --as-html, --as-xml, --external-agent\n");
crm_help('?', CRM_EX_USAGE);
}
crm_make_daemon(crm_system_name, TRUE, pid_file);
} else if (output_format == mon_output_console) {
#if CURSES_ENABLED
initscr();
cbreak();
noecho();
crm_enable_stderr(FALSE);
#else
one_shot = TRUE;
output_format = mon_output_plain;
printf("Defaulting to one-shot mode\n");
printf("You need to have curses available at compile time to enable console mode\n");
#endif
}
crm_info("Starting %s", crm_system_name);
if (xml_file != NULL) {
current_cib = filename2xml(xml_file);
mon_refresh_display(NULL);
return CRM_EX_OK;
}
if (current_cib == NULL) {
int rc = pcmk_ok;
cib = cib_new();
do {
if (!one_shot) {
print_as("Attempting connection to the cluster...\n");
}
rc = cib_connect(!one_shot);
if (one_shot) {
break;
} else if (rc != pcmk_ok) {
sleep(reconnect_msec / 1000);
#if CURSES_ENABLED
if (output_format == mon_output_console) {
clear();
refresh();
}
#endif
}
} while (rc == -ENOTCONN);
if (rc != pcmk_ok) {
if (output_format == mon_output_monitor) {
printf("CLUSTER WARN: Connection to cluster failed: %s\n", pcmk_strerror(rc));
clean_up(MON_STATUS_WARN);
} else {
print_as("\nConnection to cluster failed: %s\n", pcmk_strerror(rc));
}
if (output_format == mon_output_console) {
sleep(2);
}
clean_up(crm_errno2exit(rc));
}
}
if (one_shot) {
return CRM_EX_OK;
}
mainloop = g_main_loop_new(NULL, FALSE);
mainloop_add_signal(SIGTERM, mon_shutdown);
mainloop_add_signal(SIGINT, mon_shutdown);
#if CURSES_ENABLED
if (output_format == mon_output_console) {
ncurses_winch_handler = signal(SIGWINCH, mon_winresize);
if (ncurses_winch_handler == SIG_DFL ||
ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR)
ncurses_winch_handler = NULL;
g_io_add_watch(g_io_channel_unix_new(STDIN_FILENO), G_IO_IN, detect_user_input, NULL);
}
#endif
refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL);
g_main_loop_run(mainloop);
g_main_destroy(mainloop);
crm_info("Exiting %s", crm_system_name);
clean_up(CRM_EX_OK);
return CRM_EX_OK; // Should never be reached
}
#define mon_warn(fmt...) do { \
if (!has_warnings) { \
print_as("CLUSTER WARN:"); \
} else { \
print_as(","); \
} \
print_as(fmt); \
has_warnings = TRUE; \
} while(0)
static int
count_resources(pe_working_set_t * data_set, resource_t * rsc)
{
int count = 0;
GListPtr gIter = NULL;
if (rsc == NULL) {
gIter = data_set->resources;
} else if (rsc->children) {
gIter = rsc->children;
} else {
return is_not_set(rsc->flags, pe_rsc_orphan);
}
for (; gIter != NULL; gIter = gIter->next) {
count += count_resources(data_set, gIter->data);
}
return count;
}
/*!
* \internal
* \brief Print one-line status suitable for use with monitoring software
*
* \param[in] data_set Working set of CIB state
* \param[in] history List of stonith actions
*
* \note This function's output (and the return code when the program exits)
* should conform to https://www.monitoring-plugins.org/doc/guidelines.html
*/
static void
print_simple_status(pe_working_set_t * data_set,
stonith_history_t *history)
{
GListPtr gIter = NULL;
int nodes_online = 0;
int nodes_standby = 0;
int nodes_maintenance = 0;
if (data_set->dc_node == NULL) {
mon_warn(" No DC");
}
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
if (node->details->standby && node->details->online) {
nodes_standby++;
} else if (node->details->maintenance && node->details->online) {
nodes_maintenance++;
} else if (node->details->online) {
nodes_online++;
} else {
mon_warn(" offline node: %s", node->details->uname);
}
}
if (!has_warnings) {
int nresources = count_resources(data_set, NULL);
print_as("CLUSTER OK: %d node%s online", nodes_online, s_if_plural(nodes_online));
if (nodes_standby > 0) {
print_as(", %d standby node%s", nodes_standby, s_if_plural(nodes_standby));
}
if (nodes_maintenance > 0) {
print_as(", %d maintenance node%s", nodes_maintenance, s_if_plural(nodes_maintenance));
}
print_as(", %d resource%s configured", nresources, s_if_plural(nresources));
}
print_as("\n");
}
/*!
* \internal
* \brief Print a [name]=[value][units] pair, optionally using time string
*
* \param[in] stream File stream to display output to
* \param[in] name Name to display
* \param[in] value Value to display (or NULL to convert time instead)
* \param[in] units Units to display (or NULL for no units)
* \param[in] epoch_time Epoch time to convert if value is NULL
*/
static void
print_nvpair(FILE *stream, const char *name, const char *value,
const char *units, time_t epoch_time)
{
/* print name= */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" %s=", name);
break;
case mon_output_html:
case mon_output_cgi:
case mon_output_xml:
fprintf(stream, " %s=", name);
break;
default:
break;
}
/* If we have a value (and optionally units), print it */
if (value) {
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("%s%s", value, (units? units : ""));
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "%s%s", value, (units? units : ""));
break;
case mon_output_xml:
fprintf(stream, "\"%s%s\"", value, (units? units : ""));
break;
default:
break;
}
/* Otherwise print user-friendly time string */
} else {
static char empty_str[] = "";
char *c, *date_str = asctime(localtime(&epoch_time));
for (c = (date_str != NULL) ? date_str : empty_str; *c != '\0'; ++c) {
if (*c == '\n') {
*c = '\0';
break;
}
}
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("'%s'", date_str);
break;
case mon_output_html:
case mon_output_cgi:
case mon_output_xml:
fprintf(stream, "\"%s\"", date_str);
break;
default:
break;
}
}
}
/*!
* \internal
* \brief Print whatever is needed to start a node section
*
* \param[in] stream File stream to display output to
* \param[in] node Node to print
*/
static void
print_node_start(FILE *stream, node_t *node)
{
char *node_name;
switch (output_format) {
case mon_output_plain:
case mon_output_console:
node_name = get_node_display_name(node);
print_as("* Node %s:\n", node_name);
free(node_name);
break;
case mon_output_html:
case mon_output_cgi:
node_name = get_node_display_name(node);
fprintf(stream, " Node: %s
\n \n", node_name);
free(node_name);
break;
case mon_output_xml:
fprintf(stream, " \n", node->details->uname);
break;
default:
break;
}
}
/*!
* \internal
* \brief Print whatever is needed to end a node section
*
* \param[in] stream File stream to display output to
*/
static void
print_node_end(FILE *stream)
{
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print resources section heading appropriate to options
*
* \param[in] stream File stream to display output to
*/
static void
print_resources_heading(FILE *stream)
{
const char *heading;
if (group_by_node) {
/* Active resources have already been printed by node */
heading = (inactive_resources? "Inactive resources" : NULL);
} else if (inactive_resources) {
heading = "Full list of resources";
} else {
heading = "Active resources";
}
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\n%s:\n\n", heading);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n %s
\n", heading);
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print whatever resource section closing is appropriate
*
* \param[in] stream File stream to display output to
*/
static void
print_resources_closing(FILE *stream, gboolean printed_heading)
{
const char *heading;
/* What type of resources we did or did not display */
if (group_by_node) {
heading = "inactive ";
} else if (inactive_resources) {
heading = "";
} else {
heading = "active ";
}
switch (output_format) {
case mon_output_plain:
case mon_output_console:
if (!printed_heading) {
print_as("\nNo %sresources\n\n", heading);
}
break;
case mon_output_html:
case mon_output_cgi:
if (!printed_heading) {
fprintf(stream, "
\n No %sresources
\n", heading);
}
break;
case mon_output_xml:
fprintf(stream, " %s\n",
(printed_heading? "" : ""));
break;
default:
break;
}
}
/*!
* \internal
* \brief Print whatever resource section(s) are appropriate
*
* \param[in] stream File stream to display output to
* \param[in] data_set Cluster state to display
* \param[in] print_opts Bitmask of pe_print_options
*/
static void
print_resources(FILE *stream, pe_working_set_t *data_set, int print_opts)
{
GListPtr rsc_iter;
const char *prefix = NULL;
gboolean printed_heading = FALSE;
gboolean brief_output = print_brief;
/* If we already showed active resources by node, and
* we're not showing inactive resources, we have nothing to do
*/
if (group_by_node && !inactive_resources) {
return;
}
/* XML uses an indent, and ignores brief option for resources */
if (output_format == mon_output_xml) {
prefix = " ";
brief_output = FALSE;
}
/* If we haven't already printed resources grouped by node,
* and brief output was requested, print resource summary */
if (brief_output && !group_by_node) {
print_resources_heading(stream);
printed_heading = TRUE;
print_rscs_brief(data_set->resources, NULL, print_opts, stream,
inactive_resources);
}
/* For each resource, display it if appropriate */
for (rsc_iter = data_set->resources; rsc_iter != NULL; rsc_iter = rsc_iter->next) {
resource_t *rsc = (resource_t *) rsc_iter->data;
/* Complex resources may have some sub-resources active and some inactive */
gboolean is_active = rsc->fns->active(rsc, TRUE);
gboolean partially_active = rsc->fns->active(rsc, FALSE);
/* Skip inactive orphans (deleted but still in CIB) */
if (is_set(rsc->flags, pe_rsc_orphan) && !is_active) {
continue;
/* Skip active resources if we already displayed them by node */
} else if (group_by_node) {
if (is_active) {
continue;
}
/* Skip primitives already counted in a brief summary */
} else if (brief_output && (rsc->variant == pe_native)) {
continue;
/* Skip resources that aren't at least partially active,
* unless we're displaying inactive resources
*/
} else if (!partially_active && !inactive_resources) {
continue;
}
/* Print this resource */
if (printed_heading == FALSE) {
print_resources_heading(stream);
printed_heading = TRUE;
}
rsc->fns->print(rsc, prefix, print_opts, stream);
}
print_resources_closing(stream, printed_heading);
}
/*!
* \internal
* \brief Print heading for resource history
*
* \param[in] stream File stream to display output to
* \param[in] data_set Current state of CIB
* \param[in] node Node that ran this resource
* \param[in] rsc Resource to print
* \param[in] rsc_id ID of resource to print
* \param[in] all Whether to print every resource or just failed ones
*/
static void
print_rsc_history_start(FILE *stream, pe_working_set_t *data_set, node_t *node,
resource_t *rsc, const char *rsc_id, gboolean all)
{
time_t last_failure = 0;
int failcount = rsc?
pe_get_failcount(node, rsc, &last_failure, pe_fc_default,
NULL, data_set)
: 0;
if (!all && !failcount && (last_failure <= 0)) {
return;
}
/* Print resource ID */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" %s:", rsc_id);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " %s:", rsc_id);
break;
case mon_output_xml:
fprintf(stream, " 0)) {
/* Print migration threshold */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" migration-threshold=%d", rsc->migration_threshold);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " migration-threshold=%d", rsc->migration_threshold);
break;
case mon_output_xml:
fprintf(stream, " orphan=\"false\" migration-threshold=\"%d\"",
rsc->migration_threshold);
break;
default:
break;
}
/* Print fail count if any */
if (failcount > 0) {
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" " CRM_FAIL_COUNT_PREFIX "=%d", failcount);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " " CRM_FAIL_COUNT_PREFIX "=%d", failcount);
break;
case mon_output_xml:
fprintf(stream, " " CRM_FAIL_COUNT_PREFIX "=\"%d\"",
failcount);
break;
default:
break;
}
}
/* Print last failure time if any */
if (last_failure > 0) {
print_nvpair(stream, CRM_LAST_FAILURE_PREFIX, NULL, NULL,
last_failure);
}
}
/* End the heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "\n \n");
break;
case mon_output_xml:
fprintf(stream, ">\n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print closing for resource history
*
* \param[in] stream File stream to display output to
*/
static void
print_rsc_history_end(FILE *stream)
{
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n \n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print operation history
*
* \param[in] stream File stream to display output to
* \param[in] data_set Current state of CIB
* \param[in] node Node this operation is for
* \param[in] xml_op Root of XML tree describing this operation
* \param[in] task Task parsed from this operation's XML
* \param[in] interval_ms_s Interval parsed from this operation's XML
* \param[in] rc Return code parsed from this operation's XML
*/
static void
print_op_history(FILE *stream, pe_working_set_t *data_set, node_t *node,
xmlNode *xml_op, const char *task, const char *interval_ms_s,
int rc)
{
const char *value = NULL;
const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
/* Begin the operation description */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" + (%s) %s:", call, task);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " (%s) %s:", call, task);
break;
case mon_output_xml:
fprintf(stream, " 0) {
print_nvpair(stream, attr, NULL, NULL, int_value);
}
}
attr = XML_RSC_OP_LAST_RUN;
value = crm_element_value(xml_op, attr);
if (value) {
int_value = crm_parse_int(value, NULL);
if (int_value > 0) {
print_nvpair(stream, attr, NULL, NULL, int_value);
}
}
attr = XML_RSC_OP_T_EXEC;
value = crm_element_value(xml_op, attr);
if (value) {
print_nvpair(stream, attr, value, "ms", 0);
}
attr = XML_RSC_OP_T_QUEUE;
value = crm_element_value(xml_op, attr);
if (value) {
print_nvpair(stream, attr, value, "ms", 0);
}
}
/* End the operation description */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" rc=%d (%s)\n", rc, services_ocf_exitcode_str(rc));
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " rc=%d (%s)\n", rc, services_ocf_exitcode_str(rc));
break;
case mon_output_xml:
fprintf(stream, " rc=\"%d\" rc_text=\"%s\" />\n", rc, services_ocf_exitcode_str(rc));
break;
default:
break;
}
}
/*!
* \internal
* \brief Print resource operation/failure history
*
* \param[in] stream File stream to display output to
* \param[in] data_set Current state of CIB
* \param[in] node Node that ran this resource
* \param[in] rsc_entry Root of XML tree describing resource status
* \param[in] operations Whether to print operations or just failcounts
*/
static void
print_rsc_history(FILE *stream, pe_working_set_t *data_set, node_t *node,
xmlNode *rsc_entry, gboolean operations)
{
GListPtr gIter = NULL;
GListPtr op_list = NULL;
gboolean printed = FALSE;
const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
resource_t *rsc = pe_find_resource(data_set->resources, rsc_id);
xmlNode *rsc_op = NULL;
/* If we're not showing operations, just print the resource failure summary */
if (operations == FALSE) {
print_rsc_history_start(stream, data_set, node, rsc, rsc_id, FALSE);
print_rsc_history_end(stream);
return;
}
/* Create a list of this resource's operations */
for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) {
if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
op_list = g_list_append(op_list, rsc_op);
}
}
op_list = g_list_sort(op_list, sort_op_by_callid);
/* Print each operation */
for (gIter = op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *xml_op = (xmlNode *) gIter->data;
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *interval_ms_s = crm_element_value(xml_op,
XML_LRM_ATTR_INTERVAL_MS);
const char *op_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC);
int rc = crm_parse_int(op_rc, "0");
/* Display 0-interval monitors as "probe" */
if (safe_str_eq(task, CRMD_ACTION_STATUS)
&& ((interval_ms_s == NULL) || safe_str_eq(interval_ms_s, "0"))) {
task = "probe";
}
/* Ignore notifies and some probes */
if (safe_str_eq(task, CRMD_ACTION_NOTIFY) || (safe_str_eq(task, "probe") && (rc == 7))) {
continue;
}
/* If this is the first printed operation, print heading for resource */
if (printed == FALSE) {
printed = TRUE;
print_rsc_history_start(stream, data_set, node, rsc, rsc_id, TRUE);
}
/* Print the operation */
print_op_history(stream, data_set, node, xml_op, task, interval_ms_s,
rc);
}
/* Free the list we created (no need to free the individual items) */
g_list_free(op_list);
/* If we printed anything, close the resource */
if (printed) {
print_rsc_history_end(stream);
}
}
/*!
* \internal
* \brief Print node operation/failure history
*
* \param[in] stream File stream to display output to
* \param[in] data_set Current state of CIB
* \param[in] node_state Root of XML tree describing node status
* \param[in] operations Whether to print operations or just failcounts
*/
static void
print_node_history(FILE *stream, pe_working_set_t *data_set,
xmlNode *node_state, gboolean operations)
{
node_t *node = pe_find_node_id(data_set->nodes, ID(node_state));
xmlNode *lrm_rsc = NULL;
xmlNode *rsc_entry = NULL;
if (node && node->details && node->details->online) {
print_node_start(stream, node);
lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
/* Print history of each of the node's resources */
for (rsc_entry = __xml_first_child(lrm_rsc); rsc_entry != NULL;
rsc_entry = __xml_next(rsc_entry)) {
if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
print_rsc_history(stream, data_set, node, rsc_entry, operations);
}
}
print_node_end(stream);
}
}
/*!
* \internal
* \brief Print extended information about an attribute if appropriate
*
* \param[in] data_set Working set of CIB state
*
* \return TRUE if extended information was printed, FALSE otherwise
* \note Currently, extended information is only supported for ping/pingd
* resources, for which a message will be printed if connectivity is lost
* or degraded.
*/
static gboolean
print_attr_msg(FILE *stream, node_t * node, GListPtr rsc_list, const char *attrname, const char *attrvalue)
{
GListPtr gIter = NULL;
for (gIter = rsc_list; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
const char *type = g_hash_table_lookup(rsc->meta, "type");
if (rsc->children != NULL) {
if (print_attr_msg(stream, node, rsc->children, attrname, attrvalue)) {
return TRUE;
}
}
if (safe_str_eq(type, "ping") || safe_str_eq(type, "pingd")) {
const char *name = g_hash_table_lookup(rsc->parameters, "name");
if (name == NULL) {
name = "pingd";
}
/* To identify the resource with the attribute name. */
if (safe_str_eq(name, attrname)) {
int host_list_num = 0;
int expected_score = 0;
int value = crm_parse_int(attrvalue, "0");
const char *hosts = g_hash_table_lookup(rsc->parameters, "host_list");
const char *multiplier = g_hash_table_lookup(rsc->parameters, "multiplier");
if(hosts) {
char **host_list = g_strsplit(hosts, " ", 0);
host_list_num = g_strv_length(host_list);
g_strfreev(host_list);
}
/* pingd multiplier is the same as the default value. */
expected_score = host_list_num * crm_parse_int(multiplier, "1");
switch (output_format) {
case mon_output_plain:
case mon_output_console:
if (value <= 0) {
print_as("\t: Connectivity is lost");
} else if (value < expected_score) {
print_as("\t: Connectivity is degraded (Expected=%d)", expected_score);
}
break;
case mon_output_html:
case mon_output_cgi:
if (value <= 0) {
fprintf(stream, " (connectivity is lost)");
} else if (value < expected_score) {
fprintf(stream, " (connectivity is degraded -- expected %d)",
expected_score);
}
break;
case mon_output_xml:
fprintf(stream, " expected=\"%d\"", expected_score);
break;
default:
break;
}
return TRUE;
}
}
}
return FALSE;
}
static int
compare_attribute(gconstpointer a, gconstpointer b)
{
int rc;
rc = strcmp((const char *)a, (const char *)b);
return rc;
}
static void
create_attr_list(gpointer name, gpointer value, gpointer data)
{
int i;
const char *filt_str[] = FILTER_STR;
CRM_CHECK(name != NULL, return);
/* filtering automatic attributes */
for (i = 0; filt_str[i] != NULL; i++) {
if (g_str_has_prefix(name, filt_str[i])) {
return;
}
}
attr_list = g_list_insert_sorted(attr_list, name, compare_attribute);
}
/* structure for passing multiple user data to g_list_foreach() */
struct mon_attr_data {
FILE *stream;
node_t *node;
};
static void
print_node_attribute(gpointer name, gpointer user_data)
{
const char *value = NULL;
struct mon_attr_data *data = (struct mon_attr_data *) user_data;
value = pe_node_attribute_raw(data->node, name);
/* Print attribute name and value */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as(" + %-32s\t: %-10s", (char *)name, value);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(data->stream, " %s: %s",
(char *)name, value);
break;
case mon_output_xml:
fprintf(data->stream,
" stream, data->node, data->node->details->running_rsc,
name, value);
/* Close out the attribute */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(data->stream, "\n");
break;
case mon_output_xml:
fprintf(data->stream, " />\n");
break;
default:
break;
}
}
static void
print_node_summary(FILE *stream, pe_working_set_t * data_set, gboolean operations)
{
xmlNode *node_state = NULL;
xmlNode *cib_status = get_object_root(XML_CIB_TAG_STATUS, data_set->input);
/* Print heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
if (operations) {
print_as("\nOperations:\n");
} else {
print_as("\nMigration Summary:\n");
}
break;
case mon_output_html:
case mon_output_cgi:
if (operations) {
fprintf(stream, "
\n Operations
\n");
} else {
fprintf(stream, "
\n Migration Summary
\n");
}
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
/* Print each node in the CIB status */
for (node_state = __xml_first_child(cib_status); node_state != NULL;
node_state = __xml_next(node_state)) {
if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
print_node_history(stream, data_set, node_state, operations);
}
}
/* Close section */
switch (output_format) {
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
static void
print_ticket(gpointer name, gpointer value, gpointer data)
{
ticket_t *ticket = (ticket_t *) value;
FILE *stream = (FILE *) data;
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("* %s:\t%s%s", ticket->id,
(ticket->granted? "granted" : "revoked"),
(ticket->standby? " [standby]" : ""));
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " %s: %s%s", ticket->id,
(ticket->granted? "granted" : "revoked"),
(ticket->standby? " [standby]" : ""));
break;
case mon_output_xml:
fprintf(stream, " id, (ticket->granted? "granted" : "revoked"),
(ticket->standby? "true" : "false"));
break;
default:
break;
}
if (ticket->last_granted > -1) {
print_nvpair(stdout, "last-granted", NULL, NULL, ticket->last_granted);
}
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "\n");
break;
case mon_output_xml:
fprintf(stream, " />\n");
break;
default:
break;
}
}
static void
print_cluster_tickets(FILE *stream, pe_working_set_t * data_set)
{
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\nTickets:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n Tickets
\n \n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
/* Print each ticket */
g_hash_table_foreach(data_set->tickets, print_ticket, stream);
/* Close section */
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Return human-friendly string representing node name
*
* The returned string will be in the format
* uname[@hostUname] [(nodeID)]
* "@hostUname" will be printed if the node is a guest node.
* "(nodeID)" will be printed if the node ID is different from the node uname,
* and detailed output has been requested.
*
* \param[in] node Node to represent
* \return Newly allocated string with representation of node name
* \note It is the caller's responsibility to free the result with free().
*/
static char *
get_node_display_name(node_t *node)
{
char *node_name;
const char *node_host = NULL;
const char *node_id = NULL;
int name_len;
CRM_ASSERT((node != NULL) && (node->details != NULL) && (node->details->uname != NULL));
/* Host is displayed only if this is a guest node */
if (is_container_remote_node(node)) {
node_t *host_node = pe__current_node(node->details->remote_rsc);
if (host_node && host_node->details) {
node_host = host_node->details->uname;
}
if (node_host == NULL) {
node_host = ""; /* so we at least get "uname@" to indicate guest */
}
}
/* Node ID is displayed if different from uname and detail is requested */
if (print_clone_detail && safe_str_neq(node->details->uname, node->details->id)) {
node_id = node->details->id;
}
/* Determine name length */
name_len = strlen(node->details->uname) + 1;
if (node_host) {
name_len += strlen(node_host) + 1; /* "@node_host" */
}
if (node_id) {
name_len += strlen(node_id) + 3; /* + " (node_id)" */
}
/* Allocate and populate display name */
node_name = malloc(name_len);
CRM_ASSERT(node_name != NULL);
strcpy(node_name, node->details->uname);
if (node_host) {
strcat(node_name, "@");
strcat(node_name, node_host);
}
if (node_id) {
strcat(node_name, " (");
strcat(node_name, node_id);
strcat(node_name, ")");
}
return node_name;
}
/*!
* \internal
* \brief Print a negative location constraint
*
* \param[in] stream File stream to display output to
* \param[in] node Node affected by constraint
* \param[in] location Constraint to print
*/
static void print_ban(FILE *stream, node_t *node, rsc_to_node_t *location)
{
char *node_name = NULL;
switch (output_format) {
case mon_output_plain:
case mon_output_console:
node_name = get_node_display_name(node);
print_as(" %s\tprevents %s from running %son %s\n",
location->id, location->rsc_lh->id,
((location->role_filter == RSC_ROLE_MASTER)? "as Master " : ""),
node_name);
break;
case mon_output_html:
case mon_output_cgi:
node_name = get_node_display_name(node);
fprintf(stream, " %s prevents %s from running %son %s\n",
location->id, location->rsc_lh->id,
((location->role_filter == RSC_ROLE_MASTER)? "as Master " : ""),
node_name);
break;
case mon_output_xml:
fprintf(stream,
" \n",
location->id, location->rsc_lh->id, node->details->uname, node->weight,
((location->role_filter == RSC_ROLE_MASTER)? "true" : "false"));
break;
default:
break;
}
free(node_name);
}
/*!
* \internal
* \brief Print section for negative location constraints
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set corresponding to CIB status to display
*/
static void print_neg_locations(FILE *stream, pe_working_set_t *data_set)
{
GListPtr gIter, gIter2;
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\nNegative Location Constraints:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n Negative Location Constraints
\n \n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
/* Print each ban */
for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) {
rsc_to_node_t *location = (rsc_to_node_t *) gIter->data;
if (!g_str_has_prefix(location->id, print_neg_location_prefix))
continue;
for (gIter2 = location->node_list_rh; gIter2 != NULL; gIter2 = gIter2->next) {
node_t *node = (node_t *) gIter2->data;
if (node->weight < 0) {
print_ban(stream, node, location);
}
}
}
/* Close section */
switch (output_format) {
case mon_output_cgi:
case mon_output_html:
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
static void
crm_mon_get_parameters(resource_t *rsc, pe_working_set_t * data_set)
{
get_rsc_attributes(rsc->parameters, rsc, NULL, data_set);
crm_trace("Beekhof: unpacked params for %s (%d)", rsc->id, g_hash_table_size(rsc->parameters));
if(rsc->children) {
GListPtr gIter = NULL;
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
crm_mon_get_parameters(gIter->data, data_set);
}
}
}
/*!
* \internal
* \brief Print node attributes section
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_node_attributes(FILE *stream, pe_working_set_t *data_set)
{
GListPtr gIter = NULL;
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\nNode Attributes:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n Node Attributes
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
/* Unpack all resource parameters (it would be more efficient to do this
* only when needed for the first time in print_attr_msg())
*/
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
crm_mon_get_parameters(gIter->data, data_set);
}
/* Display each node's attributes */
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
struct mon_attr_data data;
data.stream = stream;
data.node = (node_t *) gIter->data;
if (data.node && data.node->details && data.node->details->online) {
print_node_start(stream, data.node);
g_hash_table_foreach(data.node->details->attrs, create_attr_list, NULL);
g_list_foreach(attr_list, print_node_attribute, &data);
g_list_free(attr_list);
attr_list = NULL;
print_node_end(stream);
}
}
/* Print section footer */
switch (output_format) {
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Return resource display options corresponding to command-line choices
*
* \return Bitmask of pe_print_options suitable for resource print functions
*/
static int
get_resource_display_options(void)
{
int print_opts;
/* Determine basic output format */
switch (output_format) {
case mon_output_console:
print_opts = pe_print_ncurses;
break;
case mon_output_html:
case mon_output_cgi:
print_opts = pe_print_html;
break;
case mon_output_xml:
print_opts = pe_print_xml;
break;
default:
print_opts = pe_print_printf;
break;
}
/* Add optional display elements */
if (print_pending) {
print_opts |= pe_print_pending;
}
if (print_clone_detail) {
print_opts |= pe_print_clone_details|pe_print_implicit;
}
if (!inactive_resources) {
print_opts |= pe_print_clone_active;
}
if (print_brief) {
print_opts |= pe_print_brief;
}
return print_opts;
}
/*!
* \internal
* \brief Return human-friendly string representing current time
*
* \return Current time as string (as by ctime() but without newline) on success
* or "Could not determine current time" on error
* \note The return value points to a statically allocated string which might be
* overwritten by subsequent calls to any of the C library date and time functions.
*/
static const char *
crm_now_string(void)
{
time_t a_time = time(NULL);
char *since_epoch = ctime(&a_time);
if ((a_time == (time_t) -1) || (since_epoch == NULL)) {
return "Could not determine current time";
}
since_epoch[strlen(since_epoch) - 1] = EOS; /* trim newline */
return (since_epoch);
}
/*!
* \internal
* \brief Print header for cluster summary if needed
*
* \param[in] stream File stream to display output to
*/
static void
print_cluster_summary_header(FILE *stream)
{
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " Cluster Summary
\n \n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print footer for cluster summary if needed
*
* \param[in] stream File stream to display output to
*/
static void
print_cluster_summary_footer(FILE *stream)
{
switch (output_format) {
case mon_output_cgi:
case mon_output_html:
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print times the display was last updated and CIB last changed
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_cluster_times(FILE *stream, pe_working_set_t *data_set)
{
const char *last_written = crm_element_value(data_set->input, XML_CIB_ATTR_WRITTEN);
const char *user = crm_element_value(data_set->input, XML_ATTR_UPDATE_USER);
const char *client = crm_element_value(data_set->input, XML_ATTR_UPDATE_CLIENT);
const char *origin = crm_element_value(data_set->input, XML_ATTR_UPDATE_ORIG);
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("Last updated: %s", crm_now_string());
print_as((user || client || origin)? "\n" : "\t\t");
print_as("Last change: %s", last_written ? last_written : "");
if (user) {
print_as(" by %s", user);
}
if (client) {
print_as(" via %s", client);
}
if (origin) {
print_as(" on %s", origin);
}
print_as("\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " Last updated: %s
\n", crm_now_string());
fprintf(stream, " Last change: %s", last_written ? last_written : "");
if (user) {
fprintf(stream, " by %s", user);
}
if (client) {
fprintf(stream, " via %s", client);
}
if (origin) {
fprintf(stream, " on %s", origin);
}
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n", crm_now_string());
fprintf(stream, " \n",
last_written ? last_written : "", user ? user : "",
client ? client : "", origin ? origin : "");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print cluster stack
*
* \param[in] stream File stream to display output to
* \param[in] stack_s Stack name
*/
static void
print_cluster_stack(FILE *stream, const char *stack_s)
{
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("Stack: %s\n", stack_s);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " Stack: %s
\n", stack_s);
break;
case mon_output_xml:
fprintf(stream, " \n", stack_s);
break;
default:
break;
}
}
/*!
* \internal
* \brief Print current DC and its version
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_cluster_dc(FILE *stream, pe_working_set_t *data_set)
{
node_t *dc = data_set->dc_node;
xmlNode *dc_version = get_xpath_object("//nvpair[@name='dc-version']",
data_set->input, LOG_DEBUG);
const char *dc_version_s = dc_version?
crm_element_value(dc_version, XML_NVPAIR_ATTR_VALUE)
: NULL;
const char *quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM);
char *dc_name = dc? get_node_display_name(dc) : NULL;
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("Current DC: ");
if (dc) {
print_as("%s (version %s) - partition %s quorum\n",
dc_name, (dc_version_s? dc_version_s : "unknown"),
(crm_is_true(quorum) ? "with" : "WITHOUT"));
} else {
print_as("NONE\n");
}
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " Current DC: ");
if (dc) {
fprintf(stream, "%s (version %s) - partition %s quorum",
dc_name, (dc_version_s? dc_version_s : "unknown"),
(crm_is_true(quorum)? "with" : "WITHOUT"));
} else {
fprintf(stream, "NONE");
}
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " details->uname, dc->details->id,
(crm_is_true(quorum) ? "true" : "false"));
} else {
fprintf(stream, "present=\"false\"");
}
fprintf(stream, " />\n");
break;
default:
break;
}
free(dc_name);
}
/*!
* \internal
* \brief Print counts of configured nodes and resources
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
* \param[in] stack_s Stack name
*/
static void
print_cluster_counts(FILE *stream, pe_working_set_t *data_set, const char *stack_s)
{
int nnodes = g_list_length(data_set->nodes);
int nresources = count_resources(data_set, NULL);
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\n%d node%s configured\n", nnodes, s_if_plural(nnodes));
print_as("%d resource%s configured",
nresources, s_if_plural(nresources));
if(data_set->disabled_resources || data_set->blocked_resources) {
print_as(" (");
if (data_set->disabled_resources) {
print_as("%d DISABLED", data_set->disabled_resources);
}
if (data_set->disabled_resources && data_set->blocked_resources) {
print_as(", ");
}
if (data_set->blocked_resources) {
print_as("%d BLOCKED from starting due to failure",
data_set->blocked_resources);
}
print_as(")");
}
print_as("\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " %d node%s configured
\n",
nnodes, s_if_plural(nnodes));
fprintf(stream, " %d resource%s configured",
nresources, s_if_plural(nresources));
if (data_set->disabled_resources || data_set->blocked_resources) {
fprintf(stream, " (");
if (data_set->disabled_resources) {
fprintf(stream, "%d DISABLED",
data_set->disabled_resources);
}
if (data_set->disabled_resources && data_set->blocked_resources) {
fprintf(stream, ", ");
}
if (data_set->blocked_resources) {
fprintf(stream,
"%d BLOCKED from starting due to failure",
data_set->blocked_resources);
}
fprintf(stream, ")");
}
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream,
" \n",
g_list_length(data_set->nodes));
fprintf(stream,
" \n",
count_resources(data_set, NULL),
data_set->disabled_resources, data_set->blocked_resources);
break;
default:
break;
}
}
/*!
* \internal
* \brief Print cluster-wide options
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*
* \note Currently this is only implemented for HTML and XML output, and
* prints only a few options. If there is demand, more could be added.
*/
static void
print_cluster_options(FILE *stream, pe_working_set_t *data_set)
{
switch (output_format) {
case mon_output_plain:
case mon_output_console:
if (is_set(data_set->flags, pe_flag_maintenance_mode)) {
print_as("\n *** Resource management is DISABLED ***");
print_as("\n The cluster will not attempt to start, stop or recover services");
print_as("\n");
}
break;
case mon_output_html:
fprintf(stream, "
\n Config Options
\n");
fprintf(stream, " \n");
fprintf(stream, " STONITH of failed nodes | %s |
\n",
is_set(data_set->flags, pe_flag_stonith_enabled)? "enabled" : "disabled");
fprintf(stream, " Cluster is | %ssymmetric |
\n",
is_set(data_set->flags, pe_flag_symmetric_cluster)? "" : "a");
fprintf(stream, " No Quorum Policy | ");
switch (data_set->no_quorum_policy) {
case no_quorum_freeze:
fprintf(stream, "Freeze resources");
break;
case no_quorum_stop:
fprintf(stream, "Stop ALL resources");
break;
case no_quorum_ignore:
fprintf(stream, "Ignore");
break;
case no_quorum_suicide:
fprintf(stream, "Suicide");
break;
}
fprintf(stream, " |
\n");
fprintf(stream, " Resource management | ");
if (is_set(data_set->flags, pe_flag_maintenance_mode)) {
fprintf(stream, "DISABLED (the cluster will "
"not attempt to start, stop or recover services)");
} else {
fprintf(stream, "enabled");
}
fprintf(stream, " |
\n");
fprintf(stream, "
\n \n");
break;
case mon_output_xml:
fprintf(stream, " flags, pe_flag_stonith_enabled)?
"true" : "false");
fprintf(stream, " symmetric-cluster=\"%s\"",
is_set(data_set->flags, pe_flag_symmetric_cluster)?
"true" : "false");
fprintf(stream, " no-quorum-policy=\"");
switch (data_set->no_quorum_policy) {
case no_quorum_freeze:
fprintf(stream, "freeze");
break;
case no_quorum_stop:
fprintf(stream, "stop");
break;
case no_quorum_ignore:
fprintf(stream, "ignore");
break;
case no_quorum_suicide:
fprintf(stream, "suicide");
break;
}
fprintf(stream, "\"");
fprintf(stream, " maintenance-mode=\"%s\"",
is_set(data_set->flags, pe_flag_maintenance_mode)?
"true" : "false");
fprintf(stream, " />\n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Get the name of the stack in use (or "unknown" if not available)
*
* \param[in] data_set Working set of CIB state
*
* \return String representing stack name
*/
static const char *
get_cluster_stack(pe_working_set_t *data_set)
{
xmlNode *stack = get_xpath_object("//nvpair[@name='cluster-infrastructure']",
data_set->input, LOG_DEBUG);
return stack? crm_element_value(stack, XML_NVPAIR_ATTR_VALUE) : "unknown";
}
/*!
* \internal
* \brief Print a summary of cluster-wide information
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_cluster_summary(FILE *stream, pe_working_set_t *data_set)
{
const char *stack_s = get_cluster_stack(data_set);
gboolean header_printed = FALSE;
if (show & mon_show_stack) {
if (header_printed == FALSE) {
print_cluster_summary_header(stream);
header_printed = TRUE;
}
print_cluster_stack(stream, stack_s);
}
/* Always print DC if none, even if not requested */
if ((data_set->dc_node == NULL) || (show & mon_show_dc)) {
if (header_printed == FALSE) {
print_cluster_summary_header(stream);
header_printed = TRUE;
}
print_cluster_dc(stream, data_set);
}
if (show & mon_show_times) {
if (header_printed == FALSE) {
print_cluster_summary_header(stream);
header_printed = TRUE;
}
print_cluster_times(stream, data_set);
}
if (is_set(data_set->flags, pe_flag_maintenance_mode)
|| data_set->disabled_resources
|| data_set->blocked_resources
|| is_set(show, mon_show_count)) {
if (header_printed == FALSE) {
print_cluster_summary_header(stream);
header_printed = TRUE;
}
print_cluster_counts(stream, data_set, stack_s);
}
/* There is not a separate option for showing cluster options, so show with
* stack for now; a separate option could be added if there is demand
*/
if (show & mon_show_stack) {
print_cluster_options(stream, data_set);
}
if (header_printed) {
print_cluster_summary_footer(stream);
}
}
/*!
* \internal
* \brief Print a failed action
*
* \param[in] stream File stream to display output to
* \param[in] xml_op Root of XML tree describing failed action
*/
static void
print_failed_action(FILE *stream, xmlNode *xml_op)
{
const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
const char *op_key_attr = "op_key";
const char *last = crm_element_value(xml_op, XML_RSC_OP_LAST_CHANGE);
const char *node = crm_element_value(xml_op, XML_ATTR_UNAME);
const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
const char *exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON);
int rc = crm_parse_int(crm_element_value(xml_op, XML_LRM_ATTR_RC), "0");
int status = crm_parse_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), "0");
char *exit_reason_cleaned;
/* If no op_key was given, use id instead */
if (op_key == NULL) {
op_key = ID(xml_op);
op_key_attr = "id";
}
/* If no exit reason was given, use "none" */
if (exit_reason == NULL) {
exit_reason = "none";
}
/* Print common action information */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("* %s on %s '%s' (%d): call=%s, status=%s, exitreason='%s'",
op_key, node, services_ocf_exitcode_str(rc), rc,
call, services_lrm_status_str(status), exit_reason);
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, " %s on %s '%s' (%d): call=%s, status=%s, exitreason='%s'",
op_key, node, services_ocf_exitcode_str(rc), rc,
call, services_lrm_status_str(status), exit_reason);
break;
case mon_output_xml:
exit_reason_cleaned = crm_xml_escape(exit_reason);
fprintf(stream, " \n");
break;
case mon_output_xml:
fprintf(stream, " />\n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print a section for failed actions
*
* \param[in] stream File stream to display output to
* \param[in] data_set Working set of CIB state
*/
static void
print_failed_actions(FILE *stream, pe_working_set_t *data_set)
{
xmlNode *xml_op = NULL;
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\nFailed Resource Actions:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream,
"
\n Failed Resource Actions
\n \n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
/* Print each failed action */
for (xml_op = __xml_first_child(data_set->failed); xml_op != NULL;
xml_op = __xml_next(xml_op)) {
print_failed_action(stream, xml_op);
}
/* End section */
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Reduce the stonith-history
* for successful actions we keep the last of every action-type & target
* for failed actions we record as well who had failed
* for actions in progress we keep full track
*
* \param[in] history List of stonith actions
*
*/
static stonith_history_t *
reduce_stonith_history(stonith_history_t *history)
{
stonith_history_t *new = NULL, *hp, *np, *tmp;
for (hp = history; hp; ) {
for (np = new; np; np = np->next) {
if ((hp->state == st_done) || (hp->state == st_failed)) {
/* action not in progress */
if (safe_str_eq(hp->target, np->target) &&
safe_str_eq(hp->action, np->action) &&
(hp->state == np->state)) {
if ((hp->state == st_done) ||
safe_str_eq(hp->delegate, np->delegate)) {
/* replace or purge */
if (hp->completed < np->completed) {
/* purge older hp */
tmp = hp->next;
hp->next = NULL;
stonith_history_free(hp);
hp = tmp;
break;
}
/* damn single linked list */
free(hp->target);
free(hp->action);
free(np->origin);
np->origin = hp->origin;
free(np->delegate);
np->delegate = hp->delegate;
free(np->client);
np->client = hp->client;
np->completed = hp->completed;
tmp = hp;
hp = hp->next;
free(tmp);
break;
}
}
if (np->next) {
continue;
}
}
np = 0; /* let outer loop progress hp */
break;
}
/* simply move hp from history to new */
if (np == NULL) {
tmp = hp->next;
hp->next = new;
new = hp;
hp = tmp;
}
}
return new;
}
/*!
* \internal
* \brief Sort the stonith-history
* sort by competed most current on the top
* pending actions lacking a completed-stamp are gathered at the top
*
* \param[in] history List of stonith actions
*
*/
static stonith_history_t *
sort_stonith_history(stonith_history_t *history)
{
stonith_history_t *new = NULL, *pending = NULL, *hp, *np, *tmp;
for (hp = history; hp; ) {
tmp = hp->next;
if ((hp->state == st_done) || (hp->state == st_failed)) {
/* sort into new */
if ((!new) || (hp->completed > new->completed)) {
hp->next = new;
new = hp;
} else {
np = new;
do {
if ((!np->next) || (hp->completed > np->next->completed)) {
hp->next = np->next;
np->next = hp;
break;
}
np = np->next;
} while (1);
}
} else {
/* put into pending */
hp->next = pending;
pending = hp;
}
hp = tmp;
}
/* pending actions don't have a completed-stamp so make them go front */
if (pending) {
stonith_history_t *last_pending = pending;
while (last_pending->next) {
last_pending = last_pending->next;
}
last_pending->next = new;
new = pending;
}
return new;
}
/*!
* \internal
* \brief Turn stonith action into a better readable string
*
* \param[in] action Stonith action
*/
static char *
fence_action_str(const char *action)
{
char *str = NULL;
if (action == NULL) {
str = strdup("fencing");
} else if (!strcmp(action, "on")) {
str = strdup("unfencing");
} else if (!strcmp(action, "off")) {
str = strdup("turning off");
} else {
str = strdup(action);
}
return str;
}
/*!
* \internal
* \brief Print a stonith action
*
* \param[in] stream File stream to display output to
* \param[in] event stonith event
*/
static void
print_stonith_action(FILE *stream, stonith_history_t *event)
{
char *action_s = fence_action_str(event->action);
char *run_at_s = ctime(&event->completed);
if ((run_at_s) && (run_at_s[0] != 0)) {
run_at_s[strlen(run_at_s)-1] = 0; /* Overwrite the newline */
}
switch(output_format) {
case mon_output_xml:
fprintf(stream, " target, event->action);
switch(event->state) {
case st_done:
fprintf(stream, " state=\"success\"");
break;
case st_failed:
fprintf(stream, " state=\"failed\"");
break;
default:
fprintf(stream, " state=\"pending\"");
}
fprintf(stream, " origin=\"%s\" client=\"%s\"",
event->origin, event->client);
if (event->delegate) {
fprintf(stream, " delegate=\"%s\"", event->delegate);
}
switch(event->state) {
case st_done:
case st_failed:
fprintf(stream, " completed=\"%s\"", run_at_s?run_at_s:"");
break;
default:
fprintf(stream, " state=\"pending\"");
}
fprintf(stream, " />\n");
break;
case mon_output_plain:
case mon_output_console:
switch(event->state) {
case st_done:
print_as("* %s of %s successful: delegate=%s, client=%s, origin=%s,\n"
" %s='%s'\n",
action_s, event->target,
event->delegate ? event->delegate : "",
event->client, event->origin,
((!fence_full_history) && (output_format != mon_output_xml))?
"last-successful":"completed",
run_at_s?run_at_s:"");
break;
case st_failed:
print_as("* %s of %s failed: delegate=%s, client=%s, origin=%s,\n"
" %s='%s'\n",
action_s, event->target,
event->delegate ? event->delegate : "",
event->client, event->origin,
((!fence_full_history) && (output_format != mon_output_xml))?
"last-failed":"completed",
run_at_s?run_at_s:"");
break;
default:
print_as("* %s of %s pending: client=%s, origin=%s\n",
action_s, event->target,
event->client, event->origin);
}
break;
case mon_output_html:
case mon_output_cgi:
switch(event->state) {
case st_done:
fprintf(stream, " %s of %s successful: delegate=%s, "
"client=%s, origin=%s, %s='%s'\n",
action_s, event->target,
event->delegate ? event->delegate : "",
event->client, event->origin,
((!fence_full_history) &&
(output_format != mon_output_xml))?
"last-successful":"completed",
run_at_s?run_at_s:"");
break;
case st_failed:
fprintf(stream, " %s of %s failed: delegate=%s, "
"client=%s, origin=%s, %s='%s'\n",
action_s, event->target,
event->delegate ? event->delegate : "",
event->client, event->origin,
((!fence_full_history) &&
(output_format != mon_output_xml))?
"last-failed":"completed",
run_at_s?run_at_s:"");
break;
default:
fprintf(stream, " %s of %s pending: client=%s, "
"origin=%s\n",
action_s, event->target,
event->client, event->origin);
}
break;
default:
/* no support for fence history for other formats so far */
break;
}
free(action_s);
}
/*!
* \internal
* \brief Print a section for failed stonith actions
*
* \param[in] stream File stream to display output to
* \param[in] history List of stonith actions
*
*/
static void
print_failed_stonith_actions(FILE *stream, stonith_history_t *history)
{
stonith_history_t *hp;
for (hp = history; hp; hp = hp->next) {
if (hp->state == st_failed) {
break;
}
}
if (!hp) {
return;
}
/* Print section heading */
switch (output_format) {
/* no need to take care of xml in here as xml gets full
* history anyway
*/
case mon_output_plain:
case mon_output_console:
print_as("\nFailed Fencing Actions:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n Failed Fencing Actions
\n \n");
break;
default:
break;
}
/* Print each failed stonith action */
for (hp = history; hp; hp = hp->next) {
if (hp->state == st_failed) {
print_stonith_action(stream, hp);
}
}
/* End section */
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print a section for stonith-history
*
* \param[in] stream File stream to display output to
* \param[in] history List of stonith actions
*
*/
static void
print_stonith_history(FILE *stream, stonith_history_t *history)
{
stonith_history_t *hp;
/* Print section heading */
switch (output_format) {
case mon_output_plain:
case mon_output_console:
print_as("\nFencing History:\n");
break;
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n Fencing History
\n \n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
for (hp = history; hp; hp = hp->next) {
if ((hp->state != st_failed) || (output_format == mon_output_xml)) {
print_stonith_action(stream, hp);
}
}
/* End section */
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
fprintf(stream, "
\n");
break;
case mon_output_xml:
fprintf(stream, " \n");
break;
default:
break;
}
}
/*!
* \internal
* \brief Print cluster status to screen
*
* This uses the global display preferences set by command-line options
* to display cluster status in a human-friendly way.
*
* \param[in] data_set Working set of CIB state
* \param[in] stonith_history List of stonith actions
*/
static void
print_status(pe_working_set_t * data_set,
stonith_history_t *stonith_history)
{
GListPtr gIter = NULL;
int print_opts = get_resource_display_options();
/* space-separated lists of node names */
char *online_nodes = NULL;
char *online_remote_nodes = NULL;
char *online_guest_nodes = NULL;
char *offline_nodes = NULL;
char *offline_remote_nodes = NULL;
if (output_format == mon_output_console) {
blank_screen();
}
print_cluster_summary(stdout, data_set);
print_as("\n");
/* Gather node information (and print if in bad state or grouping by node) */
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
const char *node_mode = NULL;
char *node_name = get_node_display_name(node);
/* Get node mode */
if (node->details->unclean) {
if (node->details->online) {
node_mode = "UNCLEAN (online)";
} else if (node->details->pending) {
node_mode = "UNCLEAN (pending)";
} else {
node_mode = "UNCLEAN (offline)";
}
} else if (node->details->pending) {
node_mode = "pending";
} else if (node->details->standby_onfail && node->details->online) {
node_mode = "standby (on-fail)";
} else if (node->details->standby) {
if (node->details->online) {
node_mode = "standby";
} else {
node_mode = "OFFLINE (standby)";
}
} else if (node->details->maintenance) {
if (node->details->online) {
node_mode = "maintenance";
} else {
node_mode = "OFFLINE (maintenance)";
}
} else if (node->details->online) {
node_mode = "online";
if (group_by_node == FALSE) {
if (is_container_remote_node(node)) {
online_guest_nodes = add_list_element(online_guest_nodes, node_name);
} else if (is_baremetal_remote_node(node)) {
online_remote_nodes = add_list_element(online_remote_nodes, node_name);
} else {
online_nodes = add_list_element(online_nodes, node_name);
}
free(node_name);
continue;
}
} else {
node_mode = "OFFLINE";
if (group_by_node == FALSE) {
if (is_baremetal_remote_node(node)) {
offline_remote_nodes = add_list_element(offline_remote_nodes, node_name);
} else if (is_container_remote_node(node)) {
/* ignore offline guest nodes */
} else {
offline_nodes = add_list_element(offline_nodes, node_name);
}
free(node_name);
continue;
}
}
/* If we get here, node is in bad state, or we're grouping by node */
/* Print the node name and status */
if (is_container_remote_node(node)) {
print_as("Guest");
} else if (is_baremetal_remote_node(node)) {
print_as("Remote");
}
print_as("Node %s: %s\n", node_name, node_mode);
/* If we're grouping by node, print its resources */
if (group_by_node) {
if (print_brief) {
print_rscs_brief(node->details->running_rsc, "\t", print_opts | pe_print_rsconly,
stdout, FALSE);
} else {
GListPtr gIter2 = NULL;
for (gIter2 = node->details->running_rsc; gIter2 != NULL; gIter2 = gIter2->next) {
resource_t *rsc = (resource_t *) gIter2->data;
rsc->fns->print(rsc, "\t", print_opts | pe_print_rsconly, stdout);
}
}
}
free(node_name);
}
/* If we're not grouping by node, summarize nodes by status */
if (online_nodes) {
print_as("Online: [%s ]\n", online_nodes);
free(online_nodes);
}
if (offline_nodes) {
print_as("OFFLINE: [%s ]\n", offline_nodes);
free(offline_nodes);
}
if (online_remote_nodes) {
print_as("RemoteOnline: [%s ]\n", online_remote_nodes);
free(online_remote_nodes);
}
if (offline_remote_nodes) {
print_as("RemoteOFFLINE: [%s ]\n", offline_remote_nodes);
free(offline_remote_nodes);
}
if (online_guest_nodes) {
print_as("GuestOnline: [%s ]\n", online_guest_nodes);
free(online_guest_nodes);
}
/* Print resources section, if needed */
print_resources(stdout, data_set, print_opts);
/* print Node Attributes section if requested */
if (show & mon_show_attributes) {
print_node_attributes(stdout, data_set);
}
/* If requested, print resource operations (which includes failcounts)
* or just failcounts
*/
if (show & (mon_show_operations | mon_show_failcounts)) {
print_node_summary(stdout, data_set,
((show & mon_show_operations)? TRUE : FALSE));
}
/* If there were any failed actions, print them */
if (xml_has_children(data_set->failed)) {
print_failed_actions(stdout, data_set);
}
/* Print failed stonith actions */
if (fence_history) {
print_failed_stonith_actions(stdout, stonith_history);
}
/* Print tickets if requested */
if (show & mon_show_tickets) {
print_cluster_tickets(stdout, data_set);
}
/* Print negative location constraints if requested */
if (show & mon_show_bans) {
print_neg_locations(stdout, data_set);
}
/* Print stonith history */
if (fence_history && (show & mon_show_fence_history)) {
print_stonith_history(stdout, stonith_history);
}
#if CURSES_ENABLED
if (output_format == mon_output_console) {
refresh();
}
#endif
}
/*!
* \internal
* \brief Print cluster status in XML format
*
* \param[in] data_set Working set of CIB state
*/
static void
print_xml_status(pe_working_set_t * data_set,
stonith_history_t *stonith_history)
{
FILE *stream = stdout;
GListPtr gIter = NULL;
int print_opts = get_resource_display_options();
fprintf(stream, "\n");
fprintf(stream, "\n", VERSION);
print_cluster_summary(stream, data_set);
/*** NODES ***/
fprintf(stream, " \n");
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
const char *node_type = "unknown";
switch (node->details->type) {
case node_member:
node_type = "member";
break;
case node_remote:
node_type = "remote";
break;
case node_ping:
node_type = "ping";
break;
}
fprintf(stream, " details->uname);
fprintf(stream, "id=\"%s\" ", node->details->id);
fprintf(stream, "online=\"%s\" ", node->details->online ? "true" : "false");
fprintf(stream, "standby=\"%s\" ", node->details->standby ? "true" : "false");
fprintf(stream, "standby_onfail=\"%s\" ", node->details->standby_onfail ? "true" : "false");
fprintf(stream, "maintenance=\"%s\" ", node->details->maintenance ? "true" : "false");
fprintf(stream, "pending=\"%s\" ", node->details->pending ? "true" : "false");
fprintf(stream, "unclean=\"%s\" ", node->details->unclean ? "true" : "false");
fprintf(stream, "shutdown=\"%s\" ", node->details->shutdown ? "true" : "false");
fprintf(stream, "expected_up=\"%s\" ", node->details->expected_up ? "true" : "false");
fprintf(stream, "is_dc=\"%s\" ", node->details->is_dc ? "true" : "false");
fprintf(stream, "resources_running=\"%d\" ", g_list_length(node->details->running_rsc));
fprintf(stream, "type=\"%s\" ", node_type);
if (is_container_remote_node(node)) {
fprintf(stream, "id_as_resource=\"%s\" ", node->details->remote_rsc->container->id);
}
if (group_by_node) {
GListPtr lpc2 = NULL;
fprintf(stream, ">\n");
for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) {
resource_t *rsc = (resource_t *) lpc2->data;
rsc->fns->print(rsc, " ", print_opts | pe_print_rsconly, stream);
}
fprintf(stream, " \n");
} else {
fprintf(stream, "/>\n");
}
}
fprintf(stream, " \n");
/* Print resources section, if needed */
print_resources(stream, data_set, print_opts);
/* print Node Attributes section if requested */
if (show & mon_show_attributes) {
print_node_attributes(stream, data_set);
}
/* If requested, print resource operations (which includes failcounts)
* or just failcounts
*/
if (show & (mon_show_operations | mon_show_failcounts)) {
print_node_summary(stream, data_set,
((show & mon_show_operations)? TRUE : FALSE));
}
/* If there were any failed actions, print them */
if (xml_has_children(data_set->failed)) {
print_failed_actions(stream, data_set);
}
/* Print stonith history */
if (fence_history) {
print_stonith_history(stdout, stonith_history);
}
/* Print tickets if requested */
if (show & mon_show_tickets) {
print_cluster_tickets(stream, data_set);
}
/* Print negative location constraints if requested */
if (show & mon_show_bans) {
print_neg_locations(stream, data_set);
}
fprintf(stream, "\n");
fflush(stream);
fclose(stream);
}
/*!
* \internal
* \brief Print cluster status in HTML format (with HTTP headers if CGI)
*
* \param[in] data_set Working set of CIB state
* \param[in] filename Name of file to write HTML to (ignored if CGI)
*
* \return 0 on success, -1 on error
*/
static int
print_html_status(pe_working_set_t * data_set,
const char *filename,
stonith_history_t *stonith_history)
{
FILE *stream;
GListPtr gIter = NULL;
char *filename_tmp = NULL;
int print_opts = get_resource_display_options();
if (output_format == mon_output_cgi) {
stream = stdout;
fprintf(stream, "Content-Type: text/html\n\n");
} else {
filename_tmp = crm_concat(filename, "tmp", '.');
stream = fopen(filename_tmp, "w");
if (stream == NULL) {
crm_perror(LOG_ERR, "Cannot open %s for writing", filename_tmp);
free(filename_tmp);
return -1;
}
}
fprintf(stream, "\n");
fprintf(stream, " \n");
fprintf(stream, " Cluster status\n");
fprintf(stream, " \n", reconnect_msec / 1000);
fprintf(stream, " \n");
fprintf(stream, "\n");
print_cluster_summary(stream, data_set);
/*** NODE LIST ***/
fprintf(stream, "
\n Node List
\n");
fprintf(stream, "\n");
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
char *node_name = get_node_display_name(node);
fprintf(stream, "- Node: %s: ", node_name);
if (node->details->standby_onfail && node->details->online) {
fprintf(stream, "standby (on-fail)\n");
} else if (node->details->standby && node->details->online) {
fprintf(stream, "standby\n");
} else if (node->details->standby) {
fprintf(stream, "OFFLINE (standby)\n");
} else if (node->details->maintenance && node->details->online) {
fprintf(stream, "maintenance\n");
} else if (node->details->maintenance) {
fprintf(stream, "OFFLINE (maintenance)\n");
} else if (node->details->online) {
fprintf(stream, "online\n");
} else {
fprintf(stream, "OFFLINE\n");
}
if (print_brief && group_by_node) {
fprintf(stream, "
\n");
print_rscs_brief(node->details->running_rsc, NULL, print_opts | pe_print_rsconly,
stream, FALSE);
fprintf(stream, "
\n");
} else if (group_by_node) {
GListPtr lpc2 = NULL;
fprintf(stream, "\n");
for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) {
resource_t *rsc = (resource_t *) lpc2->data;
fprintf(stream, "- ");
rsc->fns->print(rsc, NULL, print_opts | pe_print_rsconly, stream);
fprintf(stream, "
\n");
}
fprintf(stream, "
\n");
}
fprintf(stream, " \n");
free(node_name);
}
fprintf(stream, "
\n");
/* Print resources section, if needed */
print_resources(stream, data_set, print_opts);
/* print Node Attributes section if requested */
if (show & mon_show_attributes) {
print_node_attributes(stream, data_set);
}
/* If requested, print resource operations (which includes failcounts)
* or just failcounts
*/
if (show & (mon_show_operations | mon_show_failcounts)) {
print_node_summary(stream, data_set,
((show & mon_show_operations)? TRUE : FALSE));
}
/* If there were any failed actions, print them */
if (xml_has_children(data_set->failed)) {
print_failed_actions(stream, data_set);
}
/* Print failed stonith actions */
if (fence_history) {
print_failed_stonith_actions(stream, stonith_history);
}
/* Print stonith history */
if (fence_history && (show & mon_show_fence_history)) {
print_stonith_history(stream, stonith_history);
}
/* Print tickets if requested */
if (show & mon_show_tickets) {
print_cluster_tickets(stream, data_set);
}
/* Print negative location constraints if requested */
if (show & mon_show_bans) {
print_neg_locations(stream, data_set);
}
fprintf(stream, "\n");
fprintf(stream, "\n");
fflush(stream);
fclose(stream);
if (output_format != mon_output_cgi) {
if (rename(filename_tmp, filename) != 0) {
crm_perror(LOG_ERR, "Unable to rename %s->%s", filename_tmp, filename);
}
free(filename_tmp);
}
return 0;
}
static int
send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc,
int status, const char *desc)
{
pid_t pid;
/*setenv needs chars, these are ints */
char *rc_s = crm_itoa(rc);
char *status_s = crm_itoa(status);
char *target_rc_s = crm_itoa(target_rc);
crm_debug("Sending external notification to '%s' via '%s'", external_recipient, external_agent);
if(rsc) {
setenv("CRM_notify_rsc", rsc, 1);
}
if (external_recipient) {
setenv("CRM_notify_recipient", external_recipient, 1);
}
setenv("CRM_notify_node", node, 1);
setenv("CRM_notify_task", task, 1);
setenv("CRM_notify_desc", desc, 1);
setenv("CRM_notify_rc", rc_s, 1);
setenv("CRM_notify_target_rc", target_rc_s, 1);
setenv("CRM_notify_status", status_s, 1);
pid = fork();
if (pid == -1) {
crm_perror(LOG_ERR, "notification fork() failed.");
}
if (pid == 0) {
/* crm_debug("notification: I am the child. Executing the nofitication program."); */
execl(external_agent, external_agent, NULL);
exit(CRM_EX_ERROR);
}
crm_trace("Finished running custom notification program '%s'.", external_agent);
free(target_rc_s);
free(status_s);
free(rc_s);
return 0;
}
static void
handle_rsc_op(xmlNode * xml, const char *node_id)
{
int rc = -1;
int status = -1;
int action = -1;
int target_rc = -1;
int transition_num = -1;
gboolean notify = TRUE;
char *rsc = NULL;
char *task = NULL;
const char *desc = NULL;
const char *magic = NULL;
const char *id = NULL;
char *update_te_uuid = NULL;
const char *node = NULL;
xmlNode *n = xml;
xmlNode * rsc_op = xml;
if(strcmp((const char*)xml->name, XML_LRM_TAG_RSC_OP) != 0) {
xmlNode *cIter;
for(cIter = xml->children; cIter; cIter = cIter->next) {
handle_rsc_op(cIter, node_id);
}
return;
}
id = crm_element_value(rsc_op, XML_LRM_ATTR_TASK_KEY);
if (id == NULL) {
/* Compatibility with <= 1.1.5 */
id = ID(rsc_op);
}
magic = crm_element_value(rsc_op, XML_ATTR_TRANSITION_MAGIC);
if (magic == NULL) {
/* non-change */
return;
}
if (FALSE == decode_transition_magic(magic, &update_te_uuid, &transition_num, &action,
&status, &rc, &target_rc)) {
crm_err("Invalid event %s detected for %s", magic, id);
return;
}
if (parse_op_key(id, &rsc, &task, NULL) == FALSE) {
crm_err("Invalid event detected for %s", id);
goto bail;
}
node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET);
while (n != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(n))) {
n = n->parent;
}
if(node == NULL && n) {
node = crm_element_value(n, XML_ATTR_UNAME);
}
if (node == NULL && n) {
node = ID(n);
}
if (node == NULL) {
node = node_id;
}
if (node == NULL) {
crm_err("No node detected for event %s (%s)", magic, id);
goto bail;
}
/* look up where we expected it to be? */
desc = pcmk_strerror(pcmk_ok);
if (status == PCMK_LRM_OP_DONE && target_rc == rc) {
crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc);
if (rc == PCMK_OCF_NOT_RUNNING) {
notify = FALSE;
}
} else if (status == PCMK_LRM_OP_DONE) {
desc = services_ocf_exitcode_str(rc);
crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc);
} else {
desc = services_lrm_status_str(status);
crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc);
}
if (notify && external_agent) {
send_custom_trap(node, rsc, task, target_rc, rc, status, desc);
}
bail:
free(update_te_uuid);
free(rsc);
free(task);
}
static gboolean
mon_trigger_refresh(gpointer user_data)
{
mainloop_set_trigger(refresh_trigger);
return FALSE;
}
#define NODE_PATT "/lrm[@id="
static char *get_node_from_xpath(const char *xpath)
{
char *nodeid = NULL;
char *tmp = strstr(xpath, NODE_PATT);
if(tmp) {
tmp += strlen(NODE_PATT);
tmp += 1;
nodeid = strdup(tmp);
tmp = strstr(nodeid, "\'");
CRM_ASSERT(tmp);
tmp[0] = 0;
}
return nodeid;
}
static void crm_diff_update_v2(const char *event, xmlNode * msg)
{
xmlNode *change = NULL;
xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
for (change = __xml_first_child(diff); change != NULL; change = __xml_next(change)) {
const char *name = NULL;
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
xmlNode *match = NULL;
const char *node = NULL;
if(op == NULL) {
continue;
} else if(strcmp(op, "create") == 0) {
match = change->children;
} else if(strcmp(op, "move") == 0) {
continue;
} else if(strcmp(op, "delete") == 0) {
continue;
} else if(strcmp(op, "modify") == 0) {
match = first_named_child(change, XML_DIFF_RESULT);
if(match) {
match = match->children;
}
}
if(match) {
name = (const char *)match->name;
}
crm_trace("Handling %s operation for %s %p, %s", op, xpath, match, name);
if(xpath == NULL) {
/* Version field, ignore */
} else if(name == NULL) {
crm_debug("No result for %s operation to %s", op, xpath);
CRM_ASSERT(strcmp(op, "delete") == 0 || strcmp(op, "move") == 0);
} else if(strcmp(name, XML_TAG_CIB) == 0) {
xmlNode *state = NULL;
xmlNode *status = first_named_child(match, XML_CIB_TAG_STATUS);
for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) {
node = crm_element_value(state, XML_ATTR_UNAME);
if (node == NULL) {
node = ID(state);
}
handle_rsc_op(state, node);
}
} else if(strcmp(name, XML_CIB_TAG_STATUS) == 0) {
xmlNode *state = NULL;
for (state = __xml_first_child(match); state != NULL; state = __xml_next(state)) {
node = crm_element_value(state, XML_ATTR_UNAME);
if (node == NULL) {
node = ID(state);
}
handle_rsc_op(state, node);
}
} else if(strcmp(name, XML_CIB_TAG_STATE) == 0) {
node = crm_element_value(match, XML_ATTR_UNAME);
if (node == NULL) {
node = ID(match);
}
handle_rsc_op(match, node);
} else if(strcmp(name, XML_CIB_TAG_LRM) == 0) {
node = ID(match);
handle_rsc_op(match, node);
} else if(strcmp(name, XML_LRM_TAG_RESOURCES) == 0) {
char *local_node = get_node_from_xpath(xpath);
handle_rsc_op(match, local_node);
free(local_node);
} else if(strcmp(name, XML_LRM_TAG_RESOURCE) == 0) {
char *local_node = get_node_from_xpath(xpath);
handle_rsc_op(match, local_node);
free(local_node);
} else if(strcmp(name, XML_LRM_TAG_RSC_OP) == 0) {
char *local_node = get_node_from_xpath(xpath);
handle_rsc_op(match, local_node);
free(local_node);
} else {
crm_trace("Ignoring %s operation for %s %p, %s", op, xpath, match, name);
}
}
}
static void crm_diff_update_v1(const char *event, xmlNode * msg)
{
/* Process operation updates */
xmlXPathObject *xpathObj = xpath_search(msg,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED
"//" XML_LRM_TAG_RSC_OP);
int lpc = 0, max = numXpathResults(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
handle_rsc_op(rsc_op, NULL);
}
freeXpathObject(xpathObj);
}
void
crm_diff_update(const char *event, xmlNode * msg)
{
int rc = -1;
static bool stale = FALSE;
gboolean cib_updated = FALSE;
xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
print_dot();
if (current_cib != NULL) {
rc = xml_apply_patchset(current_cib, diff, TRUE);
switch (rc) {
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(current_cib); current_cib = NULL;
break;
case pcmk_ok:
cib_updated = TRUE;
break;
default:
crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(current_cib); current_cib = NULL;
}
}
if (current_cib == NULL) {
crm_trace("Re-requesting the full cib");
cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call);
}
if (external_agent) {
int format = 0;
crm_element_value_int(diff, "format", &format);
switch(format) {
case 1:
crm_diff_update_v1(event, msg);
break;
case 2:
crm_diff_update_v2(event, msg);
break;
default:
crm_err("Unknown patch format: %d", format);
}
}
if (current_cib == NULL) {
if(!stale) {
print_as("--- Stale data ---");
}
stale = TRUE;
return;
}
stale = FALSE;
kick_refresh(cib_updated);
}
gboolean
mon_refresh_display(gpointer user_data)
{
xmlNode *cib_copy = copy_xml(current_cib);
pe_working_set_t data_set;
stonith_history_t *stonith_history = NULL;
last_refresh = time(NULL);
if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) {
if (cib) {
cib->cmds->signoff(cib);
}
print_as("Upgrade failed: %s", pcmk_strerror(-pcmk_err_schema_validation));
if (output_format == mon_output_console) {
sleep(2);
}
clean_up(CRM_EX_CONFIG);
return FALSE;
}
/* get the stonith-history if there is evidence we need it
*/
while (fence_history) {
if (st != NULL) {
if (st->cmds->history(st, st_opt_sync_call, NULL, &stonith_history, 120)) {
fprintf(stderr, "Critical: Unable to get stonith-history\n");
mon_cib_connection_destroy(NULL);
} else {
if ((!fence_full_history) && (output_format != mon_output_xml)) {
stonith_history = reduce_stonith_history(stonith_history);
}
stonith_history = sort_stonith_history(stonith_history);
break; /* all other cases are errors */
}
} else {
fprintf(stderr, "Critical: No stonith-API\n");
}
free_xml(cib_copy);
print_as("Reading stonith-history failed");
if (output_format == mon_output_console) {
sleep(2);
}
return FALSE;
}
set_working_set_defaults(&data_set);
data_set.input = cib_copy;
cluster_status(&data_set);
/* Unpack constraints if any section will need them
* (tickets may be referenced in constraints but not granted yet,
* and bans need negative location constraints) */
if (show & (mon_show_bans | mon_show_tickets)) {
xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set.input);
unpack_constraints(cib_constraints, &data_set);
}
switch (output_format) {
case mon_output_html:
case mon_output_cgi:
if (print_html_status(&data_set, output_filename, stonith_history) != 0) {
fprintf(stderr, "Critical: Unable to output html file\n");
clean_up(CRM_EX_CANTCREAT);
}
break;
case mon_output_xml:
print_xml_status(&data_set, stonith_history);
break;
case mon_output_monitor:
print_simple_status(&data_set, stonith_history);
if (has_warnings) {
clean_up(MON_STATUS_WARN);
}
break;
case mon_output_plain:
case mon_output_console:
print_status(&data_set, stonith_history);
break;
case mon_output_none:
break;
}
stonith_history_free(stonith_history);
stonith_history = NULL;
cleanup_alloc_calculations(&data_set);
-
- /* poll for pending fence-actions as we don't get notifications so far */
- if ((!one_shot) && (show & mon_show_fence_history)) {
- kick_refresh(FALSE);
- }
return TRUE;
}
void
mon_st_callback_event(stonith_t * st, stonith_event_t * e)
{
if (st->state == stonith_disconnected) {
/* disconnect cib as well and have everything reconnect */
mon_cib_connection_destroy(NULL);
} else if (external_agent) {
char *desc = crm_strdup_printf("Operation %s requested by %s for peer %s: %s (ref=%s)",
e->operation, e->origin, e->target, pcmk_strerror(e->result),
e->id);
send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc);
free(desc);
}
}
void kick_refresh(gboolean data_updated)
{
static int updates = 0;
long now = time(NULL);
if (data_updated) {
updates++;
}
if(refresh_timer == NULL) {
refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL);
}
/* Refresh
* - immediately if the last update was more than 5s ago
* - every 10 cib-updates
* - at most 2s after the last update
*/
if ((now - last_refresh) > (reconnect_msec / 1000)) {
mainloop_set_trigger(refresh_trigger);
mainloop_timer_stop(refresh_timer);
updates = 0;
} else if(updates >= 10) {
mainloop_set_trigger(refresh_trigger);
mainloop_timer_stop(refresh_timer);
updates = 0;
} else {
- /* if nothing has been updated (e.g. fence-history polling for changes
- * in pending actions since atm that doesn't generate notifications or
- * cib update has gone wrong) it is enough to update at reconnect-rate
- */
- mainloop_timer_set_period(refresh_timer, data_updated?2000:reconnect_msec);
mainloop_timer_start(refresh_timer);
}
}
void
mon_st_callback_display(stonith_t * st, stonith_event_t * e)
{
if (st->state == stonith_disconnected) {
/* disconnect cib as well and have everything reconnect */
mon_cib_connection_destroy(NULL);
} else {
print_dot();
kick_refresh(TRUE);
}
}
/*
* De-init ncurses, disconnect from the CIB manager, and deallocate memory.
*/
static void
clean_up(crm_exit_t exit_code)
{
#if CURSES_ENABLED
if (output_format == mon_output_console) {
output_format = mon_output_plain;
echo();
nocbreak();
endwin();
}
#endif
if (cib != NULL) {
cib->cmds->signoff(cib);
cib_delete(cib);
cib = NULL;
}
if (st != NULL) {
if (st->state != stonith_disconnected) {
st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT);
+ st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE);
+ st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY);
st->cmds->disconnect(st);
}
stonith_api_delete(st);
st = NULL;
}
free(output_filename);
free(xml_file);
free(pid_file);
crm_exit(exit_code);
}
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
index 0452bc1ab5..77dddc0cb5 100644
--- a/tools/stonith_admin.c
+++ b/tools/stonith_admin.c
@@ -1,742 +1,767 @@
/*
* Copyright 2009-2018 Andrew Beekhof
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
{ "help", no_argument, NULL, '?',
"\tDisplay this text and exit."
},
{ "version", no_argument, NULL, '$',
"\tDisplay version information and exit."
},
{ "verbose", no_argument, NULL, 'V',
"\tIncrease debug output (may be specified multiple times)."
},
{ "quiet", no_argument, NULL, 'q',
"\tBe less descriptive in output."
},
-
+ { "cleanup", no_argument, NULL, 'c',
+ "\tCleanup wherever appropriate."
+ },
+ { "broadcast", no_argument, NULL, 'b',
+ "\tBroadcast wherever appropriate."
+ },
{ "-spacer-", no_argument, NULL, '-', "\nDevice definition commands:" },
{ "register", required_argument, NULL, 'R',
"Register the named stonith device. Requires: --agent.\n"
"\t\t\tOptional: --option, --env-option."
},
{ "deregister", required_argument, NULL, 'D',
"De-register the named stonith device."
},
{ "register-level", required_argument, NULL, 'r',
"Register a stonith level for the named target,\n"
"\t\t\tspecified as one of NAME, @PATTERN, or ATTR=VALUE.\n"
"\t\t\tRequires: --index and one or more --device entries."
},
{ "deregister-level", required_argument, NULL, 'd',
"Unregister a stonith level for the named target,\n"
"\t\t\tspecified as for --register-level. Requires: --index."
},
{ "-spacer-", no_argument, NULL, '-', "\nQueries:" },
{ "list", required_argument, NULL, 'l',
"List devices that can terminate the specified host.\n"
"\t\t\tOptional: --timeout."
},
{ "list-registered", no_argument, NULL, 'L',
"List all registered devices. Optional: --timeout."
},
{ "list-installed", no_argument, NULL, 'I',
"List all installed devices. Optional: --timeout."
},
{ "list-targets", required_argument, NULL, 's',
"List the targets that can be fenced by the\n"
"\t\t\tnamed device. Optional: --timeout."
},
{ "metadata", no_argument, NULL, 'M',
"\tShow agent metadata. Requires: --agent.\n"
"\t\t\tOptional: --timeout."
},
{ "query", required_argument, NULL, 'Q',
"Check the named device's status. Optional: --timeout."
},
{ "history", required_argument, NULL, 'H',
"Show last successful fencing operation for named node\n"
- "\t\t\t(or '*' for all nodes). Optional: --timeout, --quiet\n"
- "\t\t\t(show only the operation's epoch timestamp),\n"
- "\t\t\t--verbose (show all recorded and pending operations)."
+ "\t\t\t(or '*' for all nodes). Optional: --timeout, --cleanup,\n"
+ "\t\t\t--quiet (show only the operation's epoch timestamp),\n"
+ "\t\t\t--verbose (show all recorded and pending operations),\n"
+ "\t\t\t--broadcast (update history from all nodes available)."
},
{ "last", required_argument, NULL, 'h',
"Indicate when the named node was last fenced.\n"
"\t\t\tOptional: --as-node-id."
},
{ "validate", no_argument, NULL, 'K',
"\tValidate a fence device configuration.\n"
"\t\t\tRequires: --agent. Optional: --option, --env-option,\n"
"\t\t\t--quiet (print no output, only return status).\n"
},
{ "-spacer-", no_argument, NULL, '-', "\nFencing Commands:" },
{ "fence", required_argument, NULL, 'F',
"Fence named host. Optional: --timeout, --tolerance."
},
{ "unfence", required_argument, NULL, 'U',
"Unfence named host. Optional: --timeout, --tolerance."
},
{ "reboot", required_argument, NULL, 'B',
"Reboot named host. Optional: --timeout, --tolerance."
},
{ "confirm", required_argument, NULL, 'C',
"Tell cluster that named host is now safely down."
},
{ "-spacer-", no_argument, NULL, '-', "\nAdditional Options:" },
{ "agent", required_argument, NULL, 'a',
"The agent to use (for example, fence_xvm;\n"
"\t\t\twith --register, --metadata, --validate)."
},
{ "option", required_argument, NULL, 'o',
"Specify a device configuration parameter as NAME=VALUE\n"
"\t\t\t(may be specified multiple times; with --register,\n"
"\t\t\t--validate)."
},
{ "env-option", required_argument, NULL, 'e',
"Specify a device configuration parameter with the\n"
"\t\t\tspecified name, using the value of the\n"
"\t\t\tenvironment variable of the same name prefixed with\n"
"\t\t\tOCF_RESKEY_ (may be specified multiple times;\n"
"\t\t\twith --register, --validate)."
},
{ "tag", required_argument, NULL, 'T',
"Identify fencing operations in logs with the specified\n"
"\t\t\ttag; useful when multiple entities might invoke\n"
"\t\t\tstonith_admin (used with most commands)."
},
{ "device", required_argument, NULL, 'v',
"Device ID (with --register-level, device to associate with\n"
"\t\t\ta given host and level; may be specified multiple times)"
#if SUPPORT_CIBSECRETS
"\n\t\t\t(with --validate, name to use to load CIB secrets)"
#endif
"."
},
{ "index", required_argument, NULL, 'i',
"The stonith level (1-9) (with --register-level,\n"
"\t\t\t--deregister-level)."
},
{ "timeout", required_argument, NULL, 't',
"Operation timeout in seconds (default 120;\n"
"\t\t\tused with most commands)."
},
{ "as-node-id", no_argument, NULL, 'n',
"(Advanced) The supplied node is the corosync node ID\n"
"\t\t\t(with --last)."
},
{ "tolerance", required_argument, NULL, 0,
"(Advanced) Do nothing if an equivalent --fence request\n"
"\t\t\tsucceeded less than this many seconds earlier\n"
"\t\t\t(with --fence, --unfence, --reboot)."
},
{ 0, 0, 0, 0 }
};
/* *INDENT-ON* */
static int st_opts = st_opt_sync_call | st_opt_allow_suicide;
static GMainLoop *mainloop = NULL;
struct {
stonith_t *st;
const char *target;
const char *action;
char *name;
int timeout;
int tolerance;
int rc;
} async_fence_data;
static int
try_mainloop_connect(void)
{
stonith_t *st = async_fence_data.st;
int tries = 10;
int i = 0;
int rc = 0;
for (i = 0; i < tries; i++) {
crm_debug("Connecting as %s", async_fence_data.name);
rc = st->cmds->connect(st, async_fence_data.name, NULL);
if (!rc) {
crm_debug("stonith client connection established");
return 0;
} else {
crm_debug("stonith client connection failed");
}
sleep(1);
}
crm_err("Could not connect to the fencer");
return -1;
}
static void
notify_callback(stonith_t * st, stonith_event_t * e)
{
if (e->result != pcmk_ok) {
return;
}
if (safe_str_eq(async_fence_data.target, e->target) &&
safe_str_eq(async_fence_data.action, e->action)) {
async_fence_data.rc = e->result;
g_main_loop_quit(mainloop);
}
}
static void
fence_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
async_fence_data.rc = data->rc;
g_main_loop_quit(mainloop);
}
static gboolean
async_fence_helper(gpointer user_data)
{
stonith_t *st = async_fence_data.st;
int call_id = 0;
if (try_mainloop_connect()) {
g_main_loop_quit(mainloop);
return TRUE;
}
st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, notify_callback);
call_id = st->cmds->fence(st,
st_opt_allow_suicide,
async_fence_data.target,
async_fence_data.action,
async_fence_data.timeout, async_fence_data.tolerance);
if (call_id < 0) {
g_main_loop_quit(mainloop);
return TRUE;
}
st->cmds->register_callback(st,
call_id,
async_fence_data.timeout,
st_opt_timeout_updates, NULL, "callback", fence_callback);
return TRUE;
}
static int
mainloop_fencing(stonith_t * st, const char *target, const char *action, int timeout, int tolerance)
{
crm_trigger_t *trig;
async_fence_data.st = st;
async_fence_data.target = target;
async_fence_data.action = action;
async_fence_data.timeout = timeout;
async_fence_data.tolerance = tolerance;
async_fence_data.rc = -1;
trig = mainloop_add_trigger(G_PRIORITY_HIGH, async_fence_helper, NULL);
mainloop_set_trigger(trig);
mainloop = g_main_loop_new(NULL, FALSE);
g_main_loop_run(mainloop);
return async_fence_data.rc;
}
static int
handle_level(stonith_t *st, char *target, int fence_level,
stonith_key_value_t *devices, bool added)
{
char *node = NULL;
char *pattern = NULL;
char *name = NULL;
char *value = strchr(target, '=');
/* Determine if targeting by attribute, node name pattern or node name */
if (value != NULL) {
name = target;
*value++ = '\0';
} else if (*target == '@') {
pattern = target + 1;
} else {
node = target;
}
/* Register or unregister level as appropriate */
if (added) {
return st->cmds->register_level_full(st, st_opts, node, pattern,
name, value, fence_level,
devices);
}
return st->cmds->remove_level_full(st, st_opts, node, pattern,
name, value, fence_level);
}
static char *
fence_action_str(const char *action)
{
char *str = NULL;
if (action == NULL) {
str = strdup("unknown");
} else if (action[0] == 'o') { // on, off
str = crm_concat("turn", action, ' ');
} else {
str = strdup(action);
}
return str;
}
static void
print_fence_event(stonith_history_t *event)
{
char *action_s = fence_action_str(event->action);
time_t complete = event->completed;
printf("%s was able to %s node %s on behalf of %s from %s at %s\n",
(event->delegate? event->delegate : "This node"), action_s,
event->target, event->client, event->origin, ctime(&complete));
free(action_s);
}
static int
-show_history(stonith_t *st, const char *target, int timeout, int quiet,
- int verbose)
+handle_history(stonith_t *st, const char *target, int timeout, int quiet,
+ int verbose, int cleanup, int broadcast)
{
stonith_history_t *history = NULL, *hp, *latest = NULL;
int rc = 0;
- rc = st->cmds->history(st, st_opts,
+ if (!quiet) {
+ if (cleanup) {
+ printf("cleaning up fencing-history%s%s\n",
+ target?" for node ":"", target?target:"");
+ }
+ if (broadcast) {
+ printf("gather fencing-history from all nodes\n");
+ }
+ }
+ rc = st->cmds->history(st, st_opts | (cleanup?st_opt_cleanup:0) |
+ (broadcast?st_opt_broadcast:0),
(safe_str_eq(target, "*")? NULL : target),
&history, timeout);
for (hp = history; hp; hp = hp->next) {
char *action_s = NULL;
time_t complete = hp->completed;
if (hp->state == st_done) {
latest = hp;
}
if (quiet || !verbose) {
continue;
}
if (hp->state == st_failed) {
action_s = fence_action_str(hp->action);
printf("%s failed to %s node %s on behalf of %s from %s at %s\n",
hp->delegate ? hp->delegate : "We", action_s, hp->target,
hp->client, hp->origin, ctime(&complete));
} else if (hp->state == st_done) {
print_fence_event(latest);
} else {
/* ocf:pacemaker:controld depends on "wishes to" being
* in this output, when used with older versions of DLM
* that don't report stateful_merge_wait
*/
action_s = fence_action_str(hp->action);
printf("%s at %s wishes to %s node %s - %d %lld\n",
hp->client, hp->origin, action_s, hp->target, hp->state,
(long long) complete);
}
free(action_s);
}
if (latest) {
if (quiet) {
printf("%lld\n", (long long) latest->completed);
} else if (!verbose) { // already printed if verbose
print_fence_event(latest);
}
}
stonith_history_free(history);
return rc;
}
static int
validate(stonith_t *st, const char *agent, const char *id,
stonith_key_value_t *params, int timeout, int quiet)
{
int rc = 1;
char *output = NULL;
char *error_output = NULL;
rc = st->cmds->validate(st, st_opt_sync_call, id, NULL, agent, params,
timeout, &output, &error_output);
if (!quiet) {
printf("Validation of %s %s\n", agent, (rc? "failed" : "succeeded"));
if (output && *output) {
puts(output);
free(output);
}
if (error_output && *error_output) {
puts(error_output);
free(error_output);
}
}
return rc;
}
int
main(int argc, char **argv)
{
int flag;
int rc = 0;
int quiet = 0;
+ int cleanup = 0;
+ int broadcast = 0;
int verbose = 0;
int argerr = 0;
int timeout = 120;
int option_index = 0;
int fence_level = 0;
int no_connect = 0;
int tolerance = 0;
int as_nodeid = FALSE;
char *name = NULL;
char *value = NULL;
char *target = NULL;
char *lists = NULL;
const char *agent = NULL;
const char *device = NULL;
const char *longname = NULL;
char action = 0;
crm_exit_t exit_code = CRM_EX_OK;
stonith_t *st = NULL;
stonith_key_value_t *params = NULL;
stonith_key_value_t *devices = NULL;
stonith_key_value_t *dIter = NULL;
crm_log_cli_init("stonith_admin");
crm_set_options(NULL, " []", long_options,
"access the Pacemaker fencing API");
async_fence_data.name = strdup(crm_system_name);
while (1) {
flag = crm_get_option_long(argc, argv, &option_index, &longname);
if (flag == -1)
break;
switch (flag) {
case 'V':
verbose = 1;
crm_bump_log_level(argc, argv);
break;
case '$':
case '?':
crm_help(flag, CRM_EX_OK);
break;
case 'I':
case 'K':
no_connect = 1;
/* fall through */
case 'L':
action = flag;
break;
case 'q':
quiet = 1;
break;
+ case 'c':
+ cleanup = 1;
+ break;
+ case 'b':
+ broadcast = 1;
+ break;
case 'Q':
case 'R':
case 'D':
case 's':
action = flag;
device = optarg;
break;
case 'T':
free(async_fence_data.name);
async_fence_data.name = crm_strdup_printf("%s.%s", crm_system_name, optarg);
break;
case 'a':
agent = optarg;
break;
case 'l':
target = optarg;
action = 'L';
break;
case 'M':
no_connect = 1;
action = flag;
break;
case 't':
timeout = crm_atoi(optarg, NULL);
break;
case 'B':
case 'F':
case 'U':
/* using mainloop here */
no_connect = 1;
/* fall through */
case 'C':
/* Always log the input arguments */
crm_log_args(argc, argv);
target = optarg;
action = flag;
break;
case 'n':
as_nodeid = TRUE;
break;
case 'h':
case 'H':
case 'r':
case 'd':
target = optarg;
action = flag;
break;
case 'i':
fence_level = crm_atoi(optarg, NULL);
break;
case 'v':
devices = stonith_key_value_add(devices, NULL, optarg);
break;
case 'o':
crm_info("Scanning: -o %s", optarg);
rc = sscanf(optarg, "%m[^=]=%m[^=]", &name, &value);
if (rc != 2) {
crm_err("Invalid option: -o %s", optarg);
++argerr;
} else {
crm_info("Got: '%s'='%s'", name, value);
params = stonith_key_value_add(params, name, value);
}
free(value); value = NULL;
free(name); name = NULL;
break;
case 'e':
{
char *key = crm_concat("OCF_RESKEY", optarg, '_');
const char *env = getenv(key);
if (env == NULL) {
crm_err("Invalid option: -e %s", optarg);
++argerr;
} else {
crm_info("Got: '%s'='%s'", optarg, env);
params = stonith_key_value_add(params, optarg, env);
}
free(key);
}
break;
case 0:
if (safe_str_eq("tolerance", longname)) {
tolerance = crm_get_msec(optarg) / 1000; /* Send in seconds */
}
break;
default:
++argerr;
break;
}
}
if (optind > argc) {
++argerr;
}
if (argerr) {
crm_help('?', CRM_EX_USAGE);
}
st = stonith_api_new();
if (!no_connect) {
rc = st->cmds->connect(st, async_fence_data.name, NULL);
if (rc < 0) {
fprintf(stderr, "Could not connect to fencer: %s\n",
pcmk_strerror(rc));
exit_code = CRM_EX_DISCONNECT;
goto done;
}
}
switch (action) {
case 'I':
rc = st->cmds->list_agents(st, st_opt_sync_call, NULL, &devices, timeout);
for (dIter = devices; dIter; dIter = dIter->next) {
fprintf(stdout, " %s\n", dIter->value);
}
if (rc == 0) {
fprintf(stderr, "No devices found\n");
} else if (rc > 0) {
fprintf(stderr, "%d devices found\n", rc);
rc = 0;
}
stonith_key_value_freeall(devices, 1, 1);
break;
case 'L':
rc = st->cmds->query(st, st_opts, target, &devices, timeout);
for (dIter = devices; dIter; dIter = dIter->next) {
fprintf(stdout, " %s\n", dIter->value);
}
if (rc == 0) {
fprintf(stderr, "No devices found\n");
} else if (rc > 0) {
fprintf(stderr, "%d devices found\n", rc);
rc = 0;
}
stonith_key_value_freeall(devices, 1, 1);
break;
case 'Q':
rc = st->cmds->monitor(st, st_opts, device, timeout);
if (rc < 0) {
rc = st->cmds->list(st, st_opts, device, NULL, timeout);
}
break;
case 's':
rc = st->cmds->list(st, st_opts, device, &lists, timeout);
if (rc == 0) {
if (lists) {
char *source = lists, *dest = lists;
while (*dest) {
if ((*dest == '\\') && (*(dest+1) == 'n')) {
*source = '\n';
dest++;
dest++;
source++;
} else if ((*dest == ',') || (*dest == ';')) {
dest++;
} else {
*source = *dest;
dest++;
source++;
}
if (!(*dest)) {
*source = 0;
}
}
fprintf(stdout, "%s", lists);
free(lists);
}
} else {
fprintf(stderr, "List command returned error. rc : %d\n", rc);
}
break;
case 'R':
rc = st->cmds->register_device(st, st_opts, device, NULL, agent,
params);
break;
case 'D':
rc = st->cmds->remove_device(st, st_opts, device);
break;
case 'd':
case 'r':
rc = handle_level(st, target, fence_level, devices, action == 'r');
break;
case 'M':
if (agent == NULL) {
printf("Please specify an agent to query using -a,--agent [value]\n");
exit_code = CRM_EX_USAGE;
goto done;
} else {
char *buffer = NULL;
rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, timeout);
if (rc == pcmk_ok) {
printf("%s\n", buffer);
}
free(buffer);
}
break;
case 'C':
rc = st->cmds->confirm(st, st_opts, target);
break;
case 'B':
rc = mainloop_fencing(st, target, "reboot", timeout, tolerance);
break;
case 'F':
rc = mainloop_fencing(st, target, "off", timeout, tolerance);
break;
case 'U':
rc = mainloop_fencing(st, target, "on", timeout, tolerance);
break;
case 'h':
{
time_t when = 0;
if(as_nodeid) {
uint32_t nodeid = atol(target);
when = stonith_api_time(nodeid, NULL, FALSE);
} else {
when = stonith_api_time(0, target, FALSE);
}
if(when) {
printf("Node %s last kicked at: %s\n", target, ctime(&when));
} else {
printf("Node %s has never been kicked\n", target);
}
}
break;
case 'H':
- rc = show_history(st, target, timeout, quiet, verbose);
+ rc = handle_history(st, target, timeout, quiet,
+ verbose, cleanup, broadcast);
break;
case 'K':
if (agent == NULL) {
printf("Please specify an agent to validate with --agent\n");
exit_code = CRM_EX_USAGE;
goto done;
}
device = (devices? devices->key : NULL);
rc = validate(st, agent, device, params, timeout, quiet);
break;
}
crm_info("Command returned: %s (%d)", pcmk_strerror(rc), rc);
exit_code = crm_errno2exit(rc);
done:
free(async_fence_data.name);
stonith_key_value_freeall(params, 1, 1);
st->cmds->disconnect(st);
stonith_api_delete(st);
return exit_code;
}