Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F1842429
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
96 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
index b65b64c7f1..cd08d7460d 100644
--- a/daemons/fenced/fenced_history.c
+++ b/daemons/fenced/fenced_history.c
@@ -1,482 +1,487 @@
/*
* Copyright 2009-2018 Andrew Beekhof <andrew@beekhof.net>
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/common/ipcs.h>
#include <crm/cluster/internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <pacemaker-fenced.h>
#define MAX_STONITH_HISTORY 500
/*!
* \internal
* \brief Send a broadcast to all nodes to trigger cleanup or
* history synchronisation
*
* \param[in] history Optional history to be attached
* \param[in] callopts We control cleanup via a flag in the callopts
* \param[in] target Cleanup can be limited to certain fence-targets
*/
static void
stonith_send_broadcast_history(xmlNode *history,
int callopts,
const char *target)
{
xmlNode *bcast = create_xml_node(NULL, "stonith_command");
xmlNode *data = create_xml_node(NULL, __FUNCTION__);
if (target) {
crm_xml_add(data, F_STONITH_TARGET, target);
}
crm_xml_add(bcast, F_TYPE, T_STONITH_NG);
crm_xml_add(bcast, F_SUBTYPE, "broadcast");
crm_xml_add(bcast, F_STONITH_OPERATION, STONITH_OP_FENCE_HISTORY);
crm_xml_add_int(bcast, F_STONITH_CALLOPTS, callopts);
if (history) {
add_node_copy(data, history);
}
add_message_xml(bcast, F_STONITH_CALLDATA, data);
send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);
free_xml(data);
free_xml(bcast);
}
static gboolean
stonith_remove_history_entry (gpointer key,
gpointer value,
gpointer user_data)
{
remote_fencing_op_t *op = value;
const char *target = (const char *) user_data;
if ((op->state == st_failed) || (op->state == st_done)) {
if ((target) && (strcmp(op->target, target) != 0)) {
return FALSE;
}
return TRUE;
}
return FALSE; /* don't clean pending operations */
}
/*!
* \internal
* \brief Send out a cleanup broadcast or do a local history-cleanup
*
* \param[in] target Cleanup can be limited to certain fence-targets
* \param[in] broadcast Send out a cleanup broadcast
*/
static void
stonith_fence_history_cleanup(const char *target,
gboolean broadcast)
{
if (broadcast) {
stonith_send_broadcast_history(NULL,
st_opt_cleanup | st_opt_discard_reply,
target);
/* we'll do the local clean when we receive back our own broadcast */
} else if (stonith_remote_op_list) {
g_hash_table_foreach_remove(stonith_remote_op_list,
stonith_remove_history_entry,
(gpointer) target);
do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
}
}
/* keeping the length of fence-history within bounds
* =================================================
*
* If things are really running wild a lot of fencing-attempts
* might fill up the hash-map, eventually using up a lot
* of memory and creating huge history-sync messages.
* Before the history being synced across nodes at least
* the reboot of a cluster-node helped keeping the
* history within bounds even though not in a reliable
* manner.
*
* stonith_remote_op_list isn't sorted for time-stamps
* thus it would be kind of expensive to delete e.g.
* the oldest entry if it would grow past MAX_STONITH_HISTORY
* entries.
* It is more efficient to purge MAX_STONITH_HISTORY/2
* entries whenever the list grows beyond MAX_STONITH_HISTORY.
* (sort for age + purge the MAX_STONITH_HISTORY/2 oldest)
* That done on a per-node-base might raise the
* probability of large syncs to occur.
* Things like introducing a broadcast to purge
* MAX_STONITH_HISTORY/2 entries or not sync above a certain
* threshold coming to mind ...
* Simplest thing though is to purge the full history
* throughout the cluster once MAX_STONITH_HISTORY is reached.
* On the other hand this leads to purging the history in
* situations where it would be handy to have it probably.
*/
static int
op_time_sort(const void *a_voidp, const void *b_voidp)
{
const remote_fencing_op_t **a = (const remote_fencing_op_t **) a_voidp;
const remote_fencing_op_t **b = (const remote_fencing_op_t **) b_voidp;
gboolean a_pending = ((*a)->state != st_failed) && ((*a)->state != st_done);
gboolean b_pending = ((*b)->state != st_failed) && ((*b)->state != st_done);
if (a_pending && b_pending) {
return 0;
} else if (a_pending) {
return -1;
} else if (b_pending) {
return 1;
} else if ((*b)->completed == (*a)->completed) {
return 0;
} else if ((*b)->completed > (*a)->completed) {
return 1;
}
return -1;
}
/*!
* \internal
* \brief Do a local history-trim to MAX_STONITH_HISTORY / 2 entries
* once over MAX_STONITH_HISTORY
*/
void
stonith_fence_history_trim(void)
{
guint num_ops;
if (!stonith_remote_op_list) {
return;
}
num_ops = g_hash_table_size(stonith_remote_op_list);
if (num_ops > MAX_STONITH_HISTORY) {
remote_fencing_op_t *ops[num_ops];
remote_fencing_op_t *op = NULL;
GHashTableIter iter;
int i;
crm_trace("Fencing History growing beyond limit of %d so purge "
"half of failed/successful attempts", MAX_STONITH_HISTORY);
/* write all ops into an array */
i = 0;
g_hash_table_iter_init(&iter, stonith_remote_op_list);
while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
ops[i++] = op;
}
/* run quicksort over the array so that we get pending ops
* first and then sorted most recent to oldest
*/
qsort(ops, num_ops, sizeof(remote_fencing_op_t *), op_time_sort);
/* purgest oldest half of the history entries */
for (i = MAX_STONITH_HISTORY / 2; i < num_ops; i++) {
/* keep pending ops even if they shouldn't fill more than
* half of our buffer
*/
if ((ops[i]->state == st_failed) || (ops[i]->state == st_done)) {
g_hash_table_remove(stonith_remote_op_list, ops[i]->id);
}
}
/* we've just purged valid data from the list so there is no need
* to create a notification - if displayed it can stay
*/
}
}
/*!
* \internal
* \brief Convert xml fence-history to a hash-table like stonith_remote_op_list
*
* \param[in] history Fence-history in xml
*
* \return Fence-history as hash-table
*/
static GHashTable *
stonith_xml_history_to_list(xmlNode *history)
{
xmlNode *xml_op = NULL;
GHashTable *rv = NULL;
init_stonith_remote_op_hash_table(&rv);
CRM_LOG_ASSERT(rv != NULL);
for (xml_op = __xml_first_child(history); xml_op != NULL;
xml_op = __xml_next(xml_op)) {
remote_fencing_op_t *op = NULL;
char *id = crm_element_value_copy(xml_op, F_STONITH_REMOTE_OP_ID);
int completed, state;
if (!id) {
crm_warn("History to convert to hashtable has no id in entry");
continue;
}
crm_trace("Attaching op %s to hashtable", id);
op = calloc(1, sizeof(remote_fencing_op_t));
op->id = id;
op->target = crm_element_value_copy(xml_op, F_STONITH_TARGET);
op->action = crm_element_value_copy(xml_op, F_STONITH_ACTION);
op->originator = crm_element_value_copy(xml_op, F_STONITH_ORIGIN);
op->delegate = crm_element_value_copy(xml_op, F_STONITH_DELEGATE);
op->client_name = crm_element_value_copy(xml_op, F_STONITH_CLIENTNAME);
crm_element_value_int(xml_op, F_STONITH_DATE, &completed);
op->completed = (time_t) completed;
crm_element_value_int(xml_op, F_STONITH_STATE, &state);
op->state = (enum op_state) state;
g_hash_table_replace(rv, id, op);
CRM_LOG_ASSERT(g_hash_table_lookup(rv, id) != NULL);
}
return rv;
}
/*!
* \internal
* \brief Craft xml difference between local fence-history and a history
* coming from remote
*
* \param[in] remote_history Fence-history as hash-table (may be NULL)
* \param[in] add_id If crafting the answer for an API
* history-request there is no need for the id
* \param[in] target Optionally limit to certain fence-target
*
* \return The fence-history as xml
*/
static xmlNode *
stonith_local_history_diff(GHashTable *remote_history,
gboolean add_id,
const char *target)
{
xmlNode *history = NULL;
int cnt = 0;
if (stonith_remote_op_list) {
GHashTableIter iter;
remote_fencing_op_t *op = NULL;
history = create_xml_node(NULL, F_STONITH_HISTORY_LIST);
g_hash_table_iter_init(&iter, stonith_remote_op_list);
while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
xmlNode *entry = NULL;
if (remote_history &&
g_hash_table_lookup(remote_history, op->id)) {
continue; /* skip entries broadcasted already */
}
if (target && strcmp(op->target, target) != 0) {
continue;
}
cnt++;
crm_trace("Attaching op %s", op->id);
entry = create_xml_node(history, STONITH_OP_EXEC);
if (add_id) {
crm_xml_add(entry, F_STONITH_REMOTE_OP_ID, op->id);
}
crm_xml_add(entry, F_STONITH_TARGET, op->target);
crm_xml_add(entry, F_STONITH_ACTION, op->action);
crm_xml_add(entry, F_STONITH_ORIGIN, op->originator);
crm_xml_add(entry, F_STONITH_DELEGATE, op->delegate);
crm_xml_add(entry, F_STONITH_CLIENTNAME, op->client_name);
crm_xml_add_int(entry, F_STONITH_DATE, op->completed);
crm_xml_add_int(entry, F_STONITH_STATE, op->state);
}
}
if (cnt == 0) {
free_xml(history);
return NULL;
} else {
return history;
}
}
/*!
* \internal
* \brief Merge fence-history coming from remote into local history
*
* \param[in] history Hash-table holding remote history to be merged in
*/
static void
stonith_merge_in_history_list(GHashTable *history)
{
GHashTableIter iter;
remote_fencing_op_t *op = NULL;
gboolean updated = FALSE;
if (!history) {
return;
}
init_stonith_remote_op_hash_table(&stonith_remote_op_list);
g_hash_table_iter_init(&iter, history);
while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
remote_fencing_op_t *stored_op =
g_hash_table_lookup(stonith_remote_op_list, op->id);
if (stored_op) {
continue; // Skip existent (@TODO state-merging might be desirable)
}
updated = TRUE;
g_hash_table_iter_steal(&iter);
if ((op->state != st_failed) &&
(op->state != st_done) &&
safe_str_eq(op->originator, stonith_our_uname)) {
crm_warn("received pending action we are supposed to be the "
"owner but it's not in our records -> fail it");
op->state = st_failed;
op->completed = time(NULL);
/* use -EHOSTUNREACH to not introduce a new return-code that might
trigger unexpected results at other places and to prevent
remote_op_done from setting the delegate if not present
*/
stonith_bcast_result_to_peers(op, -EHOSTUNREACH);
}
g_hash_table_insert(stonith_remote_op_list, op->id, op);
/* we could trim the history here but if we bail
* out after trim we might miss more recent entries
* of those that might still be in the list
* if we don't bail out trimming once is more
* efficient and memory overhead is minimal as
* we are just moving pointers from one hash to
* another
*/
}
stonith_fence_history_trim();
if (updated) {
do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
}
g_hash_table_destroy(history); /* remove what is left */
}
/*!
* \internal
* \brief Handle fence-history messages (either from API or coming in as
* broadcasts
*
* \param[in] msg Request message
* \param[in] output In case of a request from the API used to craft
* a reply from
* \param[in] remote_peer
* \param[in] options call-options from the request
*
* \return always success as there is actully nothing that can go really wrong
*/
int
stonith_fence_history(xmlNode *msg, xmlNode **output,
const char *remote_peer, int options)
{
int rc = 0;
const char *target = NULL;
xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_TRACE);
xmlNode *out_history = NULL;
if (dev) {
target = crm_element_value(dev, F_STONITH_TARGET);
if (target && (options & st_opt_cs_nodeid)) {
int nodeid = crm_atoi(target, NULL);
crm_node_t *node = crm_find_known_peer_full(nodeid, NULL, CRM_GET_PEER_ANY);
if (node) {
target = node->uname;
}
}
}
if (options & st_opt_cleanup) {
crm_trace("Cleaning up operations on %s in %p", target,
stonith_remote_op_list);
stonith_fence_history_cleanup(target,
crm_element_value(msg, F_STONITH_CALLID) != NULL);
} else if (options & st_opt_broadcast) {
+ /* there is no clear sign atm for when a history sync
+ is done so send a notification for anything
+ that smells like history-sync
+ */
+ do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY_SYNCED, 0, NULL);
if (crm_element_value(msg, F_STONITH_CALLID)) {
/* this is coming from the stonith-API
*
* craft a broadcast with node's history
* so that every node can merge and broadcast
* what it has on top
*/
out_history = stonith_local_history_diff(NULL, TRUE, NULL);
crm_trace("Broadcasting history to peers");
stonith_send_broadcast_history(out_history,
st_opt_broadcast | st_opt_discard_reply,
NULL);
} else if (remote_peer &&
!safe_str_eq(remote_peer, stonith_our_uname)) {
xmlNode *history =
get_xpath_object("//" F_STONITH_HISTORY_LIST, msg, LOG_TRACE);
GHashTable *received_history =
history?stonith_xml_history_to_list(history):NULL;
/* either a broadcast created directly upon stonith-API request
* or a diff as response to such a thing
*
* in both cases it may have a history or not
* if we have differential data
* merge in what we've received and stop
* otherwise broadcast what we have on top
* marking as differential and merge in afterwards
*/
if (!history ||
!crm_is_true(crm_element_value(history,
F_STONITH_DIFFERENTIAL))) {
out_history =
stonith_local_history_diff(received_history, TRUE, NULL);
if (out_history) {
crm_trace("Broadcasting history-diff to peers");
crm_xml_add(out_history, F_STONITH_DIFFERENTIAL,
XML_BOOLEAN_TRUE);
stonith_send_broadcast_history(out_history,
st_opt_broadcast | st_opt_discard_reply,
NULL);
} else {
crm_trace("History-diff is empty - skip broadcast");
}
}
stonith_merge_in_history_list(received_history);
} else {
crm_trace("Skipping history-query-broadcast (%s%s)"
" we sent ourselves",
remote_peer?"remote-peer=":"local-ipc",
remote_peer?remote_peer:"");
}
} else {
/* plain history request */
crm_trace("Looking for operations on %s in %p", target,
stonith_remote_op_list);
*output = stonith_local_history_diff(NULL, FALSE, target);
}
free_xml(out_history);
return rc;
}
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
index 7e9bb07666..7a87f93f7c 100644
--- a/daemons/fenced/pacemaker-fenced.c
+++ b/daemons/fenced/pacemaker-fenced.c
@@ -1,1518 +1,1521 @@
/*
* Copyright 2009-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h> /* U32T ~ PRIu32, X32T ~ PRIx32 */
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/common/ipcs.h>
#include <crm/cluster/internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <crm/common/mainloop.h>
#include <crm/cib/internal.h>
#include <crm/pengine/status.h>
#include <pacemaker-internal.h>
#include <pacemaker-fenced.h>
char *stonith_our_uname = NULL;
char *stonith_our_uuid = NULL;
long stonith_watchdog_timeout_ms = 0;
static GMainLoop *mainloop = NULL;
gboolean stand_alone = FALSE;
static gboolean no_cib_connect = FALSE;
static gboolean stonith_shutdown_flag = FALSE;
static qb_ipcs_service_t *ipcs = NULL;
static xmlNode *local_cib = NULL;
static pe_working_set_t *fenced_data_set = NULL;
static cib_t *cib_api = NULL;
static void *cib_library = NULL;
static void stonith_shutdown(int nsig);
static void stonith_cleanup(void);
static int32_t
st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
if (stonith_shutdown_flag) {
crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c));
return -EPERM;
}
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
st_ipc_created(qb_ipcs_connection_t * c)
{
crm_trace("Connection created for %p", c);
}
/* Exit code means? */
static int32_t
st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
int call_options = 0;
xmlNode *request = NULL;
crm_client_t *c = crm_client_get(qbc);
const char *op = NULL;
if (c == NULL) {
crm_info("Invalid client: %p", qbc);
return 0;
}
request = crm_ipcs_recv(c, data, size, &id, &flags);
if (request == NULL) {
crm_ipcs_send_ack(c, id, flags, "nack", __FUNCTION__, __LINE__);
return 0;
}
op = crm_element_value(request, F_CRM_TASK);
if(safe_str_eq(op, CRM_OP_RM_NODE_CACHE)) {
crm_xml_add(request, F_TYPE, T_STONITH_NG);
crm_xml_add(request, F_STONITH_OPERATION, op);
crm_xml_add(request, F_STONITH_CLIENTID, c->id);
crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c));
crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE);
free_xml(request);
return 0;
}
if (c->name == NULL) {
const char *value = crm_element_value(request, F_STONITH_CLIENTNAME);
if (value == NULL) {
value = "unknown";
}
c->name = crm_strdup_printf("%s.%u", value, c->pid);
}
crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
crm_trace("Flags %" X32T "/%u for command %" U32T " from %s",
flags, call_options, id, crm_client_name(c));
if (is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(flags & crm_ipc_client_response);
CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */
c->request_id = id; /* Reply only to the last one */
}
crm_xml_add(request, F_STONITH_CLIENTID, c->id);
crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c));
crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
crm_log_xml_trace(request, "Client[inbound]");
stonith_command(c, id, flags, request, NULL);
free_xml(request);
return 0;
}
/* Error code means? */
static int32_t
st_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p closed", c);
crm_client_destroy(client);
/* 0 means: yes, go ahead and destroy the connection */
return 0;
}
static void
st_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p destroyed", c);
st_ipc_closed(c);
}
static void
stonith_peer_callback(xmlNode * msg, void *private_data)
{
const char *remote_peer = crm_element_value(msg, F_ORIG);
const char *op = crm_element_value(msg, F_STONITH_OPERATION);
if (crm_str_eq(op, "poke", TRUE)) {
return;
}
crm_log_xml_trace(msg, "Peer[inbound]");
stonith_command(NULL, 0, 0, msg, remote_peer);
}
#if SUPPORT_COROSYNC
static void
stonith_peer_ais_callback(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
uint32_t kind = 0;
xmlNode *xml = NULL;
const char *from = NULL;
char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
if(data == NULL) {
return;
}
if (kind == crm_class_cluster) {
xml = string2xml(data);
if (xml == NULL) {
crm_err("Invalid XML: '%.120s'", data);
free(data);
return;
}
crm_xml_add(xml, F_ORIG, from);
/* crm_xml_add_int(xml, F_SEQ, wrapper->id); */
stonith_peer_callback(xml, NULL);
}
free_xml(xml);
free(data);
return;
}
static void
stonith_peer_cs_destroy(gpointer user_data)
{
crm_crit("Lost connection to cluster layer, shutting down");
stonith_shutdown(0);
}
#endif
void
do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer)
{
/* send callback to originating child */
crm_client_t *client_obj = NULL;
int local_rc = pcmk_ok;
crm_trace("Sending response");
client_obj = crm_client_get_by_id(client_id);
crm_trace("Sending callback to request originator");
if (client_obj == NULL) {
local_rc = -1;
crm_trace("No client to sent the response to. F_STONITH_CLIENTID not set.");
} else {
int rid = 0;
if (sync_reply) {
CRM_LOG_ASSERT(client_obj->request_id);
rid = client_obj->request_id;
client_obj->request_id = 0;
crm_trace("Sending response %d to %s %s",
rid, client_obj->name, from_peer ? "(originator of delegated request)" : "");
} else {
crm_trace("Sending an event to %s %s",
client_obj->name, from_peer ? "(originator of delegated request)" : "");
}
local_rc = crm_ipcs_send(client_obj, rid, notify_src, sync_reply?crm_ipc_flags_none:crm_ipc_server_event);
}
if (local_rc < pcmk_ok && client_obj != NULL) {
crm_warn("%sSync reply to %s failed: %s",
sync_reply ? "" : "A-",
client_obj ? client_obj->name : "<unknown>", pcmk_strerror(local_rc));
}
}
long long
get_stonith_flag(const char *name)
{
if (safe_str_eq(name, T_STONITH_NOTIFY_FENCE)) {
return st_callback_notify_fence;
} else if (safe_str_eq(name, STONITH_OP_DEVICE_ADD)) {
return st_callback_device_add;
} else if (safe_str_eq(name, STONITH_OP_DEVICE_DEL)) {
return st_callback_device_del;
} else if (safe_str_eq(name, T_STONITH_NOTIFY_HISTORY)) {
return st_callback_notify_history;
+ } else if (safe_str_eq(name, T_STONITH_NOTIFY_HISTORY_SYNCED)) {
+ return st_callback_notify_history_synced;
+
}
return st_callback_unknown;
}
static void
stonith_notify_client(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
crm_client_t *client = value;
const char *type = NULL;
CRM_CHECK(client != NULL, return);
CRM_CHECK(update_msg != NULL, return);
type = crm_element_value(update_msg, F_SUBTYPE);
CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return);
if (client->ipcs == NULL) {
crm_trace("Skipping client with NULL channel");
return;
}
if (client->options & get_stonith_flag(type)) {
int rc = crm_ipcs_send(client, 0, update_msg, crm_ipc_server_event | crm_ipc_server_error);
if (rc <= 0) {
crm_warn("%s notification of client %s.%.6s failed: %s (%d)",
type, crm_client_name(client), client->id, pcmk_strerror(rc), rc);
} else {
crm_trace("Sent %s notification to client %s.%.6s", type, crm_client_name(client),
client->id);
}
}
}
void
do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout)
{
crm_client_t *client = NULL;
xmlNode *notify_data = NULL;
if (!timeout || !call_id || !client_id) {
return;
}
client = crm_client_get_by_id(client_id);
if (!client) {
return;
}
notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE);
crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE);
crm_xml_add(notify_data, F_STONITH_CALLID, call_id);
crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout);
crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id);
if (client) {
crm_ipcs_send(client, 0, notify_data, crm_ipc_server_event);
}
free_xml(notify_data);
}
void
do_stonith_notify(int options, const char *type, int result, xmlNode * data)
{
/* TODO: Standardize the contents of data */
xmlNode *update_msg = create_xml_node(NULL, "notify");
CRM_CHECK(type != NULL,;);
crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY);
crm_xml_add(update_msg, F_SUBTYPE, type);
crm_xml_add(update_msg, F_STONITH_OPERATION, type);
crm_xml_add_int(update_msg, F_STONITH_RC, result);
if (data != NULL) {
add_message_xml(update_msg, F_STONITH_CALLDATA, data);
}
crm_trace("Notifying clients");
g_hash_table_foreach(client_connections, stonith_notify_client, update_msg);
free_xml(update_msg);
crm_trace("Notify complete");
}
static void
do_stonith_notify_config(int options, const char *op, int rc,
const char *desc, int active)
{
xmlNode *notify_data = create_xml_node(NULL, op);
CRM_CHECK(notify_data != NULL, return);
crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active);
do_stonith_notify(options, op, rc, notify_data);
free_xml(notify_data);
}
void
do_stonith_notify_device(int options, const char *op, int rc, const char *desc)
{
do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(device_list));
}
void
do_stonith_notify_level(int options, const char *op, int rc, const char *desc)
{
do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(topology));
}
static void
topology_remove_helper(const char *node, int level)
{
int rc;
char *desc = NULL;
xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL);
crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level);
crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
rc = stonith_level_remove(data, &desc);
do_stonith_notify_level(0, STONITH_OP_LEVEL_DEL, rc, desc);
free_xml(data);
free(desc);
}
static void
remove_cib_device(xmlXPathObjectPtr xpathObj)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
const char *rsc_id = NULL;
const char *standard = NULL;
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match != NULL) {
standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
}
if (safe_str_neq(standard, PCMK_RESOURCE_CLASS_STONITH)) {
continue;
}
rsc_id = crm_element_value(match, XML_ATTR_ID);
stonith_device_remove(rsc_id, TRUE);
}
}
static void
handle_topology_change(xmlNode *match, bool remove)
{
int rc;
char *desc = NULL;
CRM_CHECK(match != NULL, return);
crm_trace("Updating %s", ID(match));
if(remove) {
int index = 0;
char *key = stonith_level_key(match, -1);
crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
topology_remove_helper(key, index);
free(key);
}
rc = stonith_level_register(match, &desc);
do_stonith_notify_level(0, STONITH_OP_LEVEL_ADD, rc, desc);
free(desc);
}
static void
remove_fencing_topology(xmlXPathObjectPtr xpathObj)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if (match && crm_element_value(match, XML_DIFF_MARKER)) {
/* Deletion */
int index = 0;
char *target = stonith_level_key(match, -1);
crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
if (target == NULL) {
crm_err("Invalid fencing target in element %s", ID(match));
} else if (index <= 0) {
crm_err("Invalid level for %s in element %s", target, ID(match));
} else {
topology_remove_helper(target, index);
}
/* } else { Deal with modifications during the 'addition' stage */
}
}
}
static void
register_fencing_topology(xmlXPathObjectPtr xpathObj)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
handle_topology_change(match, TRUE);
}
}
/* Fencing
<diff crm_feature_set="3.0.6">
<diff-removed>
<fencing-topology>
<fencing-level id="f-p1.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="removed:top"/>
<fencing-level id="f-p1.2" target="pcmk-1" index="2" devices="power" __crm_diff_marker__="removed:top"/>
<fencing-level devices="disk,network" id="f-p2.1"/>
</fencing-topology>
</diff-removed>
<diff-added>
<fencing-topology>
<fencing-level id="f-p.1" target="pcmk-1" index="1" devices="poison-pill" __crm_diff_marker__="added:top"/>
<fencing-level id="f-p2.1" target="pcmk-2" index="1" devices="disk,something"/>
<fencing-level id="f-p3.1" target="pcmk-2" index="2" devices="power" __crm_diff_marker__="added:top"/>
</fencing-topology>
</diff-added>
</diff>
*/
static void
fencing_topology_init()
{
xmlXPathObjectPtr xpathObj = NULL;
const char *xpath = "//" XML_TAG_FENCING_LEVEL;
crm_trace("Full topology refresh");
free_topology_list();
init_topology_list();
/* Grab everything */
xpathObj = xpath_search(local_cib, xpath);
register_fencing_topology(xpathObj);
freeXpathObject(xpathObj);
}
#define rsc_name(x) x->clone_name?x->clone_name:x->id
/*!
* \internal
* \brief Check whether our uname is in a resource's allowed node list
*
* \param[in] rsc Resource to check
*
* \return Pointer to node object if found, NULL otherwise
*/
static node_t *
our_node_allowed_for(resource_t *rsc)
{
GHashTableIter iter;
node_t *node = NULL;
if (rsc && stonith_our_uname) {
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if (node && strcmp(node->details->uname, stonith_our_uname) == 0) {
break;
}
node = NULL;
}
}
return node;
}
/*!
* \internal
* \brief If a resource or any of its children are STONITH devices, update their
* definitions given a cluster working set.
*
* \param[in] rsc Resource to check
* \param[in] data_set Cluster working set with device information
*/
static void cib_device_update(resource_t *rsc, pe_working_set_t *data_set)
{
node_t *node = NULL;
const char *value = NULL;
const char *rclass = NULL;
node_t *parent = NULL;
gboolean remove = TRUE;
/* If this is a complex resource, check children rather than this resource itself.
* TODO: Mark each installed device and remove if untouched when this process finishes.
*/
if(rsc->children) {
GListPtr gIter = NULL;
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
cib_device_update(gIter->data, data_set);
if(pe_rsc_is_clone(rsc)) {
crm_trace("Only processing one copy of the clone %s", rsc->id);
break;
}
}
return;
}
/* We only care about STONITH resources. */
rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
if (safe_str_neq(rclass, PCMK_RESOURCE_CLASS_STONITH)) {
return;
}
/* If this STONITH resource is disabled, just remove it. */
value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
if (safe_str_eq(value, RSC_STOPPED)) {
crm_info("Device %s has been disabled", rsc->id);
goto update_done;
}
/* Check whether our node is allowed for this resource (and its parent if in a group) */
node = our_node_allowed_for(rsc);
if (rsc->parent && (rsc->parent->variant == pe_group)) {
parent = our_node_allowed_for(rsc->parent);
}
if(node == NULL) {
/* Our node is disallowed, so remove the device */
GHashTableIter iter;
crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname);
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
crm_trace("Available: %s = %d", node->details->uname, node->weight);
}
goto update_done;
} else if(node->weight < 0 || (parent && parent->weight < 0)) {
/* Our node (or its group) is disallowed by score, so remove the device */
char *score = score2char((node->weight < 0) ? node->weight : parent->weight);
crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, score);
free(score);
goto update_done;
} else {
/* Our node is allowed, so update the device information */
int rc;
xmlNode *data;
GHashTableIter gIter;
stonith_key_value_t *params = NULL;
const char *name = NULL;
const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE);
const char *rsc_provides = NULL;
crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight);
get_rsc_attributes(rsc->parameters, rsc, node, data_set);
get_meta_attributes(rsc->meta, rsc, node, data_set);
rsc_provides = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROVIDES);
g_hash_table_iter_init(&gIter, rsc->parameters);
while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) {
if (!name || !value) {
continue;
}
params = stonith_key_value_add(params, name, value);
crm_trace(" %s=%s", name, value);
}
remove = FALSE;
data = create_device_registration_xml(rsc_name(rsc), st_namespace_any,
agent, params, rsc_provides);
stonith_key_value_freeall(params, 1, 1);
rc = stonith_device_register(data, NULL, TRUE);
CRM_ASSERT(rc == pcmk_ok);
free_xml(data);
}
update_done:
if(remove && g_hash_table_lookup(device_list, rsc_name(rsc))) {
stonith_device_remove(rsc_name(rsc), TRUE);
}
}
/*!
* \internal
* \brief Update all STONITH device definitions based on current CIB
*/
static void
cib_devices_update(void)
{
GListPtr gIter = NULL;
crm_info("Updating devices to version %s.%s.%s",
crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN),
crm_element_value(local_cib, XML_ATTR_GENERATION),
crm_element_value(local_cib, XML_ATTR_NUMUPDATES));
CRM_ASSERT(fenced_data_set != NULL);
fenced_data_set->input = local_cib;
fenced_data_set->now = crm_time_new(NULL);
fenced_data_set->flags |= pe_flag_quick_location;
fenced_data_set->localhost = stonith_our_uname;
cluster_status(fenced_data_set);
pcmk__schedule_actions(fenced_data_set, NULL, NULL);
for (gIter = fenced_data_set->resources; gIter != NULL; gIter = gIter->next) {
cib_device_update(gIter->data, fenced_data_set);
}
fenced_data_set->input = NULL; // Wasn't a copy, so don't let API free it
pe_reset_working_set(fenced_data_set);
}
static void
update_cib_stonith_devices_v2(const char *event, xmlNode * msg)
{
xmlNode *change = NULL;
char *reason = NULL;
bool needs_update = FALSE;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) {
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
const char *shortpath = NULL;
if ((op == NULL) ||
(strcmp(op, "move") == 0) ||
strstr(xpath, "/"XML_CIB_TAG_STATUS)) {
continue;
} else if (safe_str_eq(op, "delete") && strstr(xpath, "/"XML_CIB_TAG_RESOURCE)) {
const char *rsc_id = NULL;
char *search = NULL;
char *mutable = NULL;
if (strstr(xpath, XML_TAG_ATTR_SETS) ||
strstr(xpath, XML_TAG_META_SETS)) {
needs_update = TRUE;
reason = strdup("(meta) attribute deleted from resource");
break;
}
mutable = strdup(xpath);
rsc_id = strstr(mutable, "primitive[@id=\'");
if (rsc_id != NULL) {
rsc_id += strlen("primitive[@id=\'");
search = strchr(rsc_id, '\'');
}
if (search != NULL) {
*search = 0;
stonith_device_remove(rsc_id, TRUE);
} else {
crm_warn("Ignoring malformed CIB update (resource deletion)");
}
free(mutable);
} else if (strstr(xpath, "/"XML_CIB_TAG_RESOURCES) ||
strstr(xpath, "/"XML_CIB_TAG_CONSTRAINTS) ||
strstr(xpath, "/"XML_CIB_TAG_RSCCONFIG)) {
shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath);
reason = crm_strdup_printf("%s %s", op, shortpath+1);
needs_update = TRUE;
break;
}
}
if(needs_update) {
crm_info("Updating device list from the cib: %s", reason);
cib_devices_update();
} else {
crm_trace("No updates for device list found in cib");
}
free(reason);
}
static void
update_cib_stonith_devices_v1(const char *event, xmlNode * msg)
{
const char *reason = "none";
gboolean needs_update = FALSE;
xmlXPathObjectPtr xpath_obj = NULL;
/* process new constraints */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION);
if (numXpathResults(xpath_obj) > 0) {
int max = numXpathResults(xpath_obj), lpc = 0;
/* Safest and simplest to always recompute */
needs_update = TRUE;
reason = "new location constraint";
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpath_obj, lpc);
crm_log_xml_trace(match, "new constraint");
}
}
freeXpathObject(xpath_obj);
/* process deletions */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE);
if (numXpathResults(xpath_obj) > 0) {
remove_cib_device(xpath_obj);
}
freeXpathObject(xpath_obj);
/* process additions */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE);
if (numXpathResults(xpath_obj) > 0) {
int max = numXpathResults(xpath_obj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
const char *rsc_id = NULL;
const char *standard = NULL;
xmlNode *match = getXpathResult(xpath_obj, lpc);
rsc_id = crm_element_value(match, XML_ATTR_ID);
standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
if (safe_str_neq(standard, PCMK_RESOURCE_CLASS_STONITH)) {
continue;
}
crm_trace("Fencing resource %s was added or modified", rsc_id);
reason = "new resource";
needs_update = TRUE;
}
}
freeXpathObject(xpath_obj);
if(needs_update) {
crm_info("Updating device list from the cib: %s", reason);
cib_devices_update();
}
}
static void
update_cib_stonith_devices(const char *event, xmlNode * msg)
{
int format = 1;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
CRM_ASSERT(patchset);
crm_element_value_int(patchset, "format", &format);
switch(format) {
case 1:
update_cib_stonith_devices_v1(event, msg);
break;
case 2:
update_cib_stonith_devices_v2(event, msg);
break;
default:
crm_warn("Unknown patch format: %d", format);
}
}
/* Needs to hold node name + attribute name + attribute value + 75 */
#define XPATH_MAX 512
/*!
* \internal
* \brief Check whether a node has a specific attribute name/value
*
* \param[in] node Name of node to check
* \param[in] name Name of an attribute to look for
* \param[in] value The value the named attribute needs to be set to in order to be considered a match
*
* \return TRUE if the locally cached CIB has the specified node attribute
*/
gboolean
node_has_attr(const char *node, const char *name, const char *value)
{
char xpath[XPATH_MAX];
xmlNode *match;
int n;
CRM_CHECK(local_cib != NULL, return FALSE);
/* Search for the node's attributes in the CIB. While the schema allows
* multiple sets of instance attributes, and allows instance attributes to
* use id-ref to reference values elsewhere, that is intended for resources,
* so we ignore that here.
*/
n = snprintf(xpath, XPATH_MAX, "//" XML_CIB_TAG_NODES
"/" XML_CIB_TAG_NODE "[@uname='%s']/" XML_TAG_ATTR_SETS
"/" XML_CIB_TAG_NVPAIR "[@name='%s' and @value='%s']",
node, name, value);
match = get_xpath_object(xpath, local_cib, LOG_TRACE);
CRM_CHECK(n < XPATH_MAX, return FALSE);
return (match != NULL);
}
static void
update_fencing_topology(const char *event, xmlNode * msg)
{
int format = 1;
const char *xpath;
xmlXPathObjectPtr xpathObj = NULL;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
CRM_ASSERT(patchset);
crm_element_value_int(patchset, "format", &format);
if(format == 1) {
/* Process deletions (only) */
xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL;
xpathObj = xpath_search(msg, xpath);
remove_fencing_topology(xpathObj);
freeXpathObject(xpathObj);
/* Process additions and changes */
xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL;
xpathObj = xpath_search(msg, xpath);
register_fencing_topology(xpathObj);
freeXpathObject(xpathObj);
} else if(format == 2) {
xmlNode *change = NULL;
int add[] = { 0, 0, 0 };
int del[] = { 0, 0, 0 };
xml_patch_versions(patchset, add, del);
for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) {
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
if(op == NULL) {
continue;
} else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) {
/* Change to a specific entry */
crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath);
if(strcmp(op, "move") == 0) {
continue;
} else if(strcmp(op, "create") == 0) {
handle_topology_change(change->children, FALSE);
} else if(strcmp(op, "modify") == 0) {
xmlNode *match = first_named_child(change, XML_DIFF_RESULT);
if(match) {
handle_topology_change(match->children, TRUE);
}
} else if(strcmp(op, "delete") == 0) {
/* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */
crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
fencing_topology_init();
return;
}
} else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) {
/* Change to the topology in general */
crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
fencing_topology_init();
return;
} else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) {
/* Changes to the whole config section, possibly including the topology as a whild */
if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) {
crm_trace("Nothing for us in %s operation %d.%d.%d for %s.",
op, add[0], add[1], add[2], xpath);
} else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) {
crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.",
op, add[0], add[1], add[2], xpath);
fencing_topology_init();
return;
}
} else {
crm_trace("Nothing for us in %s operation %d.%d.%d for %s",
op, add[0], add[1], add[2], xpath);
}
}
} else {
crm_warn("Unknown patch format: %d", format);
}
}
static bool have_cib_devices = FALSE;
static void
update_cib_cache_cb(const char *event, xmlNode * msg)
{
int rc = pcmk_ok;
xmlNode *stonith_enabled_xml = NULL;
xmlNode *stonith_watchdog_xml = NULL;
const char *stonith_enabled_s = NULL;
static gboolean stonith_enabled_saved = TRUE;
if(!have_cib_devices) {
crm_trace("Skipping updates until we get a full dump");
return;
} else if(msg == NULL) {
crm_trace("Missing %s update", event);
return;
}
/* Maintain a local copy of the CIB so that we have full access
* to device definitions, location constraints, and node attributes
*/
if (local_cib != NULL) {
int rc = pcmk_ok;
xmlNode *patchset = NULL;
crm_element_value_int(msg, F_CIB_RC, &rc);
if (rc != pcmk_ok) {
return;
}
patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
xml_log_patchset(LOG_TRACE, "Config update", patchset);
rc = xml_apply_patchset(local_cib, patchset, TRUE);
switch (rc) {
case pcmk_ok:
case -pcmk_err_old_data:
break;
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(local_cib);
local_cib = NULL;
break;
default:
crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(local_cib);
local_cib = NULL;
}
}
if (local_cib == NULL) {
crm_trace("Re-requesting the full cib");
rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call);
if(rc != pcmk_ok) {
crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc);
return;
}
CRM_ASSERT(local_cib != NULL);
stonith_enabled_saved = FALSE; /* Trigger a full refresh below */
}
crm_peer_caches_refresh(local_cib);
stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", local_cib, LOG_TRACE);
if (stonith_enabled_xml) {
stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE);
}
if (stonith_enabled_s == NULL || crm_is_true(stonith_enabled_s)) {
long timeout_ms = 0;
const char *value = NULL;
stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", local_cib, LOG_TRACE);
if (stonith_watchdog_xml) {
value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE);
}
if(value) {
timeout_ms = crm_get_msec(value);
}
if (timeout_ms < 0) {
timeout_ms = crm_auto_watchdog_timeout();
}
if(timeout_ms != stonith_watchdog_timeout_ms) {
crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000);
stonith_watchdog_timeout_ms = timeout_ms;
}
} else {
stonith_watchdog_timeout_ms = 0;
}
if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) {
crm_trace("Ignoring cib updates while stonith is disabled");
stonith_enabled_saved = FALSE;
return;
} else if (stonith_enabled_saved == FALSE) {
crm_info("Updating stonith device and topology lists now that stonith is enabled");
stonith_enabled_saved = TRUE;
fencing_topology_init();
cib_devices_update();
} else {
update_fencing_topology(event, msg);
update_cib_stonith_devices(event, msg);
}
}
static void
init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
crm_info("Updating device list from the cib: init");
have_cib_devices = TRUE;
local_cib = copy_xml(output);
crm_peer_caches_refresh(local_cib);
fencing_topology_init();
cib_devices_update();
}
static void
stonith_shutdown(int nsig)
{
stonith_shutdown_flag = TRUE;
crm_info("Terminating with %d clients",
crm_hash_table_size(client_connections));
if (mainloop != NULL && g_main_loop_is_running(mainloop)) {
g_main_loop_quit(mainloop);
} else {
stonith_cleanup();
crm_exit(CRM_EX_OK);
}
}
static void
cib_connection_destroy(gpointer user_data)
{
if (stonith_shutdown_flag) {
crm_info("Connection to the CIB manager closed");
return;
} else {
crm_crit("Lost connection to the CIB manager, shutting down");
}
if (cib_api) {
cib_api->cmds->signoff(cib_api);
}
stonith_shutdown(0);
}
static void
stonith_cleanup(void)
{
if (cib_api) {
cib_api->cmds->signoff(cib_api);
}
if (ipcs) {
qb_ipcs_destroy(ipcs);
}
crm_peer_destroy();
crm_client_cleanup();
free_stonith_remote_op_list();
free_topology_list();
free_device_list();
free_metadata_cache();
free(stonith_our_uname);
stonith_our_uname = NULL;
free_xml(local_cib);
local_cib = NULL;
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
{"stand-alone", 0, 0, 's'},
{"stand-alone-w-cpg", 0, 0, 'c'},
{"logfile", 1, 0, 'l'},
{"verbose", 0, 0, 'V'},
{"version", 0, 0, '$'},
{"help", 0, 0, '?'},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
static void
setup_cib(void)
{
int rc, retries = 0;
static cib_t *(*cib_new_fn) (void) = NULL;
if (cib_new_fn == NULL) {
cib_new_fn = find_library_function(&cib_library, CIB_LIBRARY, "cib_new", TRUE);
}
if (cib_new_fn != NULL) {
cib_api = (*cib_new_fn) ();
}
if (cib_api == NULL) {
crm_err("No connection to the CIB manager");
return;
}
do {
sleep(retries);
rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command);
} while (rc == -ENOTCONN && ++retries < 5);
if (rc != pcmk_ok) {
crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc);
} else if (pcmk_ok !=
cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) {
crm_err("Could not set CIB notification callback");
} else {
rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local);
cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb",
init_cib_cache_cb);
cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy);
crm_info("Watching for stonith topology changes");
}
}
struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = st_ipc_accept,
.connection_created = st_ipc_created,
.msg_process = st_ipc_dispatch,
.connection_closed = st_ipc_closed,
.connection_destroyed = st_ipc_destroy
};
/*!
* \internal
* \brief Callback for peer status changes
*
* \param[in] type What changed
* \param[in] node What peer had the change
* \param[in] data Previous value of what changed
*/
static void
st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
if ((type != crm_status_processes) && !is_set(node->flags, crm_remote_node)) {
/*
* This is a hack until we can send to a nodeid and/or we fix node name lookups
* These messages are ignored in stonith_peer_callback()
*/
xmlNode *query = create_xml_node(NULL, "stonith_command");
crm_xml_add(query, F_XML_TAGNAME, "stonith_command");
crm_xml_add(query, F_TYPE, T_STONITH_NG);
crm_xml_add(query, F_STONITH_OPERATION, "poke");
crm_debug("Broadcasting our uname because of node %u", node->id);
send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
free_xml(query);
}
}
int
main(int argc, char **argv)
{
int flag;
int lpc = 0;
int argerr = 0;
int option_index = 0;
crm_cluster_t cluster;
const char *actions[] = { "reboot", "off", "on", "list", "monitor", "status" };
crm_ipc_t *old_instance = NULL;
crm_log_preinit(NULL, argc, argv);
crm_set_options(NULL, "mode [options]", long_options,
"Provides a summary of cluster's current state."
"\n\nOutputs varying levels of detail in a number of different formats.\n");
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1) {
break;
}
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'l':
crm_add_logfile(optarg);
break;
case 's':
stand_alone = TRUE;
break;
case 'c':
stand_alone = FALSE;
no_cib_connect = TRUE;
break;
case '$':
case '?':
crm_help(flag, CRM_EX_OK);
break;
default:
++argerr;
break;
}
}
if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) {
printf("<?xml version=\"1.0\"?><!DOCTYPE resource-agent SYSTEM \"ra-api-1.dtd\">\n");
printf("<resource-agent name=\"pacemaker-fenced\">\n");
printf(" <version>1.0</version>\n");
printf(" <longdesc lang=\"en\">Instance attributes available for all \"stonith\"-class resources"
" and used by Pacemaker's fence daemon, formerly known as stonithd</longdesc>\n");
printf(" <shortdesc lang=\"en\">Instance attributes available for all \"stonith\"-class resources</shortdesc>\n");
printf(" <parameters>\n");
#if 0
// priority is not implemented yet
printf(" <parameter name=\"priority\" unique=\"0\">\n");
printf(" <shortdesc lang=\"en\">Devices that are not in a topology "
"are tried in order of highest to lowest integer priority</shortdesc>\n");
printf(" <content type=\"integer\" default=\"0\"/>\n");
printf(" </parameter>\n");
#endif
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTARG);
printf
(" <shortdesc lang=\"en\">Advanced use only: An alternate parameter to supply instead of 'port'</shortdesc>\n");
printf
(" <longdesc lang=\"en\">Some devices do not support the standard 'port' parameter or may provide additional ones.\n"
"Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n"
"A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n"
" </longdesc>\n");
printf(" <content type=\"string\" default=\"port\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTMAP);
printf
(" <shortdesc lang=\"en\">A mapping of host names to ports numbers for devices that do not support host names.</shortdesc>\n");
printf
(" <longdesc lang=\"en\">Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2</longdesc>\n");
printf(" <content type=\"string\" default=\"\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTLIST);
printf
(" <shortdesc lang=\"en\">A list of machines controlled by this device (Optional unless %s=static-list).</shortdesc>\n",
STONITH_ATTR_HOSTCHECK);
printf(" <content type=\"string\" default=\"\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_HOSTCHECK);
printf
(" <shortdesc lang=\"en\">How to determine which machines are controlled by the device.</shortdesc>\n");
printf(" <longdesc lang=\"en\">Allowed values: dynamic-list "
"(query the device via the 'list' command), static-list "
"(check the " STONITH_ATTR_HOSTLIST " attribute), status "
"(query the device via the 'status' command), none (assume "
"every device can fence every machine)</longdesc>\n");
printf(" <content type=\"string\" default=\"dynamic-list\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_DELAY_MAX);
printf
(" <shortdesc lang=\"en\">Enable a random delay for stonith actions and specify the maximum of random delay.</shortdesc>\n");
printf
(" <longdesc lang=\"en\">This prevents double fencing when using slow devices such as sbd.\n"
"Use this to enable a random delay for stonith actions.\n"
"The overall delay is derived from this random delay value adding a static delay so that the sum is kept below the maximum delay.</longdesc>\n");
printf(" <content type=\"time\" default=\"0s\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_DELAY_BASE);
printf
(" <shortdesc lang=\"en\">Enable a base delay for stonith actions and specify base delay value.</shortdesc>\n");
printf
(" <longdesc lang=\"en\">This prevents double fencing when different delays are configured on the nodes.\n"
"Use this to enable a static delay for stonith actions.\n"
"The overall delay is derived from a random delay value adding this static delay so that the sum is kept below the maximum delay.</longdesc>\n");
printf(" <content type=\"time\" default=\"0s\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n", STONITH_ATTR_ACTION_LIMIT);
printf
(" <shortdesc lang=\"en\">The maximum number of actions can be performed in parallel on this device</shortdesc>\n");
printf
(" <longdesc lang=\"en\">Cluster property concurrent-fencing=true needs to be configured first.\n"
"Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.</longdesc>\n");
printf(" <content type=\"integer\" default=\"1\"/>\n");
printf(" </parameter>\n");
for (lpc = 0; lpc < DIMOF(actions); lpc++) {
printf(" <parameter name=\"pcmk_%s_action\" unique=\"0\">\n", actions[lpc]);
printf
(" <shortdesc lang=\"en\">Advanced use only: An alternate command to run instead of '%s'</shortdesc>\n",
actions[lpc]);
printf
(" <longdesc lang=\"en\">Some devices do not support the standard commands or may provide additional ones.\n"
"Use this to specify an alternate, device-specific, command that implements the '%s' action.</longdesc>\n",
actions[lpc]);
printf(" <content type=\"string\" default=\"%s\"/>\n", actions[lpc]);
printf(" </parameter>\n");
printf(" <parameter name=\"pcmk_%s_timeout\" unique=\"0\">\n", actions[lpc]);
printf
(" <shortdesc lang=\"en\">Advanced use only: Specify an alternate timeout to use for %s actions instead of stonith-timeout</shortdesc>\n",
actions[lpc]);
printf
(" <longdesc lang=\"en\">Some devices need much more/less time to complete than normal.\n"
"Use this to specify an alternate, device-specific, timeout for '%s' actions.</longdesc>\n",
actions[lpc]);
printf(" <content type=\"time\" default=\"60s\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"pcmk_%s_retries\" unique=\"0\">\n", actions[lpc]);
printf
(" <shortdesc lang=\"en\">Advanced use only: The maximum number of times to retry the '%s' command within the timeout period</shortdesc>\n",
actions[lpc]);
printf(" <longdesc lang=\"en\">Some devices do not support multiple connections."
" Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
" Use this option to alter the number of times Pacemaker retries '%s' actions before giving up."
"</longdesc>\n", actions[lpc]);
printf(" <content type=\"integer\" default=\"2\"/>\n");
printf(" </parameter>\n");
}
printf(" </parameters>\n");
printf("</resource-agent>\n");
return CRM_EX_OK;
}
if (optind != argc) {
++argerr;
}
if (argerr) {
crm_help('?', CRM_EX_USAGE);
}
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
crm_notice("Starting Pacemaker fencer");
old_instance = crm_ipc_new("stonith-ng", 0);
if (crm_ipc_connect(old_instance)) {
/* IPC end-point already up */
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_err("pacemaker-fenced is already active, aborting startup");
crm_exit(CRM_EX_OK);
} else {
/* not up or not authentic, we'll proceed either way */
crm_ipc_destroy(old_instance);
old_instance = NULL;
}
mainloop_add_signal(SIGTERM, stonith_shutdown);
crm_peer_init();
fenced_data_set = pe_new_working_set();
CRM_ASSERT(fenced_data_set != NULL);
if (stand_alone == FALSE) {
if (is_corosync_cluster()) {
#if SUPPORT_COROSYNC
cluster.destroy = stonith_peer_cs_destroy;
cluster.cpg.cpg_deliver_fn = stonith_peer_ais_callback;
cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership;
#endif
}
crm_set_status_callback(&st_peer_update_callback);
if (crm_cluster_connect(&cluster) == FALSE) {
crm_crit("Cannot sign in to the cluster... terminating");
crm_exit(CRM_EX_FATAL);
}
stonith_our_uname = cluster.uname;
stonith_our_uuid = cluster.uuid;
if (no_cib_connect == FALSE) {
setup_cib();
}
} else {
stonith_our_uname = strdup("localhost");
}
init_device_list();
init_topology_list();
if(stonith_watchdog_timeout_ms > 0) {
int rc;
xmlNode *xml;
stonith_key_value_t *params = NULL;
params = stonith_key_value_add(params, STONITH_ATTR_HOSTLIST, stonith_our_uname);
xml = create_device_registration_xml("watchdog", st_namespace_internal,
STONITH_WATCHDOG_AGENT, params,
NULL);
stonith_key_value_freeall(params, 1, 1);
rc = stonith_device_register(xml, NULL, FALSE);
free_xml(xml);
if (rc != pcmk_ok) {
crm_crit("Cannot register watchdog pseudo fence agent");
crm_exit(CRM_EX_FATAL);
}
}
stonith_ipc_server_init(&ipcs, &ipc_callbacks);
/* Create the mainloop and run it... */
mainloop = g_main_loop_new(NULL, FALSE);
crm_notice("Pacemaker fencer successfully started and accepting connections");
g_main_loop_run(mainloop);
stonith_cleanup();
pe_free_working_set(fenced_data_set);
crm_exit(CRM_EX_OK);
}
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
index a8531a69cd..583cb47a14 100644
--- a/daemons/fenced/pacemaker-fenced.h
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -1,262 +1,263 @@
/*
* Copyright 2009-2019 the Pacemaker project contributors
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm/common/mainloop.h>
/*!
* \internal
* \brief Check to see if target was fenced in the last few seconds.
* \param tolerance, The number of seconds to look back in time
* \param target, The node to search for
* \param action, The action we want to match.
*
* \retval FALSE, not match
* \retval TRUE, fencing operation took place in the last 'tolerance' number of seconds.
*/
gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action);
enum st_device_flags
{
st_device_supports_list = 0x0001,
st_device_supports_status = 0x0002,
st_device_supports_reboot = 0x0004,
};
typedef struct stonith_device_s {
char *id;
char *agent;
char *namespace;
/*! list of actions that must execute on the target node. Used for unfencing */
char *on_target_actions;
GListPtr targets;
time_t targets_age;
gboolean has_attr_map;
/* should nodeid parameter for victim be included in agent arguments */
gboolean include_nodeid;
/* whether the cluster should automatically unfence nodes with the device */
gboolean automatic_unfencing;
guint priority;
enum st_device_flags flags;
GHashTable *params;
GHashTable *aliases;
GList *pending_ops;
crm_trigger_t *work;
xmlNode *agent_metadata;
/*! A verified device is one that has contacted the
* agent successfully to perform a monitor operation */
gboolean verified;
gboolean cib_registered;
gboolean api_registered;
} stonith_device_t;
/* These values are used to index certain arrays by "phase". Usually an
* operation has only one "phase", so phase is always zero. However, some
* reboots are remapped to "off" then "on", in which case "reboot" will be
* phase 0, "off" will be phase 1 and "on" will be phase 2.
*/
enum st_remap_phase {
st_phase_requested = 0,
st_phase_off = 1,
st_phase_on = 2,
st_phase_max = 3
};
typedef struct remote_fencing_op_s {
/* The unique id associated with this operation */
char *id;
/*! The node this operation will fence */
char *target;
/*! The fencing action to perform on the target. (reboot, on, off) */
char *action;
/*! When was the fencing action recorded (seconds since epoch) */
time_t created;
/*! Marks if the final notifications have been sent to local stonith clients. */
gboolean notify_sent;
/*! The number of query replies received */
guint replies;
/*! The number of query replies expected */
guint replies_expected;
/*! Does this node own control of this operation */
gboolean owner;
/*! After query is complete, This the high level timer that expires the entire operation */
guint op_timer_total;
/*! This timer expires the current fencing request. Many fencing
* requests may exist in a single operation */
guint op_timer_one;
/*! This timer expires the query request sent out to determine
* what nodes are contain what devices, and who those devices can fence */
guint query_timer;
/*! This is the default timeout to use for each fencing device if no
* custom timeout is received in the query. */
gint base_timeout;
/*! This is the calculated total timeout an operation can take before
* expiring. This is calculated by adding together all the timeout
* values associated with the devices this fencing operation may call */
gint total_timeout;
/*! Delegate is the node being asked to perform a fencing action
* on behalf of the node that owns the remote operation. Some operations
* will involve multiple delegates. This value represents the final delegate
* that is used. */
char *delegate;
/*! The point at which the remote operation completed */
time_t completed;
/*! The stonith_call_options associated with this remote operation */
long long call_options;
/*! The current state of the remote operation. This indicates
* what stage the op is in, query, exec, done, duplicate, failed. */
enum op_state state;
/*! The node that owns the remote operation */
char *originator;
/*! The local client id that initiated the fencing request */
char *client_id;
/*! The client's call_id that initiated the fencing request */
int client_callid;
/*! The name of client that initiated the fencing request */
char *client_name;
/*! List of the received query results for all the nodes in the cpg group */
GListPtr query_results;
/*! The original request that initiated the remote stonith operation */
xmlNode *request;
/*! The current topology level being executed */
guint level;
/*! The current operation phase being executed */
enum st_remap_phase phase;
/*! Devices with automatic unfencing (always run if "on" requested, never if remapped) */
GListPtr automatic_list;
/*! List of all devices at the currently executing topology level */
GListPtr devices_list;
/*! Current entry in the topology device list */
GListPtr devices;
/*! List of duplicate operations attached to this operation. Once this operation
* completes, the duplicate operations will be closed out as well. */
GListPtr duplicates;
} remote_fencing_op_t;
/*!
* \internal
* \brief Broadcast the result of an operation to the peers.
* \param op, Operation whose result should be broadcast
* \param rc, Result of the operation
*/
void stonith_bcast_result_to_peers(remote_fencing_op_t * op, int rc);
enum st_callback_flags {
- st_callback_unknown = 0x0000,
- st_callback_notify_fence = 0x0001,
- st_callback_device_add = 0x0004,
- st_callback_device_del = 0x0010,
- st_callback_notify_history = 0x0020
+ st_callback_unknown = 0x0000,
+ st_callback_notify_fence = 0x0001,
+ st_callback_device_add = 0x0004,
+ st_callback_device_del = 0x0010,
+ st_callback_notify_history = 0x0020,
+ st_callback_notify_history_synced = 0x0040
};
/*
* Complex fencing requirements are specified via fencing topologies.
* A topology consists of levels; each level is a list of fencing devices.
* Topologies are stored in a hash table by node name. When a node needs to be
* fenced, if it has an entry in the topology table, the levels are tried
* sequentially, and the devices in each level are tried sequentially.
* Fencing is considered successful as soon as any level succeeds;
* a level is considered successful if all its devices succeed.
* Essentially, all devices at a given level are "and-ed" and the
* levels are "or-ed".
*
* This structure is used for the topology table entries.
* Topology levels start from 1, so levels[0] is unused and always NULL.
*/
typedef struct stonith_topology_s {
int kind;
/*! Node name regex or attribute name=value for which topology applies */
char *target;
char *target_value;
char *target_pattern;
char *target_attribute;
/*! Names of fencing devices at each topology level */
GListPtr levels[ST_LEVEL_MAX];
} stonith_topology_t;
void init_device_list(void);
void free_device_list(void);
void init_topology_list(void);
void free_topology_list(void);
void free_stonith_remote_op_list(void);
void init_stonith_remote_op_hash_table(GHashTable **table);
void free_metadata_cache(void);
long long get_stonith_flag(const char *name);
void stonith_command(crm_client_t * client, uint32_t id, uint32_t flags,
xmlNode * op_request, const char *remote_peer);
int stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib);
int stonith_device_remove(const char *id, gboolean from_cib);
char *stonith_level_key(xmlNode * msg, int mode);
int stonith_level_kind(xmlNode * msg);
int stonith_level_register(xmlNode * msg, char **desc);
int stonith_level_remove(xmlNode * msg, char **desc);
stonith_topology_t *find_topology_for_host(const char *host);
void do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply,
gboolean from_peer);
xmlNode *stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data,
int rc);
void
do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
void do_stonith_notify(int options, const char *type, int result, xmlNode * data);
void do_stonith_notify_device(int options, const char *op, int rc, const char *desc);
void do_stonith_notify_level(int options, const char *op, int rc, const char *desc);
remote_fencing_op_t *initiate_remote_stonith_op(crm_client_t * client, xmlNode * request,
gboolean manual_ack);
int process_remote_stonith_exec(xmlNode * msg);
int process_remote_stonith_query(xmlNode * msg);
void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer);
int stonith_fence_history(xmlNode *msg, xmlNode **output,
const char *remote_peer, int options);
void stonith_fence_history_trim(void);
bool fencing_peer_active(crm_node_t *peer);
int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op);
gboolean string_in_list(GListPtr list, const char *item);
gboolean node_has_attr(const char *node, const char *name, const char *value);
extern char *stonith_our_uname;
extern gboolean stand_alone;
extern GHashTable *device_list;
extern GHashTable *topology;
extern long stonith_watchdog_timeout_ms;
extern GHashTable *stonith_remote_op_list;
diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h
index b6407324df..418a03c85e 100644
--- a/include/crm/stonith-ng.h
+++ b/include/crm/stonith-ng.h
@@ -1,554 +1,555 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifdef __cplusplus
extern "C" {
#endif
/**
* \file
* \brief Fencing aka. STONITH
* \ingroup fencing
*/
#ifndef STONITH_NG__H
# define STONITH_NG__H
# include <dlfcn.h>
# include <errno.h>
# include <stdbool.h> // bool
# include <stdint.h> // uint32_t
# include <time.h> // time_t
# define T_STONITH_NOTIFY_DISCONNECT "st_notify_disconnect"
# define T_STONITH_NOTIFY_FENCE "st_notify_fence"
# define T_STONITH_NOTIFY_HISTORY "st_notify_history"
+# define T_STONITH_NOTIFY_HISTORY_SYNCED "st_notify_history_synced"
/* *INDENT-OFF* */
enum stonith_state {
stonith_connected_command,
stonith_connected_query,
stonith_disconnected,
};
enum stonith_call_options {
st_opt_none = 0x00000000,
st_opt_verbose = 0x00000001,
st_opt_allow_suicide = 0x00000002,
st_opt_manual_ack = 0x00000008,
st_opt_discard_reply = 0x00000010,
/* st_opt_all_replies = 0x00000020, */
st_opt_topology = 0x00000040,
st_opt_scope_local = 0x00000100,
st_opt_cs_nodeid = 0x00000200,
st_opt_sync_call = 0x00001000,
/*! Allow the timeout period for a callback to be adjusted
* based on the time the server reports the operation will take. */
st_opt_timeout_updates = 0x00002000,
/*! Only report back if operation is a success in callback */
st_opt_report_only_success = 0x00004000,
/* used where ever apropriate - e.g. cleanup of history */
st_opt_cleanup = 0x000080000,
/* used where ever apropriate - e.g. send out a history query to all nodes */
st_opt_broadcast = 0x000100000,
};
/*! Order matters here, do not change values */
enum op_state
{
st_query,
st_exec,
st_done,
st_duplicate,
st_failed,
};
// Supported fence agent interface standards
enum stonith_namespace {
st_namespace_invalid,
st_namespace_any,
st_namespace_internal, // Implemented internally by Pacemaker
/* Neither of these projects are active any longer, but the fence agent
* interfaces they created are still in use and supported by Pacemaker.
*/
st_namespace_rhcs, // Red Hat Cluster Suite compatible
st_namespace_lha, // Linux-HA compatible
};
enum stonith_namespace stonith_text2namespace(const char *namespace_s);
const char *stonith_namespace2text(enum stonith_namespace st_namespace);
enum stonith_namespace stonith_get_namespace(const char *agent,
const char *namespace_s);
typedef struct stonith_key_value_s {
char *key;
char *value;
struct stonith_key_value_s *next;
} stonith_key_value_t;
typedef struct stonith_history_s {
char *target;
char *action;
char *origin;
char *delegate;
char *client;
int state;
time_t completed;
struct stonith_history_s *next;
} stonith_history_t;
typedef struct stonith_s stonith_t;
typedef struct stonith_event_s
{
char *id;
char *type;
char *message;
char *operation;
int result;
char *origin;
char *target;
char *action;
char *executioner;
char *device;
/*! The name of the client that initiated the action. */
char *client_origin;
} stonith_event_t;
typedef struct stonith_callback_data_s
{
int rc;
int call_id;
void *userdata;
} stonith_callback_data_t;
typedef struct stonith_api_operations_s
{
/*!
* \brief Destroy the stonith api structure.
*/
int (*free) (stonith_t *st);
/*!
* \brief Connect to the local stonith daemon.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*connect) (stonith_t *st, const char *name, int *stonith_fd);
/*!
* \brief Disconnect from the local stonith daemon.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*disconnect)(stonith_t *st);
/*!
* \brief Remove a registered stonith device with the local stonith daemon.
*
* \note Synchronous, guaranteed to occur in daemon before function returns.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*remove_device)(
stonith_t *st, int options, const char *name);
/*!
* \brief Register a stonith device with the local stonith daemon.
*
* \note Synchronous, guaranteed to occur in daemon before function returns.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*register_device)(
stonith_t *st, int options, const char *id,
const char *provider, const char *agent, stonith_key_value_t *params);
/*!
* \brief Remove a fencing level for a specific node.
*
* \note This feature is not available when stonith is in standalone mode.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*remove_level)(
stonith_t *st, int options, const char *node, int level);
/*!
* \brief Register a fencing level containing the fencing devices to be used
* at that level for a specific node.
*
* \note This feature is not available when stonith is in standalone mode.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*register_level)(
stonith_t *st, int options, const char *node, int level, stonith_key_value_t *device_list);
/*!
* \brief Get the metadata documentation for a resource.
*
* \note Value is returned in output. Output must be freed when set.
*
* \retval 0 success
* \retval negative error code on failure
*/
int (*metadata)(stonith_t *st, int options,
const char *device, const char *provider, char **output, int timeout);
/*!
* \brief Retrieve a list of installed stonith agents
*
* \note if provider is not provided, all known agents will be returned
* \note list must be freed using stonith_key_value_freeall()
* \note call_options parameter is not used, it is reserved for future use.
*
* \retval num items in list on success
* \retval negative error code on failure
*/
int (*list_agents)(stonith_t *stonith, int call_options, const char *provider,
stonith_key_value_t **devices, int timeout);
/*!
* \brief Retrieve string listing hosts and port assignments from a local stonith device.
*
* \retval 0 on success
* \retval negative error code on failure
*/
int (*list)(stonith_t *st, int options, const char *id, char **list_output, int timeout);
/*!
* \brief Check to see if a local stonith device is reachable
*
* \retval 0 on success
* \retval negative error code on failure
*/
int (*monitor)(stonith_t *st, int options, const char *id, int timeout);
/*!
* \brief Check to see if a local stonith device's port is reachable
*
* \retval 0 on success
* \retval negative error code on failure
*/
int (*status)(stonith_t *st, int options, const char *id, const char *port, int timeout);
/*!
* \brief Retrieve a list of registered stonith devices.
*
* \note If node is provided, only devices that can fence the node id
* will be returned.
*
* \retval num items in list on success
* \retval negative error code on failure
*/
int (*query)(stonith_t *st, int options, const char *node,
stonith_key_value_t **devices, int timeout);
/*!
* \brief Issue a fencing action against a node.
*
* \note Possible actions are, 'on', 'off', and 'reboot'.
*
* \param st, stonith connection
* \param options, call options
* \param node, The target node to fence
* \param action, The fencing action to take
* \param timeout, The default per device timeout to use with each device
* capable of fencing the target.
*
* \retval 0 success
* \retval negative error code on failure.
*/
int (*fence)(stonith_t *st, int options, const char *node, const char *action,
int timeout, int tolerance);
/*!
* \brief Manually confirm that a node is down.
*
* \retval 0 success
* \retval negative error code on failure.
*/
int (*confirm)(stonith_t *st, int options, const char *node);
/*!
* \brief Retrieve a list of fencing operations that have occurred for a specific node.
*
* \note History is not available in standalone mode.
*
* \retval 0 success
* \retval negative error code on failure.
*/
int (*history)(stonith_t *st, int options, const char *node, stonith_history_t **output, int timeout);
int (*register_notification)(
stonith_t *st, const char *event,
void (*notify)(stonith_t *st, stonith_event_t *e));
int (*remove_notification)(stonith_t *st, const char *event);
/*!
* \brief Register a callback to receive the result of an asynchronous call
*
* \param[in] call_id The call ID to register callback for
* \param[in] timeout Default time to wait until callback expires
* \param[in] options Bitmask of \c stonith_call_options (respects
* \c st_opt_timeout_updates and
* \c st_opt_report_only_success)
* \param[in] userdata Pointer that will be given to callback
* \param[in] callback_name Unique name to identify callback
* \param[in] callback The callback function to register
*
* \return \c TRUE on success, \c FALSE if call_id is negative, -errno otherwise
*
* \todo This function should return \c pcmk_ok on success, and \c call_id
* when negative, but that would break backward compatibility.
*/
int (*register_callback)(stonith_t *st,
int call_id,
int timeout,
int options,
void *userdata,
const char *callback_name,
void (*callback)(stonith_t *st, stonith_callback_data_t *data));
/*!
* \brief Remove a registered callback for a given call id.
*/
int (*remove_callback)(stonith_t *st, int call_id, bool all_callbacks);
/*!
* \brief Remove fencing level for specific node, node regex or attribute
*
* \param[in] st Fencer connection to use
* \param[in] options Bitmask of stonith_call_options to pass to the fencer
* \param[in] node If not NULL, target level by this node name
* \param[in] pattern If not NULL, target by node name using this regex
* \param[in] attr If not NULL, target by this node attribute
* \param[in] value If not NULL, target by this node attribute value
* \param[in] level Index number of level to remove
*
* \return 0 on success, negative error code otherwise
*
* \note This feature is not available when stonith is in standalone mode.
* The caller should set only one of node, pattern or attr/value.
*/
int (*remove_level_full)(stonith_t *st, int options,
const char *node, const char *pattern,
const char *attr, const char *value, int level);
/*!
* \brief Register fencing level for specific node, node regex or attribute
*
* \param[in] st Fencer connection to use
* \param[in] options Bitmask of stonith_call_options to pass to fencer
* \param[in] node If not NULL, target level by this node name
* \param[in] pattern If not NULL, target by node name using this regex
* \param[in] attr If not NULL, target by this node attribute
* \param[in] value If not NULL, target by this node attribute value
* \param[in] level Index number of level to add
* \param[in] device_list Devices to use in level
*
* \return 0 on success, negative error code otherwise
*
* \note This feature is not available when stonith is in standalone mode.
* The caller should set only one of node, pattern or attr/value.
*/
int (*register_level_full)(stonith_t *st, int options,
const char *node, const char *pattern,
const char *attr, const char *value,
int level, stonith_key_value_t *device_list);
/*!
* \brief Validate an arbitrary stonith device configuration
*
* \param[in] st Stonithd connection to use
* \param[in] call_options Bitmask of stonith_call_options to use with fencer
* \param[in] rsc_id ID used to replace CIB secrets in params
* \param[in] namespace_s Namespace of fence agent to validate (optional)
* \param[in] agent Fence agent to validate
* \param[in] params Configuration parameters to pass to fence agent
* \param[in] timeout Fail if no response within this many seconds
* \param[out] output If non-NULL, where to store any agent output
* \param[out] error_output If non-NULL, where to store agent error output
*
* \return pcmk_ok if validation succeeds, -errno otherwise
*
* \note If pcmk_ok is returned, the caller is responsible for freeing
* the output (if requested).
*/
int (*validate)(stonith_t *st, int call_options, const char *rsc_id,
const char *namespace_s, const char *agent,
stonith_key_value_t *params, int timeout, char **output,
char **error_output);
} stonith_api_operations_t;
struct stonith_s
{
enum stonith_state state;
int call_id;
int call_timeout;
void *st_private;
stonith_api_operations_t *cmds;
};
/* *INDENT-ON* */
/* Core functions */
stonith_t *stonith_api_new(void);
void stonith_api_delete(stonith_t * st);
void stonith_dump_pending_callbacks(stonith_t * st);
// deprecated (use stonith_get_namespace() instead)
const char *get_stonith_provider(const char *agent, const char *provider);
bool stonith_dispatch(stonith_t * st);
stonith_key_value_t *stonith_key_value_add(stonith_key_value_t * kvp, const char *key,
const char *value);
void stonith_key_value_freeall(stonith_key_value_t * kvp, int keys, int values);
void stonith_history_free(stonith_history_t *history);
// Convenience functions
int stonith_api_connect_retry(stonith_t *st, const char *name,
int max_attempts);
/* Basic helpers that allows nodes to be fenced and the history to be
* queried without mainloop or the caller understanding the full API
*
* At least one of nodeid and uname are required
*/
int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off);
time_t stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress);
/*
* Helpers for using the above functions without install-time dependencies
*
* Usage:
* #include <crm/stonith-ng.h>
*
* To turn a node off by corosync nodeid:
* stonith_api_kick_helper(nodeid, 120, 1);
*
* To check the last fence date/time (also by nodeid):
* last = stonith_api_time_helper(nodeid, 0);
*
* To check if fencing is in progress:
* if(stonith_api_time_helper(nodeid, 1) > 0) { ... }
*
* eg.
#include <stdio.h>
#include <time.h>
#include <crm/stonith-ng.h>
int
main(int argc, char ** argv)
{
int rc = 0;
int nodeid = 102;
rc = stonith_api_time_helper(nodeid, 0);
printf("%d last fenced at %s\n", nodeid, ctime(rc));
rc = stonith_api_kick_helper(nodeid, 120, 1);
printf("%d fence result: %d\n", nodeid, rc);
rc = stonith_api_time_helper(nodeid, 0);
printf("%d last fenced at %s\n", nodeid, ctime(rc));
return 0;
}
*/
# define STONITH_LIBRARY "libstonithd.so.26"
typedef int (*st_api_kick_fn) (int nodeid, const char *uname, int timeout, bool off);
typedef time_t (*st_api_time_fn) (int nodeid, const char *uname, bool in_progress);
static inline int
stonith_api_kick_helper(uint32_t nodeid, int timeout, bool off)
{
static void *st_library = NULL;
static st_api_kick_fn st_kick_fn;
if (st_library == NULL) {
st_library = dlopen(STONITH_LIBRARY, RTLD_LAZY);
}
if (st_library && st_kick_fn == NULL) {
st_kick_fn = (st_api_kick_fn) dlsym(st_library, "stonith_api_kick");
}
if (st_kick_fn == NULL) {
#ifdef ELIBACC
return -ELIBACC;
#else
return -ENOSYS;
#endif
}
return (*st_kick_fn) (nodeid, NULL, timeout, off);
}
static inline time_t
stonith_api_time_helper(uint32_t nodeid, bool in_progress)
{
static void *st_library = NULL;
static st_api_time_fn st_time_fn;
if (st_library == NULL) {
st_library = dlopen(STONITH_LIBRARY, RTLD_LAZY);
}
if (st_library && st_time_fn == NULL) {
st_time_fn = (st_api_time_fn) dlsym(st_library, "stonith_api_time");
}
if (st_time_fn == NULL) {
return 0;
}
return (*st_time_fn) (nodeid, NULL, in_progress);
}
/**
* Does the given agent describe a stonith resource that can exist?
*
* \param[in] agent What is the name of the agent?
* \param[in] timeout Timeout to use when querying. If 0 is given,
* use a default of 120.
*
* \return A boolean
*/
bool stonith_agent_exists(const char *agent, int timeout);
/*!
* \brief Turn stonith action into a more readable string.
*
* \param action Stonith action
*/
const char *stonith_action_str(const char *action);
#ifdef __cplusplus
}
#endif
#endif
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Nov 23, 4:47 PM (17 h, 2 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1018939
Default Alt Text
(96 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment