Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4639014
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
236 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 7dca748e75..4beef51050 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -1,3712 +1,3712 @@
/*
* Copyright 2009-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <ctype.h>
#include <crm/crm.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/cluster/internal.h>
#include <crm/common/mainloop.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <pacemaker-fenced.h>
GHashTable *device_list = NULL;
GHashTable *topology = NULL;
static GList *cmd_list = NULL;
static GHashTable *fenced_handlers = NULL;
struct device_search_s {
/* target of fence action */
char *host;
/* requested fence action */
char *action;
/* timeout to use if a device is queried dynamically for possible targets */
int per_device_timeout;
/* number of registered fencing devices at time of request */
int replies_needed;
/* number of device replies received so far */
int replies_received;
/* whether the target is eligible to perform requested action (or off) */
bool allow_suicide;
/* private data to pass to search callback function */
void *user_data;
/* function to call when all replies have been received */
void (*callback) (GList * devices, void *user_data);
/* devices capable of performing requested action (or off if remapping) */
GList *capable;
/* Whether to perform searches that support the action */
uint32_t support_action_only;
};
static gboolean stonith_device_dispatch(gpointer user_data);
static void st_child_done(int pid, const pcmk__action_result_t *result,
void *user_data);
static void search_devices_record_result(struct device_search_s *search, const char *device,
gboolean can_fence);
static int get_agent_metadata(const char *agent, xmlNode **metadata);
static void read_action_metadata(stonith_device_t *device);
static enum fenced_target_by unpack_level_kind(const xmlNode *level);
typedef struct async_command_s {
int id;
int pid;
int fd_stdout;
int options;
int default_timeout; /* seconds */
int timeout; /* seconds */
int start_delay; // seconds (-1 means disable static/random fencing delays)
int delay_id;
char *op;
char *origin;
char *client;
char *client_name;
char *remote_op_id;
char *target;
uint32_t target_nodeid;
char *action;
char *device;
GList *device_list;
GList *next_device_iter; // device_list entry for next device to execute
void *internal_user_data;
void (*done_cb) (int pid, const pcmk__action_result_t *result,
void *user_data);
guint timer_sigterm;
guint timer_sigkill;
/*! If the operation timed out, this is the last signal
* we sent to the process to get it to terminate */
int last_timeout_signo;
stonith_device_t *active_on;
stonith_device_t *activating_on;
} async_command_t;
static xmlNode *construct_async_reply(const async_command_t *cmd,
const pcmk__action_result_t *result);
static gboolean
is_action_required(const char *action, const stonith_device_t *device)
{
return (device != NULL) && device->automatic_unfencing
&& pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none);
}
static int
get_action_delay_max(const stonith_device_t *device, const char *action)
{
const char *value = NULL;
guint delay_max = 0U;
if (!pcmk__is_fencing_action(action)) {
return 0;
}
value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX);
if (value) {
pcmk_parse_interval_spec(value, &delay_max);
delay_max /= 1000;
}
return (int) delay_max;
}
static int
get_action_delay_base(const stonith_device_t *device, const char *action,
const char *target)
{
char *hash_value = NULL;
guint delay_base = 0U;
if (!pcmk__is_fencing_action(action)) {
return 0;
}
hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE);
if (hash_value) {
char *value = strdup(hash_value);
char *valptr = value;
CRM_ASSERT(value != NULL);
if (target != NULL) {
for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) {
char *mapval = strchr(val, ':');
if (mapval == NULL || mapval[1] == 0) {
crm_err("pcmk_delay_base: empty value in mapping", val);
continue;
}
if (mapval != val && strncasecmp(target, val, (size_t)(mapval - val)) == 0) {
value = mapval + 1;
crm_debug("pcmk_delay_base mapped to %s for %s",
value, target);
break;
}
}
}
if (strchr(value, ':') == 0) {
pcmk_parse_interval_spec(value, &delay_base);
delay_base /= 1000;
}
free(valptr);
}
return (int) delay_base;
}
/*!
* \internal
* \brief Override STONITH timeout with pcmk_*_timeout if available
*
* \param[in] device STONITH device to use
* \param[in] action STONITH action name
* \param[in] default_timeout Timeout to use if device does not have
* a pcmk_*_timeout parameter for action
*
* \return Value of pcmk_(action)_timeout if available, otherwise default_timeout
* \note For consistency, it would be nice if reboot/off/on timeouts could be
* set the same way as start/stop/monitor timeouts, i.e. with an
* <operation> entry in the fencing resource configuration. However that
* is insufficient because fencing devices may be registered directly via
* the fencer's register_device() API instead of going through the CIB
* (e.g. stonith_admin uses it for its -R option, and the executor uses it
* to ensure a device is registered when a command is issued). As device
* properties, pcmk_*_timeout parameters can be grabbed by the fencer when
* the device is registered, whether by CIB change or API call.
*/
static int
get_action_timeout(const stonith_device_t *device, const char *action,
int default_timeout)
{
if (action && device && device->params) {
char buffer[64] = { 0, };
const char *value = NULL;
/* If "reboot" was requested but the device does not support it,
* we will remap to "off", so check timeout for "off" instead
*/
if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)
&& !pcmk_is_set(device->flags, st_device_supports_reboot)) {
crm_trace("%s doesn't support reboot, using timeout for off instead",
device->id);
action = PCMK_ACTION_OFF;
}
/* If the device config specified an action-specific timeout, use it */
snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action);
value = g_hash_table_lookup(device->params, buffer);
if (value) {
return atoi(value);
}
}
return default_timeout;
}
/*!
* \internal
* \brief Get the currently executing device for a fencing operation
*
* \param[in] cmd Fencing operation to check
*
* \return Currently executing device for \p cmd if any, otherwise NULL
*/
static stonith_device_t *
cmd_device(const async_command_t *cmd)
{
if ((cmd == NULL) || (cmd->device == NULL) || (device_list == NULL)) {
return NULL;
}
return g_hash_table_lookup(device_list, cmd->device);
}
/*!
* \internal
* \brief Return the configured reboot action for a given device
*
* \param[in] device_id Device ID
*
* \return Configured reboot action for \p device_id
*/
const char *
fenced_device_reboot_action(const char *device_id)
{
const char *action = NULL;
if ((device_list != NULL) && (device_id != NULL)) {
stonith_device_t *device = g_hash_table_lookup(device_list, device_id);
if ((device != NULL) && (device->params != NULL)) {
action = g_hash_table_lookup(device->params, "pcmk_reboot_action");
}
}
return pcmk__s(action, PCMK_ACTION_REBOOT);
}
/*!
* \internal
* \brief Check whether a given device supports the "on" action
*
* \param[in] device_id Device ID
*
* \return true if \p device_id supports "on", otherwise false
*/
bool
fenced_device_supports_on(const char *device_id)
{
if ((device_list != NULL) && (device_id != NULL)) {
stonith_device_t *device = g_hash_table_lookup(device_list, device_id);
if (device != NULL) {
return pcmk_is_set(device->flags, st_device_supports_on);
}
}
return false;
}
static void
free_async_command(async_command_t * cmd)
{
if (!cmd) {
return;
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
cmd_list = g_list_remove(cmd_list, cmd);
g_list_free_full(cmd->device_list, free);
free(cmd->device);
free(cmd->action);
free(cmd->target);
free(cmd->remote_op_id);
free(cmd->client);
free(cmd->client_name);
free(cmd->origin);
free(cmd->op);
free(cmd);
}
/*!
* \internal
* \brief Create a new asynchronous fencing operation from request XML
*
* \param[in] msg Fencing request XML (from IPC or CPG)
*
* \return Newly allocated fencing operation on success, otherwise NULL
*
* \note This asserts on memory errors, so a NULL return indicates an
* unparseable message.
*/
static async_command_t *
create_async_command(xmlNode *msg)
{
xmlNode *op = NULL;
async_command_t *cmd = NULL;
if (msg == NULL) {
return NULL;
}
op = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, msg, LOG_ERR);
if (op == NULL) {
return NULL;
}
cmd = calloc(1, sizeof(async_command_t));
CRM_ASSERT(cmd != NULL);
// All messages must include these
cmd->action = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ACTION);
cmd->op = crm_element_value_copy(msg, PCMK__XA_ST_OP);
cmd->client = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTID);
if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) {
free_async_command(cmd);
return NULL;
}
crm_element_value_int(msg, PCMK__XA_ST_CALLID, &(cmd->id));
crm_element_value_int(msg, PCMK__XA_ST_CALLOPT, &(cmd->options));
crm_element_value_int(msg, PCMK__XA_ST_DELAY, &(cmd->start_delay));
crm_element_value_int(msg, PCMK__XA_ST_TIMEOUT, &(cmd->default_timeout));
cmd->timeout = cmd->default_timeout;
cmd->origin = crm_element_value_copy(msg, PCMK__XA_SRC);
cmd->remote_op_id = crm_element_value_copy(msg, PCMK__XA_ST_REMOTE_OP);
cmd->client_name = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTNAME);
cmd->target = crm_element_value_copy(op, PCMK__XA_ST_TARGET);
cmd->device = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ID);
cmd->done_cb = st_child_done;
// Track in global command list
cmd_list = g_list_append(cmd_list, cmd);
return cmd;
}
static int
get_action_limit(stonith_device_t * device)
{
const char *value = NULL;
int action_limit = 1;
value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT);
if ((value == NULL)
|| (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok)
|| (action_limit == 0)) {
action_limit = 1;
}
return action_limit;
}
static int
get_active_cmds(stonith_device_t * device)
{
int counter = 0;
GList *gIter = NULL;
GList *gIterNext = NULL;
CRM_CHECK(device != NULL, return 0);
for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
async_command_t *cmd = gIter->data;
gIterNext = gIter->next;
if (cmd->active_on == device) {
counter++;
}
}
return counter;
}
static void
fork_cb(int pid, void *user_data)
{
async_command_t *cmd = (async_command_t *) user_data;
stonith_device_t * device =
/* in case of a retry we've done the move from
activating_on to active_on already
*/
cmd->activating_on?cmd->activating_on:cmd->active_on;
CRM_ASSERT(device);
crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout",
cmd->action, pid,
((cmd->target == NULL)? "" : " targeting "),
pcmk__s(cmd->target, ""), device->id, cmd->timeout);
cmd->active_on = device;
cmd->activating_on = NULL;
}
static int
get_agent_metadata_cb(gpointer data) {
stonith_device_t *device = data;
guint period_ms;
switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
case pcmk_rc_ok:
if (device->agent_metadata) {
read_action_metadata(device);
stonith__device_parameter_flags(&(device->flags), device->id,
device->agent_metadata);
}
return G_SOURCE_REMOVE;
case EAGAIN:
period_ms = pcmk__mainloop_timer_get_period(device->timer);
if (period_ms < 160 * 1000) {
mainloop_timer_set_period(device->timer, 2 * period_ms);
}
return G_SOURCE_CONTINUE;
default:
return G_SOURCE_REMOVE;
}
}
/*!
* \internal
* \brief Call a command's action callback for an internal (not library) result
*
* \param[in,out] cmd Command to report result for
* \param[in] execution_status Execution status to use for result
* \param[in] exit_status Exit status to use for result
* \param[in] exit_reason Exit reason to use for result
*/
static void
report_internal_result(async_command_t *cmd, int exit_status,
int execution_status, const char *exit_reason)
{
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
pcmk__set_result(&result, exit_status, execution_status, exit_reason);
cmd->done_cb(0, &result, cmd);
pcmk__reset_result(&result);
}
static gboolean
stonith_device_execute(stonith_device_t * device)
{
int exec_rc = 0;
const char *action_str = NULL;
const char *host_arg = NULL;
async_command_t *cmd = NULL;
stonith_action_t *action = NULL;
int active_cmds = 0;
int action_limit = 0;
GList *gIter = NULL;
GList *gIterNext = NULL;
CRM_CHECK(device != NULL, return FALSE);
active_cmds = get_active_cmds(device);
action_limit = get_action_limit(device);
if (action_limit > -1 && active_cmds >= action_limit) {
crm_trace("%s is over its action limit of %d (%u active action%s)",
device->id, action_limit, active_cmds,
pcmk__plural_s(active_cmds));
return TRUE;
}
for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) {
async_command_t *pending_op = gIter->data;
gIterNext = gIter->next;
if (pending_op && pending_op->delay_id) {
crm_trace("Operation '%s'%s%s using %s was asked to run too early, "
"waiting for start delay of %ds",
pending_op->action,
((pending_op->target == NULL)? "" : " targeting "),
pcmk__s(pending_op->target, ""),
device->id, pending_op->start_delay);
continue;
}
device->pending_ops = g_list_remove_link(device->pending_ops, gIter);
g_list_free_1(gIter);
cmd = pending_op;
break;
}
if (cmd == NULL) {
crm_trace("No actions using %s are needed", device->id);
return TRUE;
}
if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
if (pcmk__is_fencing_action(cmd->action)) {
if (node_does_watchdog_fencing(stonith_our_uname)) {
pcmk__panic(__func__);
goto done;
}
} else {
crm_info("Faking success for %s watchdog operation", cmd->action);
report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
goto done;
}
}
#if SUPPORT_CIBSECRETS
exec_rc = pcmk__substitute_secrets(device->id, device->params);
if (exec_rc != pcmk_rc_ok) {
if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) {
crm_info("Proceeding with stop operation for %s "
"despite being unable to load CIB secrets (%s)",
device->id, pcmk_rc_str(exec_rc));
} else {
crm_err("Considering %s unconfigured "
"because unable to load CIB secrets: %s",
device->id, pcmk_rc_str(exec_rc));
report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS,
"Failed to get CIB secrets");
goto done;
}
}
#endif
action_str = cmd->action;
if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none)
&& !pcmk_is_set(device->flags, st_device_supports_reboot)) {
crm_notice("Remapping 'reboot' action%s%s using %s to 'off' "
"because agent '%s' does not support reboot",
((cmd->target == NULL)? "" : " targeting "),
pcmk__s(cmd->target, ""), device->id, device->agent);
action_str = PCMK_ACTION_OFF;
}
if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) {
host_arg = "port";
} else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) {
host_arg = "plug";
}
action = stonith__action_create(device->agent, action_str, cmd->target,
cmd->target_nodeid, cmd->timeout,
device->params, device->aliases, host_arg);
/* for async exec, exec_rc is negative for early error exit
otherwise handling of success/errors is done via callbacks */
cmd->activating_on = device;
exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb,
fork_cb);
if (exec_rc < 0) {
cmd->activating_on = NULL;
cmd->done_cb(0, stonith__action_result(action), cmd);
stonith__destroy_action(action);
}
done:
/* Device might get triggered to work by multiple fencing commands
* simultaneously. Trigger the device again to make sure any
* remaining concurrent commands get executed. */
if (device->pending_ops) {
mainloop_set_trigger(device->work);
}
return TRUE;
}
static gboolean
stonith_device_dispatch(gpointer user_data)
{
return stonith_device_execute(user_data);
}
static gboolean
start_delay_helper(gpointer data)
{
async_command_t *cmd = data;
stonith_device_t *device = cmd_device(cmd);
cmd->delay_id = 0;
if (device) {
mainloop_set_trigger(device->work);
}
return FALSE;
}
static void
schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
{
int delay_max = 0;
int delay_base = 0;
int requested_delay = cmd->start_delay;
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(device != NULL, return);
if (cmd->device) {
free(cmd->device);
}
if (device->include_nodeid && (cmd->target != NULL)) {
crm_node_t *node = pcmk__get_node(0, cmd->target, NULL,
pcmk__node_search_cluster);
cmd->target_nodeid = node->id;
}
cmd->device = strdup(device->id);
cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout);
if (cmd->remote_op_id) {
crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s "
"with op id %.8s and timeout %ds",
cmd->action,
(cmd->target == NULL)? "" : " targeting ",
pcmk__s(cmd->target, ""),
device->id, cmd->origin, cmd->remote_op_id, cmd->timeout);
} else {
crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds",
cmd->action,
(cmd->target == NULL)? "" : " targeting ",
pcmk__s(cmd->target, ""),
device->id, cmd->client, cmd->timeout);
}
device->pending_ops = g_list_append(device->pending_ops, cmd);
mainloop_set_trigger(device->work);
// Value -1 means disable any static/random fencing delays
if (requested_delay < 0) {
return;
}
delay_max = get_action_delay_max(device, cmd->action);
delay_base = get_action_delay_base(device, cmd->action, cmd->target);
if (delay_max == 0) {
delay_max = delay_base;
}
if (delay_max < delay_base) {
crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than "
PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s "
"(limiting to maximum delay)",
delay_base, delay_max, cmd->action, device->id);
delay_base = delay_max;
}
if (delay_max > 0) {
// coverity[dontcall] It doesn't matter here if rand() is predictable
cmd->start_delay +=
((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
+ delay_base;
}
if (cmd->start_delay > 0) {
crm_notice("Delaying '%s' action%s%s using %s for %ds " CRM_XS
" timeout=%ds requested_delay=%ds base=%ds max=%ds",
cmd->action,
(cmd->target == NULL)? "" : " targeting ",
pcmk__s(cmd->target, ""),
device->id, cmd->start_delay, cmd->timeout,
requested_delay, delay_base, delay_max);
cmd->delay_id =
g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd);
}
}
static void
free_device(gpointer data)
{
GList *gIter = NULL;
stonith_device_t *device = data;
g_hash_table_destroy(device->params);
g_hash_table_destroy(device->aliases);
for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) {
async_command_t *cmd = gIter->data;
crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action);
report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"Device was removed before action could be executed");
}
g_list_free(device->pending_ops);
g_list_free_full(device->targets, free);
if (device->timer) {
mainloop_timer_stop(device->timer);
mainloop_timer_del(device->timer);
}
mainloop_destroy_trigger(device->work);
free_xml(device->agent_metadata);
free(device->namespace);
if (device->on_target_actions != NULL) {
g_string_free(device->on_target_actions, TRUE);
}
free(device->agent);
free(device->id);
free(device);
}
void free_device_list(void)
{
if (device_list != NULL) {
g_hash_table_destroy(device_list);
device_list = NULL;
}
}
void
init_device_list(void)
{
if (device_list == NULL) {
device_list = pcmk__strkey_table(NULL, free_device);
}
}
static GHashTable *
build_port_aliases(const char *hostmap, GList ** targets)
{
char *name = NULL;
int last = 0, lpc = 0, max = 0, added = 0;
GHashTable *aliases = pcmk__strikey_table(free, free);
if (hostmap == NULL) {
return aliases;
}
max = strlen(hostmap);
for (; lpc <= max; lpc++) {
switch (hostmap[lpc]) {
/* Skip escaped chars */
case '\\':
lpc++;
break;
/* Assignment chars */
case '=':
case ':':
if (lpc > last) {
free(name);
name = calloc(1, 1 + lpc - last);
memcpy(name, hostmap + last, lpc - last);
}
last = lpc + 1;
break;
/* Delimeter chars */
/* case ',': Potentially used to specify multiple ports */
case 0:
case ';':
case ' ':
case '\t':
if (name) {
char *value = NULL;
int k = 0;
value = calloc(1, 1 + lpc - last);
memcpy(value, hostmap + last, lpc - last);
for (int i = 0; value[i] != '\0'; i++) {
if (value[i] != '\\') {
value[k++] = value[i];
}
}
value[k] = '\0';
crm_debug("Adding alias '%s'='%s'", name, value);
g_hash_table_replace(aliases, name, value);
if (targets) {
*targets = g_list_append(*targets, strdup(value));
}
value = NULL;
name = NULL;
added++;
} else if (lpc > last) {
crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last);
}
last = lpc + 1;
break;
}
if (hostmap[lpc] == 0) {
break;
}
}
if (added == 0) {
crm_info("No host mappings detected in '%s'", hostmap);
}
free(name);
return aliases;
}
GHashTable *metadata_cache = NULL;
void
free_metadata_cache(void) {
if (metadata_cache != NULL) {
g_hash_table_destroy(metadata_cache);
metadata_cache = NULL;
}
}
static void
init_metadata_cache(void) {
if (metadata_cache == NULL) {
metadata_cache = pcmk__strkey_table(free, free);
}
}
int
get_agent_metadata(const char *agent, xmlNode ** metadata)
{
char *buffer = NULL;
if (metadata == NULL) {
return EINVAL;
}
*metadata = NULL;
if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) {
return pcmk_rc_ok;
}
init_metadata_cache();
buffer = g_hash_table_lookup(metadata_cache, agent);
if (buffer == NULL) {
stonith_t *st = stonith_api_new();
int rc;
if (st == NULL) {
crm_warn("Could not get agent meta-data: "
"API memory allocation failed");
return EAGAIN;
}
rc = st->cmds->metadata(st, st_opt_sync_call, agent,
NULL, &buffer, 10);
stonith_api_delete(st);
if (rc || !buffer) {
crm_err("Could not retrieve metadata for fencing agent %s", agent);
return EAGAIN;
}
g_hash_table_replace(metadata_cache, strdup(agent), buffer);
}
*metadata = string2xml(buffer);
return pcmk_rc_ok;
}
static gboolean
is_nodeid_required(xmlNode * xml)
{
xmlXPathObjectPtr xpath = NULL;
if (stand_alone) {
return FALSE;
}
if (!xml) {
return FALSE;
}
xpath = xpath_search(xml,
"//" PCMK_XE_PARAMETER "[@" PCMK_XA_NAME "='nodeid']");
if (numXpathResults(xpath) <= 0) {
freeXpathObject(xpath);
return FALSE;
}
freeXpathObject(xpath);
return TRUE;
}
static void
read_action_metadata(stonith_device_t *device)
{
xmlXPathObjectPtr xpath = NULL;
int max = 0;
int lpc = 0;
if (device->agent_metadata == NULL) {
return;
}
xpath = xpath_search(device->agent_metadata, "//action");
max = numXpathResults(xpath);
if (max <= 0) {
freeXpathObject(xpath);
return;
}
for (lpc = 0; lpc < max; lpc++) {
const char *action = NULL;
xmlNode *match = getXpathResult(xpath, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match == NULL) { continue; };
action = crm_element_value(match, PCMK_XA_NAME);
if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) {
stonith__set_device_flags(device->flags, device->id,
st_device_supports_list);
} else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) {
stonith__set_device_flags(device->flags, device->id,
st_device_supports_status);
} else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
stonith__set_device_flags(device->flags, device->id,
st_device_supports_reboot);
} else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) {
/* PCMK_XA_AUTOMATIC means the cluster will unfence a node when it
* joins.
*
* @COMPAT PCMK__XA_REQUIRED is a deprecated synonym for
* PCMK_XA_AUTOMATIC.
*/
if (pcmk__xe_attr_is_true(match, PCMK_XA_AUTOMATIC)
|| pcmk__xe_attr_is_true(match, PCMK__XA_REQUIRED)) {
device->automatic_unfencing = TRUE;
}
stonith__set_device_flags(device->flags, device->id,
st_device_supports_on);
}
if ((action != NULL)
&& pcmk__xe_attr_is_true(match, PCMK_XA_ON_TARGET)) {
pcmk__add_word(&(device->on_target_actions), 64, action);
}
}
freeXpathObject(xpath);
}
/*!
* \internal
* \brief Set a pcmk_*_action parameter if not already set
*
* \param[in,out] params Device parameters
* \param[in] action Name of action
* \param[in] value Value to use if action is not already set
*/
static void
map_action(GHashTable *params, const char *action, const char *value)
{
char *key = crm_strdup_printf("pcmk_%s_action", action);
if (g_hash_table_lookup(params, key)) {
crm_warn("Ignoring %s='%s', see %s instead",
STONITH_ATTR_ACTION_OP, value, key);
free(key);
} else {
crm_warn("Mapping %s='%s' to %s='%s'",
STONITH_ATTR_ACTION_OP, value, key, value);
g_hash_table_insert(params, key, strdup(value));
}
}
/*!
* \internal
* \brief Create device parameter table from XML
*
* \param[in] name Device name (used for logging only)
* \param[in] dev XML containing device parameters
*/
static GHashTable *
xml2device_params(const char *name, const xmlNode *dev)
{
GHashTable *params = xml2list(dev);
const char *value;
/* Action should never be specified in the device configuration,
* but we support it for users who are familiar with other software
* that worked that way.
*/
value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP);
if (value != NULL) {
crm_warn("%s has '%s' parameter, which should never be specified in configuration",
name, STONITH_ATTR_ACTION_OP);
if (*value == '\0') {
crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP);
} else if (strcmp(value, PCMK_ACTION_REBOOT) == 0) {
crm_warn("Ignoring %s='reboot' (see " PCMK_OPT_STONITH_ACTION
" cluster property instead)",
STONITH_ATTR_ACTION_OP);
} else if (strcmp(value, PCMK_ACTION_OFF) == 0) {
map_action(params, PCMK_ACTION_REBOOT, value);
} else {
map_action(params, PCMK_ACTION_OFF, value);
map_action(params, PCMK_ACTION_REBOOT, value);
}
g_hash_table_remove(params, STONITH_ATTR_ACTION_OP);
}
return params;
}
static const char *
target_list_type(stonith_device_t * dev)
{
const char *check_type = NULL;
check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK);
if (check_type == NULL) {
if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) {
check_type = "static-list";
} else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) {
check_type = "static-list";
} else if (pcmk_is_set(dev->flags, st_device_supports_list)) {
check_type = "dynamic-list";
} else if (pcmk_is_set(dev->flags, st_device_supports_status)) {
check_type = "status";
} else {
check_type = PCMK_VALUE_NONE;
}
}
return check_type;
}
static stonith_device_t *
build_device_from_xml(xmlNode *dev)
{
const char *value;
stonith_device_t *device = NULL;
char *agent = crm_element_value_copy(dev, PCMK_XA_AGENT);
CRM_CHECK(agent != NULL, return device);
device = calloc(1, sizeof(stonith_device_t));
CRM_CHECK(device != NULL, {free(agent); return device;});
device->id = crm_element_value_copy(dev, PCMK_XA_ID);
device->agent = agent;
device->namespace = crm_element_value_copy(dev, PCMK__XA_NAMESPACE);
device->params = xml2device_params(device->id, dev);
value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST);
if (value) {
device->targets = stonith__parse_targets(value);
}
value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP);
device->aliases = build_port_aliases(value, &(device->targets));
value = target_list_type(device);
if (!pcmk__str_eq(value, "static-list", pcmk__str_casei) && device->targets) {
/* Other than "static-list", dev-> targets is unnecessary. */
g_list_free_full(device->targets, free);
device->targets = NULL;
}
switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
case pcmk_rc_ok:
if (device->agent_metadata) {
read_action_metadata(device);
stonith__device_parameter_flags(&(device->flags), device->id,
device->agent_metadata);
}
break;
case EAGAIN:
if (device->timer == NULL) {
device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
TRUE, get_agent_metadata_cb, device);
}
if (!mainloop_timer_running(device->timer)) {
mainloop_timer_start(device->timer);
}
break;
default:
break;
}
value = g_hash_table_lookup(device->params, "nodeid");
if (!value) {
device->include_nodeid = is_nodeid_required(device->agent_metadata);
}
value = crm_element_value(dev, PCMK__XA_RSC_PROVIDES);
if (pcmk__str_eq(value, PCMK_VALUE_UNFENCING, pcmk__str_casei)) {
device->automatic_unfencing = TRUE;
}
if (is_action_required(PCMK_ACTION_ON, device)) {
crm_info("Fencing device '%s' requires unfencing", device->id);
}
if (device->on_target_actions != NULL) {
crm_info("Fencing device '%s' requires actions (%s) to be executed "
"on target", device->id,
(const char *) device->on_target_actions->str);
}
device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device);
/* TODO: Hook up priority */
return device;
}
static void
schedule_internal_command(const char *origin,
stonith_device_t * device,
const char *action,
const char *target,
int timeout,
void *internal_user_data,
void (*done_cb) (int pid,
const pcmk__action_result_t *result,
void *user_data))
{
async_command_t *cmd = NULL;
cmd = calloc(1, sizeof(async_command_t));
cmd->id = -1;
cmd->default_timeout = timeout ? timeout : 60;
cmd->timeout = cmd->default_timeout;
cmd->action = strdup(action);
pcmk__str_update(&cmd->target, target);
cmd->device = strdup(device->id);
cmd->origin = strdup(origin);
cmd->client = strdup(crm_system_name);
cmd->client_name = strdup(crm_system_name);
cmd->internal_user_data = internal_user_data;
cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */
schedule_stonith_command(cmd, device);
}
// Fence agent status commands use custom exit status codes
enum fence_status_code {
fence_status_invalid = -1,
fence_status_active = 0,
fence_status_unknown = 1,
fence_status_inactive = 2,
};
static void
status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data)
{
async_command_t *cmd = user_data;
struct device_search_s *search = cmd->internal_user_data;
stonith_device_t *dev = cmd_device(cmd);
gboolean can = FALSE;
free_async_command(cmd);
if (!dev) {
search_devices_record_result(search, NULL, FALSE);
return;
}
mainloop_set_trigger(dev->work);
if (result->execution_status != PCMK_EXEC_DONE) {
crm_warn("Assuming %s cannot fence %s "
"because status could not be executed: %s%s%s%s",
dev->id, search->host,
pcmk_exec_status_str(result->execution_status),
((result->exit_reason == NULL)? "" : " ("),
((result->exit_reason == NULL)? "" : result->exit_reason),
((result->exit_reason == NULL)? "" : ")"));
search_devices_record_result(search, dev->id, FALSE);
return;
}
switch (result->exit_status) {
case fence_status_unknown:
crm_trace("%s reported it cannot fence %s", dev->id, search->host);
break;
case fence_status_active:
case fence_status_inactive:
crm_trace("%s reported it can fence %s", dev->id, search->host);
can = TRUE;
break;
default:
crm_warn("Assuming %s cannot fence %s "
"(status returned unknown code %d)",
dev->id, search->host, result->exit_status);
break;
}
search_devices_record_result(search, dev->id, can);
}
static void
dynamic_list_search_cb(int pid, const pcmk__action_result_t *result,
void *user_data)
{
async_command_t *cmd = user_data;
struct device_search_s *search = cmd->internal_user_data;
stonith_device_t *dev = cmd_device(cmd);
gboolean can_fence = FALSE;
free_async_command(cmd);
/* Host/alias must be in the list output to be eligible to be fenced
*
* Will cause problems if down'd nodes aren't listed or (for virtual nodes)
* if the guest is still listed despite being moved to another machine
*/
if (!dev) {
search_devices_record_result(search, NULL, FALSE);
return;
}
mainloop_set_trigger(dev->work);
if (pcmk__result_ok(result)) {
crm_info("Refreshing target list for %s", dev->id);
g_list_free_full(dev->targets, free);
dev->targets = stonith__parse_targets(result->action_stdout);
dev->targets_age = time(NULL);
} else if (dev->targets != NULL) {
if (result->execution_status == PCMK_EXEC_DONE) {
crm_info("Reusing most recent target list for %s "
"because list returned error code %d",
dev->id, result->exit_status);
} else {
crm_info("Reusing most recent target list for %s "
"because list could not be executed: %s%s%s%s",
dev->id, pcmk_exec_status_str(result->execution_status),
((result->exit_reason == NULL)? "" : " ("),
((result->exit_reason == NULL)? "" : result->exit_reason),
((result->exit_reason == NULL)? "" : ")"));
}
} else { // We have never successfully executed list
if (result->execution_status == PCMK_EXEC_DONE) {
crm_warn("Assuming %s cannot fence %s "
"because list returned error code %d",
dev->id, search->host, result->exit_status);
} else {
crm_warn("Assuming %s cannot fence %s "
"because list could not be executed: %s%s%s%s",
dev->id, search->host,
pcmk_exec_status_str(result->execution_status),
((result->exit_reason == NULL)? "" : " ("),
((result->exit_reason == NULL)? "" : result->exit_reason),
((result->exit_reason == NULL)? "" : ")"));
}
/* Fall back to pcmk_host_check="status" if the user didn't explicitly
* specify "dynamic-list".
*/
if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) {
crm_notice("Switching to pcmk_host_check='status' for %s", dev->id);
pcmk__insert_dup(dev->params, PCMK_STONITH_HOST_CHECK, "status");
}
}
if (dev->targets) {
const char *alias = g_hash_table_lookup(dev->aliases, search->host);
if (!alias) {
alias = search->host;
}
if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) {
can_fence = TRUE;
}
}
search_devices_record_result(search, dev->id, can_fence);
}
/*!
* \internal
* \brief Returns true if any key in first is not in second or second has a different value for key
*/
static int
device_params_diff(GHashTable *first, GHashTable *second) {
char *key = NULL;
char *value = NULL;
GHashTableIter gIter;
g_hash_table_iter_init(&gIter, first);
while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) {
if(strstr(key, "CRM_meta") == key) {
continue;
} else if (strcmp(key, PCMK_XA_CRM_FEATURE_SET) == 0) {
continue;
} else {
char *other_value = g_hash_table_lookup(second, key);
if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) {
crm_trace("Different value for %s: %s != %s", key, other_value, value);
return 1;
}
}
}
return 0;
}
/*!
* \internal
* \brief Checks to see if an identical device already exists in the device_list
*/
static stonith_device_t *
device_has_duplicate(const stonith_device_t *device)
{
stonith_device_t *dup = g_hash_table_lookup(device_list, device->id);
if (!dup) {
crm_trace("No match for %s", device->id);
return NULL;
} else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) {
crm_trace("Different agent: %s != %s", dup->agent, device->agent);
return NULL;
}
/* Use calculate_operation_digest() here? */
if (device_params_diff(device->params, dup->params) ||
device_params_diff(dup->params, device->params)) {
return NULL;
}
crm_trace("Match");
return dup;
}
int
stonith_device_register(xmlNode *dev, gboolean from_cib)
{
stonith_device_t *dup = NULL;
stonith_device_t *device = build_device_from_xml(dev);
guint ndevices = 0;
int rv = pcmk_ok;
CRM_CHECK(device != NULL, return -ENOMEM);
/* do we have a watchdog-device? */
if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) ||
pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do {
if (stonith_watchdog_timeout_ms <= 0) {
crm_err("Ignoring watchdog fence device without "
PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " set.");
rv = -ENODEV;
/* fall through to cleanup & return */
} else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
crm_err("Ignoring watchdog fence device with unknown "
"agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.",
device->agent?device->agent:"");
rv = -ENODEV;
/* fall through to cleanup & return */
} else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID,
pcmk__str_none)) {
crm_err("Ignoring watchdog fence device "
"named %s !='"STONITH_WATCHDOG_ID"'.",
device->id?device->id:"");
rv = -ENODEV;
/* fall through to cleanup & return */
} else {
if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT,
pcmk__str_none)) {
/* this either has an empty list or the targets
configured for watchdog-fencing
*/
g_list_free_full(stonith_watchdog_targets, free);
stonith_watchdog_targets = device->targets;
device->targets = NULL;
}
if (node_does_watchdog_fencing(stonith_our_uname)) {
g_list_free_full(device->targets, free);
device->targets = stonith__parse_targets(stonith_our_uname);
pcmk__insert_dup(device->params,
PCMK_STONITH_HOST_LIST, stonith_our_uname);
/* proceed as with any other stonith-device */
break;
}
crm_debug("Skip registration of watchdog fence device on node not in host-list.");
/* cleanup and fall through to more cleanup and return */
device->targets = NULL;
stonith_device_remove(device->id, from_cib);
}
free_device(device);
return rv;
} while (0);
dup = device_has_duplicate(device);
if (dup) {
ndevices = g_hash_table_size(device_list);
crm_debug("Device '%s' already in device list (%d active device%s)",
device->id, ndevices, pcmk__plural_s(ndevices));
free_device(device);
device = dup;
dup = g_hash_table_lookup(device_list, device->id);
dup->dirty = FALSE;
} else {
stonith_device_t *old = g_hash_table_lookup(device_list, device->id);
if (from_cib && old && old->api_registered) {
/* If the cib is writing over an entry that is shared with a stonith client,
* copy any pending ops that currently exist on the old entry to the new one.
* Otherwise the pending ops will be reported as failures
*/
crm_info("Overwriting existing entry for %s from CIB", device->id);
device->pending_ops = old->pending_ops;
device->api_registered = TRUE;
old->pending_ops = NULL;
if (device->pending_ops) {
mainloop_set_trigger(device->work);
}
}
g_hash_table_replace(device_list, device->id, device);
ndevices = g_hash_table_size(device_list);
crm_notice("Added '%s' to device list (%d active device%s)",
device->id, ndevices, pcmk__plural_s(ndevices));
}
if (from_cib) {
device->cib_registered = TRUE;
} else {
device->api_registered = TRUE;
}
return pcmk_ok;
}
void
stonith_device_remove(const char *id, bool from_cib)
{
stonith_device_t *device = g_hash_table_lookup(device_list, id);
guint ndevices = 0;
if (!device) {
ndevices = g_hash_table_size(device_list);
crm_info("Device '%s' not found (%d active device%s)",
id, ndevices, pcmk__plural_s(ndevices));
return;
}
if (from_cib) {
device->cib_registered = FALSE;
} else {
device->verified = FALSE;
device->api_registered = FALSE;
}
if (!device->cib_registered && !device->api_registered) {
g_hash_table_remove(device_list, id);
ndevices = g_hash_table_size(device_list);
crm_info("Removed '%s' from device list (%d active device%s)",
id, ndevices, pcmk__plural_s(ndevices));
} else {
crm_trace("Not removing '%s' from device list (%d active) because "
"still registered via:%s%s",
id, g_hash_table_size(device_list),
(device->cib_registered? " cib" : ""),
(device->api_registered? " api" : ""));
}
}
/*!
* \internal
* \brief Return the number of stonith levels registered for a node
*
* \param[in] tp Node's topology table entry
*
* \return Number of non-NULL levels in topology entry
* \note This function is used only for log messages.
*/
static int
count_active_levels(const stonith_topology_t *tp)
{
int lpc = 0;
int count = 0;
for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
if (tp->levels[lpc] != NULL) {
count++;
}
}
return count;
}
static void
free_topology_entry(gpointer data)
{
stonith_topology_t *tp = data;
int lpc = 0;
for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
if (tp->levels[lpc] != NULL) {
g_list_free_full(tp->levels[lpc], free);
}
}
free(tp->target);
free(tp->target_value);
free(tp->target_pattern);
free(tp->target_attribute);
free(tp);
}
void
free_topology_list(void)
{
if (topology != NULL) {
g_hash_table_destroy(topology);
topology = NULL;
}
}
void
init_topology_list(void)
{
if (topology == NULL) {
topology = pcmk__strkey_table(NULL, free_topology_entry);
}
}
char *
stonith_level_key(const xmlNode *level, enum fenced_target_by mode)
{
if (mode == fenced_target_by_unknown) {
mode = unpack_level_kind(level);
}
switch (mode) {
case fenced_target_by_name:
return crm_element_value_copy(level, PCMK_XA_TARGET);
case fenced_target_by_pattern:
return crm_element_value_copy(level, PCMK_XA_TARGET_PATTERN);
case fenced_target_by_attribute:
return crm_strdup_printf("%s=%s",
crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE),
crm_element_value(level, PCMK_XA_TARGET_VALUE));
default:
return crm_strdup_printf("unknown-%s", pcmk__xe_id(level));
}
}
/*!
* \internal
* \brief Parse target identification from topology level XML
*
* \param[in] level Topology level XML to parse
*
* \return How to identify target of \p level
*/
static enum fenced_target_by
unpack_level_kind(const xmlNode *level)
{
if (crm_element_value(level, PCMK_XA_TARGET) != NULL) {
return fenced_target_by_name;
}
if (crm_element_value(level, PCMK_XA_TARGET_PATTERN) != NULL) {
return fenced_target_by_pattern;
}
if (!stand_alone /* if standalone, there's no attribute manager */
&& (crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE) != NULL)
&& (crm_element_value(level, PCMK_XA_TARGET_VALUE) != NULL)) {
return fenced_target_by_attribute;
}
return fenced_target_by_unknown;
}
static stonith_key_value_t *
parse_device_list(const char *devices)
{
int lpc = 0;
int max = 0;
int last = 0;
stonith_key_value_t *output = NULL;
if (devices == NULL) {
return output;
}
max = strlen(devices);
for (lpc = 0; lpc <= max; lpc++) {
if (devices[lpc] == ',' || devices[lpc] == 0) {
char *line = strndup(devices + last, lpc - last);
output = stonith_key_value_add(output, NULL, line);
free(line);
last = lpc + 1;
}
}
return output;
}
/*!
* \internal
* \brief Unpack essential information from topology request XML
*
* \param[in] xml Request XML to search
* \param[out] mode If not NULL, where to store level kind
* \param[out] target If not NULL, where to store representation of target
* \param[out] id If not NULL, where to store level number
* \param[out] desc If not NULL, where to store log-friendly level description
*
* \return Topology level XML from within \p xml, or NULL if not found
* \note The caller is responsible for freeing \p *target and \p *desc if set.
*/
static xmlNode *
unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target,
int *id, char **desc)
{
enum fenced_target_by local_mode = fenced_target_by_unknown;
char *local_target = NULL;
int local_id = 0;
/* The level element can be the top element or lower. If top level, don't
* search by xpath, because it might give multiple hits if the XML is the
* CIB.
*/
if ((xml != NULL) && !pcmk__xe_is(xml, PCMK_XE_FENCING_LEVEL)) {
xml = get_xpath_object("//" PCMK_XE_FENCING_LEVEL, xml, LOG_WARNING);
}
if (xml == NULL) {
if (desc != NULL) {
*desc = crm_strdup_printf("missing");
}
} else {
local_mode = unpack_level_kind(xml);
local_target = stonith_level_key(xml, local_mode);
crm_element_value_int(xml, PCMK_XA_INDEX, &local_id);
if (desc != NULL) {
*desc = crm_strdup_printf("%s[%d]", local_target, local_id);
}
}
if (mode != NULL) {
*mode = local_mode;
}
if (id != NULL) {
*id = local_id;
}
if (target != NULL) {
*target = local_target;
} else {
free(local_target);
}
return xml;
}
/*!
* \internal
* \brief Register a fencing topology level for a target
*
* Given an XML request specifying the target name, level index, and device IDs
* for the level, this will create an entry for the target in the global topology
* table if one does not already exist, then append the specified device IDs to
* the entry's device list for the specified level.
*
* \param[in] msg XML request for STONITH level registration
* \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]"
* \param[out] result Where to set result of registration
*/
void
fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result)
{
int id = 0;
xmlNode *level;
enum fenced_target_by mode;
char *target;
stonith_topology_t *tp;
stonith_key_value_t *dIter = NULL;
stonith_key_value_t *devices = NULL;
CRM_CHECK((msg != NULL) && (result != NULL), return);
level = unpack_level_request(msg, &mode, &target, &id, desc);
if (level == NULL) {
fenced_set_protocol_error(result);
return;
}
// Ensure an ID was given (even the client API adds an ID)
if (pcmk__str_empty(pcmk__xe_id(level))) {
crm_warn("Ignoring registration for topology level without ID");
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Topology level is invalid without ID");
return;
}
// Ensure a valid target was specified
if (mode == fenced_target_by_unknown) {
crm_warn("Ignoring registration for topology level '%s' "
"without valid target", pcmk__xe_id(level));
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Invalid target for topology level '%s'",
pcmk__xe_id(level));
return;
}
// Ensure level ID is in allowed range
if ((id <= 0) || (id >= ST_LEVEL_MAX)) {
crm_warn("Ignoring topology registration for %s with invalid level %d",
target, id);
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Invalid level number '%s' for topology level '%s'",
pcmk__s(crm_element_value(level, PCMK_XA_INDEX),
""),
pcmk__xe_id(level));
return;
}
/* Find or create topology table entry */
tp = g_hash_table_lookup(topology, target);
if (tp == NULL) {
tp = calloc(1, sizeof(stonith_topology_t));
if (tp == NULL) {
pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
strerror(ENOMEM));
free(target);
return;
}
tp->kind = mode;
tp->target = target;
tp->target_value = crm_element_value_copy(level, PCMK_XA_TARGET_VALUE);
tp->target_pattern = crm_element_value_copy(level,
PCMK_XA_TARGET_PATTERN);
tp->target_attribute = crm_element_value_copy(level,
PCMK_XA_TARGET_ATTRIBUTE);
g_hash_table_replace(topology, tp->target, tp);
crm_trace("Added %s (%d) to the topology (%d active entries)",
target, (int) mode, g_hash_table_size(topology));
} else {
free(target);
}
if (tp->levels[id] != NULL) {
crm_info("Adding to the existing %s[%d] topology entry",
tp->target, id);
}
devices = parse_device_list(crm_element_value(level, PCMK_XA_DEVICES));
for (dIter = devices; dIter; dIter = dIter->next) {
const char *device = dIter->value;
crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id);
tp->levels[id] = g_list_append(tp->levels[id], strdup(device));
}
stonith_key_value_freeall(devices, 1, 1);
{
int nlevels = count_active_levels(tp);
crm_info("Target %s has %d active fencing level%s",
tp->target, nlevels, pcmk__plural_s(nlevels));
}
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
}
/*!
* \internal
* \brief Unregister a fencing topology level for a target
*
* Given an XML request specifying the target name and level index (or 0 for all
* levels), this will remove any corresponding entry for the target from the
* global topology table.
*
* \param[in] msg XML request for STONITH level registration
* \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]"
* \param[out] result Where to set result of unregistration
*/
void
fenced_unregister_level(xmlNode *msg, char **desc,
pcmk__action_result_t *result)
{
int id = -1;
stonith_topology_t *tp;
char *target;
xmlNode *level = NULL;
CRM_CHECK(result != NULL, return);
level = unpack_level_request(msg, NULL, &target, &id, desc);
if (level == NULL) {
fenced_set_protocol_error(result);
return;
}
// Ensure level ID is in allowed range
if ((id < 0) || (id >= ST_LEVEL_MAX)) {
crm_warn("Ignoring topology unregistration for %s with invalid level %d",
target, id);
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Invalid level number '%s' for topology level %s",
pcmk__s(crm_element_value(level, PCMK_XA_INDEX),
"<null>"),
// Client API doesn't add ID to unregistration XML
pcmk__s(pcmk__xe_id(level), ""));
return;
}
tp = g_hash_table_lookup(topology, target);
if (tp == NULL) {
guint nentries = g_hash_table_size(topology);
crm_info("No fencing topology found for %s (%d active %s)",
target, nentries,
pcmk__plural_alt(nentries, "entry", "entries"));
} else if (id == 0 && g_hash_table_remove(topology, target)) {
guint nentries = g_hash_table_size(topology);
crm_info("Removed all fencing topology entries related to %s "
"(%d active %s remaining)", target, nentries,
pcmk__plural_alt(nentries, "entry", "entries"));
} else if (tp->levels[id] != NULL) {
guint nlevels;
g_list_free_full(tp->levels[id], free);
tp->levels[id] = NULL;
nlevels = count_active_levels(tp);
crm_info("Removed level %d from fencing topology for %s "
"(%d active level%s remaining)",
id, target, nlevels, pcmk__plural_s(nlevels));
}
free(target);
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
}
static char *
list_to_string(GList *list, const char *delim, gboolean terminate_with_delim)
{
int max = g_list_length(list);
size_t delim_len = delim?strlen(delim):0;
size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0);
char *rv;
GList *gIter;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
const char *value = (const char *) gIter->data;
alloc_size += strlen(value);
}
rv = calloc(alloc_size, sizeof(char));
if (rv) {
char *pos = rv;
const char *lead_delim = "";
for (gIter = list; gIter != NULL; gIter = gIter->next) {
const char *value = (const char *) gIter->data;
pos = &pos[sprintf(pos, "%s%s", lead_delim, value)];
lead_delim = delim;
}
if (max && terminate_with_delim) {
sprintf(pos, "%s", delim);
}
}
return rv;
}
/*!
* \internal
* \brief Execute a fence agent action directly (and asynchronously)
*
* Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
* directly on a specified device. Only list, monitor, and status actions are
* expected to use this call, though it should work with any agent command.
*
* \param[in] msg Request XML specifying action
* \param[out] result Where to store result of action
*
* \note If the action is monitor, the device must be registered via the API
* (CIB registration is not sufficient), because monitor should not be
* possible unless the device is "started" (API registered).
*/
static void
execute_agent_action(xmlNode *msg, pcmk__action_result_t *result)
{
xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, msg, LOG_ERR);
xmlNode *op = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, msg,
LOG_ERR);
const char *id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
const char *action = crm_element_value(op, PCMK__XA_ST_DEVICE_ACTION);
async_command_t *cmd = NULL;
stonith_device_t *device = NULL;
if ((id == NULL) || (action == NULL)) {
crm_info("Malformed API action request: device %s, action %s",
(id? id : "not specified"),
(action? action : "not specified"));
fenced_set_protocol_error(result);
return;
}
if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
// Watchdog agent actions are implemented internally
if (stonith_watchdog_timeout_ms <= 0) {
pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"Watchdog fence device not configured");
return;
} else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) {
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
pcmk__set_result_output(result,
list_to_string(stonith_watchdog_targets,
"\n", TRUE),
NULL);
return;
} else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) {
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return;
}
}
device = g_hash_table_lookup(device_list, id);
if (device == NULL) {
crm_info("Ignoring API '%s' action request because device %s not found",
action, id);
pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"'%s' not found", id);
return;
} else if (!device->api_registered
&& (strcmp(action, PCMK_ACTION_MONITOR) == 0)) {
// Monitors may run only on "started" (API-registered) devices
crm_info("Ignoring API '%s' action request because device %s not active",
action, id);
pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"'%s' not active", id);
return;
}
cmd = create_async_command(msg);
if (cmd == NULL) {
crm_log_xml_warn(msg, "invalid");
fenced_set_protocol_error(result);
return;
}
schedule_stonith_command(cmd, device);
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
}
static void
search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence)
{
search->replies_received++;
if (can_fence && device) {
if (search->support_action_only != st_device_supports_none) {
stonith_device_t *dev = g_hash_table_lookup(device_list, device);
if (dev && !pcmk_is_set(dev->flags, search->support_action_only)) {
return;
}
}
search->capable = g_list_append(search->capable, strdup(device));
}
if (search->replies_needed == search->replies_received) {
guint ndevices = g_list_length(search->capable);
crm_debug("Search found %d device%s that can perform '%s' targeting %s",
ndevices, pcmk__plural_s(ndevices),
(search->action? search->action : "unknown action"),
(search->host? search->host : "any node"));
search->callback(search->capable, search->user_data);
free(search->host);
free(search->action);
free(search);
}
}
/*!
* \internal
* \brief Check whether the local host is allowed to execute a fencing action
*
* \param[in] device Fence device to check
* \param[in] action Fence action to check
* \param[in] target Hostname of fence target
* \param[in] allow_suicide Whether self-fencing is allowed for this operation
*
* \return TRUE if local host is allowed to execute action, FALSE otherwise
*/
static gboolean
localhost_is_eligible(const stonith_device_t *device, const char *action,
const char *target, gboolean allow_suicide)
{
gboolean localhost_is_target = pcmk__str_eq(target, stonith_our_uname,
pcmk__str_casei);
if ((device != NULL) && (action != NULL)
&& (device->on_target_actions != NULL)
&& (strstr((const char*) device->on_target_actions->str,
action) != NULL)) {
if (!localhost_is_target) {
crm_trace("Operation '%s' using %s can only be executed for local "
"host, not %s", action, device->id, target);
return FALSE;
}
} else if (localhost_is_target && !allow_suicide) {
crm_trace("'%s' operation does not support self-fencing", action);
return FALSE;
}
return TRUE;
}
/*!
* \internal
* \brief Check if local node is allowed to execute (possibly remapped) action
*
* \param[in] device Fence device to check
* \param[in] action Fence action to check
* \param[in] target Node name of fence target
* \param[in] allow_self Whether self-fencing is allowed for this operation
*
* \return true if local node is allowed to execute \p action or any actions it
* might be remapped to, otherwise false
*/
static bool
localhost_is_eligible_with_remap(const stonith_device_t *device,
const char *action, const char *target,
gboolean allow_self)
{
// Check exact action
if (localhost_is_eligible(device, action, target, allow_self)) {
return true;
}
// Check potential remaps
if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
/* "reboot" might get remapped to "off" then "on", so even if reboot is
* disallowed, return true if either of those is allowed. We'll report
* the disallowed actions with the results. We never allow self-fencing
* for remapped "on" actions because the target is off at that point.
*/
if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self)
|| localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) {
return true;
}
}
return false;
}
static void
can_fence_host_with_device(stonith_device_t *dev,
struct device_search_s *search)
{
gboolean can = FALSE;
const char *check_type = "Internal bug";
const char *target = NULL;
const char *alias = NULL;
const char *dev_id = "Unspecified device";
const char *action = (search == NULL)? NULL : search->action;
CRM_CHECK((dev != NULL) && (action != NULL), goto search_report_results);
if (dev->id != NULL) {
dev_id = dev->id;
}
target = search->host;
if (target == NULL) {
can = TRUE;
check_type = "No target";
goto search_report_results;
}
/* Answer immediately if the device does not support the action
* or the local node is not allowed to perform it
*/
if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)
&& !pcmk_is_set(dev->flags, st_device_supports_on)) {
check_type = "Agent does not support 'on'";
goto search_report_results;
} else if (!localhost_is_eligible_with_remap(dev, action, target,
search->allow_suicide)) {
check_type = "This node is not allowed to execute action";
goto search_report_results;
}
// Check eligibility as specified by pcmk_host_check
check_type = target_list_type(dev);
alias = g_hash_table_lookup(dev->aliases, target);
if (pcmk__str_eq(check_type, PCMK_VALUE_NONE, pcmk__str_casei)) {
can = TRUE;
} else if (pcmk__str_eq(check_type, "static-list", pcmk__str_casei)) {
if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) {
can = TRUE;
} else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)
&& g_hash_table_lookup(dev->aliases, target)) {
can = TRUE;
}
} else if (pcmk__str_eq(check_type, "dynamic-list", pcmk__str_casei)) {
time_t now = time(NULL);
if (dev->targets == NULL || dev->targets_age + 60 < now) {
int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST,
search->per_device_timeout);
if (device_timeout > search->per_device_timeout) {
crm_notice("Since the pcmk_list_timeout (%ds) parameter of %s "
"is larger than " PCMK_OPT_STONITH_TIMEOUT
" (%ds), timeout may occur",
device_timeout, dev_id, search->per_device_timeout);
}
crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
check_type, dev_id, target, action);
schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL,
search->per_device_timeout, search, dynamic_list_search_cb);
/* we'll respond to this search request async in the cb */
return;
}
if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets,
pcmk__str_casei)) {
can = TRUE;
}
} else if (pcmk__str_eq(check_type, "status", pcmk__str_casei)) {
int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout);
if (device_timeout > search->per_device_timeout) {
crm_notice("Since the pcmk_status_timeout (%ds) parameter of %s is "
"larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), "
"timeout may occur",
device_timeout, dev_id, search->per_device_timeout);
}
crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
check_type, dev_id, target, action);
schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target,
search->per_device_timeout, search, status_search_cb);
/* we'll respond to this search request async in the cb */
return;
} else {
crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type);
check_type = "Invalid " PCMK_STONITH_HOST_CHECK;
}
search_report_results:
crm_info("%s is%s eligible to fence (%s) %s%s%s%s: %s",
dev_id, (can? "" : " not"), pcmk__s(action, "unspecified action"),
pcmk__s(target, "unspecified target"),
(alias == NULL)? "" : " (as '", pcmk__s(alias, ""),
(alias == NULL)? "" : "')", check_type);
search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can);
}
static void
search_devices(gpointer key, gpointer value, gpointer user_data)
{
stonith_device_t *dev = value;
struct device_search_s *search = user_data;
can_fence_host_with_device(dev, search);
}
#define DEFAULT_QUERY_TIMEOUT 20
static void
get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data,
void (*callback) (GList * devices, void *user_data), uint32_t support_action_only)
{
struct device_search_s *search;
guint ndevices = g_hash_table_size(device_list);
if (ndevices == 0) {
callback(NULL, user_data);
return;
}
search = calloc(1, sizeof(struct device_search_s));
if (!search) {
crm_crit("Cannot search for capable fence devices: %s",
strerror(ENOMEM));
callback(NULL, user_data);
return;
}
pcmk__str_update(&search->host, host);
pcmk__str_update(&search->action, action);
search->per_device_timeout = timeout;
search->allow_suicide = suicide;
search->callback = callback;
search->user_data = user_data;
search->support_action_only = support_action_only;
/* We are guaranteed this many replies, even if a device is
* unregistered while the search is in progress.
*/
search->replies_needed = ndevices;
crm_debug("Searching %d device%s to see which can execute '%s' targeting %s",
ndevices, pcmk__plural_s(ndevices),
(search->action? search->action : "unknown action"),
(search->host? search->host : "any node"));
g_hash_table_foreach(device_list, search_devices, search);
}
struct st_query_data {
xmlNode *reply;
char *remote_peer;
char *client_id;
char *target;
char *action;
int call_options;
};
/*!
* \internal
* \brief Add action-specific attributes to query reply XML
*
* \param[in,out] xml XML to add attributes to
* \param[in] action Fence action
* \param[in] device Fence device
* \param[in] target Fence target
*/
static void
add_action_specific_attributes(xmlNode *xml, const char *action,
const stonith_device_t *device,
const char *target)
{
int action_specific_timeout;
int delay_max;
int delay_base;
CRM_CHECK(xml && action && device, return);
// PCMK__XA_ST_REQUIRED is currently used only for unfencing
if (is_action_required(action, device)) {
crm_trace("Action '%s' is required using %s", action, device->id);
crm_xml_add_int(xml, PCMK__XA_ST_REQUIRED, 1);
}
// pcmk_<action>_timeout if configured
action_specific_timeout = get_action_timeout(device, action, 0);
if (action_specific_timeout) {
crm_trace("Action '%s' has timeout %dms using %s",
action, action_specific_timeout, device->id);
crm_xml_add_int(xml, PCMK__XA_ST_ACTION_TIMEOUT,
action_specific_timeout);
}
delay_max = get_action_delay_max(device, action);
if (delay_max > 0) {
crm_trace("Action '%s' has maximum random delay %ds using %s",
action, delay_max, device->id);
crm_xml_add_int(xml, PCMK__XA_ST_DELAY_MAX, delay_max);
}
delay_base = get_action_delay_base(device, action, target);
if (delay_base > 0) {
crm_xml_add_int(xml, PCMK__XA_ST_DELAY_BASE, delay_base);
}
if ((delay_max > 0) && (delay_base == 0)) {
crm_trace("Action '%s' has maximum random delay %ds using %s",
action, delay_max, device->id);
} else if ((delay_max == 0) && (delay_base > 0)) {
crm_trace("Action '%s' has a static delay of %ds using %s",
action, delay_base, device->id);
} else if ((delay_max > 0) && (delay_base > 0)) {
crm_trace("Action '%s' has a minimum delay of %ds and a randomly chosen "
"maximum delay of %ds using %s",
action, delay_base, delay_max, device->id);
}
}
/*!
* \internal
* \brief Add "disallowed" attribute to query reply XML if appropriate
*
* \param[in,out] xml XML to add attribute to
* \param[in] action Fence action
* \param[in] device Fence device
* \param[in] target Fence target
* \param[in] allow_suicide Whether self-fencing is allowed
*/
static void
add_disallowed(xmlNode *xml, const char *action, const stonith_device_t *device,
const char *target, gboolean allow_suicide)
{
if (!localhost_is_eligible(device, action, target, allow_suicide)) {
crm_trace("Action '%s' using %s is disallowed for local host",
action, device->id);
pcmk__xe_set_bool_attr(xml, PCMK__XA_ST_ACTION_DISALLOWED, true);
}
}
/*!
* \internal
* \brief Add child element with action-specific values to query reply XML
*
* \param[in,out] xml XML to add attribute to
* \param[in] action Fence action
* \param[in] device Fence device
* \param[in] target Fence target
* \param[in] allow_suicide Whether self-fencing is allowed
*/
static void
add_action_reply(xmlNode *xml, const char *action,
const stonith_device_t *device, const char *target,
gboolean allow_suicide)
{
xmlNode *child = create_xml_node(xml, PCMK__XE_ST_DEVICE_ACTION);
crm_xml_add(child, PCMK_XA_ID, action);
add_action_specific_attributes(child, action, device, target);
add_disallowed(child, action, device, target, allow_suicide);
}
/*!
* \internal
* \brief Send a reply to a CPG peer or IPC client
*
* \param[in] reply XML reply to send
* \param[in] call_options Send synchronously if st_opt_sync_call is set
* \param[in] remote_peer If not NULL, name of peer node to send CPG reply
* \param[in,out] client If not NULL, client to send IPC reply
*/
static void
stonith_send_reply(const xmlNode *reply, int call_options,
const char *remote_peer, pcmk__client_t *client)
{
CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)),
return);
if (remote_peer == NULL) {
do_local_reply(reply, client, call_options);
} else {
send_cluster_message(pcmk__get_node(0, remote_peer, NULL,
pcmk__node_search_cluster),
crm_msg_stonith_ng, reply, FALSE);
}
}
static void
stonith_query_capable_device_cb(GList * devices, void *user_data)
{
struct st_query_data *query = user_data;
int available_devices = 0;
xmlNode *dev = NULL;
xmlNode *list = NULL;
GList *lpc = NULL;
pcmk__client_t *client = NULL;
if (query->client_id != NULL) {
client = pcmk__find_client_by_id(query->client_id);
if ((client == NULL) && (query->remote_peer == NULL)) {
crm_trace("Skipping reply to %s: no longer a client",
query->client_id);
goto done;
}
}
/* Pack the results into XML */
list = create_xml_node(NULL, __func__);
crm_xml_add(list, PCMK__XA_ST_TARGET, query->target);
for (lpc = devices; lpc != NULL; lpc = lpc->next) {
stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data);
const char *action = query->action;
if (!device) {
/* It is possible the device got unregistered while
* determining who can fence the target */
continue;
}
available_devices++;
dev = create_xml_node(list, PCMK__XE_ST_DEVICE_ID);
crm_xml_add(dev, PCMK_XA_ID, device->id);
crm_xml_add(dev, PCMK__XA_NAMESPACE, device->namespace);
crm_xml_add(dev, PCMK_XA_AGENT, device->agent);
// Has had successful monitor, list, or status on this node
crm_xml_add_int(dev, PCMK__XA_ST_MONITOR_VERIFIED, device->verified);
crm_xml_add_int(dev, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, device->flags);
/* If the originating fencer wants to reboot the node, and we have a
* capable device that doesn't support "reboot", remap to "off" instead.
*/
if (!pcmk_is_set(device->flags, st_device_supports_reboot)
&& pcmk__str_eq(query->action, PCMK_ACTION_REBOOT,
pcmk__str_none)) {
crm_trace("%s doesn't support reboot, using values for off instead",
device->id);
action = PCMK_ACTION_OFF;
}
/* Add action-specific values if available */
add_action_specific_attributes(dev, action, device, query->target);
if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
/* A "reboot" *might* get remapped to "off" then "on", so after
* sending the "reboot"-specific values in the main element, we add
* sub-elements for "off" and "on" values.
*
* We short-circuited earlier if "reboot", "off" and "on" are all
* disallowed for the local host. However if only one or two are
* disallowed, we send back the results and mark which ones are
* disallowed. If "reboot" is disallowed, this might cause problems
* with older fencer versions, which won't check for it. Older
* versions will ignore "off" and "on", so they are not a problem.
*/
add_disallowed(dev, action, device, query->target,
pcmk_is_set(query->call_options, st_opt_allow_suicide));
add_action_reply(dev, PCMK_ACTION_OFF, device, query->target,
pcmk_is_set(query->call_options, st_opt_allow_suicide));
add_action_reply(dev, PCMK_ACTION_ON, device, query->target, FALSE);
}
/* A query without a target wants device parameters */
if (query->target == NULL) {
xmlNode *attrs = create_xml_node(dev, PCMK__XE_ATTRIBUTES);
g_hash_table_foreach(device->params, hash2field, attrs);
}
}
crm_xml_add_int(list, PCMK__XA_ST_AVAILABLE_DEVICES, available_devices);
if (query->target) {
crm_debug("Found %d matching device%s for target '%s'",
available_devices, pcmk__plural_s(available_devices),
query->target);
} else {
crm_debug("%d device%s installed",
available_devices, pcmk__plural_s(available_devices));
}
if (list != NULL) {
crm_log_xml_trace(list, "Add query results");
add_message_xml(query->reply, PCMK__XA_ST_CALLDATA, list);
}
stonith_send_reply(query->reply, query->call_options, query->remote_peer,
client);
done:
free_xml(query->reply);
free(query->remote_peer);
free(query->client_id);
free(query->target);
free(query->action);
free(query);
free_xml(list);
g_list_free_full(devices, free);
}
/*!
* \internal
* \brief Log the result of an asynchronous command
*
* \param[in] cmd Command the result is for
* \param[in] result Result of command
* \param[in] pid Process ID of command, if available
* \param[in] next Alternate device that will be tried if command failed
* \param[in] op_merged Whether this command was merged with an earlier one
*/
static void
log_async_result(const async_command_t *cmd,
const pcmk__action_result_t *result,
int pid, const char *next, bool op_merged)
{
int log_level = LOG_ERR;
int output_log_level = LOG_NEVER;
guint devices_remaining = g_list_length(cmd->next_device_iter);
GString *msg = g_string_sized_new(80); // Reasonable starting size
// Choose log levels appropriately if we have a result
if (pcmk__result_ok(result)) {
log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE;
if ((result->action_stdout != NULL)
&& !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA,
pcmk__str_none)) {
output_log_level = LOG_DEBUG;
}
next = NULL;
} else {
log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR;
if ((result->action_stdout != NULL)
&& !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA,
pcmk__str_none)) {
output_log_level = LOG_WARNING;
}
}
// Build the log message piece by piece
pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL);
if (pid != 0) {
g_string_append_printf(msg, "[%d] ", pid);
}
if (cmd->target != NULL) {
pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL);
}
if (cmd->device != NULL) {
pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL);
}
// Add exit status or execution status as appropriate
if (result->execution_status == PCMK_EXEC_DONE) {
g_string_append_printf(msg, "returned %d", result->exit_status);
} else {
pcmk__g_strcat(msg, "could not be executed: ",
pcmk_exec_status_str(result->execution_status), NULL);
}
// Add exit reason and next device if appropriate
if (result->exit_reason != NULL) {
pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL);
}
if (next != NULL) {
pcmk__g_strcat(msg, ", retrying with ", next, NULL);
}
if (devices_remaining > 0) {
g_string_append_printf(msg, " (%u device%s remaining)",
(unsigned int) devices_remaining,
pcmk__plural_s(devices_remaining));
}
g_string_append_printf(msg, " " CRM_XS " %scall %d from %s",
(op_merged? "merged " : ""), cmd->id,
cmd->client_name);
// Log the result
do_crm_log(log_level, "%s", msg->str);
g_string_free(msg, TRUE);
// Log the output (which may have multiple lines), if appropriate
if (output_log_level != LOG_NEVER) {
char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid);
crm_log_output(output_log_level, prefix, result->action_stdout);
free(prefix);
}
}
/*!
* \internal
* \brief Reply to requester after asynchronous command completion
*
* \param[in] cmd Command that completed
* \param[in] result Result of command
* \param[in] pid Process ID of command, if available
* \param[in] merged If true, command was merged with another, not executed
*/
static void
send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result,
int pid, bool merged)
{
xmlNode *reply = NULL;
pcmk__client_t *client = NULL;
CRM_CHECK((cmd != NULL) && (result != NULL), return);
log_async_result(cmd, result, pid, NULL, merged);
if (cmd->client != NULL) {
client = pcmk__find_client_by_id(cmd->client);
if ((client == NULL) && (cmd->origin == NULL)) {
crm_trace("Skipping reply to %s: no longer a client", cmd->client);
return;
}
}
reply = construct_async_reply(cmd, result);
if (merged) {
pcmk__xe_set_bool_attr(reply, PCMK__XA_ST_OP_MERGED, true);
}
if (!stand_alone && pcmk__is_fencing_action(cmd->action)
&& pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) {
/* The target was also the originator, so broadcast the result on its
* behalf (since it will be unable to).
*/
crm_trace("Broadcast '%s' result for %s (target was also originator)",
cmd->action, cmd->target);
crm_xml_add(reply, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST);
crm_xml_add(reply, PCMK__XA_ST_OP, STONITH_OP_NOTIFY);
send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
} else {
// Reply only to the originator
stonith_send_reply(reply, cmd->options, cmd->origin, client);
}
crm_log_xml_trace(reply, "Reply");
free_xml(reply);
if (stand_alone) {
/* Do notification with a clean data object */
- xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
+ xmlNode *notify_data = create_xml_node(NULL, PCMK__XE_ST_NOTIFY_FENCE);
stonith__xe_set_result(notify_data, result);
crm_xml_add(notify_data, PCMK__XA_ST_TARGET, cmd->target);
crm_xml_add(notify_data, PCMK__XA_ST_OP, cmd->op);
crm_xml_add(notify_data, PCMK__XA_ST_DELEGATE, "localhost");
crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ID, cmd->device);
crm_xml_add(notify_data, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id);
crm_xml_add(notify_data, PCMK__XA_ST_ORIGIN, cmd->client);
fenced_send_notification(PCMK__VALUE_ST_NOTIFY_FENCE, result,
notify_data);
fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
}
}
static void
cancel_stonith_command(async_command_t * cmd)
{
stonith_device_t *device = cmd_device(cmd);
if (device) {
crm_trace("Cancel scheduled '%s' action using %s",
cmd->action, device->id);
device->pending_ops = g_list_remove(device->pending_ops, cmd);
}
}
/*!
* \internal
* \brief Cancel and reply to any duplicates of a just-completed operation
*
* Check whether any fencing operations are scheduled to do the same thing as
* one that just succeeded. If so, rather than performing the same operation
* twice, return the result of this operation for all matching pending commands.
*
* \param[in,out] cmd Fencing operation that just succeeded
* \param[in] result Result of \p cmd
* \param[in] pid If nonzero, process ID of agent invocation (for logs)
*
* \note Duplicate merging will do the right thing for either type of remapped
* reboot. If the executing fencer remapped an unsupported reboot to off,
* then cmd->action will be "reboot" and will be merged with any other
* reboot requests. If the originating fencer remapped a topology reboot
* to off then on, we will get here once with cmd->action "off" and once
* with "on", and they will be merged separately with similar requests.
*/
static void
reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result,
int pid)
{
GList *next = NULL;
for (GList *iter = cmd_list; iter != NULL; iter = next) {
async_command_t *cmd_other = iter->data;
next = iter->next; // We might delete this entry, so grab next now
if (cmd == cmd_other) {
continue;
}
/* A pending operation matches if:
* 1. The client connections are different.
* 2. The target is the same.
* 3. The fencing action is the same.
* 4. The device scheduled to execute the action is the same.
*/
if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) ||
!pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) ||
!pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) ||
!pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) {
continue;
}
crm_notice("Merging fencing action '%s'%s%s originating from "
"client %s with identical fencing request from client %s",
cmd_other->action,
(cmd_other->target == NULL)? "" : " targeting ",
pcmk__s(cmd_other->target, ""), cmd_other->client_name,
cmd->client_name);
// Stop tracking the duplicate, send its result, and cancel it
cmd_list = g_list_remove_link(cmd_list, iter);
send_async_reply(cmd_other, result, pid, true);
cancel_stonith_command(cmd_other);
free_async_command(cmd_other);
g_list_free_1(iter);
}
}
/*!
* \internal
* \brief Return the next required device (if any) for an operation
*
* \param[in,out] cmd Fencing operation that just succeeded
*
* \return Next device required for action if any, otherwise NULL
*/
static stonith_device_t *
next_required_device(async_command_t *cmd)
{
for (GList *iter = cmd->next_device_iter; iter != NULL; iter = iter->next) {
stonith_device_t *next_device = g_hash_table_lookup(device_list,
iter->data);
if (is_action_required(cmd->action, next_device)) {
/* This is only called for successful actions, so it's OK to skip
* non-required devices.
*/
cmd->next_device_iter = iter->next;
return next_device;
}
}
return NULL;
}
static void
st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
{
async_command_t *cmd = user_data;
stonith_device_t *device = NULL;
stonith_device_t *next_device = NULL;
CRM_CHECK(cmd != NULL, return);
device = cmd_device(cmd);
cmd->active_on = NULL;
/* The device is ready to do something else now */
if (device) {
if (!device->verified && pcmk__result_ok(result)
&& pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST,
PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS,
NULL)) {
device->verified = TRUE;
}
mainloop_set_trigger(device->work);
}
if (pcmk__result_ok(result)) {
next_device = next_required_device(cmd);
} else if ((cmd->next_device_iter != NULL)
&& !is_action_required(cmd->action, device)) {
/* if this device didn't work out, see if there are any others we can try.
* if the failed device was 'required', we can't pick another device. */
next_device = g_hash_table_lookup(device_list,
cmd->next_device_iter->data);
cmd->next_device_iter = cmd->next_device_iter->next;
}
if (next_device == NULL) {
send_async_reply(cmd, result, pid, false);
if (pcmk__result_ok(result)) {
reply_to_duplicates(cmd, result, pid);
}
free_async_command(cmd);
} else { // This operation requires more fencing
log_async_result(cmd, result, pid, next_device->id, false);
schedule_stonith_command(cmd, next_device);
}
}
static gint
sort_device_priority(gconstpointer a, gconstpointer b)
{
const stonith_device_t *dev_a = a;
const stonith_device_t *dev_b = b;
if (dev_a->priority > dev_b->priority) {
return -1;
} else if (dev_a->priority < dev_b->priority) {
return 1;
}
return 0;
}
static void
stonith_fence_get_devices_cb(GList * devices, void *user_data)
{
async_command_t *cmd = user_data;
stonith_device_t *device = NULL;
guint ndevices = g_list_length(devices);
crm_info("Found %d matching device%s for target '%s'",
ndevices, pcmk__plural_s(ndevices), cmd->target);
if (devices != NULL) {
/* Order based on priority */
devices = g_list_sort(devices, sort_device_priority);
device = g_hash_table_lookup(device_list, devices->data);
}
if (device == NULL) { // No device found
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"No device configured for target '%s'",
cmd->target);
send_async_reply(cmd, &result, 0, false);
pcmk__reset_result(&result);
free_async_command(cmd);
g_list_free_full(devices, free);
} else { // Device found, schedule it for fencing
cmd->device_list = devices;
cmd->next_device_iter = devices->next;
schedule_stonith_command(cmd, device);
}
}
/*!
* \internal
* \brief Execute a fence action via the local node
*
* \param[in] msg Fencing request
* \param[out] result Where to store result of fence action
*/
static void
fence_locally(xmlNode *msg, pcmk__action_result_t *result)
{
const char *device_id = NULL;
stonith_device_t *device = NULL;
async_command_t *cmd = NULL;
xmlNode *dev = NULL;
CRM_CHECK((msg != NULL) && (result != NULL), return);
dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, msg, LOG_ERR);
cmd = create_async_command(msg);
if (cmd == NULL) {
crm_log_xml_warn(msg, "invalid");
fenced_set_protocol_error(result);
return;
}
device_id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
if (device_id != NULL) {
device = g_hash_table_lookup(device_list, device_id);
if (device == NULL) {
crm_err("Requested device '%s' is not available", device_id);
pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"Requested device '%s' not found", device_id);
return;
}
schedule_stonith_command(cmd, device);
} else {
const char *host = crm_element_value(dev, PCMK__XA_ST_TARGET);
if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) {
int nodeid = 0;
crm_node_t *node = NULL;
pcmk__scan_min_int(host, &nodeid, 0);
node = pcmk__search_node_caches(nodeid, NULL,
pcmk__node_search_any
|pcmk__node_search_known);
if (node != NULL) {
host = node->uname;
}
}
/* If we get to here, then self-fencing is implicitly allowed */
get_capable_devices(host, cmd->action, cmd->default_timeout,
TRUE, cmd, stonith_fence_get_devices_cb,
fenced_support_flag(cmd->action));
}
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
}
/*!
* \internal
* \brief Build an XML reply for a fencing operation
*
* \param[in] request Request that reply is for
* \param[in] data If not NULL, add to reply as call data
* \param[in] result Full result of fencing operation
*
* \return Newly created XML reply
* \note The caller is responsible for freeing the result.
* \note This has some overlap with construct_async_reply(), but that copies
* values from an async_command_t, whereas this one copies them from the
* request.
*/
xmlNode *
fenced_construct_reply(const xmlNode *request, xmlNode *data,
const pcmk__action_result_t *result)
{
xmlNode *reply = NULL;
reply = create_xml_node(NULL, PCMK__XE_ST_REPLY);
crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__);
crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
stonith__xe_set_result(reply, result);
if (request == NULL) {
/* Most likely, this is the result of a stonith operation that was
* initiated before we came up. Unfortunately that means we lack enough
* information to provide clients with a full result.
*
* @TODO Maybe synchronize this information at start-up?
*/
crm_warn("Missing request information for client notifications for "
"operation with result '%s' (initiated before we came up?)",
pcmk_exec_status_str(result->execution_status));
} else {
const char *name = NULL;
const char *value = NULL;
// Attributes to copy from request to reply
const char *names[] = {
PCMK__XA_ST_OP,
PCMK__XA_ST_CALLID,
PCMK__XA_ST_CLIENTID,
PCMK__XA_ST_CLIENTNAME,
PCMK__XA_ST_REMOTE_OP,
PCMK__XA_ST_CALLOPT,
};
for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) {
name = names[lpc];
value = crm_element_value(request, name);
crm_xml_add(reply, name, value);
}
if (data != NULL) {
add_message_xml(reply, PCMK__XA_ST_CALLDATA, data);
}
}
return reply;
}
/*!
* \internal
* \brief Build an XML reply to an asynchronous fencing command
*
* \param[in] cmd Fencing command that reply is for
* \param[in] result Command result
*/
static xmlNode *
construct_async_reply(const async_command_t *cmd,
const pcmk__action_result_t *result)
{
xmlNode *reply = create_xml_node(NULL, PCMK__XE_ST_REPLY);
crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__);
crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
crm_xml_add(reply, PCMK__XA_ST_OP, cmd->op);
crm_xml_add(reply, PCMK__XA_ST_DEVICE_ID, cmd->device);
crm_xml_add(reply, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id);
crm_xml_add(reply, PCMK__XA_ST_CLIENTID, cmd->client);
crm_xml_add(reply, PCMK__XA_ST_CLIENTNAME, cmd->client_name);
crm_xml_add(reply, PCMK__XA_ST_TARGET, cmd->target);
crm_xml_add(reply, PCMK__XA_ST_DEVICE_ACTION, cmd->op);
crm_xml_add(reply, PCMK__XA_ST_ORIGIN, cmd->origin);
crm_xml_add_int(reply, PCMK__XA_ST_CALLID, cmd->id);
crm_xml_add_int(reply, PCMK__XA_ST_CALLOPT, cmd->options);
stonith__xe_set_result(reply, result);
return reply;
}
bool fencing_peer_active(crm_node_t *peer)
{
if (peer == NULL) {
return FALSE;
} else if (peer->uname == NULL) {
return FALSE;
} else if (pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
return TRUE;
}
return FALSE;
}
void
set_fencing_completed(remote_fencing_op_t *op)
{
struct timespec tv;
qb_util_timespec_from_epoch_get(&tv);
op->completed = tv.tv_sec;
op->completed_nsec = tv.tv_nsec;
}
/*!
* \internal
* \brief Look for alternate node needed if local node shouldn't fence target
*
* \param[in] target Node that must be fenced
*
* \return Name of an alternate node that should fence \p target if any,
* or NULL otherwise
*/
static const char *
check_alternate_host(const char *target)
{
if (pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) {
GHashTableIter gIter;
crm_node_t *entry = NULL;
g_hash_table_iter_init(&gIter, crm_peer_cache);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
if (fencing_peer_active(entry)
&& !pcmk__str_eq(entry->uname, target, pcmk__str_casei)) {
crm_notice("Forwarding self-fencing request to %s",
entry->uname);
return entry->uname;
}
}
crm_warn("Will handle own fencing because no peer can");
}
return NULL;
}
static void
remove_relay_op(xmlNode * request)
{
xmlNode *dev = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, request,
LOG_TRACE);
const char *relay_op_id = NULL;
const char *op_id = NULL;
const char *client_name = NULL;
const char *target = NULL;
remote_fencing_op_t *relay_op = NULL;
if (dev) {
target = crm_element_value(dev, PCMK__XA_ST_TARGET);
}
relay_op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP_RELAY);
op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP);
client_name = crm_element_value(request, PCMK__XA_ST_CLIENTNAME);
/* Delete RELAY operation. */
if (relay_op_id && target && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) {
relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id);
if (relay_op) {
GHashTableIter iter;
remote_fencing_op_t *list_op = NULL;
g_hash_table_iter_init(&iter, stonith_remote_op_list);
/* If the operation to be deleted is registered as a duplicate, delete the registration. */
while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) {
GList *dup_iter = NULL;
if (list_op != relay_op) {
for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) {
remote_fencing_op_t *other = dup_iter->data;
if (other == relay_op) {
other->duplicates = g_list_remove(other->duplicates, relay_op);
break;
}
}
}
}
crm_debug("Deleting relay op %s ('%s'%s%s for %s), "
"replaced by op %s ('%s'%s%s for %s)",
relay_op->id, relay_op->action,
(relay_op->target == NULL)? "" : " targeting ",
pcmk__s(relay_op->target, ""),
relay_op->client_name, op_id, relay_op->action,
(target == NULL)? "" : " targeting ", pcmk__s(target, ""),
client_name);
g_hash_table_remove(stonith_remote_op_list, relay_op_id);
}
}
}
/*!
* \internal
* \brief Check whether an API request was sent by a privileged user
*
* API commands related to fencing configuration may be done only by privileged
* IPC users (i.e. root or hacluster), because all other users should go through
* the CIB to have ACLs applied. If no client was given, this is a peer request,
* which is always allowed.
*
* \param[in] c IPC client that sent request (or NULL if sent by CPG peer)
* \param[in] op Requested API operation (for logging only)
*
* \return true if sender is peer or privileged client, otherwise false
*/
static inline bool
is_privileged(const pcmk__client_t *c, const char *op)
{
if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) {
return true;
} else {
crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
pcmk__s(op, ""), pcmk__client_name(c));
return false;
}
}
// CRM_OP_REGISTER
static xmlNode *
handle_register_request(pcmk__request_t *request)
{
xmlNode *reply = create_xml_node(NULL, "reply");
CRM_ASSERT(request->ipc_client != NULL);
crm_xml_add(reply, PCMK__XA_ST_OP, CRM_OP_REGISTER);
crm_xml_add(reply, PCMK__XA_ST_CLIENTID, request->ipc_client->id);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
pcmk__set_request_flags(request, pcmk__request_reuse_options);
return reply;
}
// STONITH_OP_EXEC
static xmlNode *
handle_agent_request(pcmk__request_t *request)
{
execute_agent_action(request->xml, &request->result);
if (request->result.execution_status == PCMK_EXEC_PENDING) {
return NULL;
}
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_TIMEOUT_UPDATE
static xmlNode *
handle_update_timeout_request(pcmk__request_t *request)
{
const char *call_id = crm_element_value(request->xml, PCMK__XA_ST_CALLID);
const char *client_id = crm_element_value(request->xml,
PCMK__XA_ST_CLIENTID);
int op_timeout = 0;
crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &op_timeout);
do_stonith_async_timeout_update(client_id, call_id, op_timeout);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
// STONITH_OP_QUERY
static xmlNode *
handle_query_request(pcmk__request_t *request)
{
int timeout = 0;
xmlNode *dev = NULL;
const char *action = NULL;
const char *target = NULL;
const char *client_id = crm_element_value(request->xml,
PCMK__XA_ST_CLIENTID);
struct st_query_data *query = NULL;
if (request->peer != NULL) {
// Record it for the future notification
create_remote_stonith_op(client_id, request->xml, TRUE);
}
/* Delete the DC node RELAY operation. */
remove_relay_op(request->xml);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
dev = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, request->xml,
LOG_NEVER);
if (dev != NULL) {
const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) {
return NULL; // No query or reply necessary
}
target = crm_element_value(dev, PCMK__XA_ST_TARGET);
action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION);
}
crm_log_xml_trace(request->xml, "Query");
query = calloc(1, sizeof(struct st_query_data));
CRM_ASSERT(query != NULL);
query->reply = fenced_construct_reply(request->xml, NULL, &request->result);
pcmk__str_update(&query->remote_peer, request->peer);
pcmk__str_update(&query->client_id, client_id);
pcmk__str_update(&query->target, target);
pcmk__str_update(&query->action, action);
query->call_options = request->call_options;
crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &timeout);
get_capable_devices(target, action, timeout,
pcmk_is_set(query->call_options, st_opt_allow_suicide),
query, stonith_query_capable_device_cb, st_device_supports_none);
return NULL;
}
// STONITH_OP_NOTIFY
static xmlNode *
handle_notify_request(pcmk__request_t *request)
{
const char *flag_name = NULL;
CRM_ASSERT(request->ipc_client != NULL);
flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_ACTIVATE);
if (flag_name != NULL) {
crm_debug("Enabling %s callbacks for client %s",
flag_name, pcmk__request_origin(request));
pcmk__set_client_flags(request->ipc_client, get_stonith_flag(flag_name));
}
flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_DEACTIVATE);
if (flag_name != NULL) {
crm_debug("Disabling %s callbacks for client %s",
flag_name, pcmk__request_origin(request));
pcmk__clear_client_flags(request->ipc_client,
get_stonith_flag(flag_name));
}
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
pcmk__set_request_flags(request, pcmk__request_reuse_options);
return pcmk__ipc_create_ack(request->ipc_flags, PCMK__XE_ACK, NULL,
CRM_EX_OK);
}
// STONITH_OP_RELAY
static xmlNode *
handle_relay_request(pcmk__request_t *request)
{
xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request->xml,
LOG_TRACE);
crm_notice("Received forwarded fencing request from "
"%s %s to fence (%s) peer %s",
pcmk__request_origin_type(request),
pcmk__request_origin(request),
crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION),
crm_element_value(dev, PCMK__XA_ST_TARGET));
if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) {
fenced_set_protocol_error(&request->result);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
return NULL;
}
// STONITH_OP_FENCE
static xmlNode *
handle_fence_request(pcmk__request_t *request)
{
if ((request->peer != NULL) || stand_alone) {
fence_locally(request->xml, &request->result);
} else if (pcmk_is_set(request->call_options, st_opt_manual_ack)) {
switch (fenced_handle_manual_confirmation(request->ipc_client,
request->xml)) {
case pcmk_rc_ok:
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE,
NULL);
break;
case EINPROGRESS:
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
NULL);
break;
default:
fenced_set_protocol_error(&request->result);
break;
}
} else {
const char *alternate_host = NULL;
xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request->xml,
LOG_TRACE);
const char *target = crm_element_value(dev, PCMK__XA_ST_TARGET);
const char *action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION);
const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
if (request->ipc_client != NULL) {
int tolerance = 0;
crm_notice("Client %s wants to fence (%s) %s using %s",
pcmk__request_origin(request), action,
target, (device? device : "any device"));
crm_element_value_int(dev, PCMK__XA_ST_TOLERANCE, &tolerance);
if (stonith_check_fence_tolerance(tolerance, target, action)) {
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE,
NULL);
return fenced_construct_reply(request->xml, NULL,
&request->result);
}
alternate_host = check_alternate_host(target);
} else {
crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'",
request->peer, action, target,
(device == NULL)? "(any)" : device);
}
if (alternate_host != NULL) {
const char *client_id = NULL;
remote_fencing_op_t *op = NULL;
if (request->ipc_client->id == 0) {
client_id = crm_element_value(request->xml,
PCMK__XA_ST_CLIENTID);
} else {
client_id = request->ipc_client->id;
}
/* Create a duplicate fencing operation to relay with the client ID.
* When a query response is received, this operation should be
* deleted to avoid keeping the duplicate around.
*/
op = create_remote_stonith_op(client_id, request->xml, FALSE);
crm_xml_add(request->xml, PCMK__XA_ST_OP, STONITH_OP_RELAY);
crm_xml_add(request->xml, PCMK__XA_ST_CLIENTID,
request->ipc_client->id);
crm_xml_add(request->xml, PCMK__XA_ST_REMOTE_OP, op->id);
send_cluster_message(pcmk__get_node(0, alternate_host, NULL,
pcmk__node_search_cluster),
crm_msg_stonith_ng, request->xml, FALSE);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
NULL);
} else if (initiate_remote_stonith_op(request->ipc_client, request->xml,
FALSE) == NULL) {
fenced_set_protocol_error(&request->result);
} else {
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
NULL);
}
}
if (request->result.execution_status == PCMK_EXEC_PENDING) {
return NULL;
}
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_FENCE_HISTORY
static xmlNode *
handle_history_request(pcmk__request_t *request)
{
xmlNode *reply = NULL;
xmlNode *data = NULL;
stonith_fence_history(request->xml, &data, request->peer,
request->call_options);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
if (!pcmk_is_set(request->call_options, st_opt_discard_reply)) {
/* When the local node broadcasts its history, it sets
* st_opt_discard_reply and doesn't need a reply.
*/
reply = fenced_construct_reply(request->xml, data, &request->result);
}
free_xml(data);
return reply;
}
// STONITH_OP_DEVICE_ADD
static xmlNode *
handle_device_add_request(pcmk__request_t *request)
{
const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);
xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, request->xml,
LOG_ERR);
if (is_privileged(request->ipc_client, op)) {
int rc = stonith_device_register(dev, FALSE);
pcmk__set_result(&request->result,
((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
stonith__legacy2status(rc),
((rc == pcmk_ok)? NULL : pcmk_strerror(rc)));
} else {
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must register device via CIB");
}
fenced_send_config_notification(op, &request->result,
(dev == NULL)? NULL : pcmk__xe_id(dev));
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_DEVICE_DEL
static xmlNode *
handle_device_delete_request(pcmk__request_t *request)
{
xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, request->xml,
LOG_ERR);
const char *device_id = crm_element_value(dev, PCMK_XA_ID);
const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);
if (is_privileged(request->ipc_client, op)) {
stonith_device_remove(device_id, false);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
} else {
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must delete device via CIB");
}
fenced_send_config_notification(op, &request->result, device_id);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_LEVEL_ADD
static xmlNode *
handle_level_add_request(pcmk__request_t *request)
{
char *desc = NULL;
const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);
if (is_privileged(request->ipc_client, op)) {
fenced_register_level(request->xml, &desc, &request->result);
} else {
unpack_level_request(request->xml, NULL, NULL, NULL, &desc);
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must add level via CIB");
}
fenced_send_config_notification(op, &request->result, desc);
free(desc);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_LEVEL_DEL
static xmlNode *
handle_level_delete_request(pcmk__request_t *request)
{
char *desc = NULL;
const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);
if (is_privileged(request->ipc_client, op)) {
fenced_unregister_level(request->xml, &desc, &request->result);
} else {
unpack_level_request(request->xml, NULL, NULL, NULL, &desc);
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must delete level via CIB");
}
fenced_send_config_notification(op, &request->result, desc);
free(desc);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// CRM_OP_RM_NODE_CACHE
static xmlNode *
handle_cache_request(pcmk__request_t *request)
{
int node_id = 0;
const char *name = NULL;
crm_element_value_int(request->xml, PCMK_XA_ID, &node_id);
name = crm_element_value(request->xml, PCMK_XA_UNAME);
reap_crm_member(node_id, name);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
static xmlNode *
handle_unknown_request(pcmk__request_t *request)
{
crm_err("Unknown IPC request %s from %s %s",
request->op, pcmk__request_origin_type(request),
pcmk__request_origin(request));
pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
"Unknown IPC request type '%s' (bug?)", request->op);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
static void
fenced_register_handlers(void)
{
pcmk__server_command_t handlers[] = {
{ CRM_OP_REGISTER, handle_register_request },
{ STONITH_OP_EXEC, handle_agent_request },
{ STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request },
{ STONITH_OP_QUERY, handle_query_request },
{ STONITH_OP_NOTIFY, handle_notify_request },
{ STONITH_OP_RELAY, handle_relay_request },
{ STONITH_OP_FENCE, handle_fence_request },
{ STONITH_OP_FENCE_HISTORY, handle_history_request },
{ STONITH_OP_DEVICE_ADD, handle_device_add_request },
{ STONITH_OP_DEVICE_DEL, handle_device_delete_request },
{ STONITH_OP_LEVEL_ADD, handle_level_add_request },
{ STONITH_OP_LEVEL_DEL, handle_level_delete_request },
{ CRM_OP_RM_NODE_CACHE, handle_cache_request },
{ NULL, handle_unknown_request },
};
fenced_handlers = pcmk__register_handlers(handlers);
}
void
fenced_unregister_handlers(void)
{
if (fenced_handlers != NULL) {
g_hash_table_destroy(fenced_handlers);
fenced_handlers = NULL;
}
}
static void
handle_request(pcmk__request_t *request)
{
xmlNode *reply = NULL;
const char *reason = NULL;
if (fenced_handlers == NULL) {
fenced_register_handlers();
}
reply = pcmk__process_request(request, fenced_handlers);
if (reply != NULL) {
if (pcmk_is_set(request->flags, pcmk__request_reuse_options)
&& (request->ipc_client != NULL)) {
/* Certain IPC-only commands must reuse the call options from the
* original request rather than the ones set by stonith_send_reply()
* -> do_local_reply().
*/
pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply,
request->ipc_flags);
request->ipc_client->request_id = 0;
} else {
stonith_send_reply(reply, request->call_options,
request->peer, request->ipc_client);
}
free_xml(reply);
}
reason = request->result.exit_reason;
crm_debug("Processed %s request from %s %s: %s%s%s%s",
request->op, pcmk__request_origin_type(request),
pcmk__request_origin(request),
pcmk_exec_status_str(request->result.execution_status),
(reason == NULL)? "" : " (",
(reason == NULL)? "" : reason,
(reason == NULL)? "" : ")");
}
static void
handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer)
{
// Copy, because request might be freed before we want to log this
char *op = crm_element_value_copy(request, PCMK__XA_ST_OP);
if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
process_remote_stonith_query(request);
} else if (pcmk__str_any_of(op, STONITH_OP_NOTIFY, STONITH_OP_FENCE,
NULL)) {
fenced_process_fencing_reply(request);
} else {
crm_err("Ignoring unknown %s reply from %s %s",
pcmk__s(op, "untyped"), ((client == NULL)? "peer" : "client"),
((client == NULL)? remote_peer : pcmk__client_name(client)));
crm_log_xml_warn(request, "UnknownOp");
free(op);
return;
}
crm_debug("Processed %s reply from %s %s",
op, ((client == NULL)? "peer" : "client"),
((client == NULL)? remote_peer : pcmk__client_name(client)));
free(op);
}
/*!
* \internal
* \brief Handle a message from an IPC client or CPG peer
*
* \param[in,out] client If not NULL, IPC client that sent message
* \param[in] id If from IPC client, IPC message ID
* \param[in] flags Message flags
* \param[in,out] message Message XML
* \param[in] remote_peer If not NULL, CPG peer that sent message
*/
void
stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
xmlNode *message, const char *remote_peer)
{
int call_options = st_opt_none;
bool is_reply = false;
CRM_CHECK(message != NULL, return);
if (get_xpath_object("//" PCMK__XE_ST_REPLY, message, LOG_NEVER) != NULL) {
is_reply = true;
}
crm_element_value_int(message, PCMK__XA_ST_CALLOPT, &call_options);
crm_debug("Processing %ssynchronous %s %s %u from %s %s",
pcmk_is_set(call_options, st_opt_sync_call)? "" : "a",
crm_element_value(message, PCMK__XA_ST_OP),
(is_reply? "reply" : "request"), id,
((client == NULL)? "peer" : "client"),
((client == NULL)? remote_peer : pcmk__client_name(client)));
if (pcmk_is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(client == NULL || client->request_id == id);
}
if (is_reply) {
handle_reply(client, message, remote_peer);
} else {
pcmk__request_t request = {
.ipc_client = client,
.ipc_id = id,
.ipc_flags = flags,
.peer = remote_peer,
.xml = message,
.call_options = call_options,
.result = PCMK__UNKNOWN_RESULT,
};
request.op = crm_element_value_copy(request.xml, PCMK__XA_ST_OP);
CRM_CHECK(request.op != NULL, return);
if (pcmk_is_set(request.call_options, st_opt_sync_call)) {
pcmk__set_request_flags(&request, pcmk__request_sync);
}
handle_request(&request);
pcmk__reset_request(&request);
}
}
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index b7f16693e0..7d31ba07a4 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -1,2543 +1,2543 @@
/*
* Copyright 2009-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <ctype.h>
#include <regex.h>
#include <crm/crm.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/cluster/internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/common/util.h>
#include <pacemaker-fenced.h>
#define TIMEOUT_MULTIPLY_FACTOR 1.2
/* When one fencer queries its peers for devices able to handle a fencing
* request, each peer will reply with a list of such devices available to it.
* Each reply will be parsed into a peer_device_info_t, with each device's
* information kept in a device_properties_t.
*/
typedef struct device_properties_s {
/* Whether access to this device has been verified */
gboolean verified;
/* The remaining members are indexed by the operation's "phase" */
/* Whether this device has been executed in each phase */
gboolean executed[st_phase_max];
/* Whether this device is disallowed from executing in each phase */
gboolean disallowed[st_phase_max];
/* Action-specific timeout for each phase */
int custom_action_timeout[st_phase_max];
/* Action-specific maximum random delay for each phase */
int delay_max[st_phase_max];
/* Action-specific base delay for each phase */
int delay_base[st_phase_max];
/* Group of enum st_device_flags */
uint32_t device_support_flags;
} device_properties_t;
typedef struct {
/* Name of peer that sent this result */
char *host;
/* Only try peers for non-topology based operations once */
gboolean tried;
/* Number of entries in the devices table */
int ndevices;
/* Devices available to this host that are capable of fencing the target */
GHashTable *devices;
} peer_device_info_t;
GHashTable *stonith_remote_op_list = NULL;
extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
int call_options);
static void request_peer_fencing(remote_fencing_op_t *op,
peer_device_info_t *peer);
static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup);
static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
static int get_op_total_timeout(const remote_fencing_op_t *op,
const peer_device_info_t *chosen_peer);
static gint
sort_strings(gconstpointer a, gconstpointer b)
{
return strcmp(a, b);
}
static void
free_remote_query(gpointer data)
{
if (data != NULL) {
peer_device_info_t *peer = data;
g_hash_table_destroy(peer->devices);
free(peer->host);
free(peer);
}
}
void
free_stonith_remote_op_list(void)
{
if (stonith_remote_op_list != NULL) {
g_hash_table_destroy(stonith_remote_op_list);
stonith_remote_op_list = NULL;
}
}
struct peer_count_data {
const remote_fencing_op_t *op;
gboolean verified_only;
uint32_t support_action_only;
int count;
};
/*!
* \internal
* \brief Increment a counter if a device has not been executed yet
*
* \param[in] key Device ID (ignored)
* \param[in] value Device properties
* \param[in,out] user_data Peer count data
*/
static void
count_peer_device(gpointer key, gpointer value, gpointer user_data)
{
device_properties_t *props = (device_properties_t*)value;
struct peer_count_data *data = user_data;
if (!props->executed[data->op->phase]
&& (!data->verified_only || props->verified)
&& ((data->support_action_only == st_device_supports_none) || pcmk_is_set(props->device_support_flags, data->support_action_only))) {
++(data->count);
}
}
/*!
* \internal
* \brief Check the number of available devices in a peer's query results
*
* \param[in] op Operation that results are for
* \param[in] peer Peer to count
* \param[in] verified_only Whether to count only verified devices
* \param[in] support_action_only Whether to count only devices that support action
*
* \return Number of devices available to peer that were not already executed
*/
static int
count_peer_devices(const remote_fencing_op_t *op,
const peer_device_info_t *peer, gboolean verified_only, uint32_t support_on_action_only)
{
struct peer_count_data data;
data.op = op;
data.verified_only = verified_only;
data.support_action_only = support_on_action_only;
data.count = 0;
if (peer) {
g_hash_table_foreach(peer->devices, count_peer_device, &data);
}
return data.count;
}
/*!
* \internal
* \brief Search for a device in a query result
*
* \param[in] op Operation that result is for
* \param[in] peer Query result for a peer
* \param[in] device Device ID to search for
*
* \return Device properties if found, NULL otherwise
*/
static device_properties_t *
find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer,
const char *device, uint32_t support_action_only)
{
device_properties_t *props = g_hash_table_lookup(peer->devices, device);
if (props && support_action_only != st_device_supports_none && !pcmk_is_set(props->device_support_flags, support_action_only)) {
return NULL;
}
return (props && !props->executed[op->phase]
&& !props->disallowed[op->phase])? props : NULL;
}
/*!
* \internal
* \brief Find a device in a peer's device list and mark it as executed
*
* \param[in] op Operation that peer result is for
* \param[in,out] peer Peer with results to search
* \param[in] device ID of device to mark as done
* \param[in] verified_devices_only Only consider verified devices
*
* \return TRUE if device was found and marked, FALSE otherwise
*/
static gboolean
grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer,
const char *device, gboolean verified_devices_only)
{
device_properties_t *props = find_peer_device(op, peer, device,
fenced_support_flag(op->action));
if ((props == NULL) || (verified_devices_only && !props->verified)) {
return FALSE;
}
crm_trace("Removing %s from %s (%d remaining)",
device, peer->host, count_peer_devices(op, peer, FALSE, st_device_supports_none));
props->executed[op->phase] = TRUE;
return TRUE;
}
static void
clear_remote_op_timers(remote_fencing_op_t * op)
{
if (op->query_timer) {
g_source_remove(op->query_timer);
op->query_timer = 0;
}
if (op->op_timer_total) {
g_source_remove(op->op_timer_total);
op->op_timer_total = 0;
}
if (op->op_timer_one) {
g_source_remove(op->op_timer_one);
op->op_timer_one = 0;
}
}
static void
free_remote_op(gpointer data)
{
remote_fencing_op_t *op = data;
crm_log_xml_debug(op->request, "Destroying");
clear_remote_op_timers(op);
free(op->id);
free(op->action);
free(op->delegate);
free(op->target);
free(op->client_id);
free(op->client_name);
free(op->originator);
if (op->query_results) {
g_list_free_full(op->query_results, free_remote_query);
}
if (op->request) {
free_xml(op->request);
op->request = NULL;
}
if (op->devices_list) {
g_list_free_full(op->devices_list, free);
op->devices_list = NULL;
}
g_list_free_full(op->automatic_list, free);
g_list_free(op->duplicates);
pcmk__reset_result(&op->result);
free(op);
}
void
init_stonith_remote_op_hash_table(GHashTable **table)
{
if (*table == NULL) {
*table = pcmk__strkey_table(NULL, free_remote_op);
}
}
/*!
* \internal
* \brief Return an operation's originally requested action (before any remap)
*
* \param[in] op Operation to check
*
* \return Operation's original action
*/
static const char *
op_requested_action(const remote_fencing_op_t *op)
{
return ((op->phase > st_phase_requested)? PCMK_ACTION_REBOOT : op->action);
}
/*!
* \internal
* \brief Remap a "reboot" operation to the "off" phase
*
* \param[in,out] op Operation to remap
*/
static void
op_phase_off(remote_fencing_op_t *op)
{
crm_info("Remapping multiple-device reboot targeting %s to 'off' "
CRM_XS " id=%.8s", op->target, op->id);
op->phase = st_phase_off;
/* Happily, "off" and "on" are shorter than "reboot", so we can reuse the
* memory allocation at each phase.
*/
strcpy(op->action, PCMK_ACTION_OFF);
}
/*!
* \internal
* \brief Advance a remapped reboot operation to the "on" phase
*
* \param[in,out] op Operation to remap
*/
static void
op_phase_on(remote_fencing_op_t *op)
{
GList *iter = NULL;
crm_info("Remapped 'off' targeting %s complete, "
"remapping to 'on' for %s " CRM_XS " id=%.8s",
op->target, op->client_name, op->id);
op->phase = st_phase_on;
strcpy(op->action, PCMK_ACTION_ON);
/* Skip devices with automatic unfencing, because the cluster will handle it
* when the node rejoins.
*/
for (iter = op->automatic_list; iter != NULL; iter = iter->next) {
GList *match = g_list_find_custom(op->devices_list, iter->data,
sort_strings);
if (match) {
op->devices_list = g_list_remove(op->devices_list, match->data);
}
}
g_list_free_full(op->automatic_list, free);
op->automatic_list = NULL;
/* Rewind device list pointer */
op->devices = op->devices_list;
}
/*!
* \internal
* \brief Reset a remapped reboot operation
*
* \param[in,out] op Operation to reset
*/
static void
undo_op_remap(remote_fencing_op_t *op)
{
if (op->phase > 0) {
crm_info("Undoing remap of reboot targeting %s for %s "
CRM_XS " id=%.8s", op->target, op->client_name, op->id);
op->phase = st_phase_requested;
strcpy(op->action, PCMK_ACTION_REBOOT);
}
}
/*!
* \internal
* \brief Create notification data XML for a fencing operation result
*
* \param[in] op Fencer operation that completed
*
* \return Newly created XML to add as notification data
* \note The caller is responsible for freeing the result.
*/
static xmlNode *
fencing_result2xml(const remote_fencing_op_t *op)
{
- xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
+ xmlNode *notify_data = create_xml_node(NULL, PCMK__XE_ST_NOTIFY_FENCE);
crm_xml_add_int(notify_data, PCMK_XA_STATE, op->state);
crm_xml_add(notify_data, PCMK__XA_ST_TARGET, op->target);
crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ACTION, op->action);
crm_xml_add(notify_data, PCMK__XA_ST_DELEGATE, op->delegate);
crm_xml_add(notify_data, PCMK__XA_ST_REMOTE_OP, op->id);
crm_xml_add(notify_data, PCMK__XA_ST_ORIGIN, op->originator);
crm_xml_add(notify_data, PCMK__XA_ST_CLIENTID, op->client_id);
crm_xml_add(notify_data, PCMK__XA_ST_CLIENTNAME, op->client_name);
return notify_data;
}
/*!
* \internal
* \brief Broadcast a fence result notification to all CPG peers
*
* \param[in] op Fencer operation that completed
* \param[in] op_merged Whether this operation is a duplicate of another
*/
void
fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged)
{
static int count = 0;
xmlNode *bcast = create_xml_node(NULL, PCMK__XE_ST_REPLY);
xmlNode *notify_data = fencing_result2xml(op);
count++;
crm_trace("Broadcasting result to peers");
crm_xml_add(bcast, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY);
crm_xml_add(bcast, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST);
crm_xml_add(bcast, PCMK__XA_ST_OP, STONITH_OP_NOTIFY);
crm_xml_add_int(bcast, PCMK_XA_COUNT, count);
if (op_merged) {
pcmk__xe_set_bool_attr(bcast, PCMK__XA_ST_OP_MERGED, true);
}
stonith__xe_set_result(notify_data, &op->result);
add_message_xml(bcast, PCMK__XA_ST_CALLDATA, notify_data);
send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);
free_xml(notify_data);
free_xml(bcast);
return;
}
/*!
* \internal
* \brief Reply to a local request originator and notify all subscribed clients
*
* \param[in,out] op Fencer operation that completed
* \param[in,out] data Top-level XML to add notification to
*/
static void
handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data)
{
xmlNode *notify_data = NULL;
xmlNode *reply = NULL;
pcmk__client_t *client = NULL;
if (op->notify_sent == TRUE) {
/* nothing to do */
return;
}
/* Do notification with a clean data object */
crm_xml_add_int(data, PCMK_XA_STATE, op->state);
crm_xml_add(data, PCMK__XA_ST_TARGET, op->target);
crm_xml_add(data, PCMK__XA_ST_OP, op->action);
reply = fenced_construct_reply(op->request, data, &op->result);
crm_xml_add(reply, PCMK__XA_ST_DELEGATE, op->delegate);
/* Send fencing OP reply to local client that initiated fencing */
client = pcmk__find_client_by_id(op->client_id);
if (client == NULL) {
crm_trace("Skipping reply to %s: no longer a client", op->client_id);
} else {
do_local_reply(reply, client, op->call_options);
}
/* bcast to all local clients that the fencing operation happend */
notify_data = fencing_result2xml(op);
fenced_send_notification(PCMK__VALUE_ST_NOTIFY_FENCE, &op->result,
notify_data);
free_xml(notify_data);
fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
/* mark this op as having notify's already sent */
op->notify_sent = TRUE;
free_xml(reply);
}
/*!
* \internal
* \brief Finalize all duplicates of a given fencer operation
*
* \param[in,out] op Fencer operation that completed
* \param[in,out] data Top-level XML to add notification to
*/
static void
finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data)
{
for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) {
remote_fencing_op_t *other = iter->data;
if (other->state == st_duplicate) {
other->state = op->state;
crm_debug("Performing duplicate notification for %s@%s: %s "
CRM_XS " id=%.8s",
other->client_name, other->originator,
pcmk_exec_status_str(op->result.execution_status),
other->id);
pcmk__copy_result(&op->result, &other->result);
finalize_op(other, data, true);
} else {
// Possible if (for example) it timed out already
crm_err("Skipping duplicate notification for %s@%s "
CRM_XS " state=%s id=%.8s",
other->client_name, other->originator,
stonith_op_state_str(other->state), other->id);
}
}
}
static char *
delegate_from_xml(xmlNode *xml)
{
xmlNode *match = get_xpath_object("//@" PCMK__XA_ST_DELEGATE, xml,
LOG_NEVER);
if (match == NULL) {
return crm_element_value_copy(xml, PCMK__XA_SRC);
} else {
return crm_element_value_copy(match, PCMK__XA_ST_DELEGATE);
}
}
/*!
* \internal
* \brief Finalize a peer fencing operation
*
* Clean up after a fencing operation completes. This function has two code
* paths: the executioner uses it to broadcast the result to CPG peers, and then
* each peer (including the executioner) uses it to process that broadcast and
* notify its IPC clients of the result.
*
* \param[in,out] op Fencer operation that completed
* \param[in,out] data If not NULL, XML reply of last delegated operation
* \param[in] dup Whether this operation is a duplicate of another
* (in which case, do not broadcast the result)
*
* \note The operation result should be set before calling this function.
*/
static void
finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
{
int level = LOG_ERR;
const char *subt = NULL;
xmlNode *local_data = NULL;
gboolean op_merged = FALSE;
CRM_CHECK((op != NULL), return);
// This is a no-op if timers have already been cleared
clear_remote_op_timers(op);
if (op->notify_sent) {
// Most likely, this is a timed-out action that eventually completed
crm_notice("Operation '%s'%s%s by %s for %s@%s%s: "
"Result arrived too late " CRM_XS " id=%.8s",
op->action, (op->target? " targeting " : ""),
(op->target? op->target : ""),
(op->delegate? op->delegate : "unknown node"),
op->client_name, op->originator,
(op_merged? " (merged)" : ""),
op->id);
return;
}
set_fencing_completed(op);
undo_op_remap(op);
if (data == NULL) {
data = create_xml_node(NULL, "remote-op");
local_data = data;
} else if (op->delegate == NULL) {
switch (op->result.execution_status) {
case PCMK_EXEC_NO_FENCE_DEVICE:
break;
case PCMK_EXEC_INVALID:
if (op->result.exit_status != CRM_EX_EXPIRED) {
op->delegate = delegate_from_xml(data);
}
break;
default:
op->delegate = delegate_from_xml(data);
break;
}
}
if (dup || (crm_element_value(data, PCMK__XA_ST_OP_MERGED) != NULL)) {
op_merged = true;
}
/* Tell everyone the operation is done, we will continue
* with doing the local notifications once we receive
* the broadcast back. */
subt = crm_element_value(data, PCMK__XA_SUBT);
if (!dup && !pcmk__str_eq(subt, PCMK__VALUE_BROADCAST, pcmk__str_none)) {
/* Defer notification until the bcast message arrives */
fenced_broadcast_op_result(op, op_merged);
free_xml(local_data);
return;
}
if (pcmk__result_ok(&op->result) || dup
|| !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
level = LOG_NOTICE;
}
do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) "
CRM_XS " id=%.8s", op->action, (op->target? " targeting " : ""),
(op->target? op->target : ""),
(op->delegate? op->delegate : "unknown node"),
op->client_name, op->originator,
(op_merged? " (merged)" : ""),
crm_exit_str(op->result.exit_status),
pcmk_exec_status_str(op->result.execution_status),
((op->result.exit_reason == NULL)? "" : ": "),
((op->result.exit_reason == NULL)? "" : op->result.exit_reason),
op->id);
handle_local_reply_and_notify(op, data);
if (!dup) {
finalize_op_duplicates(op, data);
}
/* Free non-essential parts of the record
* Keep the record around so we can query the history
*/
if (op->query_results) {
g_list_free_full(op->query_results, free_remote_query);
op->query_results = NULL;
}
if (op->request) {
free_xml(op->request);
op->request = NULL;
}
free_xml(local_data);
}
/*!
* \internal
* \brief Finalize a watchdog fencer op after the waiting time expires
*
* \param[in,out] userdata Fencer operation that completed
*
* \return G_SOURCE_REMOVE (which tells glib not to restart timer)
*/
static gboolean
remote_op_watchdog_done(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
op->op_timer_one = 0;
crm_notice("Self-fencing (%s) by %s for %s assumed complete "
CRM_XS " id=%.8s",
op->action, op->target, op->client_name, op->id);
op->state = st_done;
pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
finalize_op(op, NULL, false);
return G_SOURCE_REMOVE;
}
static gboolean
remote_op_timeout_one(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
op->op_timer_one = 0;
crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
" id=%.8s", op->action, op->target, op->client_name, op->id);
pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
"Peer did not return fence result within timeout");
// The requested delay has been applied for the first device
if (op->client_delay > 0) {
op->client_delay = 0;
crm_trace("Try another device for '%s' action targeting %s "
"for client %s without delay " CRM_XS " id=%.8s",
op->action, op->target, op->client_name, op->id);
}
// Try another device, if appropriate
request_peer_fencing(op, NULL);
return G_SOURCE_REMOVE;
}
/*!
* \internal
* \brief Finalize a remote fencer operation that timed out
*
* \param[in,out] op Fencer operation that timed out
* \param[in] reason Readable description of what step timed out
*/
static void
finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
{
crm_debug("Action '%s' targeting %s for client %s timed out "
CRM_XS " id=%.8s",
op->action, op->target, op->client_name, op->id);
if (op->phase == st_phase_on) {
/* A remapped reboot operation timed out in the "on" phase, but the
* "off" phase completed successfully, so quit trying any further
* devices, and return success.
*/
op->state = st_done;
pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
} else {
op->state = st_failed;
pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
}
finalize_op(op, NULL, false);
}
/*!
* \internal
* \brief Finalize a remote fencer operation that timed out
*
* \param[in,out] userdata Fencer operation that timed out
*
* \return G_SOURCE_REMOVE (which tells glib not to restart timer)
*/
static gboolean
remote_op_timeout(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
op->op_timer_total = 0;
if (op->state == st_done) {
crm_debug("Action '%s' targeting %s for client %s already completed "
CRM_XS " id=%.8s",
op->action, op->target, op->client_name, op->id);
} else {
finalize_timed_out_op(userdata, "Fencing did not complete within a "
"total timeout based on the "
"configured timeout and retries for "
"any devices attempted");
}
return G_SOURCE_REMOVE;
}
static gboolean
remote_op_query_timeout(gpointer data)
{
remote_fencing_op_t *op = data;
op->query_timer = 0;
if (op->state == st_done) {
crm_debug("Operation %.8s targeting %s already completed",
op->id, op->target);
} else if (op->state == st_exec) {
crm_debug("Operation %.8s targeting %s already in progress",
op->id, op->target);
} else if (op->query_results) {
// Query succeeded, so attempt the actual fencing
crm_debug("Query %.8s targeting %s complete (state=%s)",
op->id, op->target, stonith_op_state_str(op->state));
request_peer_fencing(op, NULL);
} else {
crm_debug("Query %.8s targeting %s timed out (state=%s)",
op->id, op->target, stonith_op_state_str(op->state));
finalize_timed_out_op(op, "No capable peers replied to device query "
"within timeout");
}
return G_SOURCE_REMOVE;
}
static gboolean
topology_is_empty(stonith_topology_t *tp)
{
int i;
if (tp == NULL) {
return TRUE;
}
for (i = 0; i < ST_LEVEL_MAX; i++) {
if (tp->levels[i] != NULL) {
return FALSE;
}
}
return TRUE;
}
/*!
* \internal
* \brief Add a device to an operation's automatic unfencing list
*
* \param[in,out] op Operation to modify
* \param[in] device Device ID to add
*/
static void
add_required_device(remote_fencing_op_t *op, const char *device)
{
GList *match = g_list_find_custom(op->automatic_list, device,
sort_strings);
if (!match) {
op->automatic_list = g_list_prepend(op->automatic_list, strdup(device));
}
}
/*!
* \internal
* \brief Remove a device from the automatic unfencing list
*
* \param[in,out] op Operation to modify
* \param[in] device Device ID to remove
*/
static void
remove_required_device(remote_fencing_op_t *op, const char *device)
{
GList *match = g_list_find_custom(op->automatic_list, device,
sort_strings);
if (match) {
op->automatic_list = g_list_remove(op->automatic_list, match->data);
}
}
/* deep copy the device list */
static void
set_op_device_list(remote_fencing_op_t * op, GList *devices)
{
GList *lpc = NULL;
if (op->devices_list) {
g_list_free_full(op->devices_list, free);
op->devices_list = NULL;
}
for (lpc = devices; lpc != NULL; lpc = lpc->next) {
op->devices_list = g_list_append(op->devices_list, strdup(lpc->data));
}
op->devices = op->devices_list;
}
/*!
* \internal
* \brief Check whether a node matches a topology target
*
* \param[in] tp Topology table entry to check
* \param[in] node Name of node to check
*
* \return TRUE if node matches topology target
*/
static gboolean
topology_matches(const stonith_topology_t *tp, const char *node)
{
regex_t r_patt;
CRM_CHECK(node && tp && tp->target, return FALSE);
switch (tp->kind) {
case fenced_target_by_attribute:
/* This level targets by attribute, so tp->target is a NAME=VALUE pair
* of a permanent attribute applied to targeted nodes. The test below
* relies on the locally cached copy of the CIB, so if fencing needs to
* be done before the initial CIB is received or after a malformed CIB
* is received, then the topology will be unable to be used.
*/
if (node_has_attr(node, tp->target_attribute, tp->target_value)) {
crm_notice("Matched %s with %s by attribute", node, tp->target);
return TRUE;
}
break;
case fenced_target_by_pattern:
/* This level targets node names matching a pattern, so tp->target
* (and tp->target_pattern) is a regular expression.
*/
if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) {
crm_info("Bad regex '%s' for fencing level", tp->target);
} else {
int status = regexec(&r_patt, node, 0, NULL, 0);
regfree(&r_patt);
if (status == 0) {
crm_notice("Matched %s with %s by name", node, tp->target);
return TRUE;
}
}
break;
case fenced_target_by_name:
crm_trace("Testing %s against %s", node, tp->target);
return pcmk__str_eq(tp->target, node, pcmk__str_casei);
default:
break;
}
crm_trace("No match for %s with %s", node, tp->target);
return FALSE;
}
stonith_topology_t *
find_topology_for_host(const char *host)
{
GHashTableIter tIter;
stonith_topology_t *tp = g_hash_table_lookup(topology, host);
if(tp != NULL) {
crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
return tp;
}
g_hash_table_iter_init(&tIter, topology);
while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) {
if (topology_matches(tp, host)) {
crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
return tp;
}
}
crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology));
return NULL;
}
/*!
* \internal
* \brief Set fencing operation's device list to target's next topology level
*
* \param[in,out] op Remote fencing operation to modify
* \param[in] empty_ok If true, an operation without a target (i.e.
* queries) or a target without a topology will get a
* pcmk_rc_ok return value instead of ENODEV
*
* \return Standard Pacemaker return value
*/
static int
advance_topology_level(remote_fencing_op_t *op, bool empty_ok)
{
stonith_topology_t *tp = NULL;
if (op->target) {
tp = find_topology_for_host(op->target);
}
if (topology_is_empty(tp)) {
return empty_ok? pcmk_rc_ok : ENODEV;
}
CRM_ASSERT(tp->levels != NULL);
stonith__set_call_options(op->call_options, op->id, st_opt_topology);
/* This is a new level, so undo any remapping left over from previous */
undo_op_remap(op);
do {
op->level++;
} while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL);
if (op->level < ST_LEVEL_MAX) {
crm_trace("Attempting fencing level %d targeting %s (%d devices) "
"for client %s@%s (id=%.8s)",
op->level, op->target, g_list_length(tp->levels[op->level]),
op->client_name, op->originator, op->id);
set_op_device_list(op, tp->levels[op->level]);
// The requested delay has been applied for the first fencing level
if ((op->level > 1) && (op->client_delay > 0)) {
op->client_delay = 0;
}
if ((g_list_next(op->devices_list) != NULL)
&& pcmk__str_eq(op->action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
/* A reboot has been requested for a topology level with multiple
* devices. Instead of rebooting the devices sequentially, we will
* turn them all off, then turn them all on again. (Think about
* switched power outlets for redundant power supplies.)
*/
op_phase_off(op);
}
return pcmk_rc_ok;
}
crm_info("All %sfencing options targeting %s for client %s@%s failed "
CRM_XS " id=%.8s",
(stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"",
op->target, op->client_name, op->originator, op->id);
return ENODEV;
}
/*!
* \internal
* \brief If fencing operation is a duplicate, merge it into the other one
*
* \param[in,out] op Fencing operation to check
*/
static void
merge_duplicates(remote_fencing_op_t *op)
{
GHashTableIter iter;
remote_fencing_op_t *other = NULL;
time_t now = time(NULL);
g_hash_table_iter_init(&iter, stonith_remote_op_list);
while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) {
const char *other_action = op_requested_action(other);
if (!strcmp(op->id, other->id)) {
continue; // Don't compare against self
}
if (other->state > st_exec) {
crm_trace("%.8s not duplicate of %.8s: not in progress",
op->id, other->id);
continue;
}
if (!pcmk__str_eq(op->target, other->target, pcmk__str_casei)) {
crm_trace("%.8s not duplicate of %.8s: node %s vs. %s",
op->id, other->id, op->target, other->target);
continue;
}
if (!pcmk__str_eq(op->action, other_action, pcmk__str_none)) {
crm_trace("%.8s not duplicate of %.8s: action %s vs. %s",
op->id, other->id, op->action, other_action);
continue;
}
if (pcmk__str_eq(op->client_name, other->client_name, pcmk__str_casei)) {
crm_trace("%.8s not duplicate of %.8s: same client %s",
op->id, other->id, op->client_name);
continue;
}
if (pcmk__str_eq(other->target, other->originator, pcmk__str_casei)) {
crm_trace("%.8s not duplicate of %.8s: suicide for %s",
op->id, other->id, other->target);
continue;
}
if (!fencing_peer_active(pcmk__get_node(0, other->originator, NULL,
pcmk__node_search_cluster))) {
crm_notice("Failing action '%s' targeting %s originating from "
"client %s@%s: Originator is dead " CRM_XS " id=%.8s",
other->action, other->target, other->client_name,
other->originator, other->id);
crm_trace("%.8s not duplicate of %.8s: originator dead",
op->id, other->id);
other->state = st_failed;
continue;
}
if ((other->total_timeout > 0)
&& (now > (other->total_timeout + other->created))) {
crm_trace("%.8s not duplicate of %.8s: old (%ld vs. %ld + %d)",
op->id, other->id, now, other->created,
other->total_timeout);
continue;
}
/* There is another in-flight request to fence the same host
* Piggyback on that instead. If it fails, so do we.
*/
other->duplicates = g_list_append(other->duplicates, op);
if (other->total_timeout == 0) {
other->total_timeout = op->total_timeout =
TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL);
crm_trace("Best guess as to timeout used for %.8s: %d",
other->id, other->total_timeout);
}
crm_notice("Merging fencing action '%s' targeting %s originating from "
"client %s with identical request from %s@%s "
CRM_XS " original=%.8s duplicate=%.8s total_timeout=%ds",
op->action, op->target, op->client_name,
other->client_name, other->originator,
op->id, other->id, other->total_timeout);
report_timeout_period(op, other->total_timeout);
op->state = st_duplicate;
}
}
static uint32_t fencing_active_peers(void)
{
uint32_t count = 0;
crm_node_t *entry;
GHashTableIter gIter;
g_hash_table_iter_init(&gIter, crm_peer_cache);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
if(fencing_peer_active(entry)) {
count++;
}
}
return count;
}
/*!
* \internal
* \brief Process a manual confirmation of a pending fence action
*
* \param[in] client IPC client that sent confirmation
* \param[in,out] msg Request XML with manual confirmation
*
* \return Standard Pacemaker return code
*/
int
fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg)
{
remote_fencing_op_t *op = NULL;
xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, msg, LOG_ERR);
CRM_CHECK(dev != NULL, return EPROTO);
crm_notice("Received manual confirmation that %s has been fenced",
pcmk__s(crm_element_value(dev, PCMK__XA_ST_TARGET),
"unknown target"));
op = initiate_remote_stonith_op(client, msg, TRUE);
if (op == NULL) {
return EPROTO;
}
op->state = st_done;
set_fencing_completed(op);
op->delegate = strdup("a human");
// For the fencer's purposes, the fencing operation is done
pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
finalize_op(op, msg, false);
/* For the requester's purposes, the operation is still pending. The
* actual result will be sent asynchronously via the operation's done_cb().
*/
return EINPROGRESS;
}
/*!
* \internal
* \brief Create a new remote stonith operation
*
* \param[in] client ID of local stonith client that initiated the operation
* \param[in] request The request from the client that started the operation
* \param[in] peer TRUE if this operation is owned by another stonith peer
* (an operation owned by one peer is stored on all peers,
* but only the owner executes it; all nodes get the results
* once the owner finishes execution)
*/
void *
create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer)
{
remote_fencing_op_t *op = NULL;
xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request,
LOG_NEVER);
int call_options = 0;
const char *operation = NULL;
init_stonith_remote_op_hash_table(&stonith_remote_op_list);
/* If this operation is owned by another node, check to make
* sure we haven't already created this operation. */
if (peer && dev) {
const char *op_id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP);
CRM_CHECK(op_id != NULL, return NULL);
op = g_hash_table_lookup(stonith_remote_op_list, op_id);
if (op) {
crm_debug("Reusing existing remote fencing op %.8s for %s",
op_id, ((client == NULL)? "unknown client" : client));
return op;
}
}
op = calloc(1, sizeof(remote_fencing_op_t));
CRM_ASSERT(op != NULL);
crm_element_value_int(request, PCMK__XA_ST_TIMEOUT, &(op->base_timeout));
// Value -1 means disable any static/random fencing delays
crm_element_value_int(request, PCMK__XA_ST_DELAY, &(op->client_delay));
if (peer && dev) {
op->id = crm_element_value_copy(dev, PCMK__XA_ST_REMOTE_OP);
} else {
op->id = crm_generate_uuid();
}
g_hash_table_replace(stonith_remote_op_list, op->id, op);
op->state = st_query;
op->replies_expected = fencing_active_peers();
op->action = crm_element_value_copy(dev, PCMK__XA_ST_DEVICE_ACTION);
/* The node initiating the stonith operation. If an operation is relayed,
* this is the last node the operation lands on. When in standalone mode,
* origin is the ID of the client that originated the operation.
*
* Or may be the name of the function that created the operation.
*/
op->originator = crm_element_value_copy(dev, PCMK__XA_ST_ORIGIN);
// Delegate may not be set
op->delegate = crm_element_value_copy(dev, PCMK__XA_ST_DELEGATE);
op->created = time(NULL);
if (op->originator == NULL) {
/* Local or relayed request */
op->originator = strdup(stonith_our_uname);
}
CRM_LOG_ASSERT(client != NULL);
if (client) {
op->client_id = strdup(client);
}
/* For a RELAY operation, set fenced on the client. */
operation = crm_element_value(request, PCMK__XA_ST_OP);
if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
op->client_name = crm_strdup_printf("%s.%lu", crm_system_name,
(unsigned long) getpid());
} else {
op->client_name = crm_element_value_copy(request,
PCMK__XA_ST_CLIENTNAME);
}
op->target = crm_element_value_copy(dev, PCMK__XA_ST_TARGET);
op->request = copy_xml(request); /* TODO: Figure out how to avoid this */
crm_element_value_int(request, PCMK__XA_ST_CALLOPT, &call_options);
op->call_options = call_options;
crm_element_value_int(request, PCMK__XA_ST_CALLID, &(op->client_callid));
crm_trace("%s new fencing op %s ('%s' targeting %s for client %s, "
"base timeout %d, %u %s expected)",
(peer && dev)? "Recorded" : "Generated", op->id, op->action,
op->target, op->client_name, op->base_timeout,
op->replies_expected,
pcmk__plural_alt(op->replies_expected, "reply", "replies"));
if (op->call_options & st_opt_cs_nodeid) {
int nodeid;
crm_node_t *node;
pcmk__scan_min_int(op->target, &nodeid, 0);
node = pcmk__search_node_caches(nodeid, NULL,
pcmk__node_search_any
|pcmk__node_search_known);
/* Ensure the conversion only happens once */
stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid);
if (node && node->uname) {
free(op->target);
op->target = strdup(node->uname);
} else {
crm_warn("Could not expand nodeid '%s' into a host name", op->target);
}
}
/* check to see if this is a duplicate operation of another in-flight operation */
merge_duplicates(op);
if (op->state != st_duplicate) {
/* kick history readers */
fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
}
/* safe to trim as long as that doesn't touch pending ops */
stonith_fence_history_trim();
return op;
}
/*!
* \internal
* \brief Create a peer fencing operation from a request, and initiate it
*
* \param[in] client IPC client that made request (NULL to get from request)
* \param[in] request Request XML
* \param[in] manual_ack Whether this is a manual action confirmation
*
* \return Newly created operation on success, otherwise NULL
*/
remote_fencing_op_t *
initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request,
gboolean manual_ack)
{
int query_timeout = 0;
xmlNode *query = NULL;
const char *client_id = NULL;
remote_fencing_op_t *op = NULL;
const char *relay_op_id = NULL;
const char *operation = NULL;
if (client) {
client_id = client->id;
} else {
client_id = crm_element_value(request, PCMK__XA_ST_CLIENTID);
}
CRM_LOG_ASSERT(client_id != NULL);
op = create_remote_stonith_op(client_id, request, FALSE);
op->owner = TRUE;
if (manual_ack) {
return op;
}
CRM_CHECK(op->action, return NULL);
if (advance_topology_level(op, true) != pcmk_rc_ok) {
op->state = st_failed;
}
switch (op->state) {
case st_failed:
// advance_topology_level() exhausted levels
pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"All topology levels failed");
crm_warn("Could not request peer fencing (%s) targeting %s "
CRM_XS " id=%.8s", op->action, op->target, op->id);
finalize_op(op, NULL, false);
return op;
case st_duplicate:
crm_info("Requesting peer fencing (%s) targeting %s (duplicate) "
CRM_XS " id=%.8s", op->action, op->target, op->id);
return op;
default:
crm_notice("Requesting peer fencing (%s) targeting %s "
CRM_XS " id=%.8s state=%s base_timeout=%d",
op->action, op->target, op->id,
stonith_op_state_str(op->state), op->base_timeout);
}
query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY,
NULL, op->call_options);
crm_xml_add(query, PCMK__XA_ST_REMOTE_OP, op->id);
crm_xml_add(query, PCMK__XA_ST_TARGET, op->target);
crm_xml_add(query, PCMK__XA_ST_DEVICE_ACTION, op_requested_action(op));
crm_xml_add(query, PCMK__XA_ST_ORIGIN, op->originator);
crm_xml_add(query, PCMK__XA_ST_CLIENTID, op->client_id);
crm_xml_add(query, PCMK__XA_ST_CLIENTNAME, op->client_name);
crm_xml_add_int(query, PCMK__XA_ST_TIMEOUT, op->base_timeout);
/* In case of RELAY operation, RELAY information is added to the query to delete the original operation of RELAY. */
operation = crm_element_value(request, PCMK__XA_ST_OP);
if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
relay_op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP);
if (relay_op_id) {
crm_xml_add(query, PCMK__XA_ST_REMOTE_OP_RELAY, relay_op_id);
}
}
send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
free_xml(query);
query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR;
op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op);
return op;
}
enum find_best_peer_options {
/*! Skip checking the target peer for capable fencing devices */
FIND_PEER_SKIP_TARGET = 0x0001,
/*! Only check the target peer for capable fencing devices */
FIND_PEER_TARGET_ONLY = 0x0002,
/*! Skip peers and devices that are not verified */
FIND_PEER_VERIFIED_ONLY = 0x0004,
};
static peer_device_info_t *
find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options)
{
GList *iter = NULL;
gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE;
if (!device && pcmk_is_set(op->call_options, st_opt_topology)) {
return NULL;
}
for (iter = op->query_results; iter != NULL; iter = iter->next) {
peer_device_info_t *peer = iter->data;
crm_trace("Testing result from %s targeting %s with %d device%s: %d %x",
peer->host, op->target, peer->ndevices,
pcmk__plural_s(peer->ndevices), peer->tried, options);
if ((options & FIND_PEER_SKIP_TARGET) && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
continue;
}
if ((options & FIND_PEER_TARGET_ONLY) && !pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
continue;
}
if (pcmk_is_set(op->call_options, st_opt_topology)) {
if (grab_peer_device(op, peer, device, verified_devices_only)) {
return peer;
}
} else if (!peer->tried
&& count_peer_devices(op, peer, verified_devices_only,
fenced_support_flag(op->action))) {
/* No topology: Use the current best peer */
crm_trace("Simple fencing");
return peer;
}
}
return NULL;
}
static peer_device_info_t *
stonith_choose_peer(remote_fencing_op_t * op)
{
const char *device = NULL;
peer_device_info_t *peer = NULL;
uint32_t active = fencing_active_peers();
do {
if (op->devices) {
device = op->devices->data;
crm_trace("Checking for someone to fence (%s) %s using %s",
op->action, op->target, device);
} else {
crm_trace("Checking for someone to fence (%s) %s",
op->action, op->target);
}
/* Best choice is a peer other than the target with verified access */
peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY);
if (peer) {
crm_trace("Found verified peer %s for %s", peer->host, device?device:"<any>");
return peer;
}
if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) {
crm_trace("Waiting before looking for unverified devices to fence %s", op->target);
return NULL;
}
/* If no other peer has verified access, next best is unverified access */
peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET);
if (peer) {
crm_trace("Found best unverified peer %s", peer->host);
return peer;
}
/* If no other peer can do it, last option is self-fencing
* (which is never allowed for the "on" phase of a remapped reboot)
*/
if (op->phase != st_phase_on) {
peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY);
if (peer) {
crm_trace("%s will fence itself", peer->host);
return peer;
}
}
/* Try the next fencing level if there is one (unless we're in the "on"
* phase of a remapped "reboot", because we ignore errors in that case)
*/
} while ((op->phase != st_phase_on)
&& pcmk_is_set(op->call_options, st_opt_topology)
&& (advance_topology_level(op, false) == pcmk_rc_ok));
if ((stonith_watchdog_timeout_ms > 0)
&& pcmk__is_fencing_action(op->action)
&& pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none)
&& node_does_watchdog_fencing(op->target)) {
crm_info("Couldn't contact watchdog-fencing target-node (%s)",
op->target);
/* check_watchdog_fencing_and_wait will log additional info */
} else {
crm_notice("Couldn't find anyone to fence (%s) %s using %s",
op->action, op->target, (device? device : "any device"));
}
return NULL;
}
static int
get_device_timeout(const remote_fencing_op_t *op,
const peer_device_info_t *peer, const char *device,
bool with_delay)
{
device_properties_t *props;
int delay = 0;
if (!peer || !device) {
return op->base_timeout;
}
props = g_hash_table_lookup(peer->devices, device);
if (!props) {
return op->base_timeout;
}
// op->client_delay < 0 means disable any static/random fencing delays
if (with_delay && (op->client_delay >= 0)) {
// delay_base is eventually limited by delay_max
delay = (props->delay_max[op->phase] > 0 ?
props->delay_max[op->phase] : props->delay_base[op->phase]);
}
return (props->custom_action_timeout[op->phase]?
props->custom_action_timeout[op->phase] : op->base_timeout)
+ delay;
}
struct timeout_data {
const remote_fencing_op_t *op;
const peer_device_info_t *peer;
int total_timeout;
};
/*!
* \internal
* \brief Add timeout to a total if device has not been executed yet
*
* \param[in] key GHashTable key (device ID)
* \param[in] value GHashTable value (device properties)
* \param[in,out] user_data Timeout data
*/
static void
add_device_timeout(gpointer key, gpointer value, gpointer user_data)
{
const char *device_id = key;
device_properties_t *props = value;
struct timeout_data *timeout = user_data;
if (!props->executed[timeout->op->phase]
&& !props->disallowed[timeout->op->phase]) {
timeout->total_timeout += get_device_timeout(timeout->op, timeout->peer,
device_id, true);
}
}
static int
get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer)
{
struct timeout_data timeout;
timeout.op = op;
timeout.peer = peer;
timeout.total_timeout = 0;
g_hash_table_foreach(peer->devices, add_device_timeout, &timeout);
return (timeout.total_timeout? timeout.total_timeout : op->base_timeout);
}
static int
get_op_total_timeout(const remote_fencing_op_t *op,
const peer_device_info_t *chosen_peer)
{
long long total_timeout = 0;
stonith_topology_t *tp = find_topology_for_host(op->target);
if (pcmk_is_set(op->call_options, st_opt_topology) && tp) {
int i;
GList *device_list = NULL;
GList *iter = NULL;
GList *auto_list = NULL;
if (pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none)
&& (op->automatic_list != NULL)) {
auto_list = g_list_copy(op->automatic_list);
}
/* Yep, this looks scary, nested loops all over the place.
* Here is what is going on.
* Loop1: Iterate through fencing levels.
* Loop2: If a fencing level has devices, loop through each device
* Loop3: For each device in a fencing level, see what peer owns it
* and what that peer has reported the timeout is for the device.
*/
for (i = 0; i < ST_LEVEL_MAX; i++) {
if (!tp->levels[i]) {
continue;
}
for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
/* in case of watchdog-device we add the timeout to the budget
regardless of if we got a reply or not
*/
if ((stonith_watchdog_timeout_ms > 0)
&& pcmk__is_fencing_action(op->action)
&& pcmk__str_eq(device_list->data, STONITH_WATCHDOG_ID,
pcmk__str_none)
&& node_does_watchdog_fencing(op->target)) {
total_timeout += stonith_watchdog_timeout_ms / 1000;
continue;
}
for (iter = op->query_results; iter != NULL; iter = iter->next) {
const peer_device_info_t *peer = iter->data;
if (auto_list) {
GList *match = g_list_find_custom(auto_list, device_list->data,
sort_strings);
if (match) {
auto_list = g_list_remove(auto_list, match->data);
}
}
if (find_peer_device(op, peer, device_list->data,
fenced_support_flag(op->action))) {
total_timeout += get_device_timeout(op, peer,
device_list->data,
true);
break;
}
} /* End Loop3: match device with peer that owns device, find device's timeout period */
} /* End Loop2: iterate through devices at a specific level */
} /*End Loop1: iterate through fencing levels */
//Add only exists automatic_list device timeout
if (auto_list) {
for (iter = auto_list; iter != NULL; iter = iter->next) {
GList *iter2 = NULL;
for (iter2 = op->query_results; iter2 != NULL; iter = iter2->next) {
peer_device_info_t *peer = iter2->data;
if (find_peer_device(op, peer, iter->data, st_device_supports_on)) {
total_timeout += get_device_timeout(op, peer,
iter->data, true);
break;
}
}
}
}
g_list_free(auto_list);
} else if (chosen_peer) {
total_timeout = get_peer_timeout(op, chosen_peer);
} else {
total_timeout = op->base_timeout;
}
if (total_timeout <= 0) {
total_timeout = op->base_timeout;
}
/* Take any requested fencing delay into account to prevent it from eating
* up the total timeout.
*/
if (op->client_delay > 0) {
total_timeout += op->client_delay;
}
return (int) QB_MIN(total_timeout, INT_MAX);
}
static void
report_timeout_period(remote_fencing_op_t * op, int op_timeout)
{
GList *iter = NULL;
xmlNode *update = NULL;
const char *client_node = NULL;
const char *client_id = NULL;
const char *call_id = NULL;
if (op->call_options & st_opt_sync_call) {
/* There is no reason to report the timeout for a synchronous call. It
* is impossible to use the reported timeout to do anything when the client
* is blocking for the response. This update is only important for
* async calls that require a callback to report the results in. */
return;
} else if (!op->request) {
return;
}
crm_trace("Reporting timeout for %s (id=%.8s)", op->client_name, op->id);
client_node = crm_element_value(op->request, PCMK__XA_ST_CLIENTNODE);
call_id = crm_element_value(op->request, PCMK__XA_ST_CALLID);
client_id = crm_element_value(op->request, PCMK__XA_ST_CLIENTID);
if (!client_node || !call_id || !client_id) {
return;
}
if (pcmk__str_eq(client_node, stonith_our_uname, pcmk__str_casei)) {
// Client is connected to this node, so send update directly to them
do_stonith_async_timeout_update(client_id, call_id, op_timeout);
return;
}
/* The client is connected to another node, relay this update to them */
update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0);
crm_xml_add(update, PCMK__XA_ST_REMOTE_OP, op->id);
crm_xml_add(update, PCMK__XA_ST_CLIENTID, client_id);
crm_xml_add(update, PCMK__XA_ST_CALLID, call_id);
crm_xml_add_int(update, PCMK__XA_ST_TIMEOUT, op_timeout);
send_cluster_message(pcmk__get_node(0, client_node, NULL,
pcmk__node_search_cluster),
crm_msg_stonith_ng, update, FALSE);
free_xml(update);
for (iter = op->duplicates; iter != NULL; iter = iter->next) {
remote_fencing_op_t *dup = iter->data;
crm_trace("Reporting timeout for duplicate %.8s to client %s",
dup->id, dup->client_name);
report_timeout_period(iter->data, op_timeout);
}
}
/*!
* \internal
* \brief Advance an operation to the next device in its topology
*
* \param[in,out] op Fencer operation to advance
* \param[in] device ID of device that just completed
* \param[in,out] msg If not NULL, XML reply of last delegated operation
*/
static void
advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
xmlNode *msg)
{
/* Advance to the next device at this topology level, if any */
if (op->devices) {
op->devices = op->devices->next;
}
/* Handle automatic unfencing if an "on" action was requested */
if ((op->phase == st_phase_requested)
&& pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none)) {
/* If the device we just executed was required, it's not anymore */
remove_required_device(op, device);
/* If there are no more devices at this topology level, run through any
* remaining devices with automatic unfencing
*/
if (op->devices == NULL) {
op->devices = op->automatic_list;
}
}
if ((op->devices == NULL) && (op->phase == st_phase_off)) {
/* We're done with this level and with required devices, but we had
* remapped "reboot" to "off", so start over with "on". If any devices
* need to be turned back on, op->devices will be non-NULL after this.
*/
op_phase_on(op);
}
// This function is only called if the previous device succeeded
pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
if (op->devices) {
/* Necessary devices remain, so execute the next one */
crm_trace("Next targeting %s on behalf of %s@%s",
op->target, op->client_name, op->originator);
// The requested delay has been applied for the first device
if (op->client_delay > 0) {
op->client_delay = 0;
}
request_peer_fencing(op, NULL);
} else {
/* We're done with all devices and phases, so finalize operation */
crm_trace("Marking complex fencing op targeting %s as complete",
op->target);
op->state = st_done;
finalize_op(op, msg, false);
}
}
static gboolean
check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
{
if (node_does_watchdog_fencing(op->target)) {
guint timeout_ms = QB_MIN(stonith_watchdog_timeout_ms, UINT_MAX);
crm_notice("Waiting %s for %s to self-fence (%s) for "
"client %s " CRM_XS " id=%.8s",
pcmk__readable_interval(timeout_ms), op->target, op->action,
op->client_name, op->id);
if (op->op_timer_one) {
g_source_remove(op->op_timer_one);
}
op->op_timer_one = g_timeout_add(timeout_ms, remote_op_watchdog_done,
op);
return TRUE;
} else {
crm_debug("Skipping fallback to watchdog-fencing as %s is "
"not in host-list", op->target);
}
return FALSE;
}
/*!
* \internal
* \brief Ask a peer to execute a fencing operation
*
* \param[in,out] op Fencing operation to be executed
* \param[in,out] peer If NULL or topology is in use, choose best peer to
* execute the fencing, otherwise use this peer
*/
static void
request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer)
{
const char *device = NULL;
int timeout;
CRM_CHECK(op != NULL, return);
crm_trace("Action %.8s targeting %s for %s is %s",
op->id, op->target, op->client_name,
stonith_op_state_str(op->state));
if ((op->phase == st_phase_on) && (op->devices != NULL)) {
/* We are in the "on" phase of a remapped topology reboot. If this
* device has pcmk_reboot_action="off", or doesn't support the "on"
* action, skip it.
*
* We can't check device properties at this point because we haven't
* chosen a peer for this stage yet. Instead, we check the local node's
* knowledge about the device. If different versions of the fence agent
* are installed on different nodes, there's a chance this could be
* mistaken, but the worst that could happen is we don't try turning the
* node back on when we should.
*/
device = op->devices->data;
if (pcmk__str_eq(fenced_device_reboot_action(device), PCMK_ACTION_OFF,
pcmk__str_none)) {
crm_info("Not turning %s back on using %s because the device is "
"configured to stay off (pcmk_reboot_action='off')",
op->target, device);
advance_topology_device_in_level(op, device, NULL);
return;
}
if (!fenced_device_supports_on(device)) {
crm_info("Not turning %s back on using %s because the agent "
"doesn't support 'on'", op->target, device);
advance_topology_device_in_level(op, device, NULL);
return;
}
}
timeout = op->base_timeout;
if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) {
peer = stonith_choose_peer(op);
}
if (!op->op_timer_total) {
op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, peer);
op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op);
report_timeout_period(op, op->total_timeout);
crm_info("Total timeout set to %d for peer's fencing targeting %s for %s"
CRM_XS "id=%.8s",
op->total_timeout, op->target, op->client_name, op->id);
}
if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) {
/* Ignore the caller's peer preference if topology is in use, because
* that peer might not have access to the required device. With
* topology, stonith_choose_peer() removes the device from further
* consideration, so the timeout must be calculated beforehand.
*
* @TODO Basing the total timeout on the caller's preferred peer (above)
* is less than ideal.
*/
peer = stonith_choose_peer(op);
device = op->devices->data;
/* Fencing timeout sent to peer takes no delay into account.
* The peer will add a dedicated timer for any delay upon
* schedule_stonith_command().
*/
timeout = get_device_timeout(op, peer, device, false);
}
if (peer) {
int timeout_one = 0;
xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
if (op->client_delay > 0) {
/* Take requested fencing delay into account to prevent it from
* eating up the timeout.
*/
timeout_one = TIMEOUT_MULTIPLY_FACTOR * op->client_delay;
}
crm_xml_add(remote_op, PCMK__XA_ST_REMOTE_OP, op->id);
crm_xml_add(remote_op, PCMK__XA_ST_TARGET, op->target);
crm_xml_add(remote_op, PCMK__XA_ST_DEVICE_ACTION, op->action);
crm_xml_add(remote_op, PCMK__XA_ST_ORIGIN, op->originator);
crm_xml_add(remote_op, PCMK__XA_ST_CLIENTID, op->client_id);
crm_xml_add(remote_op, PCMK__XA_ST_CLIENTNAME, op->client_name);
crm_xml_add_int(remote_op, PCMK__XA_ST_TIMEOUT, timeout);
crm_xml_add_int(remote_op, PCMK__XA_ST_CALLOPT, op->call_options);
crm_xml_add_int(remote_op, PCMK__XA_ST_DELAY, op->client_delay);
if (device) {
timeout_one += TIMEOUT_MULTIPLY_FACTOR *
get_device_timeout(op, peer, device, true);
crm_notice("Requesting that %s perform '%s' action targeting %s "
"using %s " CRM_XS " for client %s (%ds)",
peer->host, op->action, op->target, device,
op->client_name, timeout_one);
crm_xml_add(remote_op, PCMK__XA_ST_DEVICE_ID, device);
} else {
timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer);
crm_notice("Requesting that %s perform '%s' action targeting %s "
CRM_XS " for client %s (%ds, %lds)",
peer->host, op->action, op->target, op->client_name,
timeout_one, stonith_watchdog_timeout_ms);
}
op->state = st_exec;
if (op->op_timer_one) {
g_source_remove(op->op_timer_one);
op->op_timer_one = 0;
}
if (!((stonith_watchdog_timeout_ms > 0)
&& (pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none)
|| (pcmk__str_eq(peer->host, op->target, pcmk__str_casei)
&& pcmk__is_fencing_action(op->action)))
&& check_watchdog_fencing_and_wait(op))) {
/* Some thoughts about self-fencing cases reaching this point:
- Actually check in check_watchdog_fencing_and_wait
shouldn't fail if STONITH_WATCHDOG_ID is
chosen as fencing-device and it being present implies
watchdog-fencing is enabled anyway
- If watchdog-fencing is disabled either in general or for
a specific target - detected in check_watchdog_fencing_and_wait -
for some other kind of self-fencing we can't expect
a success answer but timeout is fine if the node doesn't
come back in between
- Delicate might be the case where we have watchdog-fencing
enabled for a node but the watchdog-fencing-device isn't
explicitly chosen for suicide. Local pe-execution in sbd
may detect the node as unclean and lead to timely suicide.
Otherwise the selection of PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
at least is questionable.
*/
/* coming here we're not waiting for watchdog timeout -
thus engage timer with timout evaluated before */
op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
}
send_cluster_message(pcmk__get_node(0, peer->host, NULL,
pcmk__node_search_cluster),
crm_msg_stonith_ng, remote_op, FALSE);
peer->tried = TRUE;
free_xml(remote_op);
return;
} else if (op->phase == st_phase_on) {
/* A remapped "on" cannot be executed, but the node was already
* turned off successfully, so ignore the error and continue.
*/
crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s "
"after successful 'off'", device, op->target);
advance_topology_device_in_level(op, device, NULL);
return;
} else if (op->owner == FALSE) {
crm_err("Fencing (%s) targeting %s for client %s is not ours to control",
op->action, op->target, op->client_name);
} else if (op->query_timer == 0) {
/* We've exhausted all available peers */
crm_info("No remaining peers capable of fencing (%s) %s for client %s "
CRM_XS " state=%s", op->action, op->target, op->client_name,
stonith_op_state_str(op->state));
CRM_CHECK(op->state < st_done, return);
finalize_timed_out_op(op, "All nodes failed, or are unable, to "
"fence target");
} else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
/* if the operation never left the query state,
* but we have all the expected replies, then no devices
* are available to execute the fencing operation. */
if(stonith_watchdog_timeout_ms > 0 && pcmk__str_eq(device,
STONITH_WATCHDOG_ID, pcmk__str_null_matches)) {
if (check_watchdog_fencing_and_wait(op)) {
return;
}
}
if (op->state == st_query) {
crm_info("No peers (out of %d) have devices capable of fencing "
"(%s) %s for client %s " CRM_XS " state=%s",
op->replies, op->action, op->target, op->client_name,
stonith_op_state_str(op->state));
pcmk__reset_result(&op->result);
pcmk__set_result(&op->result, CRM_EX_ERROR,
PCMK_EXEC_NO_FENCE_DEVICE, NULL);
} else {
if (pcmk_is_set(op->call_options, st_opt_topology)) {
pcmk__reset_result(&op->result);
pcmk__set_result(&op->result, CRM_EX_ERROR,
PCMK_EXEC_NO_FENCE_DEVICE, NULL);
}
/* ... else use existing result from previous failed attempt
* (topology is not in use, and no devices remain to be attempted).
* Overwriting the result with PCMK_EXEC_NO_FENCE_DEVICE would
* prevent finalize_op() from setting the correct delegate if
* needed.
*/
crm_info("No peers (out of %d) are capable of fencing (%s) %s "
"for client %s " CRM_XS " state=%s",
op->replies, op->action, op->target, op->client_name,
stonith_op_state_str(op->state));
}
op->state = st_failed;
finalize_op(op, NULL, false);
} else {
crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
"for client %s " CRM_XS " id=%.8s",
op->action, op->target, (device? " using " : ""),
(device? device : ""), op->client_name, op->id);
}
}
/*!
* \internal
* \brief Comparison function for sorting query results
*
* \param[in] a GList item to compare
* \param[in] b GList item to compare
*
* \return Per the glib documentation, "a negative integer if the first value
* comes before the second, 0 if they are equal, or a positive integer
* if the first value comes after the second."
*/
static gint
sort_peers(gconstpointer a, gconstpointer b)
{
const peer_device_info_t *peer_a = a;
const peer_device_info_t *peer_b = b;
return (peer_b->ndevices - peer_a->ndevices);
}
/*!
* \internal
* \brief Determine if all the devices in the topology are found or not
*
* \param[in] op Fencing operation with topology to check
*/
static gboolean
all_topology_devices_found(const remote_fencing_op_t *op)
{
GList *device = NULL;
GList *iter = NULL;
device_properties_t *match = NULL;
stonith_topology_t *tp = NULL;
gboolean skip_target = FALSE;
int i;
tp = find_topology_for_host(op->target);
if (!tp) {
return FALSE;
}
if (pcmk__is_fencing_action(op->action)) {
/* Don't count the devices on the target node if we are killing
* the target node. */
skip_target = TRUE;
}
for (i = 0; i < ST_LEVEL_MAX; i++) {
for (device = tp->levels[i]; device; device = device->next) {
match = NULL;
for (iter = op->query_results; iter && !match; iter = iter->next) {
peer_device_info_t *peer = iter->data;
if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
continue;
}
match = find_peer_device(op, peer, device->data, st_device_supports_none);
}
if (!match) {
return FALSE;
}
}
}
return TRUE;
}
/*!
* \internal
* \brief Parse action-specific device properties from XML
*
* \param[in] xml XML element containing the properties
* \param[in] peer Name of peer that sent XML (for logs)
* \param[in] device Device ID (for logs)
* \param[in] action Action the properties relate to (for logs)
* \param[in,out] op Fencing operation that properties are being parsed for
* \param[in] phase Phase the properties relate to
* \param[in,out] props Device properties to update
*/
static void
parse_action_specific(const xmlNode *xml, const char *peer, const char *device,
const char *action, remote_fencing_op_t *op,
enum st_remap_phase phase, device_properties_t *props)
{
props->custom_action_timeout[phase] = 0;
crm_element_value_int(xml, PCMK__XA_ST_ACTION_TIMEOUT,
&props->custom_action_timeout[phase]);
if (props->custom_action_timeout[phase]) {
crm_trace("Peer %s with device %s returned %s action timeout %d",
peer, device, action, props->custom_action_timeout[phase]);
}
props->delay_max[phase] = 0;
crm_element_value_int(xml, PCMK__XA_ST_DELAY_MAX, &props->delay_max[phase]);
if (props->delay_max[phase]) {
crm_trace("Peer %s with device %s returned maximum of random delay %d for %s",
peer, device, props->delay_max[phase], action);
}
props->delay_base[phase] = 0;
crm_element_value_int(xml, PCMK__XA_ST_DELAY_BASE,
&props->delay_base[phase]);
if (props->delay_base[phase]) {
crm_trace("Peer %s with device %s returned base delay %d for %s",
peer, device, props->delay_base[phase], action);
}
/* Handle devices with automatic unfencing */
if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) {
int required = 0;
crm_element_value_int(xml, PCMK__XA_ST_REQUIRED, &required);
if (required) {
crm_trace("Peer %s requires device %s to execute for action %s",
peer, device, action);
add_required_device(op, device);
}
}
/* If a reboot is remapped to off+on, it's possible that a node is allowed
* to perform one action but not another.
*/
if (pcmk__xe_attr_is_true(xml, PCMK__XA_ST_ACTION_DISALLOWED)) {
props->disallowed[phase] = TRUE;
crm_trace("Peer %s is disallowed from executing %s for device %s",
peer, action, device);
}
}
/*!
* \internal
* \brief Parse one device's properties from peer's XML query reply
*
* \param[in] xml XML node containing device properties
* \param[in,out] op Operation that query and reply relate to
* \param[in,out] peer Peer's device information
* \param[in] device ID of device being parsed
*/
static void
add_device_properties(const xmlNode *xml, remote_fencing_op_t *op,
peer_device_info_t *peer, const char *device)
{
xmlNode *child;
int verified = 0;
device_properties_t *props = calloc(1, sizeof(device_properties_t));
int flags = st_device_supports_on; /* Old nodes that don't set the flag assume they support the on action */
/* Add a new entry to this peer's devices list */
CRM_ASSERT(props != NULL);
g_hash_table_insert(peer->devices, strdup(device), props);
/* Peers with verified (monitored) access will be preferred */
crm_element_value_int(xml, PCMK__XA_ST_MONITOR_VERIFIED, &verified);
if (verified) {
crm_trace("Peer %s has confirmed a verified device %s",
peer->host, device);
props->verified = TRUE;
}
crm_element_value_int(xml, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, &flags);
props->device_support_flags = flags;
/* Parse action-specific device properties */
parse_action_specific(xml, peer->host, device, op_requested_action(op),
op, st_phase_requested, props);
for (child = pcmk__xml_first_child(xml); child != NULL;
child = pcmk__xml_next(child)) {
/* Replies for "reboot" operations will include the action-specific
* values for "off" and "on" in child elements, just in case the reboot
* winds up getting remapped.
*/
if (pcmk__str_eq(pcmk__xe_id(child), PCMK_ACTION_OFF, pcmk__str_none)) {
parse_action_specific(child, peer->host, device, PCMK_ACTION_OFF,
op, st_phase_off, props);
} else if (pcmk__str_eq(pcmk__xe_id(child), PCMK_ACTION_ON,
pcmk__str_none)) {
parse_action_specific(child, peer->host, device, PCMK_ACTION_ON,
op, st_phase_on, props);
}
}
}
/*!
* \internal
* \brief Parse a peer's XML query reply and add it to operation's results
*
* \param[in,out] op Operation that query and reply relate to
* \param[in] host Name of peer that sent this reply
* \param[in] ndevices Number of devices expected in reply
* \param[in] xml XML node containing device list
*
* \return Newly allocated result structure with parsed reply
*/
static peer_device_info_t *
add_result(remote_fencing_op_t *op, const char *host, int ndevices,
const xmlNode *xml)
{
peer_device_info_t *peer = calloc(1, sizeof(peer_device_info_t));
xmlNode *child;
// cppcheck seems not to understand the abort logic in CRM_CHECK
// cppcheck-suppress memleak
CRM_CHECK(peer != NULL, return NULL);
peer->host = strdup(host);
peer->devices = pcmk__strkey_table(free, free);
/* Each child element describes one capable device available to the peer */
for (child = pcmk__xml_first_child(xml); child != NULL;
child = pcmk__xml_next(child)) {
const char *device = pcmk__xe_id(child);
if (device) {
add_device_properties(child, op, peer, device);
}
}
peer->ndevices = g_hash_table_size(peer->devices);
CRM_CHECK(ndevices == peer->ndevices,
crm_err("Query claimed to have %d device%s but %d found",
ndevices, pcmk__plural_s(ndevices), peer->ndevices));
op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers);
return peer;
}
/*!
* \internal
* \brief Handle a peer's reply to our fencing query
*
* Parse a query result from XML and store it in the remote operation
* table, and when enough replies have been received, issue a fencing request.
*
* \param[in] msg XML reply received
*
* \return pcmk_ok on success, -errno on error
*
* \note See initiate_remote_stonith_op() for how the XML query was initially
* formed, and stonith_query() for how the peer formed its XML reply.
*/
int
process_remote_stonith_query(xmlNode *msg)
{
int ndevices = 0;
gboolean host_is_target = FALSE;
gboolean have_all_replies = FALSE;
const char *id = NULL;
const char *host = NULL;
remote_fencing_op_t *op = NULL;
peer_device_info_t *peer = NULL;
uint32_t replies_expected;
xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_REMOTE_OP, msg, LOG_ERR);
CRM_CHECK(dev != NULL, return -EPROTO);
id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP);
CRM_CHECK(id != NULL, return -EPROTO);
dev = get_xpath_object("//@" PCMK__XA_ST_AVAILABLE_DEVICES, msg, LOG_ERR);
CRM_CHECK(dev != NULL, return -EPROTO);
crm_element_value_int(dev, PCMK__XA_ST_AVAILABLE_DEVICES, &ndevices);
op = g_hash_table_lookup(stonith_remote_op_list, id);
if (op == NULL) {
crm_debug("Received query reply for unknown or expired operation %s",
id);
return -EOPNOTSUPP;
}
replies_expected = fencing_active_peers();
if (op->replies_expected < replies_expected) {
replies_expected = op->replies_expected;
}
if ((++op->replies >= replies_expected) && (op->state == st_query)) {
have_all_replies = TRUE;
}
host = crm_element_value(msg, PCMK__XA_SRC);
host_is_target = pcmk__str_eq(host, op->target, pcmk__str_casei);
crm_info("Query result %d of %d from %s for %s/%s (%d device%s) %s",
op->replies, replies_expected, host,
op->target, op->action, ndevices, pcmk__plural_s(ndevices), id);
if (ndevices > 0) {
peer = add_result(op, host, ndevices, dev);
}
pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
if (pcmk_is_set(op->call_options, st_opt_topology)) {
/* If we start the fencing before all the topology results are in,
* it is possible fencing levels will be skipped because of the missing
* query results. */
if (op->state == st_query && all_topology_devices_found(op)) {
/* All the query results are in for the topology, start the fencing ops. */
crm_trace("All topology devices found");
request_peer_fencing(op, peer);
} else if (have_all_replies) {
crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
replies_expected, op->replies);
request_peer_fencing(op, NULL);
}
} else if (op->state == st_query) {
int nverified = count_peer_devices(op, peer, TRUE,
fenced_support_flag(op->action));
/* We have a result for a non-topology fencing op that looks promising,
* go ahead and start fencing before query timeout */
if ((peer != NULL) && !host_is_target && nverified) {
/* we have a verified device living on a peer that is not the target */
crm_trace("Found %d verified device%s",
nverified, pcmk__plural_s(nverified));
request_peer_fencing(op, peer);
} else if (have_all_replies) {
crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
replies_expected, op->replies);
request_peer_fencing(op, NULL);
} else {
crm_trace("Waiting for more peer results before launching fencing operation");
}
} else if ((peer != NULL) && (op->state == st_done)) {
crm_info("Discarding query result from %s (%d device%s): "
"Operation is %s", peer->host,
peer->ndevices, pcmk__plural_s(peer->ndevices),
stonith_op_state_str(op->state));
}
return pcmk_ok;
}
/*!
* \internal
* \brief Handle a peer's reply to a fencing request
*
* Parse a fencing reply from XML, and either finalize the operation
* or attempt another device as appropriate.
*
* \param[in] msg XML reply received
*/
void
fenced_process_fencing_reply(xmlNode *msg)
{
const char *id = NULL;
const char *device = NULL;
remote_fencing_op_t *op = NULL;
xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_REMOTE_OP, msg, LOG_ERR);
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
CRM_CHECK(dev != NULL, return);
id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP);
CRM_CHECK(id != NULL, return);
dev = stonith__find_xe_with_result(msg);
CRM_CHECK(dev != NULL, return);
stonith__xe_get_result(dev, &result);
device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
if (stonith_remote_op_list) {
op = g_hash_table_lookup(stonith_remote_op_list, id);
}
if ((op == NULL) && pcmk__result_ok(&result)) {
/* Record successful fencing operations */
const char *client_id = crm_element_value(dev, PCMK__XA_ST_CLIENTID);
op = create_remote_stonith_op(client_id, dev, TRUE);
}
if (op == NULL) {
/* Could be for an event that began before we started */
/* TODO: Record the op for later querying */
crm_info("Received peer result of unknown or expired operation %s", id);
pcmk__reset_result(&result);
return;
}
pcmk__reset_result(&op->result);
op->result = result; // The operation takes ownership of the result
if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
crm_err("Received outdated reply for device %s (instead of %s) to "
"fence (%s) %s. Operation already timed out at peer level.",
device, (const char *) op->devices->data, op->action, op->target);
return;
}
if (pcmk__str_eq(crm_element_value(msg, PCMK__XA_SUBT),
PCMK__VALUE_BROADCAST, pcmk__str_none)) {
if (pcmk__result_ok(&op->result)) {
op->state = st_done;
} else {
op->state = st_failed;
}
finalize_op(op, msg, false);
return;
} else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
/* If this isn't a remote level broadcast, and we are not the
* originator of the operation, we should not be receiving this msg. */
crm_err("Received non-broadcast fencing result for operation %.8s "
"we do not own (device %s targeting %s)",
op->id, device, op->target);
return;
}
if (pcmk_is_set(op->call_options, st_opt_topology)) {
const char *device = NULL;
const char *reason = op->result.exit_reason;
/* We own the op, and it is complete. broadcast the result to all nodes
* and notify our local clients. */
if (op->state == st_done) {
finalize_op(op, msg, false);
return;
}
device = crm_element_value(msg, PCMK__XA_ST_DEVICE_ID);
if ((op->phase == 2) && !pcmk__result_ok(&op->result)) {
/* A remapped "on" failed, but the node was already turned off
* successfully, so ignore the error and continue.
*/
crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s "
"after successful 'off'",
device, pcmk_exec_status_str(op->result.execution_status),
(reason == NULL)? "" : ": ",
(reason == NULL)? "" : reason,
op->target);
pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
} else {
crm_notice("Action '%s' targeting %s%s%s on behalf of %s@%s: "
"%s%s%s%s",
op->action, op->target,
((device == NULL)? "" : " using "),
((device == NULL)? "" : device),
op->client_name,
op->originator,
pcmk_exec_status_str(op->result.execution_status),
(reason == NULL)? "" : " (",
(reason == NULL)? "" : reason,
(reason == NULL)? "" : ")");
}
if (pcmk__result_ok(&op->result)) {
/* An operation completed successfully. Try another device if
* necessary, otherwise mark the operation as done. */
advance_topology_device_in_level(op, device, msg);
return;
} else {
/* This device failed, time to try another topology level. If no other
* levels are available, mark this operation as failed and report results. */
if (advance_topology_level(op, false) != pcmk_rc_ok) {
op->state = st_failed;
finalize_op(op, msg, false);
return;
}
}
} else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) {
op->state = st_done;
finalize_op(op, msg, false);
return;
} else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT)
&& (op->devices == NULL)) {
/* If the operation timed out don't bother retrying other peers. */
op->state = st_failed;
finalize_op(op, msg, false);
return;
} else {
/* fall-through and attempt other fencing action using another peer */
}
/* Retry on failure */
crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
op->target, op->originator, op->client_name,
pcmk_exec_status_str(op->result.execution_status));
request_peer_fencing(op, NULL);
}
gboolean
stonith_check_fence_tolerance(int tolerance, const char *target, const char *action)
{
GHashTableIter iter;
time_t now = time(NULL);
remote_fencing_op_t *rop = NULL;
if (tolerance <= 0 || !stonith_remote_op_list || target == NULL ||
action == NULL) {
return FALSE;
}
g_hash_table_iter_init(&iter, stonith_remote_op_list);
while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) {
if (strcmp(rop->target, target) != 0) {
continue;
} else if (rop->state != st_done) {
continue;
/* We don't have to worry about remapped reboots here
* because if state is done, any remapping has been undone
*/
} else if (strcmp(rop->action, action) != 0) {
continue;
} else if ((rop->completed + tolerance) < now) {
continue;
}
crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s",
target, action, tolerance, rop->delegate, rop->originator);
return TRUE;
}
return FALSE;
}
diff --git a/include/crm/common/xml_names_internal.h b/include/crm/common/xml_names_internal.h
index 608a8dfc06..655cb711fb 100644
--- a/include/crm/common/xml_names_internal.h
+++ b/include/crm/common/xml_names_internal.h
@@ -1,348 +1,349 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_COMMON_XML_NAMES_INTERNAL__H
# define PCMK__CRM_COMMON_XML_NAMES_INTERNAL__H
#ifdef __cplusplus
extern "C" {
#endif
/*
* XML element names used only by internal code
*/
#define PCMK__XE_ACK "ack"
#define PCMK__XE_ATTRIBUTES "attributes"
#define PCMK__XE_CIB_CALLBACK "cib-callback"
#define PCMK__XE_CIB_COMMAND "cib_command"
#define PCMK__XE_CIB_REPLY "cib-reply"
#define PCMK__XE_CIB_RESULT "cib_result"
#define PCMK__XE_CIB_TRANSACTION "cib_transaction"
#define PCMK__XE_COPY "copy"
#define PCMK__XE_CRM_EVENT "crm_event"
#define PCMK__XE_CRM_XML "crm_xml"
#define PCMK__XE_DIV "div"
#define PCMK__XE_DOWNED "downed"
#define PCMK__XE_EXIT_NOTIFICATION "exit-notification"
#define PCMK__XE_FAILED "failed"
#define PCMK__XE_FAILED_UPDATE "failed_update"
#define PCMK__XE_GENERATION_TUPLE "generation_tuple"
#define PCMK__XE_LRM "lrm"
#define PCMK__XE_LRM_RESOURCE "lrm_resource"
#define PCMK__XE_LRM_RESOURCES "lrm_resources"
#define PCMK__XE_LRM_RSC_OP "lrm_rsc_op"
#define PCMK__XE_LRMD_ALERT "lrmd_alert"
#define PCMK__XE_LRMD_CALLDATA "lrmd_calldata"
#define PCMK__XE_LRMD_COMMAND "lrmd_command"
#define PCMK__XE_LRMD_IPC_MSG "lrmd_ipc_msg"
#define PCMK__XE_LRMD_IPC_PROXY "lrmd_ipc_proxy"
#define PCMK__XE_LRMD_NOTIFY "lrmd_notify"
#define PCMK__XE_LRMD_REPLY "lrmd_reply"
#define PCMK__XE_LRMD_RSC "lrmd_rsc"
#define PCMK__XE_LRMD_RSC_OP "lrmd_rsc_op"
#define PCMK__XE_MAINTENANCE "maintenance"
#define PCMK__XE_META "meta"
#define PCMK__XE_NACK "nack"
#define PCMK__XE_NODE_STATE "node_state"
#define PCMK__XE_NOTIFY "notify"
#define PCMK__XE_OPTIONS "options"
#define PCMK__XE_PARAM "param"
#define PCMK__XE_PING "ping"
#define PCMK__XE_PING_RESPONSE "ping_response"
#define PCMK__XE_PSEUDO_EVENT "pseudo_event"
#define PCMK__XE_RSC_OP "rsc_op"
#define PCMK__XE_SHUTDOWN "shutdown"
#define PCMK__XE_SPAN "span"
#define PCMK__XE_ST_ASYNC_TIMEOUT_VALUE "st-async-timeout-value"
#define PCMK__XE_ST_DEVICE_ACTION "st_device_action"
#define PCMK__XE_ST_DEVICE_ID "st_device_id"
#define PCMK__XE_ST_HISTORY "st_history"
+#define PCMK__XE_ST_NOTIFY_FENCE "st_notify_fence"
#define PCMK__XE_ST_REPLY "st-reply"
#define PCMK__XE_STONITH_COMMAND "stonith_command"
#define PCMK__XE_TICKET_STATE "ticket_state"
#define PCMK__XE_TRANSIENT_ATTRIBUTES "transient_attributes"
#define PCMK__XE_TRANSITION_GRAPH "transition_graph"
#define PCMK__XE_XPATH_QUERY "xpath-query"
#define PCMK__XE_XPATH_QUERY_PATH "xpath-query-path"
// @COMPAT Deprecated since 1.1.12
#define PCMK__XE_ACL_USER "acl_user"
/* @COMPAT Deprecate somehow. It's undocumented and behaves the same as
* PCMK__XE_CIB in places where it's recognized.
*/
#define PCMK__XE_ALL "all"
// @COMPAT Deprecated since 2.1.8
#define PCMK__XE_CIB_GENERATION "cib_generation"
// @COMPAT Deprecated since 2.1.8
#define PCMK__XE_CIB_UPDATE "cib_update"
// @COMPAT Deprecated since 2.1.7
#define PCMK__XE_DIFF_ADDED "diff-added"
// @COMPAT Deprecated since 2.1.7
#define PCMK__XE_DIFF_REMOVED "diff-removed"
// @COMPAT Deprecated since 1.0.8 (commit 4cb100f)
#define PCMK__XE_LIFETIME "lifetime"
/* @COMPAT Deprecated since 2.0.0; alias for <clone> with PCMK_META_PROMOTABLE
* set to "true"
*/
#define PCMK__XE_PROMOTABLE_LEGACY "master"
// @COMPAT Support for rkt is deprecated since 2.1.8
#define PCMK__XE_RKT "rkt"
// @COMPAT Deprecated since 1.1.12
#define PCMK__XE_ROLE_REF "role_ref"
/*
* XML attribute names used only by internal code
*/
#define PCMK__XA_ATTR_CLEAR_INTERVAL "attr_clear_interval"
#define PCMK__XA_ATTR_CLEAR_OPERATION "attr_clear_operation"
#define PCMK__XA_ATTR_DAMPENING "attr_dampening"
#define PCMK__XA_ATTR_HOST "attr_host"
#define PCMK__XA_ATTR_HOST_ID "attr_host_id"
#define PCMK__XA_ATTR_IS_PRIVATE "attr_is_private"
#define PCMK__XA_ATTR_IS_REMOTE "attr_is_remote"
#define PCMK__XA_ATTR_NAME "attr_name"
#define PCMK__XA_ATTR_REGEX "attr_regex"
#define PCMK__XA_ATTR_RESOURCE "attr_resource"
#define PCMK__XA_ATTR_SECTION "attr_section"
#define PCMK__XA_ATTR_SET "attr_set"
#define PCMK__XA_ATTR_SET_TYPE "attr_set_type"
#define PCMK__XA_ATTR_SYNC_POINT "attr_sync_point"
#define PCMK__XA_ATTR_USER "attr_user"
#define PCMK__XA_ATTR_VALUE "attr_value"
#define PCMK__XA_ATTR_VERSION "attr_version"
#define PCMK__XA_ATTR_WRITER "attr_writer"
#define PCMK__XA_ATTRD_IS_FORCE_WRITE "attrd_is_force_write"
#define PCMK__XA_CALL_ID "call-id"
#define PCMK__XA_CIB_CALLDATA "cib_calldata"
#define PCMK__XA_CIB_CALLID "cib_callid"
#define PCMK__XA_CIB_CALLOPT "cib_callopt"
#define PCMK__XA_CIB_CLIENTID "cib_clientid"
#define PCMK__XA_CIB_CLIENTNAME "cib_clientname"
#define PCMK__XA_CIB_DELEGATED_FROM "cib_delegated_from"
#define PCMK__XA_CIB_HOST "cib_host"
#define PCMK__XA_CIB_ISREPLYTO "cib_isreplyto"
#define PCMK__XA_CIB_NOTIFY_ACTIVATE "cib_notify_activate"
#define PCMK__XA_CIB_NOTIFY_TYPE "cib_notify_type"
#define PCMK__XA_CIB_OP "cib_op"
#define PCMK__XA_CIB_PING_ID "cib_ping_id"
#define PCMK__XA_CIB_RC "cib_rc"
#define PCMK__XA_CIB_SCHEMA_MAX "cib_schema_max"
#define PCMK__XA_CIB_SECTION "cib_section"
#define PCMK__XA_CIB_UPDATE "cib_update"
#define PCMK__XA_CIB_UPDATE_RESULT "cib_update_result"
#define PCMK__XA_CIB_UPGRADE_RC "cib_upgrade_rc"
#define PCMK__XA_CIB_USER "cib_user"
#define PCMK__XA_CLIENT_NAME "client_name"
#define PCMK__XA_CLIENT_UUID "client_uuid"
#define PCMK__XA_CONFIG_ERRORS "config-errors"
#define PCMK__XA_CONFIG_WARNINGS "config-warnings"
#define PCMK__XA_CONFIRM "confirm"
#define PCMK__XA_CONNECTION_HOST "connection_host"
#define PCMK__XA_CONTENT "content"
#define PCMK__XA_CRMD_STATE "crmd_state"
#define PCMK__XA_CRM_HOST_TO "crm_host_to"
#define PCMK__XA_CRM_LIMIT_MAX "crm-limit-max"
#define PCMK__XA_CRM_LIMIT_MODE "crm-limit-mode"
#define PCMK__XA_CRM_SUBSYSTEM "crm_subsystem"
#define PCMK__XA_CRM_SYS_FROM "crm_sys_from"
#define PCMK__XA_CRM_SYS_TO "crm_sys_to"
#define PCMK__XA_CRM_TASK "crm_task"
#define PCMK__XA_CRM_TGRAPH_IN "crm-tgraph-in"
#define PCMK__XA_CRM_USER "crm_user"
#define PCMK__XA_DC_LEAVING "dc-leaving"
#define PCMK__XA_DIGEST "digest"
#define PCMK__XA_ELECTION_AGE_SEC "election-age-sec"
#define PCMK__XA_ELECTION_AGE_NANO_SEC "election-age-nano-sec"
#define PCMK__XA_ELECTION_ID "election-id"
#define PCMK__XA_ELECTION_OWNER "election-owner"
#define PCMK__XA_GRANTED "granted"
#define PCMK__XA_GRAPH_ERRORS "graph-errors"
#define PCMK__XA_GRAPH_WARNINGS "graph-warnings"
#define PCMK__XA_HIDDEN "hidden"
#define PCMK__XA_HTTP_EQUIV "http-equiv"
#define PCMK__XA_IN_CCM "in_ccm"
#define PCMK__XA_JOIN "join"
#define PCMK__XA_JOIN_ID "join_id"
#define PCMK__XA_LINE "line"
#define PCMK__XA_LONG_ID "long-id"
#define PCMK__XA_LRMD_ALERT_ID "lrmd_alert_id"
#define PCMK__XA_LRMD_ALERT_PATH "lrmd_alert_path"
#define PCMK__XA_LRMD_CALLID "lrmd_callid"
#define PCMK__XA_LRMD_CALLOPT "lrmd_callopt"
#define PCMK__XA_LRMD_CLASS "lrmd_class"
#define PCMK__XA_LRMD_CLIENTID "lrmd_clientid"
#define PCMK__XA_LRMD_CLIENTNAME "lrmd_clientname"
#define PCMK__XA_LRMD_EXEC_OP_STATUS "lrmd_exec_op_status"
#define PCMK__XA_LRMD_EXEC_RC "lrmd_exec_rc"
#define PCMK__XA_LRMD_EXEC_TIME "lrmd_exec_time"
#define PCMK__XA_LRMD_IPC_CLIENT "lrmd_ipc_client"
#define PCMK__XA_LRMD_IPC_MSG_FLAGS "lrmd_ipc_msg_flags"
#define PCMK__XA_LRMD_IPC_MSG_ID "lrmd_ipc_msg_id"
#define PCMK__XA_LRMD_IPC_OP "lrmd_ipc_op"
#define PCMK__XA_LRMD_IPC_SERVER "lrmd_ipc_server"
#define PCMK__XA_LRMD_IPC_SESSION "lrmd_ipc_session"
#define PCMK__XA_LRMD_IPC_USER "lrmd_ipc_user"
#define PCMK__XA_LRMD_IS_IPC_PROVIDER "lrmd_is_ipc_provider"
#define PCMK__XA_LRMD_OP "lrmd_op"
#define PCMK__XA_LRMD_ORIGIN "lrmd_origin"
#define PCMK__XA_LRMD_PROTOCOL_VERSION "lrmd_protocol_version"
#define PCMK__XA_LRMD_PROVIDER "lrmd_provider"
#define PCMK__XA_LRMD_QUEUE_TIME "lrmd_queue_time"
#define PCMK__XA_LRMD_RC "lrmd_rc"
#define PCMK__XA_LRMD_RCCHANGE_TIME "lrmd_rcchange_time"
#define PCMK__XA_LRMD_REMOTE_MSG_ID "lrmd_remote_msg_id"
#define PCMK__XA_LRMD_REMOTE_MSG_TYPE "lrmd_remote_msg_type"
#define PCMK__XA_LRMD_RSC_ACTION "lrmd_rsc_action"
#define PCMK__XA_LRMD_RSC_DELETED "lrmd_rsc_deleted"
#define PCMK__XA_LRMD_RSC_EXIT_REASON "lrmd_rsc_exit_reason"
#define PCMK__XA_LRMD_RSC_ID "lrmd_rsc_id"
#define PCMK__XA_LRMD_RSC_INTERVAL "lrmd_rsc_interval"
#define PCMK__XA_LRMD_RSC_OUTPUT "lrmd_rsc_output"
#define PCMK__XA_LRMD_RSC_START_DELAY "lrmd_rsc_start_delay"
#define PCMK__XA_LRMD_RSC_USERDATA_STR "lrmd_rsc_userdata_str"
#define PCMK__XA_LRMD_RUN_TIME "lrmd_run_time"
#define PCMK__XA_LRMD_TIMEOUT "lrmd_timeout"
#define PCMK__XA_LRMD_TYPE "lrmd_type"
#define PCMK__XA_LRMD_WATCHDOG "lrmd_watchdog"
#define PCMK__XA_MAJOR_VERSION "major_version"
#define PCMK__XA_MINOR_VERSION "minor_version"
#define PCMK__XA_MODE "mode"
#define PCMK__XA_MOON "moon"
#define PCMK__XA_NAMESPACE "namespace"
#define PCMK__XA_NODE_FENCED "node_fenced"
#define PCMK__XA_NODE_IN_MAINTENANCE "node_in_maintenance"
#define PCMK__XA_NODE_START_STATE "node_start_state"
#define PCMK__XA_NODE_STATE "node_state"
#define PCMK__XA_OP_DIGEST "op-digest"
#define PCMK__XA_OP_FORCE_RESTART "op-force-restart"
#define PCMK__XA_OP_RESTART_DIGEST "op-restart-digest"
#define PCMK__XA_OP_SECURE_DIGEST "op-secure-digest"
#define PCMK__XA_OP_SECURE_PARAMS "op-secure-params"
#define PCMK__XA_OP_STATUS "op-status"
#define PCMK__XA_OPERATION_KEY "operation_key"
#define PCMK__XA_ORIGINAL_CIB_OP "original_cib_op"
#define PCMK__XA_PACEMAKERD_STATE "pacemakerd_state"
#define PCMK__XA_PASSWORD "password"
#define PCMK__XA_PRIORITY "priority"
#define PCMK__XA_RC_CODE "rc-code"
#define PCMK__XA_REAP "reap"
/* Actions to be executed on Pacemaker Remote nodes are routed through the
* controller on the cluster node hosting the remote connection. That cluster
* node is considered the router node for the action.
*/
#define PCMK__XA_ROUTER_NODE "router_node"
#define PCMK__XA_RSC_ID "rsc-id"
#define PCMK__XA_RSC_PROVIDES "rsc_provides"
#define PCMK__XA_SCHEMA "schema"
#define PCMK__XA_SCHEMAS "schemas"
#define PCMK__XA_SRC "src"
#define PCMK__XA_ST_ACTION_DISALLOWED "st_action_disallowed"
#define PCMK__XA_ST_ACTION_TIMEOUT "st_action_timeout"
#define PCMK__XA_ST_AVAILABLE_DEVICES "st-available-devices"
#define PCMK__XA_ST_CALLID "st_callid"
#define PCMK__XA_ST_CALLDATA "st_calldata"
#define PCMK__XA_ST_CALLOPT "st_callopt"
#define PCMK__XA_ST_CLIENTID "st_clientid"
#define PCMK__XA_ST_CLIENTNAME "st_clientname"
#define PCMK__XA_ST_CLIENTNODE "st_clientnode"
#define PCMK__XA_ST_DATE "st_date"
#define PCMK__XA_ST_DATE_NSEC "st_date_nsec"
#define PCMK__XA_ST_DELAY "st_delay"
#define PCMK__XA_ST_DELAY_BASE "st_delay_base"
#define PCMK__XA_ST_DELAY_MAX "st_delay_max"
#define PCMK__XA_ST_DELEGATE "st_delegate"
#define PCMK__XA_ST_DEVICE_ACTION "st_device_action"
#define PCMK__XA_ST_DEVICE_ID "st_device_id"
#define PCMK__XA_ST_DEVICE_SUPPORT_FLAGS "st_device_support_flags"
#define PCMK__XA_ST_DIFFERENTIAL "st_differential"
#define PCMK__XA_ST_MONITOR_VERIFIED "st_monitor_verified"
#define PCMK__XA_ST_NOTIFY_ACTIVATE "st_notify_activate"
#define PCMK__XA_ST_NOTIFY_DEACTIVATE "st_notify_deactivate"
#define PCMK__XA_ST_OP "st_op"
#define PCMK__XA_ST_OP_MERGED "st_op_merged"
#define PCMK__XA_ST_ORIGIN "st_origin"
#define PCMK__XA_ST_OUTPUT "st_output"
#define PCMK__XA_ST_RC "st_rc"
#define PCMK__XA_ST_REMOTE_OP "st_remote_op"
#define PCMK__XA_ST_REMOTE_OP_RELAY "st_remote_op_relay"
#define PCMK__XA_ST_REQUIRED "st_required"
#define PCMK__XA_ST_STATE "st_state"
#define PCMK__XA_ST_TARGET "st_target"
#define PCMK__XA_ST_TIMEOUT "st_timeout"
#define PCMK__XA_ST_TOLERANCE "st_tolerance"
#define PCMK__XA_SUBT "subt" // subtype
#define PCMK__XA_T "t" // type
#define PCMK__XA_TRANSITION_KEY "transition-key"
#define PCMK__XA_TRANSITION_MAGIC "transition-magic"
#define PCMK__XA_UPTIME "uptime"
// @COMPAT Deprecated since 2.1.8
#define PCMK__XA_CIB_OBJECT "cib_object"
// @COMPAT Deprecated since 2.1.8
#define PCMK__XA_CIB_OBJECT_TYPE "cib_object_type"
// @COMPAT Deprecated since 1.1.12; used with legacy CIB updates
#define PCMK__XA_CIB_LOCAL_NOTIFY_ID "cib_local_notify_id"
// @COMPAT Deprecated since 1.1.12; used with legacy CIB updates
#define PCMK__XA_CIB_UPDATE_DIFF "cib_update_diff"
// @COMPAT Used only with v1 patchsets
#define PCMK__XA_CRM_DIFF_MARKER "__crm_diff_marker__"
// @COMPAT Deprecated since 2.1.5
#define PCMK__XA_FIRST_INSTANCE "first-instance"
// @COMPAT Deprecated since 2.1.7
#define PCMK__XA_ORDERING "ordering"
// @COMPAT Deprecated alias for PCMK_XA_PROMOTED_MAX since 2.0.0
#define PCMK__XA_PROMOTED_MAX_LEGACY "masters"
// @COMPAT Deprecated alias for PCMK_XA_PROMOTED_ONLY since 2.0.0
#define PCMK__XA_PROMOTED_ONLY_LEGACY "master_only"
// @COMPAT Deprecated since 1.1.12
#define PCMK__XA_REF "ref"
// @COMPAT Deprecated since 2.1.6
#define PCMK__XA_REPLACE "replace"
// @COMPAT Deprecated alias for \c PCMK_XA_AUTOMATIC since 1.1.14
#define PCMK__XA_REQUIRED "required"
// @COMPAT Deprecated since 2.1.5
#define PCMK__XA_RSC_INSTANCE "rsc-instance"
// @COMPAT Deprecated since 2.1.5
#define PCMK__XA_THEN_INSTANCE "then-instance"
// @COMPAT Deprecated since 2.1.5
#define PCMK__XA_WITH_RSC_INSTANCE "with-rsc-instance"
#ifdef __cplusplus
}
#endif
#endif // PCMK__CRM_COMMON_XML_NAMES_INTERNAL__H
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Thu, Jul 10, 1:55 AM (1 d, 9 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2009606
Default Alt Text
(236 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment