Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4624261
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
153 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/fencing/admin.c b/fencing/admin.c
index 52780b4a58..c02986152a 100644
--- a/fencing/admin.c
+++ b/fencing/admin.c
@@ -1,527 +1,528 @@
/*
* Copyright (C) 2009 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
#include <crm/common/mainloop.h>
#include <crm/stonith-ng.h>
#include <crm/cib.h>
#include <crm/pengine/status.h>
#include <crm/common/xml.h>
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
{"help", 0, 0, '?', "\tThis text"},
{"version", 0, 0, '$', "\tVersion information" },
{"verbose", 0, 0, 'V', "\tIncrease debug output"},
{"quiet", 0, 0, 'q', "\tPrint only essential output"},
{"-spacer-", 0, 0, '-', "Commands:"},
{"list", 1, 0, 'l', "List devices that can terminate the specified host"},
{"list-registered", 0, 0, 'L', "List all registered devices"},
{"list-installed", 0, 0, 'I', "List all installed devices"},
{"-spacer-", 0, 0, '-', ""},
{"metadata", 0, 0, 'M', "Check the device's metadata"},
{"query", 1, 0, 'Q', "Check the device's status"},
{"fence", 1, 0, 'F', "Fence the named host"},
{"unfence", 1, 0, 'U', "Unfence the named host"},
{"reboot", 1, 0, 'B', "Reboot the named host"},
{"confirm", 1, 0, 'C', "Confirm the named host is now safely down"},
{"history", 1, 0, 'H', "Retrieve last fencing operation"},
{"-spacer-", 0, 0, '-', ""},
{"register", 1, 0, 'R', "Register the named stonith device. Requires: --agent, optional: --option"},
{"deregister", 1, 0, 'D', "De-register the named stonith device"},
{"register-level", 1, 0, 'r', "Register a stonith level for the named host. Requires: --index, one or more --device entries"},
{"deregister-level", 1, 0, 'd', "De-register a stonith level for the named host. Requires: --index"},
{"-spacer-", 0, 0, '-', ""},
{"-spacer-", 0, 0, '-', "Options and modifiers:"},
{"agent", 1, 0, 'a', "The agent (eg. fence_xvm) to instantiate when calling with --register"},
{"env-option", 1, 0, 'e'},
{"option", 1, 0, 'o'},
{"tag", 1, 0, 'T', "Identify fencing operations with the specified tag.\n\t"
"Useful when there are multiple entities that might be invoking stonith_admin(8)"},
{"device", 1, 0, 'v', "A device to associate with a given host and stonith level"},
{"index", 1, 0, 'i', "The stonith level (1-9)"},
{"timeout", 1, 0, 't', "Operation timeout in seconds"},
{"tolerance", 1, 0, 0, "(Advanced) Do nothing if an equivalent --fence request succeeded less than N seconds earlier" },
{"list-all", 0, 0, 'L', "legacy alias for --list-registered"},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
int st_opts = st_opt_sync_call | st_opt_allow_suicide;
GMainLoop *mainloop = NULL;
struct {
stonith_t *st;
const char *target;
const char *action;
char *name;
int timeout;
int tolerance;
int rc;
} async_fence_data;
static int
try_mainloop_connect(void)
{
stonith_t *st = async_fence_data.st;
int tries = 10;
int i = 0;
int rc = 0;
for (i = 0; i < tries; i++) {
crm_debug("Connecting as %s", async_fence_data.name);
rc = st->cmds->connect(st, async_fence_data.name, NULL);
if (!rc) {
crm_debug("stonith client connection established");
return 0;
} else {
crm_debug("stonith client connection failed");
}
sleep(1);
}
crm_err("Couldn not connect to stonithd. \n");
return -1;
}
static void
notify_callback(stonith_t * st, stonith_event_t * e)
{
if (e->result != pcmk_ok) {
return;
}
if (safe_str_eq(async_fence_data.target, e->target) &&
safe_str_eq(async_fence_data.action, e->action)) {
async_fence_data.rc = e->result;
g_main_loop_quit(mainloop);
}
}
static void
fence_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
async_fence_data.rc = data->rc;
g_main_loop_quit(mainloop);
}
static gboolean
async_fence_helper(gpointer user_data)
{
stonith_t *st = async_fence_data.st;
int call_id = 0;
if (try_mainloop_connect()) {
g_main_loop_quit(mainloop);
return TRUE;
}
st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, notify_callback);
call_id = st->cmds->fence(st,
st_opt_allow_suicide,
async_fence_data.target,
async_fence_data.action,
async_fence_data.timeout, async_fence_data.tolerance);
if (call_id < 0) {
g_main_loop_quit(mainloop);
return TRUE;
}
st->cmds->register_callback(st,
call_id,
async_fence_data.timeout,
st_opt_timeout_updates, NULL, "callback", fence_callback);
return TRUE;
}
static int
mainloop_fencing(stonith_t * st, const char *target, const char *action, int timeout, int tolerance)
{
crm_trigger_t *trig;
async_fence_data.st = st;
async_fence_data.target = target;
async_fence_data.action = action;
async_fence_data.timeout = timeout;
async_fence_data.tolerance = tolerance;
async_fence_data.rc = -1;
trig = mainloop_add_trigger(G_PRIORITY_HIGH, async_fence_helper, NULL);
mainloop_set_trigger(trig);
mainloop = g_main_new(FALSE);
g_main_run(mainloop);
return async_fence_data.rc;
}
int
main(int argc, char **argv)
{
int flag;
int rc = 0;
int quiet = 0;
int verbose = 0;
int argerr = 0;
int timeout = 120;
int option_index = 0;
int fence_level = 0;
int no_connect = 0;
int tolerance = 0;
char name[512];
char value[512];
const char *agent = NULL;
const char *device = NULL;
const char *target = NULL;
const char *longname = NULL;
char action = 0;
stonith_t *st = NULL;
stonith_key_value_t *params = NULL;
stonith_key_value_t *devices = NULL;
stonith_key_value_t *dIter = NULL;
crm_log_cli_init("stonith_admin");
crm_set_options(NULL, "mode [options]", long_options,
"Provides access to the stonith-ng API.\n"
"\nAllows the administrator to add/remove/list devices, check device and host status and fence hosts\n");
async_fence_data.name = strdup(crm_system_name);
while (1) {
flag = crm_get_option_long(argc, argv, &option_index, &longname);
if (flag == -1)
break;
switch (flag) {
case 'V':
verbose = 1;
crm_bump_log_level(argc, argv);
break;
case '$':
case '?':
crm_help(flag, EX_OK);
break;
case 'I':
no_connect = 1;
/* fall through */
case 'L':
action = flag;
break;
case 'q':
quiet = 1;
break;
case 'Q':
case 'R':
case 'D':
action = flag;
device = optarg;
break;
case 'T':
free(async_fence_data.name);
async_fence_data.name = g_strdup_printf("%s.%s", crm_system_name, optarg);
break;
case 'a':
agent = optarg;
break;
case 'l':
target = optarg;
action = 'L';
break;
case 'M':
no_connect = 1;
action = flag;
break;
case 't':
timeout = crm_atoi(optarg, NULL);
break;
case 'B':
case 'F':
case 'U':
/* using mainloop here */
no_connect = 1;
/* fall through */
case 'C':
/* Always log the input arguments */
crm_log_args(argc, argv);
target = optarg;
action = flag;
break;
case 'H':
case 'r':
case 'd':
target = optarg;
action = flag;
break;
case 'i':
fence_level = crm_atoi(optarg, NULL);
break;
case 'v':
devices = stonith_key_value_add(devices, NULL, optarg);
break;
case 'o':
crm_info("Scanning: -o %s", optarg);
rc = sscanf(optarg, "%[^=]=%[^=]", name, value);
if (rc != 2) {
crm_err("Invalid option: -o %s", optarg);
++argerr;
} else {
crm_info("Got: '%s'='%s'", name, value);
params = stonith_key_value_add(params, name, value);
}
break;
case 'e':
{
char *key = crm_concat("OCF_RESKEY", optarg, '_');
const char *env = getenv(key);
if (env == NULL) {
crm_err("Invalid option: -e %s", optarg);
++argerr;
} else {
crm_info("Got: '%s'='%s'", optarg, env);
params = stonith_key_value_add(params, optarg, env);
}
}
break;
case 0:
if (safe_str_eq("tolerance", longname)) {
tolerance = crm_get_msec(optarg) / 1000; /* Send in seconds */
}
break;
default:
++argerr;
break;
}
}
if (optind > argc) {
++argerr;
}
if (argerr) {
crm_help('?', EX_USAGE);
}
crm_debug("Create");
st = stonith_api_new();
crm_debug("Created");
if (!no_connect) {
crm_debug("Connecting as %s", async_fence_data.name);
rc = st->cmds->connect(st, async_fence_data.name, NULL);
crm_debug("Connect: %d", rc);
if (rc < 0) {
goto done;
}
}
switch (action) {
case 'I':
rc = st->cmds->list_agents(st, st_opt_sync_call, NULL, &devices, timeout);
for (dIter = devices; dIter; dIter = dIter->next) {
fprintf(stdout, " %s\n", dIter->value);
}
if (rc == 0) {
fprintf(stderr, "No devices found\n");
} else if (rc > 0) {
fprintf(stderr, "%d devices found\n", rc);
rc = 0;
}
stonith_key_value_freeall(devices, 1, 1);
break;
case 'L':
rc = st->cmds->query(st, st_opts, target, &devices, timeout);
for (dIter = devices; dIter; dIter = dIter->next) {
fprintf(stdout, " %s\n", dIter->value);
}
if (rc == 0) {
fprintf(stderr, "No devices found\n");
} else if (rc > 0) {
fprintf(stderr, "%d devices found\n", rc);
rc = 0;
}
stonith_key_value_freeall(devices, 1, 1);
break;
case 'Q':
rc = st->cmds->monitor(st, st_opts, device, timeout);
if (rc < 0) {
rc = st->cmds->list(st, st_opts, device, NULL, timeout);
}
break;
case 'R':
rc = st->cmds->register_device(st, st_opts, device, "stonith-ng", agent, params);
break;
case 'D':
rc = st->cmds->remove_device(st, st_opts, device);
break;
case 'r':
rc = st->cmds->register_level(st, st_opts, target, fence_level, devices);
break;
case 'd':
rc = st->cmds->remove_level(st, st_opts, target, fence_level);
break;
case 'M':
if (agent == NULL) {
printf("Please specify an agent to query using -a,--agent [value]\n");
return -1;
} else {
char *buffer = NULL;
rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, timeout);
if (rc == pcmk_ok) {
printf("%s\n", buffer);
}
free(buffer);
}
break;
case 'C':
rc = st->cmds->confirm(st, st_opts, target);
break;
case 'B':
rc = mainloop_fencing(st, target, "reboot", timeout, tolerance);
break;
case 'F':
rc = mainloop_fencing(st, target, "off", timeout, tolerance);
break;
case 'U':
rc = mainloop_fencing(st, target, "on", timeout, tolerance);
break;
case 'H':
{
stonith_history_t *history, *hp, *latest = NULL;
rc = st->cmds->history(st, st_opts, target, &history, timeout);
for (hp = history; hp; hp = hp->next) {
char *action_s = NULL;
time_t complete = hp->completed;
if (hp->state == st_done) {
latest = hp;
}
if (quiet || !verbose) {
continue;
} else if (hp->action == NULL) {
action_s = strdup("unknown");
} else if (hp->action[0] != 'r') {
action_s = crm_concat("turn", hp->action, ' ');
} else {
action_s = strdup(hp->action);
}
if (hp->state == st_failed) {
- printf("%s failed to %s node %s on behalf of %s at %s\n",
- hp->delegate ? hp->delegate : "We", action_s, hp->target, hp->origin,
- ctime(&complete));
+ printf("%s failed to %s node %s on behalf of %s from %s at %s\n",
+ hp->delegate ? hp->delegate : "We", action_s, hp->target,
+ hp->client, hp->origin, ctime(&complete));
} else if (hp->state == st_done && hp->delegate) {
- printf("%s was able to %s node %s on behalf of %s at %s\n",
- hp->delegate, action_s, hp->target, hp->origin, ctime(&complete));
+ printf("%s was able to %s node %s on behalf of %s from %s at %s\n",
+ hp->delegate, action_s, hp->target,
+ hp->client, hp->origin, ctime(&complete));
} else if (hp->state == st_done) {
- printf("We were able to %s node %s on behalf of %s at %s\n",
- action_s, hp->target, hp->origin, ctime(&complete));
+ printf("We were able to %s node %s on behalf of %s from %s at %s\n",
+ action_s, hp->target, hp->client, hp->origin, ctime(&complete));
} else {
- printf("%s wishes to %s node %s - %d %d\n",
- hp->origin, action_s, hp->target, hp->state, hp->completed);
+ printf("%s at %s wishes to %s node %s - %d %d\n",
+ hp->client, hp->origin, action_s, hp->target, hp->state, hp->completed);
}
free(action_s);
}
if (latest) {
if (quiet) {
printf("%d\n", latest->completed);
} else {
char *action_s = NULL;
time_t complete = latest->completed;
if (latest->action == NULL) {
action_s = strdup("unknown");
} else if (latest->action[0] != 'r') {
action_s = crm_concat("turn", latest->action, ' ');
} else {
action_s = strdup(latest->action);
}
- printf("%s was able to %s node %s on behalf of %s at %s\n",
+ printf("%s was able to %s node %s on behalf of %s from %s at %s\n",
latest->delegate ? latest->delegate : "We", action_s, latest->target,
- latest->origin, ctime(&complete));
+ latest->client, latest->origin, ctime(&complete));
free(action_s);
}
}
break;
} /* closing bracket for -H case */
} /* closing bracket for switch case */
done:
free(async_fence_data.name);
crm_info("Command returned: %s (%d)", pcmk_strerror(rc), rc);
if (rc < 0) {
printf("Command failed: %s\n", pcmk_strerror(rc));
}
stonith_key_value_freeall(params, 1, 1);
st->cmds->disconnect(st);
crm_debug("Disconnect: %d", rc);
crm_debug("Destroy");
stonith_api_delete(st);
return rc;
}
diff --git a/fencing/remote.c b/fencing/remote.c
index 714c48e374..656d9c97e4 100644
--- a/fencing/remote.c
+++ b/fencing/remote.c
@@ -1,1381 +1,1383 @@
/*
* Copyright (C) 2009 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <ctype.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/common/ipcs.h>
#include <crm/cluster/internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <crm/common/util.h>
#include <internal.h>
#define TIMEOUT_MULTIPLY_FACTOR 1.2
typedef struct st_query_result_s {
char *host;
int devices;
/* only try peers for non-topology based operations once */
gboolean tried;
GListPtr device_list;
GHashTable *custom_action_timeouts;
/* Subset of devices that peer has verified connectivity on */
GHashTable *verified_devices;
} st_query_result_t;
GHashTable *remote_op_list = NULL;
void call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer);
static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup);
extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
int call_options);
static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
static int get_op_total_timeout(remote_fencing_op_t * op, st_query_result_t * chosen_peer,
int default_timeout);
static void
free_remote_query(gpointer data)
{
if (data) {
st_query_result_t *query = data;
crm_trace("Free'ing query result from %s", query->host);
free(query->host);
g_list_free_full(query->device_list, free);
g_hash_table_destroy(query->custom_action_timeouts);
g_hash_table_destroy(query->verified_devices);
free(query);
}
}
static void
clear_remote_op_timers(remote_fencing_op_t * op)
{
if (op->query_timer) {
g_source_remove(op->query_timer);
op->query_timer = 0;
}
if (op->op_timer_total) {
g_source_remove(op->op_timer_total);
op->op_timer_total = 0;
}
if (op->op_timer_one) {
g_source_remove(op->op_timer_one);
op->op_timer_one = 0;
}
}
static void
free_remote_op(gpointer data)
{
remote_fencing_op_t *op = data;
crm_trace("Free'ing op %s for %s", op->id, op->target);
crm_log_xml_debug(op->request, "Destroying");
clear_remote_op_timers(op);
free(op->id);
free(op->action);
free(op->target);
free(op->client_id);
free(op->client_name);
free(op->originator);
if (op->query_results) {
g_list_free_full(op->query_results, free_remote_query);
}
if (op->request) {
free_xml(op->request);
op->request = NULL;
}
free(op);
}
static xmlNode *
create_op_done_notify(remote_fencing_op_t * op, int rc)
{
xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
crm_xml_add_int(notify_data, "state", op->state);
crm_xml_add_int(notify_data, F_STONITH_RC, rc);
crm_xml_add(notify_data, F_STONITH_TARGET, op->target);
crm_xml_add(notify_data, F_STONITH_ACTION, op->action);
crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate);
crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id);
crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator);
crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id);
crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name);
return notify_data;
}
static void
bcast_result_to_peers(remote_fencing_op_t * op, int rc)
{
static int count = 0;
xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
xmlNode *notify_data = create_op_done_notify(op, rc);
count++;
crm_trace("Broadcasting result to peers");
crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY);
crm_xml_add(bcast, F_SUBTYPE, "broadcast");
crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY);
crm_xml_add_int(bcast, "count", count);
add_message_xml(bcast, F_STONITH_CALLDATA, notify_data);
send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);
free_xml(notify_data);
free_xml(bcast);
return;
}
static void
handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
{
xmlNode *notify_data = NULL;
xmlNode *reply = NULL;
if (op->notify_sent == TRUE) {
/* nothing to do */
return;
}
/* Do notification with a clean data object */
notify_data = create_op_done_notify(op, rc);
crm_xml_add_int(data, "state", op->state);
crm_xml_add(data, F_STONITH_TARGET, op->target);
crm_xml_add(data, F_STONITH_OPERATION, op->action);
reply = stonith_construct_reply(op->request, NULL, data, rc);
crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
/* Send fencing OP reply to local client that initiated fencing */
do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
/* bcast to all local clients that the fencing operation happend */
do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data);
/* mark this op as having notify's already sent */
op->notify_sent = TRUE;
free_xml(reply);
free_xml(notify_data);
}
static void
handle_duplicates(remote_fencing_op_t * op, xmlNode * data, int rc)
{
GListPtr iter = NULL;
for (iter = op->duplicates; iter != NULL; iter = iter->next) {
remote_fencing_op_t *other = iter->data;
if (other->state == st_duplicate) {
/* Ie. it hasn't timed out already */
other->state = op->state;
crm_debug("Peforming duplicate notification for %s@%s.%.8s = %s", other->client_name,
other->originator, other->id, pcmk_strerror(rc));
remote_op_done(other, data, rc, TRUE);
} else {
crm_err("Skipping duplicate notification for %s@%s - %d", other->client_name,
other->originator, other->state);
}
}
}
/*!
* \internal
* \brief Finalize a remote operation.
*
* \description This function has two code paths.
*
* Path 1. This node is the owner of the operation and needs
* to notify the cpg group via a broadcast as to the operation's
* results.
*
* Path 2. The cpg broadcast is received. All nodes notify their local
* stonith clients the operation results.
*
* So, The owner of the operation first notifies the cluster of the result,
* and once that cpg notify is received back it notifies all the local clients.
*
* Nodes that are passive watchers of the operation will receive the
* broadcast and only need to notify their local clients the operation finished.
*
* \param op, The fencing operation to finalize
* \param data, The xml msg reply (if present) of the last delegated fencing
* operation.
* \param dup, Is this operation a duplicate, if so treat it a little differently
* making sure the broadcast is not sent out.
*/
static void
remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup)
{
int level = LOG_ERR;
const char *subt = NULL;
xmlNode *local_data = NULL;
op->completed = time(NULL);
clear_remote_op_timers(op);
if (op->notify_sent == TRUE) {
crm_err("Already sent notifications for '%s of %s by %s' (for=%s@%s.%.8s, state=%d): %s",
op->action, op->target, op->delegate ? op->delegate : "<no-one>",
op->client_name, op->originator, op->id, op->state, pcmk_strerror(rc));
goto remote_op_done_cleanup;
}
if (!op->delegate && data) {
op->delegate = crm_element_value_copy(data, F_ORIG);
}
if (data == NULL) {
data = create_xml_node(NULL, "remote-op");
local_data = data;
}
/* Tell everyone the operation is done, we will continue
* with doing the local notifications once we receive
* the broadcast back. */
subt = crm_element_value(data, F_SUBTYPE);
if (dup == FALSE && safe_str_neq(subt, "broadcast")) {
/* Defer notification until the bcast message arrives */
bcast_result_to_peers(op, rc);
goto remote_op_done_cleanup;
}
if (rc == pcmk_ok || dup) {
level = LOG_NOTICE;
} else if (safe_str_neq(op->originator, stonith_our_uname)) {
level = LOG_NOTICE;
}
do_crm_log(level,
"Operation %s of %s by %s for %s@%s.%.8s: %s",
op->action, op->target, op->delegate ? op->delegate : "<no-one>",
op->client_name, op->originator, op->id, pcmk_strerror(rc));
handle_local_reply_and_notify(op, data, rc);
if (dup == FALSE) {
handle_duplicates(op, data, rc);
}
/* Free non-essential parts of the record
* Keep the record around so we can query the history
*/
if (op->query_results) {
g_list_free_full(op->query_results, free_remote_query);
op->query_results = NULL;
}
if (op->request) {
free_xml(op->request);
op->request = NULL;
}
remote_op_done_cleanup:
free_xml(local_data);
}
static gboolean
remote_op_timeout_one(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
op->op_timer_one = 0;
crm_notice("Remote %s operation on %s for %s.%8s timed out",
op->action, op->target, op->client_name, op->id);
call_remote_stonith(op, NULL);
return FALSE;
}
static gboolean
remote_op_timeout(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
op->op_timer_total = 0;
if (op->state == st_done) {
crm_debug("Action %s (%s) for %s (%s) already completed",
op->action, op->id, op->target, op->client_name);
return FALSE;
}
crm_debug("Action %s (%s) for %s (%s) timed out",
op->action, op->id, op->target, op->client_name);
op->state = st_failed;
remote_op_done(op, NULL, -ETIME, FALSE);
return FALSE;
}
static gboolean
remote_op_query_timeout(gpointer data)
{
remote_fencing_op_t *op = data;
op->query_timer = 0;
if (op->state == st_done) {
crm_debug("Operation %s for %s already completed", op->id, op->target);
} else if (op->state == st_exec) {
crm_debug("Operation %s for %s already in progress", op->id, op->target);
} else if (op->query_results) {
crm_debug("Query %s for %s complete: %d", op->id, op->target, op->state);
call_remote_stonith(op, NULL);
} else {
crm_debug("Query %s for %s timed out: %d", op->id, op->target, op->state);
if (op->op_timer_total) {
g_source_remove(op->op_timer_total);
op->op_timer_total = 0;
}
remote_op_timeout(op);
}
return FALSE;
}
static int
stonith_topology_next(remote_fencing_op_t * op)
{
stonith_topology_t *tp = NULL;
if (op->target) {
/* Queries don't have a target set */
tp = g_hash_table_lookup(topology, op->target);
}
if (tp == NULL) {
return pcmk_ok;
}
set_bit(op->call_options, st_opt_topology);
do {
op->level++;
} while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL);
if (op->level < ST_LEVEL_MAX) {
crm_trace("Attempting fencing level %d for %s (%d devices) - %s@%s.%.8s",
op->level, op->target, g_list_length(tp->levels[op->level]),
op->client_name, op->originator, op->id);
op->devices = tp->levels[op->level];
return pcmk_ok;
}
crm_notice("All fencing options to fence %s for %s@%s.%.8s failed",
op->target, op->client_name, op->originator, op->id);
return -EINVAL;
}
/*!
* \brief Check to see if this operation is a duplicate of another in flight
* operation. If so merge this operation into the inflight operation, and mark
* it as a duplicate.
*/
static void
merge_duplicates(remote_fencing_op_t * op)
{
GHashTableIter iter;
remote_fencing_op_t *other = NULL;
time_t now = time(NULL);
g_hash_table_iter_init(&iter, remote_op_list);
while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) {
crm_node_t *peer = NULL;
if (other->state > st_exec) {
/* Must be in-progress */
continue;
} else if (safe_str_neq(op->target, other->target)) {
/* Must be for the same node */
continue;
} else if (safe_str_neq(op->action, other->action)) {
crm_trace("Must be for the same action: %s vs. ", op->action, other->action);
continue;
} else if (safe_str_eq(op->client_name, other->client_name)) {
crm_trace("Must be for different clients: %s", op->client_name);
continue;
} else if (safe_str_eq(other->target, other->originator)) {
crm_trace("Can't be a suicide operation: %s", other->target);
continue;
}
peer = crm_get_peer(0, other->originator);
if(fencing_peer_active(peer) == FALSE) {
crm_notice("Failing stonith action %s for node %s originating from %s@%s.%.8s: Originator is dead",
other->action, other->target, other->client_name, other->originator, other->id);
other->state = st_failed;
continue;
} else if(other->total_timeout > 0 && now > (other->total_timeout + other->created)) {
crm_info("Stonith action %s for node %s originating from %s@%s.%.8s is too old: %d vs. %d + %d",
other->action, other->target, other->client_name, other->originator, other->id,
now, other->created, other->total_timeout);
continue;
}
/* There is another in-flight request to fence the same host
* Piggyback on that instead. If it fails, so do we.
*/
other->duplicates = g_list_append(other->duplicates, op);
if (other->total_timeout == 0) {
crm_trace("Making a best-guess as to the timeout used");
other->total_timeout = op->total_timeout =
TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL, op->base_timeout);
}
crm_notice
("Merging stonith action %s for node %s originating from client %s.%.8s with identical request from %s@%s.%.8s (%ds)",
op->action, op->target, op->client_name, op->id, other->client_name, other->originator,
other->id, other->total_timeout);
report_timeout_period(op, other->total_timeout);
op->state = st_duplicate;
}
}
static uint32_t fencing_active_peers(void)
{
uint32_t count = 0;
crm_node_t *entry;
GHashTableIter gIter;
g_hash_table_iter_init(&gIter, crm_peer_cache);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
if(fencing_peer_active(entry)) {
count++;
}
}
return count;
}
int
stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op)
{
xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
op->state = st_done;
op->completed = time(NULL);
op->delegate = strdup("a human");
crm_notice("Injecting manual confirmation that %s is safely off/down",
crm_element_value(dev, F_STONITH_TARGET));
remote_op_done(op, msg, pcmk_ok, FALSE);
/* Replies are sent via done_cb->stonith_send_async_reply()->do_local_reply() */
return -EINPROGRESS;
}
/*!
* \internal
* \brief Create a new remote stonith op
* \param client, he local stonith client id that initaited the operation
* \param request, The request from the client that started the operation
* \param peer, Is this operation owned by another stonith peer? Operations
* owned by other peers are stored on all the stonith nodes, but only the
* owner executes the operation. All the nodes get the results to the operation
* once the owner finishes executing it.
*/
void *
create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
{
remote_fencing_op_t *op = NULL;
xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
if (remote_op_list == NULL) {
remote_op_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_remote_op);
}
/* If this operation is owned by another node, check to make
* sure we haven't already created this operation. */
if (peer && dev) {
const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
CRM_CHECK(op_id != NULL, return NULL);
op = g_hash_table_lookup(remote_op_list, op_id);
if (op) {
crm_debug("%s already exists", op_id);
return op;
}
}
op = calloc(1, sizeof(remote_fencing_op_t));
crm_element_value_int(request, F_STONITH_TIMEOUT, (int *)&(op->base_timeout));
if (peer && dev) {
op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID);
} else {
op->id = crm_generate_uuid();
}
g_hash_table_replace(remote_op_list, op->id, op);
CRM_LOG_ASSERT(g_hash_table_lookup(remote_op_list, op->id) != NULL);
crm_trace("Created %s", op->id);
op->state = st_query;
op->replies_expected = fencing_active_peers();
op->action = crm_element_value_copy(dev, F_STONITH_ACTION);
op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN);
+ op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE); /* May not be set */
op->created = time(NULL);
if (op->originator == NULL) {
/* Local or relayed request */
op->originator = strdup(stonith_our_uname);
}
CRM_LOG_ASSERT(client != NULL);
if (client) {
op->client_id = strdup(client);
}
op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME);
op->target = crm_element_value_copy(dev, F_STONITH_TARGET);
op->request = copy_xml(request); /* TODO: Figure out how to avoid this */
crm_element_value_int(request, F_STONITH_CALLOPTS, (int *)&(op->call_options));
crm_element_value_int(request, F_STONITH_CALLID, (int *)&(op->client_callid));
crm_trace("%s new stonith op: %s - %s of %s for %s",
(peer
&& dev) ? "Recorded" : "Generated", op->id, op->action, op->target, op->client_name);
if (op->call_options & st_opt_cs_nodeid) {
int nodeid = crm_atoi(op->target, NULL);
crm_node_t *node = crm_get_peer(nodeid, NULL);
/* Ensure the conversion only happens once */
op->call_options &= ~st_opt_cs_nodeid;
if (node && node->uname) {
free(op->target);
op->target = strdup(node->uname);
} else {
crm_warn("Could not expand nodeid '%s' into a host name (%p)", op->target, node);
}
}
/* check to see if this is a duplicate operation of another in-flight operation */
merge_duplicates(op);
return op;
}
remote_fencing_op_t *
initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean manual_ack)
{
int query_timeout = 0;
xmlNode *query = NULL;
const char *client_id = NULL;
remote_fencing_op_t *op = NULL;
if (client) {
client_id = client->id;
} else {
client_id = crm_element_value(request, F_STONITH_CLIENTID);
}
CRM_LOG_ASSERT(client_id != NULL);
op = create_remote_stonith_op(client_id, request, FALSE);
op->owner = TRUE;
if (manual_ack) {
crm_notice("Initiating manual confirmation for %s: %s",
op->target, op->id);
return op;
}
CRM_CHECK(op->action, return NULL);
if (stonith_topology_next(op) != pcmk_ok) {
op->state = st_failed;
}
switch (op->state) {
case st_failed:
crm_warn("Initiation of remote operation %s for %s: failed (%s)", op->action,
op->target, op->id);
remote_op_done(op, NULL, -EINVAL, FALSE);
return op;
case st_duplicate:
crm_info("Initiating remote operation %s for %s: %s (duplicate)", op->action,
op->target, op->id);
return op;
default:
crm_notice("Initiating remote operation %s for %s: %s (%d)", op->action, op->target,
op->id, op->state);
}
query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY, NULL, 0);
crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id);
crm_xml_add(query, F_STONITH_TARGET, op->target);
crm_xml_add(query, F_STONITH_ACTION, op->action);
crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout);
send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
free_xml(query);
query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR;
op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op);
return op;
}
static gint
sort_strings(gconstpointer a, gconstpointer b)
{
return strcmp(a, b);
}
enum find_best_peer_options {
/*! Skip checking the target peer for capable fencing devices */
FIND_PEER_SKIP_TARGET = 0x0001,
/*! Only check the target peer for capable fencing devices */
FIND_PEER_TARGET_ONLY = 0x0002,
/*! Skip peers and devices that are not verified */
FIND_PEER_VERIFIED_ONLY = 0x0004,
};
static st_query_result_t *
find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options)
{
GListPtr iter = NULL;
gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE;
if (!device && is_set(op->call_options, st_opt_topology)) {
return NULL;
}
for (iter = op->query_results; iter != NULL; iter = iter->next) {
st_query_result_t *peer = iter->data;
if ((options & FIND_PEER_SKIP_TARGET) && safe_str_eq(peer->host, op->target)) {
continue;
}
if ((options & FIND_PEER_TARGET_ONLY) && safe_str_neq(peer->host, op->target)) {
continue;
}
if (is_set(op->call_options, st_opt_topology)) {
/* Do they have the next device of the current fencing level? */
GListPtr match = NULL;
if (verified_devices_only && !g_hash_table_lookup(peer->verified_devices, device)) {
continue;
}
match = g_list_find_custom(peer->device_list, device, sort_strings);
if (match) {
crm_trace("Removing %s from %s (%d remaining)", (char *)match->data, peer->host,
g_list_length(peer->device_list));
peer->device_list = g_list_remove(peer->device_list, match->data);
return peer;
}
} else if (peer->devices > 0 && peer->tried == FALSE) {
if (verified_devices_only && !g_hash_table_size(peer->verified_devices)) {
continue;
}
/* No topology: Use the current best peer */
crm_trace("Simple fencing");
return peer;
}
}
return NULL;
}
static st_query_result_t *
stonith_choose_peer(remote_fencing_op_t * op)
{
st_query_result_t *peer = NULL;
const char *device = NULL;
do {
if (op->devices) {
device = op->devices->data;
crm_trace("Checking for someone to fence %s with %s", op->target,
(char *)op->devices->data);
} else {
crm_trace("Checking for someone to fence %s", op->target);
}
if ((peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET | FIND_PEER_VERIFIED_ONLY))) {
return peer;
} else if ((peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET))) {
return peer;
} else if ((peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY))) {
return peer;
}
/* Try the next fencing level if there is one */
} while (is_set(op->call_options, st_opt_topology)
&& stonith_topology_next(op) == pcmk_ok);
if (op->devices) {
crm_debug("Couldn't find anyone to fence %s with %s", op->target,
(char *)op->devices->data);
} else {
crm_debug("Couldn't find anyone to fence %s", op->target);
}
return NULL;
}
static int
get_device_timeout(st_query_result_t * peer, const char *device, int default_timeout)
{
gpointer res;
if (!peer || !device) {
return default_timeout;
}
res = g_hash_table_lookup(peer->custom_action_timeouts, device);
return res ? GPOINTER_TO_INT(res) : default_timeout;
}
static int
get_peer_timeout(st_query_result_t * peer, int default_timeout)
{
int total_timeout = 0;
GListPtr cur = NULL;
for (cur = peer->device_list; cur; cur = cur->next) {
total_timeout += get_device_timeout(peer, cur->data, default_timeout);
}
return total_timeout ? total_timeout : default_timeout;
}
static int
get_op_total_timeout(remote_fencing_op_t * op, st_query_result_t * chosen_peer, int default_timeout)
{
stonith_topology_t *tp = g_hash_table_lookup(topology, op->target);
int total_timeout = 0;
if (is_set(op->call_options, st_opt_topology) && tp) {
int i;
GListPtr device_list = NULL;
GListPtr iter = NULL;
/* Yep, this looks scary, nested loops all over the place.
* Here is what is going on.
* Loop1: Iterate through fencing levels.
* Loop2: If a fencing level has devices, loop through each device
* Loop3: For each device in a fencing level, see what peer owns it
* and what that peer has reported the timeout is for the device.
*/
for (i = 0; i < ST_LEVEL_MAX; i++) {
if (!tp->levels[i]) {
continue;
}
for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
for (iter = op->query_results; iter != NULL; iter = iter->next) {
st_query_result_t *peer = iter->data;
if (g_list_find_custom(peer->device_list, device_list->data, sort_strings)) {
total_timeout +=
get_device_timeout(peer, device_list->data, default_timeout);
break;
}
} /* End Loop3: match device with peer that owns device, find device's timeout period */
} /* End Loop2: iterate through devices at a specific level */
} /*End Loop1: iterate through fencing levels */
} else if (chosen_peer) {
total_timeout = get_peer_timeout(chosen_peer, default_timeout);
} else {
total_timeout = default_timeout;
}
return total_timeout ? total_timeout : default_timeout;
}
static void
report_timeout_period(remote_fencing_op_t * op, int op_timeout)
{
GListPtr iter = NULL;
xmlNode *update = NULL;
const char *client_node = NULL;
const char *client_id = NULL;
const char *call_id = NULL;
if (op->call_options & st_opt_sync_call) {
/* There is no reason to report the timeout for a syncronous call. It
* is impossible to use the reported timeout to do anything when the client
* is blocking for the response. This update is only important for
* async calls that require a callback to report the results in. */
return;
} else if (!op->request) {
return;
}
crm_trace("Reporting timeout for %s.%.8s", op->client_name, op->id);
client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE);
call_id = crm_element_value(op->request, F_STONITH_CALLID);
client_id = crm_element_value(op->request, F_STONITH_CLIENTID);
if (!client_node || !call_id || !client_id) {
return;
}
if (safe_str_eq(client_node, stonith_our_uname)) {
/* The client is connected to this node, send the update direclty to them */
do_stonith_async_timeout_update(client_id, call_id, op_timeout);
return;
}
/* The client is connected to another node, relay this update to them */
update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0);
crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id);
crm_xml_add(update, F_STONITH_CLIENTID, client_id);
crm_xml_add(update, F_STONITH_CALLID, call_id);
crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout);
send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE);
free_xml(update);
for (iter = op->duplicates; iter != NULL; iter = iter->next) {
remote_fencing_op_t *dup = iter->data;
crm_trace("Reporting timeout for duplicate %s.%.8s", dup->client_name, dup->id);
report_timeout_period(iter->data, op_timeout);
}
}
void
call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer)
{
const char *device = NULL;
int timeout = op->base_timeout;
crm_trace("State for %s.%.8s: %s %d", op->target, op->client_name, op->id, op->state);
if (peer == NULL && !is_set(op->call_options, st_opt_topology)) {
peer = stonith_choose_peer(op);
}
if (!op->op_timer_total) {
int total_timeout = get_op_total_timeout(op, peer, op->base_timeout);
op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * total_timeout;
op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op);
report_timeout_period(op, op->total_timeout);
crm_info("Total remote op timeout set to %d for fencing of node %s for %s.%.8s",
total_timeout, op->target, op->client_name, op->id);
}
if (is_set(op->call_options, st_opt_topology) && op->devices) {
/* Ignore any preference, they might not have the device we need */
/* When using topology, the stonith_choose_peer function pops off
* the peer from the op's query results. Make sure to calculate
* the op_timeout before calling this function when topology is in use */
peer = stonith_choose_peer(op);
device = op->devices->data;
timeout = get_device_timeout(peer, device, op->base_timeout);
}
if (peer) {
int timeout_one = 0;
xmlNode *query = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id);
crm_xml_add(query, F_STONITH_TARGET, op->target);
crm_xml_add(query, F_STONITH_ACTION, op->action);
crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
crm_xml_add_int(query, F_STONITH_TIMEOUT, timeout);
if (device) {
timeout_one =
TIMEOUT_MULTIPLY_FACTOR * get_device_timeout(peer, device, op->base_timeout);
crm_info("Requesting that %s perform op %s %s with %s for %s (%ds)", peer->host,
op->action, op->target, device, op->client_name, timeout_one);
crm_xml_add(query, F_STONITH_DEVICE, device);
crm_xml_add(query, F_STONITH_MODE, "slave");
} else {
timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(peer, op->base_timeout);
crm_info("Requesting that %s perform op %s %s for %s (%ds)",
peer->host, op->action, op->target, op->client_name, timeout_one);
crm_xml_add(query, F_STONITH_MODE, "smart");
}
op->state = st_exec;
if (op->op_timer_one) {
g_source_remove(op->op_timer_one);
}
op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, query, FALSE);
peer->tried = TRUE;
free_xml(query);
return;
} else if (op->owner == FALSE) {
crm_err("The termination of %s for %s is not ours to control", op->target, op->client_name);
} else if (op->query_timer == 0) {
/* We've exhausted all available peers */
crm_info("No remaining peers capable of terminating %s for %s (%d)", op->target,
op->client_name, op->state);
CRM_LOG_ASSERT(op->state < st_done);
remote_op_timeout(op);
} else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
crm_info("None of the %d peers are capable of terminating %s for %s (%d)",
op->replies, op->target, op->client_name, op->state);
op->state = st_failed;
remote_op_done(op, NULL, -EHOSTUNREACH, FALSE);
} else if (device) {
crm_info("Waiting for additional peers capable of terminating %s with %s for %s.%.8s",
op->target, device, op->client_name, op->id);
} else {
crm_info("Waiting for additional peers capable of terminating %s for %s%.8s",
op->target, op->client_name, op->id);
}
}
static gint
sort_peers(gconstpointer a, gconstpointer b)
{
const st_query_result_t *peer_a = a;
const st_query_result_t *peer_b = a;
if (peer_a->devices > peer_b->devices) {
return -1;
} else if (peer_a->devices > peer_b->devices) {
return 1;
}
return 0;
}
/*!
* \internal
* \brief Determine if all the devices in the topology are found or not
*/
static gboolean
all_topology_devices_found(remote_fencing_op_t * op)
{
GListPtr device = NULL;
GListPtr iter = NULL;
GListPtr match = NULL;
stonith_topology_t *tp = NULL;
gboolean skip_target = FALSE;
int i;
tp = g_hash_table_lookup(topology, op->target);
if (!tp) {
return FALSE;
}
if (safe_str_eq(op->action, "off") || safe_str_eq(op->action, "reboot")) {
/* Don't count the devices on the target node if we are killing
* the target node. */
skip_target = TRUE;
}
for (i = 0; i < ST_LEVEL_MAX; i++) {
for (device = tp->levels[i]; device; device = device->next) {
match = FALSE;
for (iter = op->query_results; iter != NULL; iter = iter->next) {
st_query_result_t *peer = iter->data;
if (skip_target && safe_str_eq(peer->host, op->target)) {
continue;
}
match = g_list_find_custom(peer->device_list, device->data, sort_strings);
}
if (!match) {
return FALSE;
}
}
}
return TRUE;
}
int
process_remote_stonith_query(xmlNode * msg)
{
int devices = 0;
gboolean host_is_target = FALSE;
const char *id = NULL;
const char *host = NULL;
remote_fencing_op_t *op = NULL;
st_query_result_t *result = NULL;
uint32_t active = fencing_active_peers();
xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
xmlNode *child = NULL;
CRM_CHECK(dev != NULL, return -EPROTO);
id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
CRM_CHECK(id != NULL, return -EPROTO);
dev = get_xpath_object("//@st-available-devices", msg, LOG_ERR);
CRM_CHECK(dev != NULL, return -EPROTO);
crm_element_value_int(dev, "st-available-devices", &devices);
op = g_hash_table_lookup(remote_op_list, id);
if (op == NULL) {
crm_debug("Unknown or expired remote op: %s", id);
return -EOPNOTSUPP;
}
op->replies++;
host = crm_element_value(msg, F_ORIG);
host_is_target = safe_str_eq(host, op->target);
if (devices <= 0) {
/* If we're doing 'known' then we might need to fire anyway */
crm_trace("Query result from %s (%d devices)", host, devices);
if(op->state == st_query && (op->replies >= op->replies_expected || op->replies >= active)) {
crm_info("All queries have arrived, continuing (%d, %d, %d) ", op->replies_expected, active, op->replies);
call_remote_stonith(op, NULL);
}
return pcmk_ok;
} else if (host_is_target) {
if (op->call_options & st_opt_allow_suicide) {
crm_trace("Allowing %s to potentialy fence itself", op->target);
} else {
crm_info("Ignoring reply from %s, hosts are not permitted to commit suicide",
op->target);
return pcmk_ok;
}
}
crm_info("Query result %d of %d from %s (%d devices)", op->replies, op->replies_expected, host, devices);
result = calloc(1, sizeof(st_query_result_t));
result->host = strdup(host);
result->devices = devices;
result->custom_action_timeouts = g_hash_table_new_full(crm_str_hash, g_str_equal, free, NULL);
result->verified_devices = g_hash_table_new_full(crm_str_hash, g_str_equal, free, NULL);
for (child = __xml_first_child(dev); child != NULL; child = __xml_next(child)) {
const char *device = ID(child);
int action_timeout = 0;
int verified = 0;
if (device) {
result->device_list = g_list_prepend(result->device_list, strdup(device));
crm_element_value_int(child, F_STONITH_ACTION_TIMEOUT, &action_timeout);
crm_element_value_int(child, F_STONITH_DEVICE_VERIFIED, &verified);
if (action_timeout) {
crm_trace("Peer %s with device %s returned action timeout %d",
result->host, device, action_timeout);
g_hash_table_insert(result->custom_action_timeouts,
strdup(device), GINT_TO_POINTER(action_timeout));
}
if (verified) {
crm_trace("Peer %s has confirmed a verified device %s", result->host, device);
g_hash_table_insert(result->verified_devices,
strdup(device), GINT_TO_POINTER(verified));
}
}
}
CRM_CHECK(devices == g_list_length(result->device_list),
crm_err("Mis-match: Query claimed to have %d devices but %d found", devices,
g_list_length(result->device_list)));
op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers);
if (is_set(op->call_options, st_opt_topology)) {
/* If we start the fencing before all the topology results are in,
* it is possible fencing levels will be skipped because of the missing
* query results. */
if (op->state == st_query && all_topology_devices_found(op)) {
/* All the query results are in for the topology, start the fencing ops. */
crm_trace("All topology devices found");
call_remote_stonith(op, result);
} else if(op->state == st_query && (op->replies >= op->replies_expected || op->replies >= active)) {
crm_info("All topology queries have arrived, continuing (%d, %d, %d) ", op->replies_expected, active, op->replies);
call_remote_stonith(op, NULL);
}
} else if (op->state == st_query) {
/* We have a result for a non-topology fencing op that looks promising,
* go ahead and start fencing before query timeout */
if (host_is_target == FALSE && g_hash_table_size(result->verified_devices)) {
/* we have a verified device living on a peer that is not the target */
crm_trace("Found %d verified devices", g_hash_table_size(result->verified_devices));
call_remote_stonith(op, result);
} else if (safe_str_eq(op->action, "on")) {
crm_trace("Unfencing %s", op->target);
call_remote_stonith(op, result);
} else if(op->replies >= op->replies_expected || op->replies >= active) {
crm_info("All queries have arrived, continuing (%d, %d, %d) ", op->replies_expected, active, op->replies);
call_remote_stonith(op, NULL);
} else {
crm_trace("Waiting for more peer results before launching fencing operation");
}
} else if (op->state == st_done) {
crm_info("Discarding query result from %s (%d devices): Operation is in state %d",
result->host, result->devices, op->state);
}
return pcmk_ok;
}
int
process_remote_stonith_exec(xmlNode * msg)
{
int rc = 0;
const char *id = NULL;
const char *device = NULL;
remote_fencing_op_t *op = NULL;
xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
CRM_CHECK(dev != NULL, return -EPROTO);
id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
CRM_CHECK(id != NULL, return -EPROTO);
dev = get_xpath_object("//@" F_STONITH_RC, msg, LOG_ERR);
CRM_CHECK(dev != NULL, return -EPROTO);
crm_element_value_int(dev, F_STONITH_RC, &rc);
device = crm_element_value(dev, F_STONITH_DEVICE);
if (remote_op_list) {
op = g_hash_table_lookup(remote_op_list, id);
}
if (op == NULL && rc == pcmk_ok) {
/* Record successful fencing operations */
const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID);
op = create_remote_stonith_op(client_id, dev, TRUE);
}
if (op == NULL) {
/* Could be for an event that began before we started */
/* TODO: Record the op for later querying */
crm_info("Unknown or expired remote op: %s", id);
return -EOPNOTSUPP;
}
if (op->devices && device && safe_str_neq(op->devices->data, device)) {
crm_err
("Received outdated reply for device %s (instead of %s) to %s node %s. Operation already timed out at remote level.",
device, op->devices->data, op->action, op->target);
return rc;
}
if (safe_str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast")) {
crm_debug("Marking call to %s for %s on behalf of %s@%s.%.8s: %s (%d)",
op->action, op->target, op->client_name, op->id, op->originator,
pcmk_strerror(rc), rc);
if (rc == pcmk_ok) {
op->state = st_done;
} else {
op->state = st_failed;
}
remote_op_done(op, msg, rc, FALSE);
return pcmk_ok;
} else if (safe_str_neq(op->originator, stonith_our_uname)) {
/* If this isn't a remote level broadcast, and we are not the
* originator of the operation, we should not be receiving this msg. */
crm_err
("%s received non-broadcast fencing result for operation it does not own (device %s targeting %s)",
stonith_our_uname, device, op->target);
return rc;
}
if (is_set(op->call_options, st_opt_topology)) {
const char *device = crm_element_value(msg, F_STONITH_DEVICE);
crm_notice("Call to %s for %s on behalf of %s@%s: %s (%d)",
device, op->target, op->client_name, op->originator,
pcmk_strerror(rc), rc);
/* We own the op, and it is complete. broadcast the result to all nodes
* and notify our local clients. */
if (op->state == st_done) {
remote_op_done(op, msg, rc, FALSE);
return rc;
}
/* An operation completed succesfully but has not yet been marked as done.
* Continue the topology if more devices exist at the current level, otherwise
* mark as done. */
if (rc == pcmk_ok) {
if (op->devices) {
/* Success, are there any more? */
op->devices = op->devices->next;
}
/* if no more devices at this fencing level, we are done,
* else we need to contine with executing the next device in the list */
if (op->devices == NULL) {
crm_trace("Marking complex fencing op for %s as complete", op->target);
op->state = st_done;
remote_op_done(op, msg, rc, FALSE);
return rc;
}
} else {
/* This device failed, time to try another topology level. If no other
* levels are available, mark this operation as failed and report results. */
if (stonith_topology_next(op) != pcmk_ok) {
op->state = st_failed;
remote_op_done(op, msg, rc, FALSE);
return rc;
}
}
} else if (rc == pcmk_ok && op->devices == NULL) {
crm_trace("All done for %s", op->target);
op->state = st_done;
remote_op_done(op, msg, rc, FALSE);
return rc;
}
/* Retry on failure or execute the rest of the topology */
crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator,
op->client_name, rc);
call_remote_stonith(op, NULL);
return rc;
}
int
stonith_fence_history(xmlNode * msg, xmlNode ** output)
{
int rc = 0;
const char *target = NULL;
xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_TRACE);
if (dev) {
int options = 0;
target = crm_element_value(dev, F_STONITH_TARGET);
crm_element_value_int(msg, F_STONITH_CALLOPTS, &options);
if (target && (options & st_opt_cs_nodeid)) {
int nodeid = crm_atoi(target, NULL);
crm_node_t *node = crm_get_peer(nodeid, NULL);
if (node) {
target = node->uname;
}
}
}
crm_trace("Looking for operations on %s in %p", target, remote_op_list);
*output = create_xml_node(NULL, F_STONITH_HISTORY_LIST);
if (remote_op_list) {
GHashTableIter iter;
remote_fencing_op_t *op = NULL;
g_hash_table_iter_init(&iter, remote_op_list);
while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
xmlNode *entry = NULL;
if (target && strcmp(op->target, target) != 0) {
continue;
}
rc = 0;
crm_trace("Attaching op %s", op->id);
entry = create_xml_node(*output, STONITH_OP_EXEC);
crm_xml_add(entry, F_STONITH_TARGET, op->target);
crm_xml_add(entry, F_STONITH_ACTION, op->action);
crm_xml_add(entry, F_STONITH_ORIGIN, op->originator);
crm_xml_add(entry, F_STONITH_DELEGATE, op->delegate);
+ crm_xml_add(entry, F_STONITH_CLIENTNAME, op->client_name);
crm_xml_add_int(entry, F_STONITH_DATE, op->completed);
crm_xml_add_int(entry, F_STONITH_STATE, op->state);
}
}
return rc;
}
gboolean
stonith_check_fence_tolerance(int tolerance, const char *target, const char *action)
{
GHashTableIter iter;
time_t now = time(NULL);
remote_fencing_op_t *rop = NULL;
crm_trace("tolerance=%d, remote_op_list=%p", tolerance, remote_op_list);
if (tolerance <= 0 || !remote_op_list || target == NULL || action == NULL) {
return FALSE;
}
g_hash_table_iter_init(&iter, remote_op_list);
while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) {
if (strcmp(rop->target, target) != 0) {
continue;
} else if (rop->state != st_done) {
continue;
} else if (strcmp(rop->action, action) != 0) {
continue;
} else if ((rop->completed + tolerance) < now) {
continue;
}
crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s",
target, action, tolerance, rop->delegate, rop->originator);
return TRUE;
}
return FALSE;
}
diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h
index 4ffb9b8335..35f6e5ad81 100644
--- a/include/crm/stonith-ng.h
+++ b/include/crm/stonith-ng.h
@@ -1,433 +1,434 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* \file
* \brief Fencing aka. STONITH
* \ingroup fencing
*/
#ifndef STONITH_NG__H
# define STONITH_NG__H
# include <dlfcn.h>
# include <stdbool.h>
/* TO-DO: Work out how to drop this requirement */
# include <libxml/tree.h>
# define T_STONITH_NOTIFY_DISCONNECT "st_notify_disconnect"
# define T_STONITH_NOTIFY_FENCE "st_notify_fence"
/* *INDENT-OFF* */
enum stonith_state {
stonith_connected_command,
stonith_connected_query,
stonith_disconnected,
};
enum stonith_call_options {
st_opt_none = 0x00000000,
st_opt_verbose = 0x00000001,
st_opt_allow_suicide = 0x00000002,
st_opt_manual_ack = 0x00000008,
st_opt_discard_reply = 0x00000010,
/* st_opt_all_replies = 0x00000020, */
st_opt_topology = 0x00000040,
st_opt_scope_local = 0x00000100,
st_opt_cs_nodeid = 0x00000200,
st_opt_sync_call = 0x00001000,
/*! Allow the timeout period for a callback to be adjusted
* based on the time the server reports the operation will take. */
st_opt_timeout_updates = 0x00002000,
/*! Only report back if operation is a success in callback */
st_opt_report_only_success = 0x00004000,
};
#define stonith_default_options = stonith_none
/*! Order matters here, do not change values */
enum op_state
{
st_query,
st_exec,
st_done,
st_duplicate,
st_failed,
};
typedef struct stonith_key_value_s {
char *key;
char *value;
struct stonith_key_value_s *next;
} stonith_key_value_t;
typedef struct stonith_history_s {
char *target;
char *action;
char *origin;
char *delegate;
int completed;
int state;
struct stonith_history_s *next;
+ char *client;
} stonith_history_t;
typedef struct stonith_s stonith_t;
typedef struct stonith_event_s
{
char *id;
char *type;
char *message;
char *operation;
int result;
char *origin;
char *target;
char *action;
char *executioner;
char *device;
/*! The name of the client that initiated the action. */
char *client_origin;
} stonith_event_t;
typedef struct stonith_callback_data_s
{
int rc;
int call_id;
void *userdata;
} stonith_callback_data_t;
typedef struct stonith_api_operations_s
{
/*!
* \brief Destroy the stonith api structure.
*/
int (*free) (stonith_t *st);
/*!
* \brief Connect to the local stonith daemon.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*connect) (stonith_t *st, const char *name, int *stonith_fd);
/*!
* \brief Disconnect from the local stonith daemon.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*disconnect)(stonith_t *st);
/*!
* \brief Remove a registered stonith device with the local stonith daemon.
*
* \note Synchronous, guaranteed to occur in daemon before function returns.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*remove_device)(
stonith_t *st, int options, const char *name);
/*!
* \brief Register a stonith device with the local stonith daemon.
*
* \note Synchronous, guaranteed to occur in daemon before function returns.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*register_device)(
stonith_t *st, int options, const char *id,
const char *namespace, const char *agent, stonith_key_value_t *params);
/*!
* \brief Remove a fencing level for a specific node.
*
* \note This feature is not available when stonith is in standalone mode.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*remove_level)(
stonith_t *st, int options, const char *node, int level);
/*!
* \brief Register a fencing level containing the fencing devices to be used
* at that level for a specific node.
*
* \note This feature is not available when stonith is in standalone mode.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*register_level)(
stonith_t *st, int options, const char *node, int level, stonith_key_value_t *device_list);
/*!
* \brief Get the metadata documentation for a resource.
*
* \note Value is returned in output. Output must be freed when set.
*
* \retval 0 success
* \retval negative error code on failure
*/
int (*metadata)(stonith_t *st, int options,
const char *device, const char *namespace, char **output, int timeout);
/*!
* \brief Retrieve a list of installed stonith agents
*
* \note if namespace is not provided, all known agents will be returned
* \note list must be freed using stonith_key_value_freeall()
* \note call_options parameter is not used, it is reserved for future use.
*
* \retval num items in list on success
* \retval negative error code on failure
*/
int (*list_agents)(stonith_t *stonith, int call_options, const char *namespace,
stonith_key_value_t **devices, int timeout);
/*!
* \brief Retrieve string listing hosts and port assignments from a local stonith device.
*
* \retval 0 on success
* \retval negative error code on failure
*/
int (*list)(stonith_t *st, int options, const char *id, char **list_output, int timeout);
/*!
* \brief Check to see if a local stonith device is reachable
*
* \retval 0 on success
* \retval negative error code on failure
*/
int (*monitor)(stonith_t *st, int options, const char *id, int timeout);
/*!
* \brief Check to see if a local stonith device's port is reachable
*
* \retval 0 on success
* \retval negative error code on failure
*/
int (*status)(stonith_t *st, int options, const char *id, const char *port, int timeout);
/*!
* \brief Retrieve a list of registered stonith devices.
*
* \note If node is provided, only devices that can fence the node id
* will be returned.
*
* \retval num items in list on success
* \retval negative error code on failure
*/
int (*query)(stonith_t *st, int options, const char *node,
stonith_key_value_t **devices, int timeout);
/*!
* \brief Issue a fencing action against a node.
*
* \note Possible actions are, 'on', 'off', and 'reboot'.
*
* \param st, stonith connection
* \param options, call options
* \param node, The target node to fence
* \param action, The fencing action to take
* \param timeout, The default per device timeout to use with each device
* capable of fencing the target.
*
* \retval 0 success
* \retval negative error code on failure.
*/
int (*fence)(stonith_t *st, int options, const char *node, const char *action,
int timeout, int tolerance);
/*!
* \brief Manually confirm that a node is down.
*
* \retval 0 success
* \retval negative error code on failure.
*/
int (*confirm)(stonith_t *st, int options, const char *node);
/*!
* \brief Retrieve a list of fencing operations that have occurred for a specific node.
*
* \note History is not available in standalone mode.
*
* \retval 0 success
* \retval negative error code on failure.
*/
int (*history)(stonith_t *st, int options, const char *node, stonith_history_t **output, int timeout);
int (*register_notification)(
stonith_t *st, const char *event,
void (*notify)(stonith_t *st, stonith_event_t *e));
int (*remove_notification)(stonith_t *st, const char *event);
/*!
* \brief Register a callback to receive the result of an async call id
*
* \param call_id, The call id to register the callback for.
* \param timeout, The default timeout period to wait until this callback expires
* \param options, Option flags, st_opt_timeout_updates and st_opt_report_only_success are the
* only valid options for this function.
* \param userdate, A pointer that will be handed back in the callback.
* \param callback_name, Unique name given to callback
* \param callback, The callback function
*
* \retval 0 success
* \retval negative error code on failure.
*/
int (*register_callback)(stonith_t *st,
int call_id,
int timeout,
int options,
void *userdata,
const char *callback_name,
void (*callback)(stonith_t *st, stonith_callback_data_t *data));
/*!
* \brief Remove a registered callback for a given call id.
*/
int (*remove_callback)(stonith_t *st, int call_id, bool all_callbacks);
} stonith_api_operations_t;
struct stonith_s
{
enum stonith_state state;
int call_id;
int call_timeout;
void *private;
stonith_api_operations_t *cmds;
};
/* *INDENT-ON* */
/* Core functions */
stonith_t *stonith_api_new(void);
void stonith_api_delete(stonith_t * st);
void stonith_dump_pending_callbacks(stonith_t * st);
const char *get_stonith_provider(const char *agent, const char *provider);
bool stonith_dispatch(stonith_t * st);
stonith_key_value_t *stonith_key_value_add(stonith_key_value_t * kvp, const char *key,
const char *value);
void stonith_key_value_freeall(stonith_key_value_t * kvp, int keys, int values);
/* Basic helpers that allows nodes to be fenced and the history to be
* queried without mainloop or the caller understanding the full API
*
* At least one of nodeid and uname are required
*/
int stonith_api_kick(int nodeid, const char *uname, int timeout, bool off);
time_t stonith_api_time(int nodeid, const char *uname, bool in_progress);
/*
* Helpers for using the above functions without install-time dependancies
*
* Usage:
* #include <crm/stonith-ng.h>
*
* To turn a node off by corosync nodeid:
* stonith_api_kick_helper(nodeid, 120, 1);
*
* To check the last fence date/time (also by nodeid):
* last = stonith_api_time_helper(nodeid, 0);
*
* To check if fencing is in progress:
* if(stonith_api_time_helper(nodeid, 1) > 0) { ... }
*
* eg.
#include <stdio.h>
#include <time.h>
#include <crm/stonith-ng.h>
int
main(int argc, char ** argv)
{
int rc = 0;
int nodeid = 102;
rc = stonith_api_time_helper(nodeid, 0);
printf("%d last fenced at %s\n", nodeid, ctime(rc));
rc = stonith_api_kick_helper(nodeid, 120, 1);
printf("%d fence result: %d\n", nodeid, rc);
rc = stonith_api_time_helper(nodeid, 0);
printf("%d last fenced at %s\n", nodeid, ctime(rc));
return 0;
}
*/
# define STONITH_LIBRARY "libstonithd.so.2"
static inline int
stonith_api_kick_helper(int nodeid, int timeout, bool off)
{
static void *st_library = NULL;
static int (*st_kick_fn) (int nodeid, const char *uname, int timeout, bool off) = NULL;
if (st_library == NULL) {
st_library = dlopen(STONITH_LIBRARY, RTLD_LAZY);
}
if (st_library && st_kick_fn == NULL) {
st_kick_fn = dlsym(st_library, "stonith_api_kick");
}
if (st_kick_fn == NULL) {
return -ELIBACC;
}
return (*st_kick_fn) (nodeid, NULL, timeout, off);
}
static inline time_t
stonith_api_time_helper(int nodeid, bool in_progress)
{
static void *st_library = NULL;
static time_t(*st_time_fn) (int nodeid, const char *uname, bool in_progress) = NULL;
if (st_library == NULL) {
st_library = dlopen(STONITH_LIBRARY, RTLD_LAZY);
}
if (st_library && st_time_fn == NULL) {
st_time_fn = dlsym(st_library, "stonith_api_time");
}
if (st_time_fn == NULL) {
return 0;
}
return (*st_time_fn) (nodeid, NULL, in_progress);
}
#endif
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index f392d2efc0..8879cfdaf4 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -1,2450 +1,2451 @@
/*
* Copyright (c) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <crm_internal.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <glib.h>
#include <dirent.h>
#include <libgen.h> /* Add it for compiling on OSX */
#include <crm/crm.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#ifdef HAVE_STONITH_STONITH_H
# include <stonith/stonith.h>
# define LHA_STONITH_LIBRARY "libstonith.so.1"
static void *lha_agents_lib = NULL;
#endif
#include <crm/common/mainloop.h>
CRM_TRACE_INIT_DATA(stonith);
struct stonith_action_s {
/*! user defined data */
char *agent;
char *action;
char *victim;
char *args;
int timeout;
int async;
void *userdata;
void (*done_cb) (GPid pid, gint status, const char *output, gpointer user_data);
/*! internal async track data */
int fd_stdout;
int last_timeout_signo;
/*! internal timing information */
time_t initial_start_time;
int tries;
int remaining_timeout;
guint timer_sigterm;
guint timer_sigkill;
int max_retries;
/* device output data */
GPid pid;
int rc;
char *output;
};
typedef struct stonith_private_s {
char *token;
crm_ipc_t *ipc;
mainloop_io_t *source;
GHashTable *stonith_op_callback_table;
GList *notify_list;
void (*op_callback) (stonith_t * st, stonith_callback_data_t * data);
} stonith_private_t;
typedef struct stonith_notify_client_s {
const char *event;
const char *obj_id; /* implement one day */
const char *obj_type; /* implement one day */
void (*notify) (stonith_t * st, stonith_event_t * e);
} stonith_notify_client_t;
typedef struct stonith_callback_client_s {
void (*callback) (stonith_t * st, stonith_callback_data_t * data);
const char *id;
void *user_data;
gboolean only_success;
gboolean allow_timeout_updates;
struct timer_rec_s *timer;
} stonith_callback_client_t;
struct notify_blob_s {
stonith_t *stonith;
xmlNode *xml;
};
struct timer_rec_s {
int call_id;
int timeout;
guint ref;
stonith_t *stonith;
};
typedef int (*stonith_op_t) (const char *, int, const char *, xmlNode *,
xmlNode *, xmlNode *, xmlNode **, xmlNode **);
static const char META_TEMPLATE[] =
"<?xml version=\"1.0\"?>\n"
"<!DOCTYPE resource-agent SYSTEM \"ra-api-1.dtd\">\n"
"<resource-agent name=\"%s\">\n"
" <version>1.0</version>\n"
" <longdesc lang=\"en\">\n"
"%s\n"
" </longdesc>\n"
" <shortdesc lang=\"en\">%s</shortdesc>\n"
"%s\n"
" <actions>\n"
" <action name=\"start\" timeout=\"20\" />\n"
" <action name=\"stop\" timeout=\"15\" />\n"
" <action name=\"status\" timeout=\"20\" />\n"
" <action name=\"monitor\" timeout=\"20\" interval=\"3600\"/>\n"
" <action name=\"meta-data\" timeout=\"15\" />\n"
" </actions>\n"
" <special tag=\"heartbeat\">\n"
" <version>2.0</version>\n" " </special>\n" "</resource-agent>\n";
bool stonith_dispatch(stonith_t * st);
int stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata);
void stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc);
xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
int call_options);
int stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data,
xmlNode ** output_data, int call_options, int timeout);
static void stonith_connection_destroy(gpointer user_data);
static void stonith_send_notification(gpointer data, gpointer user_data);
static int internal_stonith_action_execute(stonith_action_t * action);
static void
stonith_connection_destroy(gpointer user_data)
{
stonith_t *stonith = user_data;
stonith_private_t *native = NULL;
struct notify_blob_s blob;
crm_trace("Sending destroyed notification");
blob.stonith = stonith;
blob.xml = create_xml_node(NULL, "notify");
native = stonith->private;
native->ipc = NULL;
native->source = NULL;
stonith->state = stonith_disconnected;
crm_xml_add(blob.xml, F_TYPE, T_STONITH_NOTIFY);
crm_xml_add(blob.xml, F_SUBTYPE, T_STONITH_NOTIFY_DISCONNECT);
g_list_foreach(native->notify_list, stonith_send_notification, &blob);
free_xml(blob.xml);
}
xmlNode *
create_device_registration_xml(const char *id, const char *namespace, const char *agent,
stonith_key_value_t * params)
{
xmlNode *data = create_xml_node(NULL, F_STONITH_DEVICE);
xmlNode *args = create_xml_node(data, XML_TAG_ATTRS);
#if HAVE_STONITH_STONITH_H
namespace = get_stonith_provider(agent, namespace);
if (safe_str_eq(namespace, "heartbeat")) {
hash2field((gpointer) "plugin", (gpointer) agent, args);
agent = "fence_legacy";
}
#endif
crm_xml_add(data, XML_ATTR_ID, id);
crm_xml_add(data, "origin", __FUNCTION__);
crm_xml_add(data, "agent", agent);
crm_xml_add(data, "namespace", namespace);
for (; params; params = params->next) {
hash2field((gpointer) params->key, (gpointer) params->value, args);
}
return data;
}
static int
stonith_api_register_device(stonith_t * st, int call_options,
const char *id, const char *namespace, const char *agent,
stonith_key_value_t * params)
{
int rc = 0;
xmlNode *data = NULL;
data = create_device_registration_xml(id, namespace, agent, params);
rc = stonith_send_command(st, STONITH_OP_DEVICE_ADD, data, NULL, call_options, 0);
free_xml(data);
return rc;
}
static int
stonith_api_remove_device(stonith_t * st, int call_options, const char *name)
{
int rc = 0;
xmlNode *data = NULL;
data = create_xml_node(NULL, F_STONITH_DEVICE);
crm_xml_add(data, "origin", __FUNCTION__);
crm_xml_add(data, XML_ATTR_ID, name);
rc = stonith_send_command(st, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0);
free_xml(data);
return rc;
}
static int
stonith_api_remove_level(stonith_t * st, int options, const char *node, int level)
{
int rc = 0;
xmlNode *data = NULL;
data = create_xml_node(NULL, F_STONITH_LEVEL);
crm_xml_add(data, "origin", __FUNCTION__);
crm_xml_add(data, F_STONITH_TARGET, node);
crm_xml_add_int(data, XML_ATTR_ID, level);
rc = stonith_send_command(st, STONITH_OP_LEVEL_DEL, data, NULL, options, 0);
free_xml(data);
return rc;
}
xmlNode *
create_level_registration_xml(const char *node, int level, stonith_key_value_t * device_list)
{
xmlNode *data = create_xml_node(NULL, F_STONITH_LEVEL);
crm_xml_add_int(data, XML_ATTR_ID, level);
crm_xml_add(data, F_STONITH_TARGET, node);
crm_xml_add(data, "origin", __FUNCTION__);
for (; device_list; device_list = device_list->next) {
xmlNode *dev = create_xml_node(data, F_STONITH_DEVICE);
crm_xml_add(dev, XML_ATTR_ID, device_list->value);
}
return data;
}
static int
stonith_api_register_level(stonith_t * st, int options, const char *node, int level,
stonith_key_value_t * device_list)
{
int rc = 0;
xmlNode *data = create_level_registration_xml(node, level, device_list);
rc = stonith_send_command(st, STONITH_OP_LEVEL_ADD, data, NULL, options, 0);
free_xml(data);
return rc;
}
static void
append_arg(gpointer key, gpointer value, gpointer user_data)
{
int len = 3; /* =, \n, \0 */
int last = 0;
char **args = user_data;
CRM_CHECK(key != NULL, return);
CRM_CHECK(value != NULL, return);
if (strstr(key, "pcmk_")) {
return;
} else if (strstr(key, CRM_META)) {
return;
} else if (safe_str_eq(key, "crm_feature_set")) {
return;
}
len += strlen(key);
len += strlen(value);
if (*args != NULL) {
last = strlen(*args);
}
*args = realloc(*args, last + len);
crm_trace("Appending: %s=%s", (char *)key, (char *)value);
sprintf((*args) + last, "%s=%s\n", (char *)key, (char *)value);
}
static void
append_const_arg(const char *key, const char *value, char **arg_list)
{
char *glib_sucks_key = strdup(key);
char *glib_sucks_value = strdup(value);
append_arg(glib_sucks_key, glib_sucks_value, arg_list);
free(glib_sucks_value);
free(glib_sucks_key);
}
static void
append_host_specific_args(const char *victim, const char *map, GHashTable * params, char **arg_list)
{
char *name = NULL;
int last = 0, lpc = 0, max = 0;
if (map == NULL) {
/* The best default there is for now... */
crm_debug("Using default arg map: port=uname");
append_const_arg("port", victim, arg_list);
return;
}
max = strlen(map);
crm_debug("Processing arg map: %s", map);
for (; lpc < max + 1; lpc++) {
if (isalpha(map[lpc])) {
/* keep going */
} else if (map[lpc] == '=' || map[lpc] == ':') {
free(name);
name = calloc(1, 1 + lpc - last);
memcpy(name, map + last, lpc - last);
crm_debug("Got name: %s", name);
last = lpc + 1;
} else if (map[lpc] == 0 || map[lpc] == ',' || isspace(map[lpc])) {
char *param = NULL;
const char *value = NULL;
param = calloc(1, 1 + lpc - last);
memcpy(param, map + last, lpc - last);
last = lpc + 1;
crm_debug("Got key: %s", param);
if (name == NULL) {
crm_err("Misparsed '%s', found '%s' without a name", map, param);
free(param);
continue;
}
if (safe_str_eq(param, "uname")) {
value = victim;
} else {
char *key = crm_meta_name(param);
value = g_hash_table_lookup(params, key);
free(key);
}
if (value) {
crm_debug("Setting '%s'='%s' (%s) for %s", name, value, param, victim);
append_const_arg(name, value, arg_list);
} else {
crm_err("No node attribute '%s' for '%s'", name, victim);
}
free(name);
name = NULL;
free(param);
if (map[lpc] == 0) {
break;
}
} else if (isspace(map[lpc])) {
last = lpc;
}
}
free(name);
}
static char *
make_args(const char *action, const char *victim, uint32_t victim_nodeid, GHashTable * device_args,
GHashTable * port_map)
{
char buffer[512];
char *arg_list = NULL;
const char *value = NULL;
CRM_CHECK(action != NULL, return NULL);
if (device_args) {
g_hash_table_foreach(device_args, append_arg, &arg_list);
}
buffer[511] = 0;
snprintf(buffer, 511, "pcmk_%s_action", action);
if (device_args) {
value = g_hash_table_lookup(device_args, buffer);
}
if (value == NULL && device_args) {
/* Legacy support for early 1.1 releases - Remove for 1.4 */
snprintf(buffer, 511, "pcmk_%s_cmd", action);
value = g_hash_table_lookup(device_args, buffer);
}
if (value == NULL && device_args && safe_str_eq(action, "off")) {
/* Legacy support for late 1.1 releases - Remove for 1.4 */
value = g_hash_table_lookup(device_args, "pcmk_poweroff_action");
}
if (value) {
crm_info("Substituting action '%s' for requested operation '%s'", value, action);
action = value;
}
append_const_arg(STONITH_ATTR_ACTION_OP, action, &arg_list);
if (victim && device_args) {
const char *alias = victim;
const char *param = g_hash_table_lookup(device_args, STONITH_ATTR_HOSTARG);
if (port_map && g_hash_table_lookup(port_map, victim)) {
alias = g_hash_table_lookup(port_map, victim);
}
/* Always supply the node's name too:
* https://fedorahosted.org/cluster/wiki/FenceAgentAPI
*/
append_const_arg("nodename", victim, &arg_list);
if (victim_nodeid) {
char nodeid_str[33] = { 0, };
if (snprintf(nodeid_str, 33, "%u", (unsigned int)victim_nodeid)) {
crm_info("For stonith action (%s) for victim %s, adding nodeid (%d) to parameters",
action, victim, nodeid_str);
append_const_arg("nodeid", nodeid_str, &arg_list);
}
}
/* Check if we need to supply the victim in any other form */
if (param == NULL) {
const char *map = g_hash_table_lookup(device_args, STONITH_ATTR_ARGMAP);
if (map == NULL) {
param = "port";
value = g_hash_table_lookup(device_args, param);
} else {
/* Legacy handling */
append_host_specific_args(alias, map, device_args, &arg_list);
value = map; /* Nothing more to do */
}
} else if (safe_str_eq(param, "none")) {
value = param; /* Nothing more to do */
} else {
value = g_hash_table_lookup(device_args, param);
}
/* Don't overwrite explictly set values for $param */
if (value == NULL || safe_str_eq(value, "dynamic")) {
crm_debug("Performing %s action for node '%s' as '%s=%s'", action, victim, param,
alias);
append_const_arg(param, alias, &arg_list);
}
}
return arg_list;
}
static gboolean
st_child_term(gpointer data)
{
int rc = 0;
stonith_action_t *track = data;
crm_info("Child %d timed out, sending SIGTERM", track->pid);
track->timer_sigterm = 0;
track->last_timeout_signo = SIGTERM;
rc = kill(track->pid, SIGTERM);
if (rc < 0) {
crm_perror(LOG_ERR, "Couldn't send SIGTERM to %d", track->pid);
}
return FALSE;
}
static gboolean
st_child_kill(gpointer data)
{
int rc = 0;
stonith_action_t *track = data;
crm_info("Child %d timed out, sending SIGKILL", track->pid);
track->timer_sigkill = 0;
track->last_timeout_signo = SIGKILL;
rc = kill(track->pid, SIGKILL);
if (rc < 0) {
crm_perror(LOG_ERR, "Couldn't send SIGKILL to %d", track->pid);
}
return FALSE;
}
static void
stonith_action_clear_tracking_data(stonith_action_t * action)
{
if (action->timer_sigterm > 0) {
g_source_remove(action->timer_sigterm);
action->timer_sigterm = 0;
}
if (action->timer_sigkill > 0) {
g_source_remove(action->timer_sigkill);
action->timer_sigkill = 0;
}
if (action->fd_stdout) {
close(action->fd_stdout);
action->fd_stdout = 0;
}
free(action->output);
action->output = NULL;
action->rc = 0;
action->pid = 0;
action->last_timeout_signo = 0;
}
static void
stonith_action_destroy(stonith_action_t * action)
{
stonith_action_clear_tracking_data(action);
free(action->agent);
free(action->args);
free(action->action);
free(action->victim);
free(action);
}
#define FAILURE_MAX_RETRIES 2
stonith_action_t *
stonith_action_create(const char *agent,
const char *_action,
const char *victim,
uint32_t victim_nodeid,
int timeout, GHashTable * device_args, GHashTable * port_map)
{
stonith_action_t *action;
action = calloc(1, sizeof(stonith_action_t));
crm_info("Initiating action %s for agent %s (target=%s)", _action, agent, victim);
action->args = make_args(_action, victim, victim_nodeid, device_args, port_map);
action->agent = strdup(agent);
action->action = strdup(_action);
if (victim) {
action->victim = strdup(victim);
}
action->timeout = action->remaining_timeout = timeout;
action->max_retries = FAILURE_MAX_RETRIES;
if (device_args) {
char buffer[512];
const char *value = NULL;
snprintf(buffer, 511, "pcmk_%s_retries", _action);
value = g_hash_table_lookup(device_args, buffer);
if (value) {
action->max_retries = atoi(value);
}
}
return action;
}
#define READ_MAX 500
static char *
read_output(int fd)
{
char buffer[READ_MAX];
char *output = NULL;
int len = 0;
int more = 0;
if (!fd) {
return NULL;
}
do {
errno = 0;
memset(&buffer, 0, READ_MAX);
more = read(fd, buffer, READ_MAX - 1);
if (more > 0) {
buffer[more] = 0; /* Make sure its nul-terminated for logging
* 'more' is always less than our buffer size
*/
crm_trace("Got %d more bytes: %.200s...", more, buffer);
output = realloc(output, len + more + 1);
snprintf(output + len, more + 1, "%s", buffer);
len += more;
}
} while (more == (READ_MAX - 1) || (more < 0 && errno == EINTR));
return output;
}
static gboolean
update_remaining_timeout(stonith_action_t * action)
{
int diff = time(NULL) - action->initial_start_time;
if (action->tries >= action->max_retries) {
crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed",
action->agent, action->action, action->max_retries);
action->remaining_timeout = 0;
} else if ((action->rc != -ETIME) && diff < (action->timeout * 0.7)) {
/* only set remaining timeout period if there is 30%
* or greater of the original timeout period left */
action->remaining_timeout = action->timeout - diff;
} else {
action->remaining_timeout = 0;
}
return action->remaining_timeout ? TRUE : FALSE;
}
static void
stonith_action_async_done(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
{
stonith_action_t *action = mainloop_child_userdata(p);
if (action->timer_sigterm > 0) {
g_source_remove(action->timer_sigterm);
}
if (action->timer_sigkill > 0) {
g_source_remove(action->timer_sigkill);
}
if (action->last_timeout_signo) {
action->rc = -ETIME;
crm_notice("Child process %d performing action '%s' timed out with signal %d",
pid, action->action, action->last_timeout_signo);
} else if (signo) {
action->rc = -ECONNABORTED;
crm_notice("Child process %d performing action '%s' timed out with signal %d",
pid, action->action, signo);
} else {
action->rc = exitcode;
crm_debug("Child process %d performing action '%s' exited with rc %d",
pid, action->action, exitcode);
}
action->output = read_output(action->fd_stdout);
if (action->rc != pcmk_ok && update_remaining_timeout(action)) {
int rc = internal_stonith_action_execute(action);
if (rc == pcmk_ok) {
return;
}
}
if (action->done_cb) {
action->done_cb(pid, action->rc, action->output, action->userdata);
}
stonith_action_destroy(action);
}
static int
internal_stonith_action_execute(stonith_action_t * action)
{
int pid, status, len, rc = -EPROTO;
int ret;
int total = 0;
int p_read_fd, p_write_fd; /* parent read/write file descriptors */
int c_read_fd, c_write_fd; /* child read/write file descriptors */
int fd1[2];
int fd2[2];
int is_retry = 0;
/* clear any previous tracking data */
stonith_action_clear_tracking_data(action);
if (!action->tries) {
action->initial_start_time = time(NULL);
}
action->tries++;
if (action->tries > 1) {
crm_info("Attempt %d to execute %s (%s). remaining timeout is %d",
action->tries, action->agent, action->action, action->remaining_timeout);
is_retry = 1;
}
c_read_fd = c_write_fd = p_read_fd = p_write_fd = -1;
if (action->args == NULL || action->agent == NULL)
goto fail;
len = strlen(action->args);
if (pipe(fd1))
goto fail;
p_read_fd = fd1[0];
c_write_fd = fd1[1];
if (pipe(fd2))
goto fail;
c_read_fd = fd2[0];
p_write_fd = fd2[1];
crm_debug("forking");
pid = fork();
if (pid < 0) {
rc = -ECHILD;
goto fail;
}
if (!pid) {
/* child */
close(1);
/* coverity[leaked_handle] False positive */
if (dup(c_write_fd) < 0)
goto fail;
close(2);
/* coverity[leaked_handle] False positive */
if (dup(c_write_fd) < 0)
goto fail;
close(0);
/* coverity[leaked_handle] False positive */
if (dup(c_read_fd) < 0)
goto fail;
/* keep c_write_fd open so parent can report all errors. */
close(c_read_fd);
close(p_read_fd);
close(p_write_fd);
/* keep retries from executing out of control */
if (is_retry) {
sleep(1);
}
execlp(action->agent, action->agent, NULL);
exit(EXIT_FAILURE);
}
/* parent */
action->pid = pid;
ret = fcntl(p_read_fd, F_SETFL, fcntl(p_read_fd, F_GETFL, 0) | O_NONBLOCK);
if (ret < 0) {
crm_perror(LOG_NOTICE, "Could not change the output of %s to be non-blocking",
action->agent);
}
do {
crm_debug("sending args");
ret = write(p_write_fd, action->args + total, len - total);
if (ret > 0) {
total += ret;
}
} while (errno == EINTR && total < len);
if (total != len) {
crm_perror(LOG_ERR, "Sent %d not %d bytes", total, len);
if (ret >= 0) {
rc = -ECOMM;
}
goto fail;
}
close(p_write_fd); p_write_fd = -1;
/* async */
if (action->async) {
action->fd_stdout = p_read_fd;
mainloop_child_add(pid, 0/* Move the timeout here? */, action->action, action, stonith_action_async_done);
crm_trace("Op: %s on %s, pid: %d, timeout: %ds", action->action, action->agent, pid,
action->remaining_timeout);
action->last_timeout_signo = 0;
if (action->remaining_timeout) {
action->timer_sigterm =
g_timeout_add(1000 * action->remaining_timeout, st_child_term, action);
action->timer_sigkill =
g_timeout_add(1000 * (action->remaining_timeout + 5), st_child_kill, action);
} else {
crm_err("No timeout set for stonith operation %s with device %s",
action->action, action->agent);
}
close(c_write_fd);
close(c_read_fd);
return 0;
} else {
/* sync */
int timeout = action->remaining_timeout + 1;
pid_t p = 0;
while (action->remaining_timeout < 0 || timeout > 0) {
p = waitpid(pid, &status, WNOHANG);
if (p > 0) {
break;
}
sleep(1);
timeout--;
}
if (timeout == 0) {
int killrc = kill(pid, SIGKILL);
if (killrc && errno != ESRCH) {
crm_err("kill(%d, KILL) failed: %s (%d)", pid, pcmk_strerror(errno), errno);
}
/*
* From sigprocmask(2):
* It is not possible to block SIGKILL or SIGSTOP. Attempts to do so are silently ignored.
*
* This makes it safe to skip WNOHANG here
*/
p = waitpid(pid, &status, 0);
}
if (p <= 0) {
crm_perror(LOG_ERR, "waitpid(%d)", pid);
} else if (p != pid) {
crm_err("Waited for %d, got %d", pid, p);
}
action->output = read_output(p_read_fd);
action->rc = -ECONNABORTED;
rc = action->rc;
if (timeout == 0) {
action->rc = -ETIME;
} else if (WIFEXITED(status)) {
crm_debug("result = %d", WEXITSTATUS(status));
action->rc = -WEXITSTATUS(status);
rc = 0;
} else if (WIFSIGNALED(status)) {
crm_err("call %s for %s exited due to signal %d", action->action, action->agent,
WTERMSIG(status));
} else {
crm_err("call %s for %s exited abnormally. stopped=%d, continued=%d",
action->action, action->agent, WIFSTOPPED(status), WIFCONTINUED(status));
}
}
fail:
if (p_read_fd >= 0) {
close(p_read_fd);
}
if (p_write_fd >= 0) {
close(p_write_fd);
}
if (c_read_fd >= 0) {
close(c_read_fd);
}
if (c_write_fd >= 0) {
close(c_write_fd);
}
return rc;
}
GPid
stonith_action_execute_async(stonith_action_t * action,
void *userdata,
void (*done) (GPid pid, int rc, const char *output,
gpointer user_data))
{
int rc = 0;
if (!action) {
return -1;
}
action->userdata = userdata;
action->done_cb = done;
action->async = 1;
rc = internal_stonith_action_execute(action);
return rc < 0 ? rc : action->pid;
}
int
stonith_action_execute(stonith_action_t * action, int *agent_result, char **output)
{
int rc = 0;
if (!action) {
return -1;
}
do {
rc = internal_stonith_action_execute(action);
if (rc == pcmk_ok) {
/* success! */
break;
}
/* keep retrying while we have time left */
} while (update_remaining_timeout(action));
if (rc) {
/* error */
return rc;
}
if (agent_result) {
*agent_result = action->rc;
}
if (output) {
*output = action->output;
action->output = NULL; /* handed it off, do not free */
}
stonith_action_destroy(action);
return rc;
}
static int
stonith_api_device_list(stonith_t * stonith, int call_options, const char *namespace,
stonith_key_value_t ** devices, int timeout)
{
int count = 0;
if (devices == NULL) {
crm_err("Parameter error: stonith_api_device_list");
return -EFAULT;
}
/* Include Heartbeat agents */
if (namespace == NULL || safe_str_eq("heartbeat", namespace)) {
#if HAVE_STONITH_STONITH_H
static gboolean need_init = TRUE;
char **entry = NULL;
char **type_list = NULL;
static char **(*type_list_fn) (void) = NULL;
static void (*type_free_fn) (char **) = NULL;
if (need_init) {
need_init = FALSE;
type_list_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_types", FALSE);
type_free_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_free_hostlist",
FALSE);
}
if (type_list_fn) {
type_list = (*type_list_fn) ();
}
for (entry = type_list; entry != NULL && *entry; ++entry) {
crm_trace("Added: %s", *entry);
*devices = stonith_key_value_add(*devices, NULL, *entry);
count++;
}
if (type_list && type_free_fn) {
(*type_free_fn) (type_list);
}
#else
if (namespace != NULL) {
return -EINVAL; /* Heartbeat agents not supported */
}
#endif
}
/* Include Red Hat agents, basically: ls -1 @sbin_dir@/fence_* */
if (namespace == NULL || safe_str_eq("redhat", namespace)) {
struct dirent **namelist;
int file_num = scandir(RH_STONITH_DIR, &namelist, 0, alphasort);
if (file_num > 0) {
struct stat prop;
char buffer[FILENAME_MAX + 1];
while (file_num--) {
if ('.' == namelist[file_num]->d_name[0]) {
free(namelist[file_num]);
continue;
} else if (0 != strncmp(RH_STONITH_PREFIX,
namelist[file_num]->d_name, strlen(RH_STONITH_PREFIX))) {
free(namelist[file_num]);
continue;
}
snprintf(buffer, FILENAME_MAX, "%s/%s", RH_STONITH_DIR, namelist[file_num]->d_name);
if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) {
*devices = stonith_key_value_add(*devices, NULL, namelist[file_num]->d_name);
count++;
}
free(namelist[file_num]);
}
free(namelist);
}
}
return count;
}
#if HAVE_STONITH_STONITH_H
static inline char *
strdup_null(const char *val)
{
if (val) {
return strdup(val);
}
return NULL;
}
static void
stonith_plugin(int priority, const char *fmt, ...)
G_GNUC_PRINTF(2, 3);
static void
stonith_plugin(int priority, const char *fmt, ...)
{
va_list args;
char *str;
int err = errno;
va_start(args, fmt);
str = g_strdup_vprintf(fmt, args);
va_end(args);
do_crm_log_alias(priority, __FILE__, __func__, __LINE__, "%s", str);
g_free(str);
errno = err;
}
#endif
static int
stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent,
const char *namespace, char **output, int timeout)
{
int rc = 0;
char *buffer = NULL;
const char *provider = get_stonith_provider(agent, namespace);
crm_trace("looking up %s/%s metadata", agent, provider);
/* By having this in a library, we can access it from stonith_admin
* when neither lrmd or stonith-ng are running
* Important for the crm shell's validations...
*/
if (safe_str_eq(provider, "redhat")) {
stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0, 5, NULL, NULL);
int exec_rc = stonith_action_execute(action, &rc, &buffer);
if (exec_rc < 0 || rc != 0 || buffer == NULL) {
crm_debug("Query failed: %d %d: %s", exec_rc, rc, crm_str(buffer));
free(buffer); /* Just in case */
return -EINVAL;
} else {
xmlNode *xml = string2xml(buffer);
xmlNode *actions = NULL;
xmlXPathObject *xpathObj = NULL;
xpathObj = xpath_search(xml, "//actions");
if (numXpathResults(xpathObj) > 0) {
actions = getXpathResult(xpathObj, 0);
}
freeXpathObject(xpathObj);
/* Now fudge the metadata so that the start/stop actions appear */
xpathObj = xpath_search(xml, "//action[@name='stop']");
if (numXpathResults(xpathObj) <= 0) {
xmlNode *tmp = NULL;
tmp = create_xml_node(actions, "action");
crm_xml_add(tmp, "name", "stop");
crm_xml_add(tmp, "timeout", "20s");
tmp = create_xml_node(actions, "action");
crm_xml_add(tmp, "name", "start");
crm_xml_add(tmp, "timeout", "20s");
}
freeXpathObject(xpathObj);
/* Now fudge the metadata so that the port isn't required in the configuration */
xpathObj = xpath_search(xml, "//parameter[@name='port']");
if (numXpathResults(xpathObj) > 0) {
/* We'll fill this in */
xmlNode *tmp = getXpathResult(xpathObj, 0);
crm_xml_add(tmp, "required", "0");
}
freeXpathObject(xpathObj);
free(buffer);
buffer = dump_xml_formatted(xml);
free_xml(xml);
if (!buffer) {
return -EINVAL;
}
}
} else {
#if !HAVE_STONITH_STONITH_H
return -EINVAL; /* Heartbeat agents not supported */
#else
int bufferlen = 0;
static const char *no_parameter_info = "<!-- no value -->";
Stonith *stonith_obj = NULL;
static gboolean need_init = TRUE;
static Stonith *(*st_new_fn) (const char *) = NULL;
static const char *(*st_info_fn) (Stonith *, int) = NULL;
static void (*st_del_fn) (Stonith *) = NULL;
static void (*st_log_fn) (Stonith *, PILLogFun) = NULL;
if (need_init) {
need_init = FALSE;
st_new_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE);
st_del_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete",
FALSE);
st_log_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_set_log",
FALSE);
st_info_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_get_info",
FALSE);
}
if (lha_agents_lib && st_new_fn && st_del_fn && st_info_fn && st_log_fn) {
char *xml_meta_longdesc = NULL;
char *xml_meta_shortdesc = NULL;
char *meta_param = NULL;
char *meta_longdesc = NULL;
char *meta_shortdesc = NULL;
stonith_obj = (*st_new_fn) (agent);
if (stonith_obj) {
(*st_log_fn) (stonith_obj, (PILLogFun) & stonith_plugin);
meta_longdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEDESCR));
if (meta_longdesc == NULL) {
crm_warn("no long description in %s's metadata.", agent);
meta_longdesc = strdup(no_parameter_info);
}
meta_shortdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEID));
if (meta_shortdesc == NULL) {
crm_warn("no short description in %s's metadata.", agent);
meta_shortdesc = strdup(no_parameter_info);
}
meta_param = strdup_null((*st_info_fn) (stonith_obj, ST_CONF_XML));
if (meta_param == NULL) {
crm_warn("no list of parameters in %s's metadata.", agent);
meta_param = strdup(no_parameter_info);
}
(*st_del_fn) (stonith_obj);
} else {
return -EINVAL; /* Heartbeat agents not supported */
}
xml_meta_longdesc =
(char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_longdesc);
xml_meta_shortdesc =
(char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_shortdesc);
bufferlen = strlen(META_TEMPLATE) + strlen(agent)
+ strlen(xml_meta_longdesc) + strlen(xml_meta_shortdesc)
+ strlen(meta_param) + 1;
buffer = calloc(1, bufferlen);
snprintf(buffer, bufferlen - 1, META_TEMPLATE,
agent, xml_meta_longdesc, xml_meta_shortdesc, meta_param);
xmlFree(xml_meta_longdesc);
xmlFree(xml_meta_shortdesc);
free(meta_shortdesc);
free(meta_longdesc);
free(meta_param);
}
#endif
}
if (output) {
*output = buffer;
} else {
free(buffer);
}
return rc;
}
static int
stonith_api_query(stonith_t * stonith, int call_options, const char *target,
stonith_key_value_t ** devices, int timeout)
{
int rc = 0, lpc = 0, max = 0;
xmlNode *data = NULL;
xmlNode *output = NULL;
xmlXPathObjectPtr xpathObj = NULL;
CRM_CHECK(devices != NULL, return -EINVAL);
data = create_xml_node(NULL, F_STONITH_DEVICE);
crm_xml_add(data, "origin", __FUNCTION__);
crm_xml_add(data, F_STONITH_TARGET, target);
crm_xml_add(data, F_STONITH_ACTION, "off");
rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout);
if (rc < 0) {
return rc;
}
xpathObj = xpath_search(output, "//@agent");
if (xpathObj) {
max = numXpathResults(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_CHECK(match != NULL, continue);
crm_info("%s[%d] = %s", "//@agent", lpc, xmlGetNodePath(match));
*devices = stonith_key_value_add(*devices, NULL, crm_element_value(match, XML_ATTR_ID));
}
freeXpathObject(xpathObj);
}
free_xml(output);
free_xml(data);
return max;
}
static int
stonith_api_call(stonith_t * stonith,
int call_options,
const char *id,
const char *action, const char *victim, int timeout, xmlNode ** output)
{
int rc = 0;
xmlNode *data = NULL;
data = create_xml_node(NULL, F_STONITH_DEVICE);
crm_xml_add(data, "origin", __FUNCTION__);
crm_xml_add(data, F_STONITH_DEVICE, id);
crm_xml_add(data, F_STONITH_ACTION, action);
crm_xml_add(data, F_STONITH_TARGET, victim);
rc = stonith_send_command(stonith, STONITH_OP_EXEC, data, output, call_options, timeout);
free_xml(data);
return rc;
}
static int
stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **list_info,
int timeout)
{
int rc;
xmlNode *output = NULL;
rc = stonith_api_call(stonith, call_options, id, "list", NULL, timeout, &output);
if (output && list_info) {
const char *list_str;
list_str = crm_element_value(output, "st_output");
if (list_str) {
*list_info = strdup(list_str);
}
}
if (output) {
free_xml(output);
}
return rc;
}
static int
stonith_api_monitor(stonith_t * stonith, int call_options, const char *id, int timeout)
{
return stonith_api_call(stonith, call_options, id, "monitor", NULL, timeout, NULL);
}
static int
stonith_api_status(stonith_t * stonith, int call_options, const char *id, const char *port,
int timeout)
{
return stonith_api_call(stonith, call_options, id, "status", port, timeout, NULL);
}
static int
stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const char *action,
int timeout, int tolerance)
{
int rc = 0;
xmlNode *data = NULL;
data = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(data, F_STONITH_TARGET, node);
crm_xml_add(data, F_STONITH_ACTION, action);
crm_xml_add_int(data, F_STONITH_TIMEOUT, timeout);
crm_xml_add_int(data, F_STONITH_TOLERANCE, tolerance);
rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout);
free_xml(data);
return rc;
}
static int
stonith_api_confirm(stonith_t * stonith, int call_options, const char *target)
{
return stonith_api_fence(stonith, call_options | st_opt_manual_ack, target, "off", 0, 0);
}
static int
stonith_api_history(stonith_t * stonith, int call_options, const char *node,
stonith_history_t ** history, int timeout)
{
int rc = 0;
xmlNode *data = NULL;
xmlNode *output = NULL;
stonith_history_t *last = NULL;
*history = NULL;
if (node) {
data = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(data, F_STONITH_TARGET, node);
}
rc = stonith_send_command(stonith, STONITH_OP_FENCE_HISTORY, data, &output,
call_options | st_opt_sync_call, timeout);
free_xml(data);
if (rc == 0) {
xmlNode *op = NULL;
xmlNode *reply = get_xpath_object("//" F_STONITH_HISTORY_LIST, output, LOG_ERR);
for (op = __xml_first_child(reply); op != NULL; op = __xml_next(op)) {
stonith_history_t *kvp;
kvp = calloc(1, sizeof(stonith_history_t));
kvp->target = crm_element_value_copy(op, F_STONITH_TARGET);
kvp->action = crm_element_value_copy(op, F_STONITH_ACTION);
kvp->origin = crm_element_value_copy(op, F_STONITH_ORIGIN);
kvp->delegate = crm_element_value_copy(op, F_STONITH_DELEGATE);
+ kvp->client = crm_element_value_copy(op, F_STONITH_CLIENTNAME);
crm_element_value_int(op, F_STONITH_DATE, &kvp->completed);
crm_element_value_int(op, F_STONITH_STATE, &kvp->state);
if (last) {
last->next = kvp;
} else {
*history = kvp;
}
last = kvp;
}
}
return rc;
}
gboolean
is_redhat_agent(const char *agent)
{
int rc = 0;
struct stat prop;
char buffer[FILENAME_MAX + 1];
snprintf(buffer, FILENAME_MAX, "%s/%s", RH_STONITH_DIR, agent);
rc = stat(buffer, &prop);
if (rc >= 0 && S_ISREG(prop.st_mode)) {
return TRUE;
}
return FALSE;
}
const char *
get_stonith_provider(const char *agent, const char *provider)
{
/* This function sucks */
if (is_redhat_agent(agent)) {
return "redhat";
#if HAVE_STONITH_STONITH_H
} else {
Stonith *stonith_obj = NULL;
static gboolean need_init = TRUE;
static Stonith *(*st_new_fn) (const char *) = NULL;
static void (*st_del_fn) (Stonith *) = NULL;
if (need_init) {
need_init = FALSE;
st_new_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE);
st_del_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete",
FALSE);
}
if (lha_agents_lib && st_new_fn && st_del_fn) {
stonith_obj = (*st_new_fn) (agent);
if (stonith_obj) {
(*st_del_fn) (stonith_obj);
return "heartbeat";
}
}
#endif
}
crm_err("No such device: %s", agent);
return NULL;
}
static gint
stonithlib_GCompareFunc(gconstpointer a, gconstpointer b)
{
int rc = 0;
const stonith_notify_client_t *a_client = a;
const stonith_notify_client_t *b_client = b;
CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0);
rc = strcmp(a_client->event, b_client->event);
if (rc == 0) {
if (a_client->notify == NULL || b_client->notify == NULL) {
return 0;
} else if (a_client->notify == b_client->notify) {
return 0;
} else if (((long)a_client->notify) < ((long)b_client->notify)) {
crm_err("callbacks for %s are not equal: %p vs. %p",
a_client->event, a_client->notify, b_client->notify);
return -1;
}
crm_err("callbacks for %s are not equal: %p vs. %p",
a_client->event, a_client->notify, b_client->notify);
return 1;
}
return rc;
}
xmlNode *
stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options)
{
xmlNode *op_msg = create_xml_node(NULL, "stonith_command");
CRM_CHECK(op_msg != NULL, return NULL);
CRM_CHECK(token != NULL, return NULL);
crm_xml_add(op_msg, F_XML_TAGNAME, "stonith_command");
crm_xml_add(op_msg, F_TYPE, T_STONITH_NG);
crm_xml_add(op_msg, F_STONITH_CALLBACK_TOKEN, token);
crm_xml_add(op_msg, F_STONITH_OPERATION, op);
crm_xml_add_int(op_msg, F_STONITH_CALLID, call_id);
crm_trace("Sending call options: %.8lx, %d", (long)call_options, call_options);
crm_xml_add_int(op_msg, F_STONITH_CALLOPTS, call_options);
if (data != NULL) {
add_message_xml(op_msg, F_STONITH_CALLDATA, data);
}
return op_msg;
}
static void
stonith_destroy_op_callback(gpointer data)
{
stonith_callback_client_t *blob = data;
if (blob->timer && blob->timer->ref > 0) {
g_source_remove(blob->timer->ref);
}
free(blob->timer);
free(blob);
}
static int
stonith_api_signoff(stonith_t * stonith)
{
stonith_private_t *native = stonith->private;
crm_debug("Signing out of the STONITH Service");
if (native->source != NULL) {
/* Attached to mainloop */
mainloop_del_ipc_client(native->source);
native->source = NULL;
native->ipc = NULL;
} else if (native->ipc) {
/* Not attached to mainloop */
crm_ipc_t *ipc = native->ipc;
native->ipc = NULL;
crm_ipc_close(ipc);
crm_ipc_destroy(ipc);
}
stonith->state = stonith_disconnected;
return pcmk_ok;
}
static int
stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
{
int rc = pcmk_ok;
stonith_private_t *native = stonith->private;
static struct ipc_client_callbacks st_callbacks = {
.dispatch = stonith_dispatch_internal,
.destroy = stonith_connection_destroy
};
crm_trace("Connecting command channel");
stonith->state = stonith_connected_command;
if (stonith_fd) {
/* No mainloop */
native->ipc = crm_ipc_new("stonith-ng", 0);
if (native->ipc && crm_ipc_connect(native->ipc)) {
*stonith_fd = crm_ipc_get_fd(native->ipc);
} else if (native->ipc) {
rc = -ENOTCONN;
}
} else {
/* With mainloop */
native->source =
mainloop_add_ipc_client("stonith-ng", G_PRIORITY_MEDIUM, 0, stonith, &st_callbacks);
native->ipc = mainloop_get_ipc_client(native->source);
}
if (native->ipc == NULL) {
crm_debug("Could not connect to the Stonith API");
rc = -ENOTCONN;
}
if (rc == pcmk_ok) {
xmlNode *reply = NULL;
xmlNode *hello = create_xml_node(NULL, "stonith_command");
crm_xml_add(hello, F_TYPE, T_STONITH_NG);
crm_xml_add(hello, F_STONITH_OPERATION, CRM_OP_REGISTER);
crm_xml_add(hello, F_STONITH_CLIENTNAME, name);
rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply);
if (rc < 0) {
crm_perror(LOG_DEBUG, "Couldn't complete registration with the fencing API: %d", rc);
rc = -ECOMM;
} else if (reply == NULL) {
crm_err("Did not receive registration reply");
rc = -EPROTO;
} else {
const char *msg_type = crm_element_value(reply, F_STONITH_OPERATION);
const char *tmp_ticket = crm_element_value(reply, F_STONITH_CLIENTID);
if (safe_str_neq(msg_type, CRM_OP_REGISTER)) {
crm_err("Invalid registration message: %s", msg_type);
crm_log_xml_err(reply, "Bad reply");
rc = -EPROTO;
} else if (tmp_ticket == NULL) {
crm_err("No registration token provided");
crm_log_xml_err(reply, "Bad reply");
rc = -EPROTO;
} else {
crm_trace("Obtained registration token: %s", tmp_ticket);
native->token = strdup(tmp_ticket);
rc = pcmk_ok;
}
}
free_xml(reply);
free_xml(hello);
}
if (rc == pcmk_ok) {
#if HAVE_MSGFROMIPC_TIMEOUT
stonith->call_timeout = MAX_IPC_DELAY;
#endif
crm_debug("Connection to STONITH successful");
return pcmk_ok;
}
crm_debug("Connection to STONITH failed: %s", pcmk_strerror(rc));
stonith->cmds->disconnect(stonith);
return rc;
}
static int
stonith_set_notification(stonith_t * stonith, const char *callback, int enabled)
{
xmlNode *notify_msg = create_xml_node(NULL, __FUNCTION__);
stonith_private_t *native = stonith->private;
if (stonith->state != stonith_disconnected) {
int rc;
crm_xml_add(notify_msg, F_STONITH_OPERATION, T_STONITH_NOTIFY);
if (enabled) {
crm_xml_add(notify_msg, F_STONITH_NOTIFY_ACTIVATE, callback);
} else {
crm_xml_add(notify_msg, F_STONITH_NOTIFY_DEACTIVATE, callback);
}
rc = crm_ipc_send(native->ipc, notify_msg, crm_ipc_client_response, -1, NULL);
if (rc < 0) {
crm_perror(LOG_DEBUG, "Couldn't register for fencing notifications: %d", rc);
rc = -ECOMM;
}
}
free_xml(notify_msg);
return pcmk_ok;
}
static int
stonith_api_add_notification(stonith_t * stonith, const char *event,
void (*callback) (stonith_t * stonith, stonith_event_t * e))
{
GList *list_item = NULL;
stonith_notify_client_t *new_client = NULL;
stonith_private_t *private = NULL;
private = stonith->private;
crm_trace("Adding callback for %s events (%d)", event, g_list_length(private->notify_list));
new_client = calloc(1, sizeof(stonith_notify_client_t));
new_client->event = event;
new_client->notify = callback;
list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc);
if (list_item != NULL) {
crm_warn("Callback already present");
free(new_client);
return -ENOTUNIQ;
} else {
private->notify_list = g_list_append(private->notify_list, new_client);
stonith_set_notification(stonith, event, 1);
crm_trace("Callback added (%d)", g_list_length(private->notify_list));
}
return pcmk_ok;
}
static int
stonith_api_del_notification(stonith_t * stonith, const char *event)
{
GList *list_item = NULL;
stonith_notify_client_t *new_client = NULL;
stonith_private_t *private = NULL;
crm_debug("Removing callback for %s events", event);
private = stonith->private;
new_client = calloc(1, sizeof(stonith_notify_client_t));
new_client->event = event;
new_client->notify = NULL;
list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc);
stonith_set_notification(stonith, event, 0);
if (list_item != NULL) {
stonith_notify_client_t *list_client = list_item->data;
private->notify_list = g_list_remove(private->notify_list, list_client);
free(list_client);
crm_trace("Removed callback");
} else {
crm_trace("Callback not present");
}
free(new_client);
return pcmk_ok;
}
static gboolean
stonith_async_timeout_handler(gpointer data)
{
struct timer_rec_s *timer = data;
crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout);
stonith_perform_callback(timer->stonith, NULL, timer->call_id, -ETIME);
/* Always return TRUE, never remove the handler
* We do that in stonith_del_callback()
*/
return TRUE;
}
static void
set_callback_timeout(stonith_callback_client_t * callback, stonith_t * stonith, int call_id,
int timeout)
{
struct timer_rec_s *async_timer = callback->timer;
if (timeout <= 0) {
return;
}
if (!async_timer) {
async_timer = calloc(1, sizeof(struct timer_rec_s));
callback->timer = async_timer;
}
async_timer->stonith = stonith;
async_timer->call_id = call_id;
/* Allow a fair bit of grace to allow the server to tell us of a timeout
* This is only a fallback
*/
async_timer->timeout = (timeout + 60) * 1000;
if (async_timer->ref) {
g_source_remove(async_timer->ref);
}
async_timer->ref =
g_timeout_add(async_timer->timeout, stonith_async_timeout_handler, async_timer);
}
static void
update_callback_timeout(int call_id, int timeout, stonith_t * st)
{
stonith_callback_client_t *callback = NULL;
stonith_private_t *private = st->private;
callback = g_hash_table_lookup(private->stonith_op_callback_table, GINT_TO_POINTER(call_id));
if (!callback || !callback->allow_timeout_updates) {
return;
}
set_callback_timeout(callback, st, call_id, timeout);
}
static void
invoke_callback(stonith_t * st, int call_id, int rc, void *userdata,
void (*callback) (stonith_t * st, stonith_callback_data_t * data))
{
stonith_callback_data_t data = { 0, };
data.call_id = call_id;
data.rc = rc;
data.userdata = userdata;
callback(st, &data);
}
static int
stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int options,
void *user_data, const char *callback_name,
void (*callback) (stonith_t * st, stonith_callback_data_t * data))
{
stonith_callback_client_t *blob = NULL;
stonith_private_t *private = NULL;
CRM_CHECK(stonith != NULL, return -EINVAL);
CRM_CHECK(stonith->private != NULL, return -EINVAL);
private = stonith->private;
if (call_id == 0) {
private->op_callback = callback;
} else if (call_id < 0) {
if (!(options & st_opt_report_only_success)) {
crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id));
invoke_callback(stonith, call_id, call_id, user_data, callback);
} else {
crm_warn("STONITH call failed: %s", pcmk_strerror(call_id));
}
return FALSE;
}
blob = calloc(1, sizeof(stonith_callback_client_t));
blob->id = callback_name;
blob->only_success = (options & st_opt_report_only_success) ? TRUE : FALSE;
blob->user_data = user_data;
blob->callback = callback;
blob->allow_timeout_updates = (options & st_opt_timeout_updates) ? TRUE : FALSE;
if (timeout > 0) {
set_callback_timeout(blob, stonith, call_id, timeout);
}
g_hash_table_insert(private->stonith_op_callback_table, GINT_TO_POINTER(call_id), blob);
crm_trace("Added callback to %s for call %d", callback_name, call_id);
return TRUE;
}
static int
stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks)
{
stonith_private_t *private = stonith->private;
if (all_callbacks) {
private->op_callback = NULL;
g_hash_table_destroy(private->stonith_op_callback_table);
private->stonith_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal,
NULL,
stonith_destroy_op_callback);
} else if (call_id == 0) {
private->op_callback = NULL;
} else {
g_hash_table_remove(private->stonith_op_callback_table, GINT_TO_POINTER(call_id));
}
return pcmk_ok;
}
static void
stonith_dump_pending_op(gpointer key, gpointer value, gpointer user_data)
{
int call = GPOINTER_TO_INT(key);
stonith_callback_client_t *blob = value;
crm_debug("Call %d (%s): pending", call, crm_str(blob->id));
}
void
stonith_dump_pending_callbacks(stonith_t * stonith)
{
stonith_private_t *private = stonith->private;
if (private->stonith_op_callback_table == NULL) {
return;
}
return g_hash_table_foreach(private->stonith_op_callback_table, stonith_dump_pending_op, NULL);
}
void
stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc)
{
stonith_private_t *private = NULL;
stonith_callback_client_t *blob = NULL;
stonith_callback_client_t local_blob;
CRM_CHECK(stonith != NULL, return);
CRM_CHECK(stonith->private != NULL, return);
private = stonith->private;
local_blob.id = NULL;
local_blob.callback = NULL;
local_blob.user_data = NULL;
local_blob.only_success = FALSE;
if (msg != NULL) {
crm_element_value_int(msg, F_STONITH_RC, &rc);
crm_element_value_int(msg, F_STONITH_CALLID, &call_id);
}
CRM_CHECK(call_id > 0, crm_log_xml_err(msg, "Bad result"));
blob = g_hash_table_lookup(private->stonith_op_callback_table, GINT_TO_POINTER(call_id));
if (blob != NULL) {
local_blob = *blob;
blob = NULL;
stonith_api_del_callback(stonith, call_id, FALSE);
} else {
crm_trace("No callback found for call %d", call_id);
local_blob.callback = NULL;
}
if (local_blob.callback != NULL && (rc == pcmk_ok || local_blob.only_success == FALSE)) {
crm_trace("Invoking callback %s for call %d", crm_str(local_blob.id), call_id);
invoke_callback(stonith, call_id, rc, local_blob.user_data, local_blob.callback);
} else if (private->op_callback == NULL && rc != pcmk_ok) {
crm_warn("STONITH command failed: %s", pcmk_strerror(rc));
crm_log_xml_debug(msg, "Failed STONITH Update");
}
if (private->op_callback != NULL) {
crm_trace("Invoking global callback for call %d", call_id);
invoke_callback(stonith, call_id, rc, NULL, private->op_callback);
}
crm_trace("OP callback activated.");
}
/*
<notify t="st_notify" subt="st_device_register" st_op="st_device_register" st_rc="0" >
<st_calldata >
<stonith_command t="stonith-ng" st_async_id="088fb640-431a-48b9-b2fc-c4ff78d0a2d9" st_op="st_device_register" st_callid="2" st_callopt="4096" st_timeout="0" st_clientid="088fb640-431a-48b9-b2fc-c4ff78d0a2d9" st_clientname="stonith-test" >
<st_calldata >
<st_device_id id="test-id" origin="create_device_registration_xml" agent="fence_virsh" namespace="stonith-ng" >
<attributes ipaddr="localhost" pcmk-portmal="some-host=pcmk-1 pcmk-3=3,4" login="root" identity_file="/root/.ssh/id_dsa" />
</st_device_id>
</st_calldata>
</stonith_command>
</st_calldata>
</notify>
<notify t="st_notify" subt="st_notify_fence" st_op="st_notify_fence" st_rc="0" >
<st_calldata >
<st_notify_fence st_rc="0" st_target="some-host" st_op="st_fence" st_delegate="test-id" st_origin="61dd7759-e229-4be7-b1f8-ef49dd14d9f0" />
</st_calldata>
</notify>
*/
static stonith_event_t *
xml_to_event(xmlNode * msg)
{
stonith_event_t *event = calloc(1, sizeof(stonith_event_t));
const char *ntype = crm_element_value(msg, F_SUBTYPE);
char *data_addr = g_strdup_printf("//%s", ntype);
xmlNode *data = get_xpath_object(data_addr, msg, LOG_DEBUG);
crm_log_xml_trace(msg, "stonith_notify");
crm_element_value_int(msg, F_STONITH_RC, &(event->result));
if (safe_str_eq(ntype, T_STONITH_NOTIFY_FENCE)) {
event->operation = crm_element_value_copy(msg, F_STONITH_OPERATION);
if (data) {
event->origin = crm_element_value_copy(data, F_STONITH_ORIGIN);
event->action = crm_element_value_copy(data, F_STONITH_ACTION);
event->target = crm_element_value_copy(data, F_STONITH_TARGET);
event->executioner = crm_element_value_copy(data, F_STONITH_DELEGATE);
event->id = crm_element_value_copy(data, F_STONITH_REMOTE_OP_ID);
event->client_origin = crm_element_value_copy(data, F_STONITH_CLIENTNAME);
} else {
crm_err("No data for %s event", ntype);
crm_log_xml_notice(msg, "BadEvent");
}
}
g_free(data_addr);
return event;
}
static void
event_free(stonith_event_t * event)
{
free(event->id);
free(event->type);
free(event->message);
free(event->operation);
free(event->origin);
free(event->action);
free(event->target);
free(event->executioner);
free(event->device);
free(event->client_origin);
free(event);
}
static void
stonith_send_notification(gpointer data, gpointer user_data)
{
struct notify_blob_s *blob = user_data;
stonith_notify_client_t *entry = data;
stonith_event_t *st_event = NULL;
const char *event = NULL;
if (blob->xml == NULL) {
crm_warn("Skipping callback - NULL message");
return;
}
event = crm_element_value(blob->xml, F_SUBTYPE);
if (entry == NULL) {
crm_warn("Skipping callback - NULL callback client");
return;
} else if (entry->notify == NULL) {
crm_warn("Skipping callback - NULL callback");
return;
} else if (safe_str_neq(entry->event, event)) {
crm_trace("Skipping callback - event mismatch %p/%s vs. %s", entry, entry->event, event);
return;
}
st_event = xml_to_event(blob->xml);
crm_trace("Invoking callback for %p/%s event...", entry, event);
entry->notify(blob->stonith, st_event);
crm_trace("Callback invoked...");
event_free(st_event);
}
int
stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data,
int call_options, int timeout)
{
int rc = 0;
int reply_id = -1;
enum crm_ipc_flags ipc_flags = crm_ipc_client_none;
xmlNode *op_msg = NULL;
xmlNode *op_reply = NULL;
stonith_private_t *native = stonith->private;
if (stonith->state == stonith_disconnected) {
return -ENOTCONN;
}
if (output_data != NULL) {
*output_data = NULL;
}
if (op == NULL) {
crm_err("No operation specified");
return -EINVAL;
}
if (call_options & st_opt_sync_call) {
ipc_flags |= crm_ipc_client_response;
}
stonith->call_id++;
/* prevent call_id from being negative (or zero) and conflicting
* with the stonith_errors enum
* use 2 because we use it as (stonith->call_id - 1) below
*/
if (stonith->call_id < 1) {
stonith->call_id = 1;
}
CRM_CHECK(native->token != NULL,;
);
op_msg = stonith_create_op(stonith->call_id, native->token, op, data, call_options);
if (op_msg == NULL) {
return -EINVAL;
}
crm_xml_add_int(op_msg, F_STONITH_TIMEOUT, timeout);
crm_trace("Sending %s message to STONITH service, Timeout: %ds", op, timeout);
rc = crm_ipc_send(native->ipc, op_msg, ipc_flags, 1000 * (timeout + 60), &op_reply);
free_xml(op_msg);
if (rc < 0) {
crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%ds): %d", op, timeout, rc);
rc = -ECOMM;
goto done;
}
crm_log_xml_trace(op_reply, "Reply");
if (!(call_options & st_opt_sync_call)) {
crm_trace("Async call %d, returning", stonith->call_id);
CRM_CHECK(stonith->call_id != 0, return -EPROTO);
free_xml(op_reply);
return stonith->call_id;
}
rc = pcmk_ok;
crm_element_value_int(op_reply, F_STONITH_CALLID, &reply_id);
if (reply_id == stonith->call_id) {
crm_trace("Syncronous reply %d received", reply_id);
if (crm_element_value_int(op_reply, F_STONITH_RC, &rc) != 0) {
rc = -ENOMSG;
}
if ((call_options & st_opt_discard_reply) || output_data == NULL) {
crm_trace("Discarding reply");
} else {
*output_data = op_reply;
op_reply = NULL; /* Prevent subsequent free */
}
} else if (reply_id <= 0) {
crm_err("Recieved bad reply: No id set");
crm_log_xml_err(op_reply, "Bad reply");
free_xml(op_reply);
rc = -ENOMSG;
} else {
crm_err("Recieved bad reply: %d (wanted %d)", reply_id, stonith->call_id);
crm_log_xml_err(op_reply, "Old reply");
free_xml(op_reply);
rc = -ENOMSG;
}
done:
if (crm_ipc_connected(native->ipc) == FALSE) {
crm_err("STONITH disconnected");
stonith->state = stonith_disconnected;
}
free_xml(op_reply);
return rc;
}
/* Not used with mainloop */
bool
stonith_dispatch(stonith_t * st)
{
gboolean stay_connected = TRUE;
stonith_private_t *private = NULL;
CRM_ASSERT(st != NULL);
private = st->private;
while (crm_ipc_ready(private->ipc)) {
if (crm_ipc_read(private->ipc) > 0) {
const char *msg = crm_ipc_buffer(private->ipc);
stonith_dispatch_internal(msg, strlen(msg), st);
}
if (crm_ipc_connected(private->ipc) == FALSE) {
crm_err("Connection closed");
stay_connected = FALSE;
}
}
return stay_connected;
}
int
stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata)
{
const char *type = NULL;
struct notify_blob_s blob;
stonith_t *st = userdata;
stonith_private_t *private = NULL;
CRM_ASSERT(st != NULL);
private = st->private;
blob.stonith = st;
blob.xml = string2xml(buffer);
if (blob.xml == NULL) {
crm_warn("Received a NULL msg from STONITH service: %s.", buffer);
return 0;
}
/* do callbacks */
type = crm_element_value(blob.xml, F_TYPE);
crm_trace("Activating %s callbacks...", type);
if (safe_str_eq(type, T_STONITH_NG)) {
stonith_perform_callback(st, blob.xml, 0, 0);
} else if (safe_str_eq(type, T_STONITH_NOTIFY)) {
g_list_foreach(private->notify_list, stonith_send_notification, &blob);
} else if (safe_str_eq(type, T_STONITH_TIMEOUT_VALUE)) {
int call_id = 0;
int timeout = 0;
crm_element_value_int(blob.xml, F_STONITH_TIMEOUT, &timeout);
crm_element_value_int(blob.xml, F_STONITH_CALLID, &call_id);
update_callback_timeout(call_id, timeout, st);
} else {
crm_err("Unknown message type: %s", type);
crm_log_xml_warn(blob.xml, "BadReply");
}
free_xml(blob.xml);
return 1;
}
static int
stonith_api_free(stonith_t * stonith)
{
int rc = pcmk_ok;
if (stonith->state != stonith_disconnected) {
rc = stonith->cmds->disconnect(stonith);
}
if (stonith->state == stonith_disconnected) {
stonith_private_t *private = stonith->private;
g_hash_table_destroy(private->stonith_op_callback_table);
free(private->token);
free(stonith->private);
free(stonith->cmds);
free(stonith);
}
return rc;
}
void
stonith_api_delete(stonith_t * stonith)
{
stonith_private_t *private = stonith->private;
GList *list = private->notify_list;
while (list != NULL) {
stonith_notify_client_t *client = g_list_nth_data(list, 0);
list = g_list_remove(list, client);
free(client);
}
stonith->cmds->free(stonith);
stonith = NULL;
}
stonith_t *
stonith_api_new(void)
{
stonith_t *new_stonith = NULL;
stonith_private_t *private = NULL;
new_stonith = calloc(1, sizeof(stonith_t));
private = calloc(1, sizeof(stonith_private_t));
new_stonith->private = private;
private->stonith_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal,
NULL, stonith_destroy_op_callback);
private->notify_list = NULL;
new_stonith->call_id = 1;
new_stonith->state = stonith_disconnected;
new_stonith->cmds = calloc(1, sizeof(stonith_api_operations_t));
/* *INDENT-OFF* */
new_stonith->cmds->free = stonith_api_free;
new_stonith->cmds->connect = stonith_api_signon;
new_stonith->cmds->disconnect = stonith_api_signoff;
new_stonith->cmds->list = stonith_api_list;
new_stonith->cmds->monitor = stonith_api_monitor;
new_stonith->cmds->status = stonith_api_status;
new_stonith->cmds->fence = stonith_api_fence;
new_stonith->cmds->confirm = stonith_api_confirm;
new_stonith->cmds->history = stonith_api_history;
new_stonith->cmds->list_agents = stonith_api_device_list;
new_stonith->cmds->metadata = stonith_api_device_metadata;
new_stonith->cmds->query = stonith_api_query;
new_stonith->cmds->remove_device = stonith_api_remove_device;
new_stonith->cmds->register_device = stonith_api_register_device;
new_stonith->cmds->remove_level = stonith_api_remove_level;
new_stonith->cmds->register_level = stonith_api_register_level;
new_stonith->cmds->remove_callback = stonith_api_del_callback;
new_stonith->cmds->register_callback = stonith_api_add_callback;
new_stonith->cmds->remove_notification = stonith_api_del_notification;
new_stonith->cmds->register_notification = stonith_api_add_notification;
/* *INDENT-ON* */
return new_stonith;
}
stonith_key_value_t *
stonith_key_value_add(stonith_key_value_t * head, const char *key, const char *value)
{
stonith_key_value_t *p, *end;
p = calloc(1, sizeof(stonith_key_value_t));
if (key) {
p->key = strdup(key);
}
if (value) {
p->value = strdup(value);
}
end = head;
while (end && end->next) {
end = end->next;
}
if (end) {
end->next = p;
} else {
head = p;
}
return head;
}
void
stonith_key_value_freeall(stonith_key_value_t * head, int keys, int values)
{
stonith_key_value_t *p;
while (head) {
p = head->next;
if (keys) {
free(head->key);
}
if (values) {
free(head->value);
}
free(head);
head = p;
}
}
int
stonith_api_kick(int nodeid, const char *uname, int timeout, bool off)
{
char *name = NULL;
const char *action = "reboot";
int rc = -EPROTO;
stonith_t *st = NULL;
enum stonith_call_options opts = st_opt_sync_call | st_opt_allow_suicide;
st = stonith_api_new();
if (st) {
rc = st->cmds->connect(st, "stonith-api", NULL);
}
if (uname != NULL) {
name = strdup(uname);
} else if (nodeid > 0) {
opts |= st_opt_cs_nodeid;
name = crm_itoa(nodeid);
}
if (off) {
action = "off";
}
if (rc == pcmk_ok) {
rc = st->cmds->fence(st, opts, name, action, timeout, 0);
}
if (st) {
st->cmds->disconnect(st);
stonith_api_delete(st);
}
free(name);
return rc;
}
time_t
stonith_api_time(int nodeid, const char *uname, bool in_progress)
{
int rc = 0;
char *name = NULL;
time_t when = 0;
time_t progress = 0;
stonith_t *st = NULL;
stonith_history_t *history, *hp = NULL;
enum stonith_call_options opts = st_opt_sync_call;
st = stonith_api_new();
if (st) {
rc = st->cmds->connect(st, "stonith-api", NULL);
}
if (uname != NULL) {
name = strdup(uname);
} else if (nodeid > 0) {
opts |= st_opt_cs_nodeid;
name = crm_itoa(nodeid);
}
if (st && rc == pcmk_ok) {
st->cmds->history(st, st_opt_sync_call | st_opt_cs_nodeid, name, &history, 120);
for (hp = history; hp; hp = hp->next) {
if (in_progress) {
if (hp->state != st_done && hp->state != st_failed) {
progress = time(NULL);
}
} else if (hp->state == st_done) {
when = hp->completed;
}
}
}
if (progress) {
when = progress;
}
if (st) {
st->cmds->disconnect(st);
stonith_api_delete(st);
}
free(name);
return when;
}
#if HAVE_STONITH_STONITH_H
# include <pils/plugin.h>
const char *i_hate_pils(int rc);
const char *
i_hate_pils(int rc)
{
return PIL_strerror(rc);
}
#endif
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:09 PM (17 h, 17 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2002499
Default Alt Text
(153 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment