Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F2825394
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
20 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/fencing/remote.c b/fencing/remote.c
index 0c53bfea71..0632e33a05 100644
--- a/fencing/remote.c
+++ b/fencing/remote.c
@@ -1,653 +1,653 @@
/*
* Copyright (C) 2009 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <ctype.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/common/cluster.h>
#include <crm/stonith-ng.h>
#include <crm/stonith-ng-internal.h>
#include <crm/common/xml.h>
#include <crm/common/msg.h>
#include <crm/common/util.h>
#include <internal.h>
typedef struct st_query_result_s
{
char *host;
int devices;
GListPtr device_list;
} st_query_result_t;
GHashTable *remote_op_list = NULL;
void call_remote_stonith(remote_fencing_op_t *op, st_query_result_t *peer);
extern xmlNode *stonith_create_op(
int call_id, const char *token, const char *op, xmlNode *data, int call_options);
static void free_remote_query(gpointer data)
{
if(data) {
st_query_result_t *query = data;
crm_trace("Free'ing query result from %s", query->host);
crm_free(query->host);
crm_free(query);
}
}
static void free_remote_op(gpointer data)
{
remote_fencing_op_t *op = data;
crm_trace("Free'ing op %s for %s", op->id, op->target);
crm_log_xml_debug(op->request, "Destroying");
crm_free(op->id);
crm_free(op->action);
crm_free(op->target);
crm_free(op->client_id);
crm_free(op->originator);
if(op->query_timer) {
g_source_remove(op->query_timer);
}
if(op->op_timer) {
g_source_remove(op->op_timer);
}
if(op->query_results) {
slist_destroy(st_query_result_t, result, op->query_results,
free_remote_query(result);
);
}
if(op->request) {
free_xml(op->request);
op->request = NULL;
}
crm_free(op);
}
static void remote_op_done(remote_fencing_op_t *op, xmlNode *data, int rc)
{
int call = 0;
xmlNode *reply = NULL;
xmlNode *local_data = NULL;
xmlNode *notify_data = NULL;
op->completed = time(NULL);
if(op->request != NULL) {
crm_element_value_int(op->request, F_STONITH_CALLID, &call);
/* else: keep going, make sure the details are accurate for ops that arrive late */
}
if(op->query_timer) {
g_source_remove(op->query_timer);
op->query_timer = 0;
}
if(op->op_timer) {
g_source_remove(op->op_timer);
op->op_timer = 0;
}
if(data == NULL) {
data = create_xml_node(NULL, "remote-op");
local_data = data;
} else {
op->delegate = crm_element_value_copy(data, F_ORIG);
}
crm_xml_add_int(data, "state", op->state);
crm_xml_add(data, F_STONITH_TARGET, op->target);
crm_xml_add(data, F_STONITH_OPERATION, op->action);
if(op->request != NULL) {
reply = stonith_construct_reply(op->request, NULL, data, rc);
crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
crm_info("Notifing clients of %s (%s of %s from %s by %s): %d, rc=%d",
op->id, op->action, op->target, op->client_id, op->delegate, op->state, rc);
} else {
crm_err("We've already notified clients of %s (%s of %s from %s by %s): %d, rc=%d",
op->id, op->action, op->target, op->client_id, op->delegate, op->state, rc);
return;
}
if(call && reply) {
/* Don't bother with this if there is no callid - and thus the op originated elsewhere */
do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
}
/* Do notification with a clean data object */
notify_data = create_xml_node(NULL, "st-data");
crm_xml_add_int(notify_data, "state", op->state);
crm_xml_add_int(notify_data, F_STONITH_RC, rc);
crm_xml_add(notify_data, F_STONITH_TARGET, op->target);
crm_xml_add(notify_data, F_STONITH_OPERATION, op->action);
crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate);
crm_xml_add(notify_data, F_STONITH_REMOTE, op->id);
crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator);
do_stonith_notify(0, STONITH_OP_FENCE, rc, notify_data, NULL);
free_xml(notify_data);
free_xml(local_data);
free_xml(reply);
/* Free non-essential parts of the record
* Keep the record around so we can query the history
*/
if(op->query_results) {
slist_destroy(st_query_result_t, result, op->query_results,
free_remote_query(result);
);
op->query_results = NULL;
}
if(op->request) {
free_xml(op->request);
op->request = NULL;
}
}
static gboolean remote_op_timeout(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
op->query_timer = 0;
if(op->state == st_done) {
crm_debug("Action %s (%s) for %s already completed", op->action, op->id, op->target);
return FALSE;
}
crm_err("Action %s (%s) for %s timed out", op->action, op->id, op->target);
remote_op_done(op, NULL, st_err_timeout);
op->state = st_failed;
return FALSE;
}
static gboolean remote_op_query_timeout(gpointer data)
{
remote_fencing_op_t *op = data;
op->query_timer = 0;
if(op->state == st_done) {
crm_debug("Operation %s for %s already completed", op->id, op->target);
} else if(op->state == st_exec) {
crm_debug("Operation %s for %s already in progress", op->id, op->target);
} else if(op->query_results) {
crm_info("Query %s for %s complete: %d", op->id, op->target, op->state);
call_remote_stonith(op, NULL);
} else {
crm_err("Query %s for %s timed out", op->id, op->target);
if(op->op_timer) {
g_source_remove(op->op_timer);
op->op_timer = 0;
}
remote_op_timeout(op);
}
return FALSE;
}
static int stonith_topology_next(remote_fencing_op_t *op)
{
stonith_topology_t *tp = NULL;
if(op->target) {
/* Queries don't have a target set */
tp = g_hash_table_lookup(topology, op->target);
}
if(tp == NULL) {
return stonith_ok;
}
set_bit(op->call_options, st_opt_topology);
do {
op->level++;
} while(op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL);
if(op->level < ST_LEVEL_MAX) {
crm_trace("Attempting fencing level %d for %s (%d devices)", op->level, op->target, g_list_length(tp->levels[op->level]));
op->devices = tp->levels[op->level];
return stonith_ok;
}
crm_notice("All fencing options for %s failed", op->target);
return st_err_invalid_level;
}
void *create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer)
{
remote_fencing_op_t *op = NULL;
xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, request, LOG_TRACE);
if(remote_op_list == NULL) {
remote_op_list = g_hash_table_new_full(
crm_str_hash, g_str_equal, NULL, free_remote_op);
}
if(peer && dev) {
const char *peer_id = crm_element_value(dev, F_STONITH_REMOTE);
CRM_CHECK(peer_id != NULL, return NULL);
op = g_hash_table_lookup(remote_op_list, peer_id);
if(op) {
crm_debug("%s already exists", peer_id);
return op;
}
}
crm_malloc0(op, sizeof(remote_fencing_op_t));
crm_element_value_int(request, F_STONITH_TIMEOUT, (int*)&(op->base_timeout));
if(peer && dev) {
op->id = crm_element_value_copy(dev, F_STONITH_REMOTE);
crm_trace("Recorded new stonith op: %s", op->id);
} else {
cl_uuid_t new_uuid;
char uuid_str[UU_UNPARSE_SIZEOF];
cl_uuid_generate(&new_uuid);
cl_uuid_unparse(&new_uuid, uuid_str);
op->id = crm_strdup(uuid_str);
crm_trace("Generated new stonith op: %s", op->id);
}
crm_trace("Replacing op %s", op->id);
g_hash_table_replace(remote_op_list, op->id, op);
CRM_LOG_ASSERT(g_hash_table_lookup(remote_op_list, op->id) != NULL);
op->state = st_query;
op->action = crm_element_value_copy(dev, F_STONITH_ACTION);
op->originator = crm_element_value_copy(dev, "src");
if(op->originator == NULL) {
/* Local request */
op->originator = crm_strdup(stonith_our_uname);
}
op->client_id = crm_strdup(client);
op->target = crm_element_value_copy(dev, F_STONITH_TARGET);
op->request = copy_xml(request); /* TODO: Figure out how to avoid this */
crm_element_value_int(request, F_STONITH_CALLOPTS, (int*)&(op->call_options));
if(op->call_options & st_opt_cs_nodeid) {
int nodeid = crm_atoi(op->target, NULL);
crm_node_t *node = crm_get_peer(nodeid, NULL);
/* Ensure the conversion only happens once */
op->call_options &= ~st_opt_cs_nodeid;
if(node) {
crm_free(op->target);
op->target = crm_strdup(node->uname);
}
}
if(stonith_topology_next(op) != stonith_ok) {
op->state = st_failed;
}
return op;
}
remote_fencing_op_t *initiate_remote_stonith_op(stonith_client_t *client, xmlNode *request, gboolean manual_ack)
{
xmlNode *query = NULL;
remote_fencing_op_t *op = NULL;
crm_log_xml_debug(request, "RemoteOp");
op = create_remote_stonith_op(client->id, request, FALSE);
query = stonith_create_op(0, op->id, STONITH_OP_QUERY, NULL, 0);
if(!manual_ack) {
op->op_timer = g_timeout_add(1200*op->base_timeout, remote_op_timeout, op);
op->query_timer = g_timeout_add(100*op->base_timeout, remote_op_query_timeout, op);
} else {
crm_xml_add(query, F_STONITH_DEVICE, "manual_ack");
}
crm_xml_add(query, F_STONITH_REMOTE, op->id);
crm_xml_add(query, F_STONITH_TARGET, op->target);
crm_xml_add(query, F_STONITH_ACTION, op->action);
crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout);
crm_info("Initiating remote operation %s for %s: %s", op->action, op->target, op->id);
CRM_CHECK(op->action, return NULL);
send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
free_xml(query);
return op;
}
static gint sort_strings(gconstpointer a, gconstpointer b)
{
return strcmp(a, b);
}
static st_query_result_t *stonith_choose_peer(remote_fencing_op_t *op)
{
GListPtr iter = NULL;
do {
if(op->devices) {
crm_trace("Checking for someone to fence %s with %s", op->target, op->devices->data);
} else {
crm_trace("Checking for someone to fence %s", op->target);
}
for(iter = op->query_results; iter != NULL; iter = iter->next) {
st_query_result_t *peer = iter->data;
if(is_set(op->call_options, st_opt_topology)) {
/* Do they have the next device of the current fencing level? */
GListPtr match = NULL;
if(op->devices) {
match = g_list_find_custom(peer->device_list, op->devices->data, sort_strings);
}
if(match) {
crm_trace("Removing %s from %s (%d remaining)", match->data, peer->host, g_list_length(peer->device_list));
peer->device_list = g_list_remove(peer->device_list, match->data);
return peer;
}
} else if(peer && peer->devices > 0) {
/* No topology: Use the current best peer */
crm_trace("Simple fencing");
return peer;
}
}
/* Try the next fencing level if there is one */
} while(is_set(op->call_options, st_opt_topology)
&& stonith_topology_next(op) == stonith_ok);
if(op->devices) {
crm_trace("Couldn't find anyone to fence %s with %s", op->target, op->devices->data);
} else {
crm_trace("Couldn't find anyone to fence %s", op->target);
}
return NULL;
}
void call_remote_stonith(remote_fencing_op_t *op, st_query_result_t *peer)
{
const char *device = NULL;
op->state = st_exec;
if(peer == NULL) {
peer = stonith_choose_peer(op);
}
if(is_set(op->call_options, st_opt_topology)) {
device = op->devices->data;
}
if(peer) {
xmlNode *query = stonith_create_op(0, op->id, STONITH_OP_FENCE, NULL, 0);;
crm_xml_add(query, F_STONITH_REMOTE, op->id);
crm_xml_add(query, F_STONITH_TARGET, op->target);
crm_xml_add(query, F_STONITH_ACTION, op->action);
crm_xml_add_int(query, F_STONITH_TIMEOUT, 900*op->base_timeout);
if(device) {
crm_info("Requesting that %s perform op %s %s with %s", peer->host, op->action, op->target, device);
crm_xml_add(query, F_STONITH_DEVICE, device);
} else {
crm_info("Requesting that %s perform op %s %s", peer->host, op->action, op->target);
}
send_cluster_message(peer->host, crm_msg_stonith_ng, query, FALSE);
free_xml(query);
return;
} else if(op->query_timer == 0) {
/* We've exhausted all available peers */
crm_info("No remaining peers capable of terminating %s", op->target);
remote_op_timeout(op);
} else if(device) {
crm_info("Waiting for additional peers capable of terminating %s with %s", op->target, device);
} else {
crm_info("Waiting for additional peers capable of terminating %s", op->target);
}
free_remote_query(peer);
}
static gint sort_peers(gconstpointer a, gconstpointer b)
{
const st_query_result_t *peer_a = a;
const st_query_result_t *peer_b = a;
if(peer_a->devices > peer_b->devices) {
return -1;
} else if(peer_a->devices > peer_b->devices) {
return 1;
}
return 0;
}
int process_remote_stonith_query(xmlNode *msg)
{
int devices = 0;
const char *id = NULL;
char *host = NULL;
remote_fencing_op_t *op = NULL;
st_query_result_t *result = NULL;
xmlNode *dev = get_xpath_object("//@"F_STONITH_REMOTE, msg, LOG_ERR);
xmlNode *child = NULL;
crm_log_xml_trace(msg, "QueryResult");
CRM_CHECK(dev != NULL, return st_err_internal);
id = crm_element_value(dev, F_STONITH_REMOTE);
CRM_CHECK(id != NULL, return st_err_internal);
dev = get_xpath_object("//@st-available-devices", msg, LOG_ERR);
CRM_CHECK(dev != NULL, return st_err_internal);
crm_element_value_int(dev, "st-available-devices", &devices);
- host = crm_element_value_copy(msg, F_ORIG);
-
op = g_hash_table_lookup(remote_op_list, id);
if(op == NULL) {
crm_debug("Unknown or expired remote op: %s", id);
return st_err_unknown_operation;
}
op->replies++;
+ host = crm_element_value(msg, F_ORIG);
+
if(devices <= 0) {
/* If we're doing 'known' then we might need to fire anyway */
return stonith_ok;
} else if(op->call_options & st_opt_allow_suicide) {
crm_trace("Allowing %s to potentialy fence itself", op->target);
} else if(safe_str_eq(host, op->target)) {
crm_info("Ignoring reply from %s, hosts are not permitted to commit suicide", op->target);
return stonith_ok;
}
crm_trace("Query result from %s (%d devices)", host, devices);
crm_malloc0(result, sizeof(st_query_result_t));
- result->host = host;
+ result->host = crm_strdup(host);
result->devices = devices;
for (child = __xml_first_child(dev); child != NULL; child = __xml_next(child)) {
const char *device = ID(child);
if(device) {
result->device_list = g_list_prepend(result->device_list, crm_strdup(device));
}
}
CRM_CHECK(devices == g_list_length(result->device_list),
crm_err("Mis-match: Query claimed to have %d devices but %d found", devices, g_list_length(result->device_list)));
op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers);
if(op->state == st_query && is_set(op->call_options, st_opt_all_replies) == FALSE) {
call_remote_stonith(op, result);
} else if(op->state == st_done) {
crm_info("Discarding query result from %s (%d devices): Operation is in state %d",
result->host, result->devices, op->state);
}
return stonith_ok;
}
int process_remote_stonith_exec(xmlNode *msg)
{
int rc = 0;
const char *id = NULL;
remote_fencing_op_t *op = NULL;
xmlNode *dev = get_xpath_object("//@"F_STONITH_REMOTE, msg, LOG_ERR);
crm_log_xml_trace(msg, "ExecResult");
CRM_CHECK(dev != NULL, return st_err_internal);
id = crm_element_value(dev, F_STONITH_REMOTE);
CRM_CHECK(id != NULL, return st_err_internal);
dev = get_xpath_object("//@"F_STONITH_RC, msg, LOG_ERR);
CRM_CHECK(dev != NULL, return st_err_internal);
crm_element_value_int(dev, F_STONITH_RC, &rc);
if(remote_op_list) {
op = g_hash_table_lookup(remote_op_list, id);
}
if(op == NULL && rc == stonith_ok) {
/* Record successful fencing operations */
const char *client_id = crm_element_value(msg, F_STONITH_CLIENTID);
op = create_remote_stonith_op(client_id, msg, TRUE);
}
if(op == NULL) {
/* Could be for an event that began before we started */
/* TODO: Record the op for later querying */
crm_info("Unknown or expired remote op: %s", id);
return st_err_unknown_operation;
}
if(is_set(op->call_options, st_opt_topology)) {
if(rc == stonith_ok && op->devices) {
/* Success, are there any more? */
crm_info("Call to %s for %s passed, %s next", op->devices->data,
op->target, op->devices->next?op->devices->next->data:"<none>");
op->devices = op->devices->next;
}
}
if(rc == stonith_ok && op->devices == NULL) {
crm_trace("All done for %s", op->target);
op->state = st_done;
remote_op_done(op, msg, rc);
} else {
/* Retry on failure or execute the rest of the topology */
crm_trace("Next for %s", op->target);
call_remote_stonith(op, NULL);
}
return rc;
}
int stonith_fence_history(xmlNode *msg, xmlNode **output)
{
int rc = 0;
const char *target = NULL;
xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, msg, LOG_TRACE);
if(dev) {
int options = 0;
target = crm_element_value(dev, F_STONITH_TARGET);
crm_element_value_int(msg, F_STONITH_CALLOPTS, &options);
if(target && (options & st_opt_cs_nodeid)) {
int nodeid = crm_atoi(target, NULL);
crm_node_t *node = crm_get_peer(nodeid, NULL);
if(node) {
target = node->uname;
}
}
}
*output = create_xml_node(NULL, F_STONITH_HISTORY_LIST);
if (remote_op_list) {
GHashTableIter iter;
remote_fencing_op_t *op = NULL;
g_hash_table_iter_init(&iter, remote_op_list);
while(g_hash_table_iter_next(&iter, NULL, (void**)&op)) {
xmlNode *entry = NULL;
if (target && strcmp(op->target, target) != 0) {
continue;
}
rc = 0;
entry = create_xml_node(*output, STONITH_OP_EXEC);
crm_xml_add(entry, F_STONITH_TARGET, op->target);
crm_xml_add(entry, F_STONITH_ACTION, op->action);
crm_xml_add(entry, F_STONITH_ORIGIN, op->originator);
crm_xml_add(entry, F_STONITH_DELEGATE, op->delegate);
crm_xml_add_int(entry, F_STONITH_DATE, op->completed);
crm_xml_add_int(entry, F_STONITH_STATE, op->state);
}
}
return rc;
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Jan 25, 12:25 PM (12 h, 57 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1322553
Default Alt Text
(20 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment