diff --git a/fencing/commands.c b/fencing/commands.c index 1419899b79..ed013dd92a 100644 --- a/fencing/commands.c +++ b/fencing/commands.c @@ -1,1006 +1,1010 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *device_list = NULL; static int active_children = 0; static gboolean stonith_device_dispatch(gpointer user_data); static void exec_child_done(ProcTrack* proc, int status, int signo, int rc, int waslogged); static void exec_child_new(ProcTrack* p) { active_children++; } static const char *exec_child_name(ProcTrack* p) { async_command_t *cmd = proctrack_data(p); return cmd->client?cmd->client:cmd->remote; } static ProcTrack_ops StonithdProcessTrackOps = { exec_child_done, exec_child_new, exec_child_name, }; static void free_async_command(async_command_t *cmd) { crm_free(cmd->device); crm_free(cmd->action); crm_free(cmd->victim); crm_free(cmd->remote); crm_free(cmd->client); crm_free(cmd->origin); crm_free(cmd->op); crm_free(cmd); } -static async_command_t *create_async_command(xmlNode *msg, const char *action) +static async_command_t *create_async_command(xmlNode *msg) { async_command_t *cmd = NULL; + xmlNode *op = get_xpath_object("//@"F_STONITH_ACTION, msg, LOG_ERR); + const char *action = crm_element_value(op, F_STONITH_ACTION); + CRM_CHECK(action != NULL, crm_log_xml_warn(msg, "NoAction"); return NULL); crm_malloc0(cmd, sizeof(async_command_t)); crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id)); crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options)); crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->timeout)); + cmd->timeout *= 1000; cmd->origin = crm_element_value_copy(msg, F_ORIG); cmd->remote = crm_element_value_copy(msg, F_STONITH_REMOTE); cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID); cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION); cmd->action = crm_strdup(action); - cmd->victim = crm_element_value_copy(msg, F_STONITH_TARGET); + cmd->victim = crm_element_value_copy(op, F_STONITH_TARGET); cmd->pt_ops = &StonithdProcessTrackOps; CRM_CHECK(cmd->op != NULL, crm_log_xml_warn(msg, "NoOp"); free_async_command(cmd); return NULL); CRM_CHECK(cmd->client != NULL || cmd->remote != NULL, crm_log_xml_warn(msg, "NoClient")); return cmd; } static gboolean stonith_device_execute(stonith_device_t *device) { int rc = 0; int exec_rc = 0; async_command_t *cmd = NULL; CRM_CHECK(device != NULL, return FALSE); if(device->active_pid) { crm_trace("%s is still active with pid %u", device->id, device->active_pid); return TRUE; } if(device->pending_ops) { GList *first = device->pending_ops; device->pending_ops = g_list_remove_link(device->pending_ops, first); cmd = first->data; g_list_free_1(first); } if(cmd == NULL) { crm_info("Nothing to do for %s", device->id); return TRUE; } cmd->device = crm_strdup(device->id); exec_rc = run_stonith_agent(device->agent, cmd->action, cmd->victim, device->params, device->aliases, &rc, NULL, cmd); if(exec_rc > 0) { crm_debug("Operation %s%s%s on %s is active with pid: %d", cmd->action, cmd->victim?" for node ":"", cmd->victim?cmd->victim:"", device->id, exec_rc); device->active_pid = exec_rc; } else { struct _ProcTrack p; memset(&p, 0, sizeof(struct _ProcTrack)); p.privatedata = cmd; crm_warn("Operation %s%s%s on %s failed (%d/%d)", cmd->action, cmd->victim?" for node ":"", cmd->victim?cmd->victim:"", device->id, exec_rc, rc); exec_child_done(&p, 0, 0, rc<0?rc:exec_rc, 0); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static void schedule_stonith_command(async_command_t *cmd, stonith_device_t *device) { CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); crm_trace("Scheduling %s on %s", cmd->action, device->id); device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); } static void free_device(gpointer data) { GListPtr gIter = NULL; stonith_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for(gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { struct _ProcTrack p; async_command_t *cmd = gIter->data; memset(&p, 0, sizeof(struct _ProcTrack)); p.privatedata = cmd; crm_warn("Removal of device '%s' purged operation %s", device->id, cmd->action); exec_child_done(&p, 0, 0, st_err_unknown_device, 0); free_async_command(cmd); } g_list_free(device->pending_ops); slist_basic_destroy(device->targets); crm_free(device->namespace); crm_free(device->agent); crm_free(device->id); crm_free(device); } static GHashTable *build_port_aliases(const char *hostmap, GListPtr *targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if(hostmap == NULL) { return aliases; } max = strlen(hostmap); for(; lpc <= max; lpc++) { switch(hostmap[lpc]) { /* Assignment chars */ case '=': case ':': if(lpc > last) { crm_free(name); crm_malloc0(name, 1 + lpc - last); strncpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if(name) { char *value = NULL; crm_malloc0(value, 1 + lpc - last); strncpy(value, hostmap + last, lpc - last); crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if(targets) { *targets = g_list_append(*targets, crm_strdup(value)); } value=NULL; name=NULL; added++; } else if(lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc-last, hostmap+last); } last = lpc + 1; break; } if(hostmap[lpc] == 0) { break; } } if(added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } crm_free(name); return aliases; } static void parse_host_line(const char *line, GListPtr *output) { int lpc = 0; int max = 0; int last = 0; if(line) { max = strlen(line); } else { return; } /* Check for any complaints about additional parameters that the device doesn't understand */ if(strstr(line, "invalid") || strstr(line, "variable")) { crm_debug("Skipping: %s", line); return; } crm_debug_2("Processing: %s", line); /* Skip initial whitespace */ for(lpc = 0; lpc <= max && isspace(line[lpc]); lpc++) { last = lpc+1; } /* Now the actual content */ for(lpc = 0; lpc <= max; lpc++) { gboolean a_space = isspace(line[lpc]); if(a_space && lpc < max && isspace(line[lpc+1])) { /* fast-forward to the end of the spaces */ } else if(a_space || line[lpc] == ',' || line[lpc] == 0) { int rc = 0; char *entry = NULL; crm_malloc0(entry, 1 + lpc - last); rc = sscanf(line+last, "%[a-zA-Z0-9_-.]", entry); if(rc != 1) { crm_warn("Could not parse (%d %d): %s", last, lpc, line+last); } else if(safe_str_neq(entry, "on") && safe_str_neq(entry, "off")) { crm_debug_2("Adding '%s'", entry); *output = g_list_append(*output, entry); entry = NULL; } crm_free(entry); last = lpc + 1; } } } static GListPtr parse_host_list(const char *hosts) { int lpc = 0; int max = 0; int last = 0; GListPtr output = NULL; if(hosts == NULL) { return output; } max = strlen(hosts); for(lpc = 0; lpc <= max; lpc++) { if(hosts[lpc] == '\n' || hosts[lpc] == 0) { char *line = NULL; crm_malloc0(line, 2 + lpc - last); snprintf(line, 1 + lpc - last, "%s", hosts+last); parse_host_line(line, &output); crm_free(line); last = lpc + 1; } } return output; } static stonith_device_t *build_device_from_xml(xmlNode *msg) { xmlNode *dev = get_xpath_object("//"F_STONITH_DEVICE, msg, LOG_ERR); stonith_device_t *device = NULL; crm_malloc0(device, sizeof(stonith_device_t)); device->id = crm_element_value_copy(dev, XML_ATTR_ID); device->agent = crm_element_value_copy(dev, "agent"); device->namespace = crm_element_value_copy(dev, "namespace"); device->params = xml2list(dev); device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); /* TODO: Hook up priority */ return device; } static int stonith_device_register(xmlNode *msg) { const char *value = NULL; stonith_device_t *device = build_device_from_xml(msg); value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTLIST); if(value) { device->targets = parse_host_list(value); } value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTMAP); device->aliases = build_port_aliases(value, &(device->targets)); g_hash_table_replace(device_list, device->id, device); crm_info("Added '%s' to the device list (%d active devices)", device->id, g_hash_table_size(device_list)); return stonith_ok; } static int stonith_device_remove(xmlNode *msg) { xmlNode *dev = get_xpath_object("//"F_STONITH_DEVICE, msg, LOG_ERR); const char *id = crm_element_value(dev, XML_ATTR_ID); if(g_hash_table_remove(device_list, id)) { crm_info("Removed '%s' from the device list (%d active devices)", id, g_hash_table_size(device_list)); } else { crm_info("Device '%s' not found (%d active devices)", id, g_hash_table_size(device_list)); } return stonith_ok; } static gboolean string_in_list(GListPtr list, const char *item) { int lpc = 0; int max = g_list_length(list); for(lpc = 0; lpc < max; lpc ++) { const char *value = g_list_nth_data(list, lpc); if(safe_str_eq(item, value)) { return TRUE; } } return FALSE; } static int stonith_device_action(xmlNode *msg, char **output) { int rc = stonith_ok; xmlNode *dev = get_xpath_object("//"F_STONITH_DEVICE, msg, LOG_ERR); const char *id = crm_element_value(dev, F_STONITH_DEVICE); - const char *action = crm_element_value(dev, F_STONITH_ACTION); async_command_t *cmd = NULL; stonith_device_t *device = NULL; if(id) { crm_debug_2("Looking for '%s'", id); device = g_hash_table_lookup(device_list, id); } if(device) { - cmd = create_async_command(msg, action); + cmd = create_async_command(msg); if(cmd == NULL) { free_device(device); return st_err_internal; } schedule_stonith_command(cmd, device); rc = stonith_pending; } else { crm_notice("Device %s not found", id?id:""); rc = st_err_unknown_device; } return rc; } static gboolean can_fence_host_with_device(stonith_device_t *dev, const char *host) { gboolean can = FALSE; const char *alias = host; const char *check_type = NULL; if(dev == NULL) { return FALSE; } else if(host == NULL) { return TRUE; } if(g_hash_table_lookup(dev->aliases, host)) { alias = g_hash_table_lookup(dev->aliases, host); } check_type = g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTCHECK); if(check_type == NULL) { if(g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTLIST)) { check_type = "static-list"; } else { check_type = "dynamic-list"; } } if(safe_str_eq(check_type, "none")) { can = TRUE; } else if(safe_str_eq(check_type, "static-list")) { /* Presence in the hostmap is sufficient * Only use if all hosts on which the device can be active can always fence all listed hosts */ if(string_in_list(dev->targets, host)) { can = TRUE; } } else if(safe_str_eq(check_type, "dynamic-list")) { time_t now = time(NULL); /* Host/alias must be in the list output to be eligable to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if(dev->targets_age < 0) { crm_trace("Port list queries disabled for %s", dev->id); } else if(dev->targets == NULL || dev->targets_age + 60 < now) { char *output = NULL; int rc = stonith_ok; int exec_rc = stonith_ok; /* Check for the target's presence in the output of the 'list' command */ slist_basic_destroy(dev->targets); dev->targets = NULL; exec_rc = run_stonith_agent(dev->agent, "list", NULL, dev->params, NULL, &rc, &output, NULL); if(exec_rc < 0 || rc != 0) { crm_notice("Disabling port list queries for %s (%d/%d): %s", dev->id, exec_rc, rc, output); dev->targets_age = -1; } else { crm_info("Refreshing port list for %s", dev->id); dev->targets = parse_host_list(output); dev->targets_age = now; } crm_free(output); } if(string_in_list(dev->targets, alias)) { can = TRUE; } } else if(safe_str_eq(check_type, "status")) { int rc = 0; int exec_rc = 0; /* Run the status operation for the device/target combination * Will cause problems if the device doesn't return 2 for down'd nodes or * (for virtual nodes) if the device doesn't return 1 for guests that * have been moved to another host */ exec_rc = run_stonith_agent( dev->agent, "status", host, dev->params, dev->aliases, &rc, NULL, NULL); if(exec_rc != 0) { crm_err("Could not invoke %s: rc=%d", dev->id, exec_rc); } else if(rc == 1 /* unkown */) { crm_debug_2("Host %s is not known by %s", host, dev->id); } else if(rc == 0 /* active */ || rc == 2 /* inactive */) { can = TRUE; } else { crm_err("Unkown result calling %s for %s with %s: rc=%d", "status", host, dev->id, rc); } } else { crm_err("Unknown check type: %s", check_type); } if(safe_str_eq(host, alias)) { crm_info("%s can%s fence %s: %s", dev->id, can?"":" not", host, check_type); } else { crm_info("%s can%s fence %s (aka. '%s'): %s", dev->id, can?"":" not", host, alias, check_type); } return can; } struct device_search_s { const char *host; GListPtr capable; }; static void search_devices( gpointer key, gpointer value, gpointer user_data) { stonith_device_t *dev = value; struct device_search_s *search = user_data; if(can_fence_host_with_device(dev, search->host)) { search->capable = g_list_append(search->capable, value); } } static int stonith_query(xmlNode *msg, xmlNode **list) { struct device_search_s search; int available_devices = 0; xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, msg, LOG_DEBUG_3); search.host = NULL; search.capable = NULL; if(dev) { search.host = crm_element_value(dev, F_STONITH_TARGET); } crm_log_xml_debug(msg, "Query"); g_hash_table_foreach(device_list, search_devices, &search); available_devices = g_list_length(search.capable); if(search.host) { crm_debug("Found %d matching devices for '%s'", available_devices, search.host); } else { crm_debug("%d devices installed", available_devices); } /* Pack the results into data */ if(list) { GListPtr lpc = NULL; *list = create_xml_node(NULL, __FUNCTION__); crm_xml_add(*list, F_STONITH_TARGET, search.host); crm_xml_add_int(*list, "st-available-devices", available_devices); for(lpc = search.capable; lpc != NULL; lpc = lpc->next) { stonith_device_t *device = (stonith_device_t*)lpc->data; dev = create_xml_node(*list, F_STONITH_DEVICE); crm_xml_add(dev, XML_ATTR_ID, device->id); crm_xml_add(dev, "namespace", device->namespace); crm_xml_add(dev, "agent", device->agent); if(search.host == NULL) { xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS); g_hash_table_foreach(device->params, hash2field, attrs); } } } g_list_free(search.capable); return available_devices; } static void log_operation(async_command_t *cmd, int rc, int pid, const char *next, const char *output) { if(rc == 0) { next = NULL; } if(cmd->victim != NULL) { do_crm_log(rc==0?LOG_INFO:LOG_ERR, "Operation '%s' [%d] (call %d from %s) for host '%s' with device '%s' returned: %d%s%s", cmd->action, pid, cmd->id, cmd->client, cmd->victim, cmd->device, rc, next?". Trying: ":"", next?next:""); } else { do_crm_log(rc==0?LOG_DEBUG:LOG_NOTICE, "Operation '%s' [%d] for device '%s' returned: %d%s%s", cmd->action, pid, cmd->device, rc, next?". Trying: ":"", next?next:""); } if(output) { /* Logging the whole string confuses syslog when the string is xml */ char *local_copy = crm_strdup(output); int lpc = 0, last = 0, more = strlen(local_copy); for(lpc = 0; lpc < more; lpc++) { if(local_copy[lpc] == '\n' || local_copy[lpc] == 0) { local_copy[lpc] = 0; do_crm_log(rc==0?LOG_INFO:LOG_ERR, "%s: %s", cmd->device, local_copy+last); last = lpc+1; } } crm_debug("%s output: %s (total %d bytes)", cmd->device, local_copy+last, more); crm_free(local_copy); } } #define READ_MAX 500 static void exec_child_done(ProcTrack* proc, int status, int signum, int rc, int waslogged) { int len = 0; int more = 0; gboolean bcast = FALSE; char *output = NULL; xmlNode *data = NULL; xmlNode *reply = NULL; int pid = proctrack_pid(proc); stonith_device_t *device = NULL; async_command_t *cmd = proctrack_data(proc); CRM_CHECK(cmd != NULL, return); active_children--; /* The device is ready to do something else now */ device = g_hash_table_lookup(device_list, cmd->device); if(device) { device->active_pid = 0; mainloop_set_trigger(device->work); } if( signum ) { rc = st_err_signal; if( proctrack_timedout(proc) ) { crm_warn("Child '%d' performing action '%s' with '%s' timed out", pid, cmd->action, cmd->device); rc = st_err_timeout; } } do { char buffer[READ_MAX]; errno = 0; if(cmd->stdout > 0) { memset(&buffer, 0, READ_MAX); more = read(cmd->stdout, buffer, READ_MAX-1); crm_trace("Got %d more bytes: %s", more, buffer); } if(more > 0) { crm_realloc(output, len + more + 1); sprintf(output+len, "%s", buffer); len += more; } } while (more == (READ_MAX-1) || (more < 0 && errno == EINTR)); if(cmd->stdout) { close(cmd->stdout); cmd->stdout = 0; } if(rc != 0 && cmd->device_next) { stonith_device_t *dev = cmd->device_next->data; log_operation(cmd, rc, pid, dev->id, output); cmd->device_next = cmd->device_next->next; schedule_stonith_command(cmd, dev); goto done; } if(rc > 0) { rc = st_err_generic; } reply = stonith_construct_async_reply(cmd, output, data, rc); if(safe_str_eq(cmd->action, "metadata")) { /* Too verbose to log */ crm_free(output); output = NULL; } else if(crm_str_eq(cmd->action, "reboot", TRUE) || crm_str_eq(cmd->action, "poweroff", TRUE) || crm_str_eq(cmd->action, "poweron", TRUE) || crm_str_eq(cmd->action, "off", TRUE) || crm_str_eq(cmd->action, "on", TRUE)) { bcast = TRUE; } log_operation(cmd, rc, pid, NULL, output); crm_log_xml_debug_3(reply, "Reply"); - if(bcast) { + if(bcast && !stand_alone) { /* Send reply as T_STONITH_NOTIFY so everyone does notifications * Potentially limit to unsucessful operations to the originator? */ crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); } else if(cmd->origin) { send_cluster_message(cmd->origin, crm_msg_stonith_ng, reply, FALSE); } else { do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE); } free_async_command(cmd); done: reset_proctrack_data(proc); crm_free(output); free_xml(reply); free_xml(data); } static gint sort_device_priority(gconstpointer a, gconstpointer b) { const stonith_device_t *dev_a = a; const stonith_device_t *dev_b = a; if(dev_a->priority > dev_b->priority) { return -1; } else if(dev_a->priority < dev_b->priority) { return 1; } return 0; } static int stonith_fence(xmlNode *msg) { struct device_search_s search; stonith_device_t *device = NULL; - async_command_t *cmd = create_async_command(msg, crm_element_value(msg, F_STONITH_ACTION)); + async_command_t *cmd = create_async_command(msg); xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, msg, LOG_ERR); if(cmd == NULL) { return st_err_internal; } search.capable = NULL; search.host = crm_element_value(dev, F_STONITH_TARGET); crm_log_xml_info(msg, "Exec"); g_hash_table_foreach(device_list, search_devices, &search); crm_info("Found %d matching devices for '%s'", g_list_length(search.capable), search.host); if(g_list_length(search.capable) == 0) { free_async_command(cmd); return st_err_none_available; } /* Order based on priority */ search.capable = g_list_sort(search.capable, sort_device_priority); device = search.capable->data; cmd->device = device->id; if(g_list_length(search.capable) > 1) { cmd->device_list = search.capable; } schedule_stonith_command(cmd, device); return stonith_pending; } xmlNode *stonith_construct_reply(xmlNode *request, char *output, xmlNode *data, int rc) { int lpc = 0; xmlNode *reply = NULL; const char *name = NULL; const char *value = NULL; const char *names[] = { F_STONITH_OPERATION, F_STONITH_CALLID, F_STONITH_CLIENTID, F_STONITH_REMOTE, F_STONITH_CALLOPTS }; crm_debug_4("Creating a basic reply"); reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __FUNCTION__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, "st_output", output); crm_xml_add_int(reply, F_STONITH_RC, rc); CRM_CHECK(request != NULL, crm_warn("Can't create a sane reply"); return reply); for(lpc = 0; lpc < DIMOF(names); lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } if(data != NULL) { crm_debug_4("Attaching reply output"); add_message_xml(reply, F_STONITH_CALLDATA, data); } return reply; } xmlNode *stonith_construct_async_reply(async_command_t *cmd, char *output, xmlNode *data, int rc) { xmlNode *reply = NULL; crm_debug_4("Creating a basic reply"); reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __FUNCTION__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, F_STONITH_OPERATION, cmd->op); crm_xml_add(reply, F_STONITH_REMOTE, cmd->remote); crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client); crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id); crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options); crm_xml_add_int(reply, F_STONITH_RC, rc); crm_xml_add(reply, "st_output", output); if(data != NULL) { crm_info("Attaching reply output"); add_message_xml(reply, F_STONITH_CALLDATA, data); } return reply; } void stonith_command(stonith_client_t *client, xmlNode *request, const char *remote) { int call_options = 0; int rc = st_err_generic; gboolean is_reply = FALSE; gboolean always_reply = FALSE; xmlNode *reply = NULL; xmlNode *data = NULL; char *output = NULL; const char *op = crm_element_value(request, F_STONITH_OPERATION); const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); if(get_xpath_object("//"T_STONITH_REPLY, request, LOG_DEBUG_3)) { is_reply = TRUE; } if(device_list == NULL) { device_list = g_hash_table_new_full( crm_str_hash, g_str_equal, NULL, free_device); } crm_debug("Processing %s%s from %s", op, is_reply?" reply":"", client?client->name:remote); if(crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { return; } else if(crm_str_eq(op, STONITH_OP_DEVICE_ADD, TRUE)) { rc = stonith_device_register(request); do_stonith_notify(call_options, op, rc, request, NULL); } else if(crm_str_eq(op, STONITH_OP_DEVICE_DEL, TRUE)) { rc = stonith_device_remove(request); do_stonith_notify(call_options, op, rc, request, NULL); } else if(crm_str_eq(op, STONITH_OP_CONFIRM, TRUE)) { - async_command_t *cmd = create_async_command(request, crm_element_value(request, F_STONITH_ACTION)); + async_command_t *cmd = create_async_command(request); xmlNode *reply = stonith_construct_async_reply(cmd, NULL, NULL, 0); crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); crm_notice("Broadcasting manual fencing confirmation for node %s", cmd->victim); send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); free_async_command(cmd); free_xml(reply); } else if(crm_str_eq(op, STONITH_OP_EXEC, TRUE)) { rc = stonith_device_action(request, &output); } else if(is_reply && crm_str_eq(op, STONITH_OP_QUERY, TRUE)) { process_remote_stonith_query(request); return; } else if(crm_str_eq(op, STONITH_OP_QUERY, TRUE)) { create_remote_stonith_op(client_id, request, TRUE); /* Record it for the future notification */ rc = stonith_query(request, &data); always_reply = TRUE; } else if(is_reply && crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) { process_remote_stonith_exec(request); return; } else if(crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) { const char *flag_name = NULL; flag_name = crm_element_value(request, F_STONITH_NOTIFY_ACTIVATE); if(flag_name) { crm_debug("Setting %s callbacks for %s (%s): ON", flag_name, client->name, client->id); client->flags |= get_stonith_flag(flag_name); } flag_name = crm_element_value(request, F_STONITH_NOTIFY_DEACTIVATE); if(flag_name) { crm_debug("Setting %s callbacks for %s (%s): off", flag_name, client->name, client->id); client->flags |= get_stonith_flag(flag_name); } return; /* } else if(is_reply && crm_str_eq(op, STONITH_OP_FENCE, TRUE)) { */ /* process_remote_stonith_exec(request); */ /* return; */ } else if(is_reply == FALSE && crm_str_eq(op, STONITH_OP_FENCE, TRUE)) { - if(remote) { + if(remote || stand_alone) { rc = stonith_fence(request); } else if(call_options & st_opt_local_first) { rc = stonith_fence(request); if(rc < 0) { initiate_remote_stonith_op(client, request); + return; } } else { initiate_remote_stonith_op(client, request); + return; } - return; } else if (crm_str_eq(op, STONITH_OP_FENCE_HISTORY, TRUE)) { rc = stonith_fence_history(request, &data); always_reply = TRUE; } else { crm_err("Unknown %s%s from %s", op, is_reply?" reply":"", client?client->name:remote); crm_log_xml_warn(request, "UnknownOp"); } do_crm_log(rc>0?LOG_DEBUG:LOG_INFO,"Processed %s%s from %s: rc=%d", op, is_reply?" reply":"", client?client->name:remote, rc); if(is_reply || rc == stonith_pending) { /* Nothing (yet) */ } else if(remote) { reply = stonith_construct_reply(request, output, data, rc); send_cluster_message(remote, crm_msg_stonith_ng, reply, FALSE); free_xml(reply); } else if(rc <= 0 || always_reply) { reply = stonith_construct_reply(request, output, data, rc); do_local_reply(reply, client_id, call_options & st_opt_sync_call, remote!=NULL); free_xml(reply); } crm_free(output); free_xml(data); } diff --git a/fencing/internal.h b/fencing/internal.h index 16df0633fb..335c2352e4 100644 --- a/fencing/internal.h +++ b/fencing/internal.h @@ -1,63 +1,64 @@ typedef struct stonith_device_s { char *id; char *agent; char *namespace; GListPtr targets; time_t targets_age; gboolean has_attr_map; guint priority; guint active_pid; GHashTable *params; GHashTable *aliases; GList *pending_ops; crm_trigger_t *work; } stonith_device_t; typedef struct stonith_client_s { char *id; char *name; char *callback_id; const char *channel_name; IPC_Channel *channel; GCHSource *source; long long flags; } stonith_client_t; extern long long get_stonith_flag(const char *name); extern void stonith_command( stonith_client_t *client, xmlNode *op_request, const char *remote); extern void do_local_reply( xmlNode *notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer); extern xmlNode *stonith_construct_reply( xmlNode *request, char *output, xmlNode *data, int rc); extern xmlNode *stonith_construct_async_reply( async_command_t *cmd, char *output, xmlNode *data, int rc);; extern void do_stonith_notify( int options, const char *type, enum stonith_errors result, xmlNode *data, const char *remote); extern void initiate_remote_stonith_op(stonith_client_t *client, xmlNode *request); extern int process_remote_stonith_exec(xmlNode *msg); extern int process_remote_stonith_query(xmlNode *msg); extern void *create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer); extern int stonith_fence_history(xmlNode *msg, xmlNode **output); extern char *stonith_our_uname; +extern gboolean stand_alone; diff --git a/fencing/main.c b/fencing/main.c index ad48c4311d..13be53f454 100644 --- a/fencing/main.c +++ b/fencing/main.c @@ -1,679 +1,679 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include char *channel1 = NULL; char *channel2 = NULL; char *stonith_our_uname = NULL; GMainLoop *mainloop = NULL; GHashTable *client_list = NULL; +gboolean stand_alone = FALSE; gboolean stonith_shutdown_flag = FALSE; #if SUPPORT_HEARTBEAT ll_cluster_t *hb_conn = NULL; #endif static gboolean stonith_client_disconnect( IPC_Channel *channel, stonith_client_t *stonith_client) { if (channel == NULL) { CRM_LOG_ASSERT(stonith_client == NULL); } else if (stonith_client == NULL) { crm_err("No client"); } else { CRM_LOG_ASSERT(channel->ch_status != IPC_CONNECT); crm_debug_2("Cleaning up after client disconnect: %s/%s/%s", crm_str(stonith_client->name), stonith_client->channel_name, stonith_client->id); if(stonith_client->id != NULL) { if(!g_hash_table_remove(client_list, stonith_client->id)) { crm_err("Client %s not found in the hashtable", stonith_client->name); } } } return FALSE; } static gboolean stonith_client_callback(IPC_Channel *channel, gpointer user_data) { int lpc = 0; const char *value = NULL; xmlNode *request = NULL; gboolean keep_channel = TRUE; stonith_client_t *stonith_client = user_data; CRM_CHECK(stonith_client != NULL, crm_err("Invalid client"); return FALSE); CRM_CHECK(stonith_client->id != NULL, crm_err("Invalid client: %p", stonith_client); return FALSE); if(IPC_ISRCONN(channel) && channel->ops->is_message_pending(channel)) { lpc++; request = xmlfromIPC(channel, MAX_IPC_DELAY); if (request == NULL) { goto bail; } if(stonith_client->name == NULL) { value = crm_element_value(request, F_STONITH_CLIENTNAME); if(value == NULL) { stonith_client->name = crm_itoa(channel->farside_pid); } else { stonith_client->name = crm_strdup(value); } } crm_xml_add(request, F_STONITH_CLIENTID, stonith_client->id); crm_xml_add(request, F_STONITH_CLIENTNAME, stonith_client->name); if(stonith_client->callback_id == NULL) { value = crm_element_value(request, F_STONITH_CALLBACK_TOKEN); if(value != NULL) { stonith_client->callback_id = crm_strdup(value); } else { stonith_client->callback_id = crm_strdup(stonith_client->id); } } crm_log_xml(LOG_MSG, "Client[inbound]", request); stonith_command(stonith_client, request, NULL); free_xml(request); } bail: if(channel->ch_status != IPC_CONNECT) { crm_debug_2("Client disconnected"); keep_channel = stonith_client_disconnect(channel, stonith_client); } return keep_channel; } static void stonith_client_destroy(gpointer user_data) { stonith_client_t *stonith_client = user_data; if(stonith_client == NULL) { crm_debug_4("Destroying %p", user_data); return; } if(stonith_client->source != NULL) { crm_debug_4("Deleting %s (%p) from mainloop", stonith_client->name, stonith_client->source); G_main_del_IPC_Channel(stonith_client->source); stonith_client->source = NULL; } crm_debug_3("Destroying %s (%p)", stonith_client->name, user_data); crm_free(stonith_client->name); crm_free(stonith_client->callback_id); crm_free(stonith_client->id); crm_free(stonith_client); crm_debug_4("Freed the cib client"); return; } static gboolean stonith_client_connect(IPC_Channel *channel, gpointer user_data) { cl_uuid_t client_id; xmlNode *reg_msg = NULL; stonith_client_t *new_client = NULL; char uuid_str[UU_UNPARSE_SIZEOF]; const char *channel_name = user_data; crm_debug_3("Connecting channel"); CRM_CHECK(channel_name != NULL, return FALSE); if (channel == NULL) { crm_err("Channel was NULL"); return FALSE; } else if (channel->ch_status != IPC_CONNECT) { crm_err("Channel was disconnected"); return FALSE; } else if(stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", channel->farside_pid); return FALSE; } crm_malloc0(new_client, sizeof(stonith_client_t)); new_client->channel = channel; new_client->channel_name = channel_name; crm_debug_3("Created channel %p for channel %s", new_client, new_client->channel_name); channel->ops->set_recv_qlen(channel, 1024); channel->ops->set_send_qlen(channel, 1024); new_client->source = G_main_add_IPC_Channel( G_PRIORITY_DEFAULT, channel, FALSE, stonith_client_callback, new_client, stonith_client_destroy); crm_debug_3("Channel %s connected for client %s", new_client->channel_name, new_client->id); cl_uuid_generate(&client_id); cl_uuid_unparse(&client_id, uuid_str); CRM_CHECK(new_client->id == NULL, crm_free(new_client->id)); new_client->id = crm_strdup(uuid_str); /* make sure we can find ourselves later for sync calls * redirected to the master instance */ g_hash_table_insert(client_list, new_client->id, new_client); reg_msg = create_xml_node(NULL, "callback"); crm_xml_add(reg_msg, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(reg_msg, F_STONITH_CLIENTID, new_client->id); send_ipc_message(channel, reg_msg); free_xml(reg_msg); return TRUE; } static void stonith_peer_callback(xmlNode * msg, void* private_data) { const char *remote = crm_element_value(msg, F_ORIG); crm_log_xml(LOG_MSG, "Peer[inbound]", msg); stonith_command(NULL, msg, remote); } static void stonith_peer_hb_callback(HA_Message * msg, void* private_data) { xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__); stonith_peer_callback(xml, private_data); free_xml(xml); } #if SUPPORT_COROSYNC static gboolean stonith_peer_ais_callback( AIS_Message *wrapper, char *data, int sender) { xmlNode *xml = NULL; if(wrapper->header.id == crm_class_cluster) { xml = string2xml(data); if(xml == NULL) { goto bail; } crm_xml_add(xml, F_ORIG, wrapper->sender.uname); crm_xml_add_int(xml, F_SEQ, wrapper->id); stonith_peer_callback(xml, NULL); } free_xml(xml); return TRUE; bail: crm_err("Invalid XML: '%.120s'", data); return TRUE; } static void stonith_peer_ais_destroy(gpointer user_data) { crm_err("AIS connection terminated"); ais_fd_sync = -1; exit(1); } #endif static void stonith_peer_hb_destroy(gpointer user_data) { if(stonith_shutdown_flag) { crm_info("Heartbeat disconnection complete... exiting"); } else { crm_err("Heartbeat connection lost! Exiting."); } crm_info("Exiting..."); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); } else { exit(LSB_EXIT_OK); } } static int send_via_callback_channel(xmlNode *msg, const char *token) { stonith_client_t *hash_client = NULL; enum stonith_errors rc = stonith_ok; crm_debug_3("Delivering msg %p to client %s", msg, token); if(token == NULL) { crm_err("No client id token, cant send message"); if(rc == stonith_ok) { rc = -1; } } else if(msg == NULL) { crm_err("No message to send"); rc = -1; } else { /* A client that left before we could reply is not really * _our_ error. Warn instead. */ hash_client = g_hash_table_lookup(client_list, token); if(hash_client == NULL) { crm_warn("Cannot find client for token %s", token); rc = -1; } else if (crm_str_eq(hash_client->channel_name, "remote", FALSE)) { /* just hope it's alive */ } else if(hash_client->channel == NULL) { crm_err("Cannot find channel for client %s", token); rc = -1; } } if(rc == stonith_ok) { crm_debug_3("Delivering reply to client %s (%s)", token, hash_client->channel_name); if(send_ipc_message(hash_client->channel, msg) == FALSE) { crm_warn("Delivery of reply to client %s/%s failed", hash_client->name, token); rc = -1; } } return rc; } void do_local_reply(xmlNode *notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { /* send callback to originating child */ stonith_client_t *client_obj = NULL; enum stonith_errors local_rc = stonith_ok; crm_debug_2("Sending response"); if(client_id != NULL) { client_obj = g_hash_table_lookup(client_list, client_id); } else { crm_debug_2("No client to sent the response to." " F_STONITH_CLIENTID not set."); } crm_debug_3("Sending callback to request originator"); if(client_obj == NULL) { local_rc = -1; } else { const char *client_id = client_obj->callback_id; crm_debug_2("Sending %ssync response to %s %s", sync_reply?"":"an a-", client_obj->name, from_peer?"(originator of delegated request)":""); if(sync_reply) { client_id = client_obj->id; } local_rc = send_via_callback_channel(notify_src, client_id); } if(local_rc != stonith_ok && client_obj != NULL) { crm_warn("%sSync reply to %s failed: %s", sync_reply?"":"A-", client_obj?client_obj->name:"", stonith_error2string(local_rc)); } } long long get_stonith_flag(const char *name) { if(safe_str_eq(name, STONITH_OP_FENCE)) { return 0x01; } else if(safe_str_eq(name, STONITH_OP_DEVICE_ADD)) { return 0x04; } else if(safe_str_eq(name, STONITH_OP_DEVICE_DEL)) { return 0x10; } return 0; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { IPC_Channel *ipc_client = NULL; xmlNode *update_msg = user_data; stonith_client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = crm_element_value(update_msg, F_SUBTYPE); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if(client == NULL) { crm_warn("Skipping NULL client"); return; } else if(client->channel == NULL) { crm_warn("Skipping client with NULL channel"); return; } else if(client->name == NULL) { crm_debug_2("Skipping unnammed client / comamnd channel"); return; } ipc_client = client->channel; if(client->flags & get_stonith_flag(type)) { crm_info("Sending %s-notification to client %s/%s", type, client->name, client->id); if(ipc_client->send_queue->current_qlen >= ipc_client->send_queue->max_qlen) { /* We never want the STONITH to exit because our client is slow */ crm_crit("%s-notification of client %s/%s failed - queue saturated", type, client->name, client->id); } else if(send_ipc_message(ipc_client, update_msg) == FALSE) { crm_warn("%s-Notification of client %s/%s failed", type, client->name, client->id); } } } void do_stonith_notify( int options, const char *type, enum stonith_errors result, xmlNode *data, const char *remote) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = create_xml_node(NULL, "notify"); CRM_CHECK(type != NULL, ;); crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(update_msg, F_SUBTYPE, type); crm_xml_add(update_msg, F_STONITH_OPERATION, type); crm_xml_add_int(update_msg, F_STONITH_RC, result); if(data != NULL) { add_message_xml(update_msg, F_STONITH_CALLDATA, data); } crm_debug_3("Notifying clients"); g_hash_table_foreach(client_list, stonith_notify_client, update_msg); free_xml(update_msg); crm_debug_3("Notify complete"); } static void stonith_shutdown(int nsig) { stonith_shutdown_flag = TRUE; crm_info("Terminating with %d clients", g_hash_table_size(client_list)); stonith_client_disconnect(NULL, NULL); exit(0); } static void stonith_cleanup(void) { crm_peer_destroy(); g_hash_table_destroy(client_list); crm_free(stonith_our_uname); #if HAVE_LIBXML2 crm_xml_cleanup(); #endif crm_free(channel1); } static struct crm_option long_options[] = { {"stand-alone", 0, 0, 's'}, {"verbose", 0, 0, 'V'}, {"version", 0, 0, '$'}, {"help", 0, 0, '?'}, {0, 0, 0, 0} }; int main(int argc, char ** argv) { int flag; int rc = 0; int lpc = 0; int argerr = 0; int option_index = 0; - gboolean stand_alone = FALSE; const char *actions[] = { "reboot", "poweroff", "list", "monitor", "status" }; set_crm_log_level(LOG_INFO); crm_system_name = "stonith-ng"; crm_set_options(NULL, "mode [options]", long_options, "Provides a summary of cluster's current state." "\n\nOutputs varying levels of detail in a number of different formats.\n"); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch(flag) { case 'V': alter_debug(DEBUG_INC); cl_log_enable_stderr(1); break; case 's': stand_alone = TRUE; cl_log_enable_stderr(1); break; case '$': case '?': crm_help(flag, LSB_EXIT_OK); break; default: ++argerr; break; } } if(argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { printf("\n"); printf("\n"); printf(" 1.0\n"); printf(" This is a fake resource that details the instance attributes handled by stonithd.\n"); printf(" Options available for all stonith resources\n"); printf(" \n"); printf(" \n"); printf(" How long to wait for the STONITH action to complete.\n"); printf(" Overrides the stonith-timeout cluster property\n"); printf(" \n"); printf(" \n"); printf(" \n"); printf(" The priority of the stonith resource. The lower the number, the higher the priority.\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTARG); printf(" Advanced use only: An alternate parameter to supply instead of 'port'\n"); printf(" Some devices do not support the standard 'port' parameter or may provide additional ones.\n" "Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n" "A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n" " \n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTMAP); printf(" A mapping of host names to ports numbers for devices that do not support host names.\n"); printf(" Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTLIST); printf(" A list of machines controlled by this device (Optional unless %s=static-list).\n", STONITH_ATTR_HOSTCHECK); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTCHECK); printf(" How to determin which machines are controlled by the device.\n"); printf(" Allowed values: dynamic-list (query the device), static-list (check the %s attribute), none (assume every device can fence every machine)\n", STONITH_ATTR_HOSTLIST); printf(" \n"); printf(" \n"); for(lpc = 0; lpc < DIMOF(actions); lpc++) { printf(" \n", actions[lpc]); printf(" Advanced use only: An alternate command to run instead of '%s'\n", actions[lpc]); printf(" Some devices do not support the standard commands or may provide additional ones.\n" "Use this to specify an alternate, device-specific, command that implements the '%s' action.\n", actions[lpc]); printf(" \n", actions[lpc]); printf(" \n"); } printf(" \n"); printf("\n"); return 0; } if (optind != argc) { ++argerr; } if (argerr) { crm_help('?', LSB_EXIT_GENERIC); } crm_log_init("stonith-ng", crm_log_level, TRUE, TRUE, argc, argv); mainloop_add_signal(SIGTERM, stonith_shutdown); /* EnableProcLogging(); */ set_sigchld_proctrack(G_PRIORITY_HIGH,DEFAULT_MAXDISPATCHTIME); crm_peer_init(); client_list = g_hash_table_new(crm_str_hash, g_str_equal); if(stand_alone == FALSE) { void *dispatch = stonith_peer_hb_callback; void *destroy = stonith_peer_hb_destroy; if(is_openais_cluster()) { #if SUPPORT_COROSYNC destroy = stonith_peer_ais_destroy; dispatch = stonith_peer_ais_callback; #endif } if(crm_cluster_connect(&stonith_our_uname, NULL, dispatch, destroy, #if SUPPORT_HEARTBEAT &hb_conn #else NULL #endif ) == FALSE){ crm_crit("Cannot sign in to the cluster... terminating"); exit(100); } } else { stonith_our_uname = crm_strdup("localhost"); } channel1 = crm_strdup(stonith_channel); rc = init_server_ipc_comms( channel1, stonith_client_connect, default_ipc_connection_destroy); channel2 = crm_strdup(stonith_channel_callback); rc = init_server_ipc_comms( channel2, stonith_client_connect, default_ipc_connection_destroy); if(rc == 0) { /* Create the mainloop and run it... */ mainloop = g_main_new(FALSE); crm_info("Starting %s mainloop", crm_system_name); g_main_run(mainloop); } else { crm_err("Couldnt start all communication channels, exiting."); } stonith_cleanup(); #if SUPPORT_HEARTBEAT if(hb_conn) { hb_conn->llc_ops->delete(hb_conn); } #endif crm_info("Done"); return rc; } diff --git a/fencing/remote.c b/fencing/remote.c index de7a14cc04..eafbad1725 100644 --- a/fencing/remote.c +++ b/fencing/remote.c @@ -1,553 +1,553 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include typedef struct st_query_result_s { char *host; int devices; } st_query_result_t; typedef struct remote_fencing_op_s { char *id; char *target; char *action; guint replies; guint op_timer; guint query_timer; guint base_timeout; char *delegate; time_t completed; long long call_options; enum op_state state; char *client_id; char *originator; GListPtr query_results; xmlNode *request; } remote_fencing_op_t; GHashTable *remote_op_list = NULL; static void call_remote_stonith(remote_fencing_op_t *op, st_query_result_t *peer); extern xmlNode *stonith_create_op( int call_id, const char *token, const char *op, xmlNode *data, int call_options); static void free_remote_query(gpointer data) { if(data) { st_query_result_t *query = data; crm_free(query->host); crm_free(query); } } static void free_remote_op(gpointer data) { remote_fencing_op_t *op = data; crm_log_xml_debug(op->request, "Destroying"); crm_free(op->id); crm_free(op->action); crm_free(op->target); crm_free(op->client_id); crm_free(op->originator); if(op->query_timer) { g_source_remove(op->query_timer); } if(op->op_timer) { g_source_remove(op->op_timer); } if(op->query_results) { slist_destroy(st_query_result_t, result, op->query_results, free_remote_query(result); ); } if(op->request) { free_xml(op->request); op->request = NULL; } crm_free(op); } static void remote_op_done(remote_fencing_op_t *op, xmlNode *data, int rc) { int call = 0; xmlNode *reply = NULL; xmlNode *local_data = NULL; xmlNode *notify_data = NULL; op->completed = time(NULL); if(op->request != NULL) { crm_element_value_int(op->request, F_STONITH_CALLID, &call); /* else: keep going, make sure the details are accurate for ops that arrive late */ } if(op->query_timer) { g_source_remove(op->query_timer); op->query_timer = 0; } if(op->op_timer) { g_source_remove(op->op_timer); op->op_timer = 0; } if(data == NULL) { data = create_xml_node(NULL, "remote-op"); local_data = data; } else { op->delegate = crm_element_value_copy(data, F_ORIG); } crm_xml_add_int(data, "state", op->state); crm_xml_add(data, F_STONITH_TARGET, op->target); crm_xml_add(data, F_STONITH_OPERATION, op->action); if(op->request != NULL) { reply = stonith_construct_reply(op->request, NULL, data, rc); crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate); crm_info("Notifing clients of %s (%s of %s from %s by %s): %d, rc=%d", op->id, op->action, op->target, op->client_id, op->delegate, op->state, rc); } else { crm_err("We've already notified clients of %s (%s of %s from %s by %s): %d, rc=%d", op->id, op->action, op->target, op->client_id, op->delegate, op->state, rc); return; } if(call && reply) { /* Don't bother with this if there is no callid - and thus the op originated elsewhere */ do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE); } /* Do notification with a clean data object */ notify_data = create_xml_node(NULL, "st-data"); crm_xml_add_int(notify_data, "state", op->state); crm_xml_add_int(notify_data, F_STONITH_RC, rc); crm_xml_add(notify_data, F_STONITH_TARGET, op->target); crm_xml_add(notify_data, F_STONITH_OPERATION, op->action); crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate); crm_xml_add(notify_data, F_STONITH_REMOTE, op->id); crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator); do_stonith_notify(0, STONITH_OP_FENCE, rc, notify_data, NULL); free_xml(notify_data); free_xml(local_data); free_xml(reply); /* Free non-essential parts of the record * Keep the record around so we can query the history */ if(op->query_results) { slist_destroy(st_query_result_t, result, op->query_results, free_remote_query(result); ); op->query_results = NULL; } if(op->request) { free_xml(op->request); op->request = NULL; } } static gboolean remote_op_timeout(gpointer userdata) { remote_fencing_op_t *op = userdata; op->query_timer = 0; if(op->state == st_done) { crm_debug("Action %s (%s) for %s already completed", op->action, op->id, op->target); return FALSE; } crm_err("Action %s (%s) for %s timed out", op->action, op->id, op->target); remote_op_done(op, NULL, st_err_timeout); op->state = st_failed; return FALSE; } static gboolean remote_op_query_timeout(gpointer data) { remote_fencing_op_t *op = data; op->query_timer = 0; if(op->state == st_done) { crm_debug("Operation %s for %s already completed", op->id, op->target); } else if(op->state == st_exec) { crm_debug("Operation %s for %s already in progress", op->id, op->target); } else if(op->query_results) { crm_info("Query %s for %s complete: %d", op->id, op->target, op->state); call_remote_stonith(op, NULL); } else { crm_err("Query %s for %s timed out", op->id, op->target); if(op->op_timer) { g_source_remove(op->op_timer); op->op_timer = 0; } remote_op_timeout(op); } return FALSE; } void *create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer) { remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, request, LOG_ERR); if(remote_op_list == NULL) { remote_op_list = g_hash_table_new_full( crm_str_hash, g_str_equal, NULL, free_remote_op); } if(peer) { const char *peer_id = crm_element_value(dev, F_STONITH_REMOTE); CRM_CHECK(peer_id != NULL, return NULL); op = g_hash_table_lookup(remote_op_list, peer_id); if(op) { crm_debug("%s already exists", peer_id); return op; } } crm_malloc0(op, sizeof(remote_fencing_op_t)); crm_element_value_int(request, F_STONITH_TIMEOUT, (int*)&(op->base_timeout)); if(peer) { op->id = crm_element_value_copy(dev, F_STONITH_REMOTE); } else { cl_uuid_t new_uuid; char uuid_str[UU_UNPARSE_SIZEOF]; cl_uuid_generate(&new_uuid); cl_uuid_unparse(&new_uuid, uuid_str); op->id = crm_strdup(uuid_str); } g_hash_table_replace(remote_op_list, op->id, op); op->state = st_query; op->action = crm_element_value_copy(dev, F_STONITH_ACTION); op->originator = crm_element_value_copy(dev, "src"); if(op->originator == NULL) { /* Local request */ op->originator = crm_strdup(stonith_our_uname); } op->client_id = crm_strdup(client); op->target = crm_element_value_copy(dev, F_STONITH_TARGET); op->request = copy_xml(request); /* TODO: Figure out how to avoid this */ crm_element_value_int(request, F_STONITH_CALLOPTS, (int*)&(op->call_options)); return op; } void initiate_remote_stonith_op(stonith_client_t *client, xmlNode *request) { xmlNode *query = NULL; remote_fencing_op_t *op = NULL; crm_log_xml_debug(request, "RemoteOp"); op = create_remote_stonith_op(client->id, request, FALSE); - op->op_timer = g_timeout_add(1000*op->base_timeout, remote_op_timeout, op); + op->op_timer = g_timeout_add(1200*op->base_timeout, remote_op_timeout, op); op->query_timer = g_timeout_add(100*op->base_timeout, remote_op_query_timeout, op); query = stonith_create_op(0, op->id, STONITH_OP_QUERY, NULL, 0); crm_xml_add(query, F_STONITH_REMOTE, op->id); crm_xml_add(query, F_STONITH_TARGET, op->target); crm_xml_add(query, F_STONITH_ACTION, op->action); crm_xml_add(query, F_STONITH_CLIENTID, op->client_id); - crm_xml_add_int(query, F_STONITH_TIMEOUT, 100*op->base_timeout); + crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout); crm_info("Initiating remote operation %s for %s: %s", op->action, op->target, op->id); CRM_CHECK(op->action, return); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); } static void call_remote_stonith(remote_fencing_op_t *op, st_query_result_t *peer) { xmlNode *query = stonith_create_op(0, op->id, STONITH_OP_FENCE, NULL, 0);; crm_xml_add(query, F_STONITH_REMOTE, op->id); crm_xml_add(query, F_STONITH_TARGET, op->target); crm_xml_add(query, F_STONITH_ACTION, op->action); crm_xml_add_int(query, F_STONITH_TIMEOUT, 900*op->base_timeout); op->state = st_exec; while(peer == NULL && op->query_results) { peer = g_list_nth_data(op->query_results, 0); op->query_results = g_list_remove(op->query_results, peer); if(peer && peer->devices < 1) { free_remote_query(peer); peer = NULL; } } if(peer) { crm_info("Requesting that %s perform op %s %s", peer->host, op->action, op->target); send_cluster_message(peer->host, crm_msg_stonith_ng, query, FALSE); } else if(op->query_timer == 0) { /* We've exhausted all available peers */ crm_info("No remaining peers capable of terminating %s", op->target); remote_op_timeout(op); } else { crm_info("Waiting for additional peers capable of terminating %s", op->target); } free_remote_query(peer); free_xml(query); } static gint sort_peers(gconstpointer a, gconstpointer b) { const st_query_result_t *peer_a = a; const st_query_result_t *peer_b = a; /* TODO: Factor in priority? */ if(peer_a->devices > peer_b->devices) { return -1; } else if(peer_a->devices > peer_b->devices) { return 1; } return 0; } int process_remote_stonith_query(xmlNode *msg) { int devices = 0; const char *id = NULL; remote_fencing_op_t *op = NULL; st_query_result_t *result = NULL; xmlNode *dev = get_xpath_object("//@"F_STONITH_REMOTE, msg, LOG_ERR); crm_log_xml_debug(msg, "QueryResult"); CRM_CHECK(dev != NULL, return st_err_internal); id = crm_element_value(dev, F_STONITH_REMOTE); CRM_CHECK(id != NULL, return st_err_internal); dev = get_xpath_object("//@st-available-devices", msg, LOG_ERR); CRM_CHECK(dev != NULL, return st_err_internal); crm_element_value_int(dev, "st-available-devices", &devices); op = g_hash_table_lookup(remote_op_list, id); if(op == NULL) { crm_debug("Unknown or expired remote op: %s", id); return st_err_unknown_operation; } op->replies++; crm_malloc0(result, sizeof(st_query_result_t)); result->host = crm_element_value_copy(msg, F_ORIG); result->devices = devices; /* TODO: Implement options * A) If we have anyone that can do the job * B) If we have someone that can do the job and some percent of the known peers * C) If all known peers have responded * * Implement A first */ /* Track A */ if(result->devices > 0) { gboolean do_queue = FALSE; gboolean do_exec = FALSE; if(op->call_options & st_opt_allow_suicide) { crm_info("Allowing %s to potentialy fence itself", op->target); } else if(safe_str_eq(result->host, op->target)) { crm_info("Ignoring reply from %s, hosts are not permitted to commit suicide", op->target); free_remote_query(result); return 0; } switch(op->state) { case st_query: if( op->call_options & st_opt_all_replies ) { do_queue = TRUE; } else { do_exec = TRUE; } break; case st_exec: do_queue = TRUE; break; case st_failed: do_exec = TRUE; break; case st_done: crm_info("Discarding query result from %s (%d deices): Operation is in state %d", result->host, result->devices, op->state); break; } if(do_exec) { call_remote_stonith(op, result); } else if(do_queue) { crm_info("Queuing query result from %s (%d devices): %s", result->host, result->devices, op->state==st_query?"Waiting for remaining replies":"Operation is pending"); op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers); } else { free_remote_query(result); } } else { free_remote_query(result); } return 0; } int process_remote_stonith_exec(xmlNode *msg) { int rc = 0; const char *id = NULL; remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@"F_STONITH_REMOTE, msg, LOG_ERR); crm_log_xml_info(msg, "ExecResult"); CRM_CHECK(dev != NULL, return st_err_internal); id = crm_element_value(dev, F_STONITH_REMOTE); CRM_CHECK(id != NULL, return st_err_internal); dev = get_xpath_object("//@"F_STONITH_RC, msg, LOG_ERR); CRM_CHECK(dev != NULL, return st_err_internal); if(remote_op_list) { op = g_hash_table_lookup(remote_op_list, id); } if(op == NULL) { /* Could be for an event that began before we started */ /* TODO: Record the op for later querying */ crm_info("Unknown or expired remote op: %s", id); return st_err_unknown_operation; } crm_element_value_int(dev, F_STONITH_RC, &rc); if(rc == stonith_ok || op->state != st_exec) { op->state = st_done; remote_op_done(op, msg, rc); } else if(rc < stonith_ok && op->state == st_exec) { call_remote_stonith(op, NULL); } return rc; } int stonith_fence_history(xmlNode *msg, xmlNode **output) { int rc = 0; const char *target = NULL; xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, msg, LOG_TRACE); if(dev) { target = crm_element_value(dev, F_STONITH_TARGET); } *output = create_xml_node(NULL, F_STONITH_HISTORY_LIST); if (remote_op_list) { GHashTableIter iter; remote_fencing_op_t *op = NULL; g_hash_table_iter_init(&iter, remote_op_list); while(g_hash_table_iter_next(&iter, NULL, (void**)&op)) { xmlNode *entry = NULL; if (target && strcmp(op->target, target) != 0) { continue; } rc = 0; entry = create_xml_node(*output, STONITH_OP_EXEC); crm_xml_add(entry, F_STONITH_TARGET, op->target); crm_xml_add(entry, F_STONITH_ACTION, op->action); crm_xml_add(entry, F_STONITH_ORIGIN, op->originator); crm_xml_add(entry, F_STONITH_DELEGATE, op->delegate); crm_xml_add_int(entry, F_STONITH_DATE, op->completed); crm_xml_add_int(entry, F_STONITH_STATE, op->state); } } return rc; }