diff --git a/fencing/commands.c b/fencing/commands.c index 9bb397768a..fa9de2fd73 100644 --- a/fencing/commands.c +++ b/fencing/commands.c @@ -1,1591 +1,1610 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *device_list = NULL; GHashTable *topology = NULL; GList *cmd_list = NULL; static int active_children = 0; static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data); typedef struct async_command_s { int id; int pid; int fd_stdout; int options; int default_timeout; int timeout; char *op; char *origin; char *client; char *client_name; char *remote_op_id; char *victim; char *action; char *device; char *mode; GListPtr device_list; GListPtr device_next; void (*done)(GPid pid, int rc, const char *output, gpointer user_data); guint timer_sigterm; guint timer_sigkill; /*! If the operation timed out, this is the last signal * we sent to the process to get it to terminate */ int last_timeout_signo; } async_command_t; static xmlNode * stonith_construct_async_reply(async_command_t *cmd, const char *output, xmlNode *data, int rc); static int get_action_timeout(stonith_device_t *device, const char *action, int default_timeout) { char buffer[512] = { 0, }; char *value = NULL; CRM_CHECK(action != NULL, return default_timeout); if (!device->params) { return default_timeout; } snprintf(buffer, sizeof(buffer) - 1, "pcmk_%s_timeout", action); value = g_hash_table_lookup(device->params, buffer); if (!value) { return default_timeout; } return atoi(value); } static void free_async_command(async_command_t *cmd) { if (!cmd) { return; } cmd_list = g_list_remove(cmd_list, cmd); g_list_free(cmd->device_list); free(cmd->device); free(cmd->action); free(cmd->victim); free(cmd->remote_op_id); free(cmd->client); free(cmd->client_name); free(cmd->origin); free(cmd->op); free(cmd); } static async_command_t *create_async_command(xmlNode *msg) { async_command_t *cmd = NULL; xmlNode *op = get_xpath_object("//@"F_STONITH_ACTION, msg, LOG_ERR); const char *action = crm_element_value(op, F_STONITH_ACTION); CRM_CHECK(action != NULL, crm_log_xml_warn(msg, "NoAction"); return NULL); crm_log_xml_trace(msg, "Command"); cmd = calloc(1, sizeof(async_command_t)); crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id)); crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options)); crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout)); cmd->timeout = cmd->default_timeout; cmd->origin = crm_element_value_copy(msg, F_ORIG); cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID); cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID); cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME); cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION); cmd->action = strdup(action); cmd->victim = crm_element_value_copy(op, F_STONITH_TARGET); cmd->mode = crm_element_value_copy(op, F_STONITH_MODE); cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE); CRM_CHECK(cmd->op != NULL, crm_log_xml_warn(msg, "NoOp"); free_async_command(cmd); return NULL); CRM_CHECK(cmd->client != NULL, crm_log_xml_warn(msg, "NoClient")); cmd_list = g_list_append(cmd_list, cmd); return cmd; } static int stonith_manual_ack(xmlNode *msg, remote_fencing_op_t *op) { async_command_t *cmd = create_async_command(msg); xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, msg, LOG_ERR); if(cmd == NULL) { return -EINVAL; } cmd->device = strdup("manual_ack"); cmd->remote_op_id = strdup(op->id); crm_notice("Injecting manual confirmation that %s is safely off/down", crm_element_value(dev, F_STONITH_TARGET)); st_child_done(0, 0, NULL, cmd); return pcmk_ok; } static gboolean stonith_device_execute(stonith_device_t *device) { int rc = 0; int exec_rc = 0; async_command_t *cmd = NULL; stonith_action_t *action = NULL; CRM_CHECK(device != NULL, return FALSE); if(device->active_pid) { crm_trace("%s is still active with pid %u", device->id, device->active_pid); return TRUE; } if(device->pending_ops) { GList *first = device->pending_ops; device->pending_ops = g_list_remove_link(device->pending_ops, first); cmd = first->data; g_list_free_1(first); } if(cmd == NULL) { crm_trace("Nothing further to do for %s", device->id); return TRUE; } action = stonith_action_create(device->agent, cmd->action, cmd->victim, cmd->timeout, device->params, device->aliases); exec_rc = stonith_action_execute_async(action, (void *) cmd, st_child_done); if(exec_rc > 0) { crm_debug("Operation %s%s%s on %s now running with pid=%d, timeout=%dms", cmd->action, cmd->victim?" for node ":"", cmd->victim?cmd->victim:"", device->id, exec_rc, cmd->timeout); device->active_pid = exec_rc; } else { crm_warn("Operation %s%s%s on %s failed (%d/%d)", cmd->action, cmd->victim?" for node ":"", cmd->victim?cmd->victim:"", device->id, exec_rc, rc); st_child_done(0, rc<0?rc:exec_rc, NULL, cmd); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static void schedule_stonith_command(async_command_t *cmd, stonith_device_t *device) { CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); if (cmd->device) { free(cmd->device); } cmd->device = strdup(device->id); cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout); if (cmd->remote_op_id) { crm_debug("Scheduling %s on %s for remote peer %s with op id (%s) (timeout=%dms)", cmd->action, device->id, cmd->origin, cmd->remote_op_id, cmd->timeout); } else { crm_debug("Scheduling %s on %s for %s (timeout=%dms)", cmd->action, device->id, cmd->client, cmd->timeout); } device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); } void free_device(gpointer data) { GListPtr gIter = NULL; stonith_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for(gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { async_command_t *cmd = gIter->data; crm_warn("Removal of device '%s' purged operation %s", device->id, cmd->action); st_child_done(0, -ENODEV, NULL, cmd); free_async_command(cmd); } g_list_free(device->pending_ops); g_list_free_full(device->targets, free); free(device->namespace); free(device->agent); free(device->id); free(device); } static GHashTable *build_port_aliases(const char *hostmap, GListPtr *targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if(hostmap == NULL) { return aliases; } max = strlen(hostmap); for(; lpc <= max; lpc++) { switch(hostmap[lpc]) { /* Assignment chars */ case '=': case ':': if(lpc > last) { free(name); name = calloc(1, 1 + lpc - last); memcpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if(name) { char *value = NULL; value = calloc(1, 1 + lpc - last); memcpy(value, hostmap + last, lpc - last); crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if(targets) { *targets = g_list_append(*targets, strdup(value)); } value=NULL; name=NULL; added++; } else if(lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc-last, hostmap+last); } last = lpc + 1; break; } if(hostmap[lpc] == 0) { break; } } if(added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } free(name); return aliases; } static void parse_host_line(const char *line, GListPtr *output) { int lpc = 0; int max = 0; int last = 0; if(line) { max = strlen(line); } else { return; } /* Check for any complaints about additional parameters that the device doesn't understand */ if(strstr(line, "invalid") || strstr(line, "variable")) { crm_debug("Skipping: %s", line); return; } crm_trace("Processing: %s", line); /* Skip initial whitespace */ for(lpc = 0; lpc <= max && isspace(line[lpc]); lpc++) { last = lpc+1; } /* Now the actual content */ for(lpc = 0; lpc <= max; lpc++) { gboolean a_space = isspace(line[lpc]); if(a_space && lpc < max && isspace(line[lpc+1])) { /* fast-forward to the end of the spaces */ } else if(a_space || line[lpc] == ',' || line[lpc] == 0) { int rc = 1; char *entry = NULL; if(lpc != last) { entry = calloc(1, 1 + lpc - last); rc = sscanf(line+last, "%[a-zA-Z0-9_-.]", entry); } if(entry == NULL) { /* Skip */ } else if(rc != 1) { crm_warn("Could not parse (%d %d): %s", last, lpc, line+last); } else if(safe_str_neq(entry, "on") && safe_str_neq(entry, "off")) { crm_trace("Adding '%s'", entry); *output = g_list_append(*output, entry); entry = NULL; } free(entry); last = lpc + 1; } } } static GListPtr parse_host_list(const char *hosts) { int lpc = 0; int max = 0; int last = 0; GListPtr output = NULL; if(hosts == NULL) { return output; } max = strlen(hosts); for(lpc = 0; lpc <= max; lpc++) { if(hosts[lpc] == '\n' || hosts[lpc] == 0) { char *line = NULL; line = calloc(1, 2 + lpc - last); snprintf(line, 1 + lpc - last, "%s", hosts+last); parse_host_line(line, &output); free(line); last = lpc + 1; } } return output; } static stonith_device_t *build_device_from_xml(xmlNode *msg) { xmlNode *dev = get_xpath_object("//"F_STONITH_DEVICE, msg, LOG_ERR); stonith_device_t *device = NULL; device = calloc(1, sizeof(stonith_device_t)); device->id = crm_element_value_copy(dev, XML_ATTR_ID); device->agent = crm_element_value_copy(dev, "agent"); device->namespace = crm_element_value_copy(dev, "namespace"); device->params = xml2list(dev); device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); /* TODO: Hook up priority */ return device; } static const char * target_list_type(stonith_device_t *dev) { const char *check_type = NULL; check_type = g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTCHECK); if(check_type == NULL) { if(g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTLIST)) { check_type = "static-list"; } else if(g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP)) { check_type = "static-list"; } else { check_type = "dynamic-list"; } } return check_type; } static void update_dynamic_list(stonith_device_t *dev) { time_t now = time(NULL); /* Host/alias must be in the list output to be eligable to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if(dev->targets_age < 0) { crm_trace("Port list queries disabled for %s", dev->id); } else if(dev->targets == NULL || dev->targets_age + 60 < now) { stonith_action_t *action = NULL; char *output = NULL; int rc = pcmk_ok; int exec_rc = pcmk_ok; if(dev->active_pid != 0) { crm_notice("Port list query can not execute because device is busy, using cache: %s", dev->targets ? "YES" : "NO"); return; } action = stonith_action_create(dev->agent, "list", NULL, 10, dev->params, NULL); exec_rc = stonith_action_execute(action, &rc, &output); if(rc != 0 && dev->active_pid == 0) { /* This device probably only supports a single * connection, which appears to already be in use, * likely involved in a montior or (less likely) * metadata operation. * * Avoid disabling port list queries in the hope that * the op would succeed next time */ crm_info("Couldn't query ports for %s. Call failed with rc=%d and active_pid=%d: %s", dev->agent, rc, dev->active_pid, output); } else if(exec_rc < 0 || rc != 0) { /* If we successfully got the targets earlier, don't disable. */ if (dev->targets) { return; } crm_notice("Disabling port list queries for %s (%d/%d): %s", dev->id, exec_rc, rc, output); dev->targets_age = -1; /* Fall back to status */ g_hash_table_replace(dev->params, strdup(STONITH_ATTR_HOSTCHECK), strdup("status")); g_list_free_full(dev->targets, free); dev->targets = NULL; } else { crm_info("Refreshing port list for %s", dev->id); g_list_free_full(dev->targets, free); dev->targets = parse_host_list(output); dev->targets_age = now; } free(output); } } /*! * \internal * \brief Checks to see if an identical device already exists in the device_list */ static stonith_device_t * device_has_duplicate(stonith_device_t *device) { char *key = NULL; char *value = NULL; GHashTableIter gIter; stonith_device_t *dup = g_hash_table_lookup(device_list, device->id); if (!dup || safe_str_neq(dup->agent, device->agent)) { return NULL; } g_hash_table_iter_init(&gIter, device->params); while (g_hash_table_iter_next(&gIter, (void **) &key, (void **) &value)) { char *other_value = g_hash_table_lookup(dup->params, key); if (!other_value || safe_str_neq(other_value, value)) { return NULL; } } return dup; } -int stonith_device_register(xmlNode *msg, const char **desc) +int stonith_device_register(xmlNode *msg, const char **desc, gboolean from_cib) { const char *value = NULL; stonith_device_t *dup = NULL; stonith_device_t *device = build_device_from_xml(msg); value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTLIST); if(value) { device->targets = parse_host_list(value); } value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTMAP); device->aliases = build_port_aliases(value, &(device->targets)); value = target_list_type(device); if (safe_str_eq(value, "dynamic-list")) { /* set the dynamic list during the register to guarantee we have * targets cached */ update_dynamic_list(device); } if ((dup = device_has_duplicate(device))) { crm_notice("Device '%s' already existed in device list (%d active devices)", device->id, g_hash_table_size(device_list)); free_device(device); device = dup; } else { g_hash_table_replace(device_list, device->id, device); crm_notice("Added '%s' to the device list (%d active devices)", device->id, g_hash_table_size(device_list)); } if(desc) { *desc = device->id; } + + if (from_cib) { + device->cib_registered = TRUE; + } else { + device->api_registered = TRUE; + } + return pcmk_ok; } -int stonith_device_remove(const char *id) +int stonith_device_remove(const char *id, gboolean from_cib) { - if(g_hash_table_remove(device_list, id)) { - crm_info("Removed '%s' from the device list (%d active devices)", + stonith_device_t *device = g_hash_table_lookup(device_list, id); + + if (!device) { + crm_info("Device '%s' not found (%d active devices)", id, g_hash_table_size(device_list)); + return pcmk_ok; + } + + if (from_cib) { + device->cib_registered = FALSE; } else { - crm_info("Device '%s' not found (%d active devices)", + device->api_registered = FALSE; + } + + if (!device->cib_registered && !device->api_registered) { + g_hash_table_remove(device_list, id); + crm_info("Removed '%s' from the device list (%d active devices)", id, g_hash_table_size(device_list)); } return pcmk_ok; } static int count_active_levels(stonith_topology_t *tp) { int lpc = 0; int count = 0; for(lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if(tp->levels[lpc] != NULL) { count++; } } return count; } void free_topology_entry(gpointer data) { stonith_topology_t *tp = data; int lpc = 0; for(lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if(tp->levels[lpc] != NULL) { g_list_free_full(tp->levels[lpc], free); } } free(tp->node); free(tp); } int stonith_level_register(xmlNode *msg, char **desc) { int id = 0; int rc = pcmk_ok; xmlNode *child = NULL; xmlNode *level = get_xpath_object("//"F_STONITH_LEVEL, msg, LOG_ERR); const char *node = crm_element_value(level, F_STONITH_TARGET); stonith_topology_t *tp = g_hash_table_lookup(topology, node); crm_element_value_int(level, XML_ATTR_ID, &id); if(desc) { *desc = g_strdup_printf("%s[%d]", node, id); } if(id <= 0 || id >= ST_LEVEL_MAX) { return -EINVAL; } if(tp == NULL) { tp = calloc(1, sizeof(stonith_topology_t)); tp->node = strdup(node); g_hash_table_replace(topology, tp->node, tp); crm_trace("Added %s to the topology (%d active entries)", node, g_hash_table_size(topology)); } if(tp->levels[id] != NULL) { crm_info("Adding to the existing %s[%d] topology entry (%d active entries)", node, id, count_active_levels(tp)); } for (child = __xml_first_child(level); child != NULL; child = __xml_next(child)) { const char *device = ID(child); crm_trace("Adding device '%s' for %s (%d)", device, node, id); tp->levels[id] = g_list_append(tp->levels[id], strdup(device)); } crm_info("Node %s has %d active fencing levels", node, count_active_levels(tp)); return rc; } int stonith_level_remove(xmlNode *msg, char **desc) { int id = 0; xmlNode *level = get_xpath_object("//"F_STONITH_LEVEL, msg, LOG_ERR); const char *node = crm_element_value(level, F_STONITH_TARGET); stonith_topology_t *tp = g_hash_table_lookup(topology, node); if(desc) { *desc = g_strdup_printf("%s[%d]", node, id); } crm_element_value_int(level, XML_ATTR_ID, &id); if(tp == NULL) { crm_info("Node %s not found (%d active entries)", node, g_hash_table_size(topology)); return pcmk_ok; } else if(id < 0 || id >= ST_LEVEL_MAX) { return -EINVAL; } if(id == 0 && g_hash_table_remove(topology, node)) { crm_info("Removed all %s related entries from the topology (%d active entries)", node, g_hash_table_size(topology)); } else if(id > 0 && tp->levels[id] != NULL) { g_list_free_full(tp->levels[id], free); tp->levels[id] = NULL; crm_info("Removed entry '%d' from %s's topology (%d active entries remaining)", id, node, count_active_levels(tp)); } return pcmk_ok; } static gboolean string_in_list(GListPtr list, const char *item) { int lpc = 0; int max = g_list_length(list); for(lpc = 0; lpc < max; lpc ++) { const char *value = g_list_nth_data(list, lpc); if(safe_str_eq(item, value)) { return TRUE; } } return FALSE; } static int stonith_device_action(xmlNode *msg, char **output) { int rc = pcmk_ok; xmlNode *dev = get_xpath_object("//"F_STONITH_DEVICE, msg, LOG_ERR); const char *id = crm_element_value(dev, F_STONITH_DEVICE); async_command_t *cmd = NULL; stonith_device_t *device = NULL; if(id) { crm_trace("Looking for '%s'", id); device = g_hash_table_lookup(device_list, id); } if(device) { cmd = create_async_command(msg); if(cmd == NULL) { free_device(device); return -EPROTO; } schedule_stonith_command(cmd, device); rc = -EINPROGRESS; } else { crm_info("Device %s not found", id?id:""); rc = -ENODEV; } return rc; } static gboolean can_fence_host_with_device(stonith_device_t *dev, const char *host) { gboolean can = FALSE; const char *alias = host; const char *check_type = NULL; if(dev == NULL) { return FALSE; } else if(host == NULL) { return TRUE; } if(g_hash_table_lookup(dev->aliases, host)) { alias = g_hash_table_lookup(dev->aliases, host); } check_type = target_list_type(dev); if(safe_str_eq(check_type, "none")) { can = TRUE; } else if(safe_str_eq(check_type, "static-list")) { /* Presence in the hostmap is sufficient * Only use if all hosts on which the device can be active can always fence all listed hosts */ if(string_in_list(dev->targets, host)) { can = TRUE; } else if(g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP) && g_hash_table_lookup(dev->aliases, host)) { can = TRUE; } } else if(safe_str_eq(check_type, "dynamic-list")) { update_dynamic_list(dev); if(string_in_list(dev->targets, alias)) { can = TRUE; } } else if(safe_str_eq(check_type, "status")) { int rc = 0; int exec_rc = 0; stonith_action_t *action = NULL; /* Run the status operation for the device/target combination * Will cause problems if the device doesn't return 2 for down'd nodes or * (for virtual nodes) if the device doesn't return 1 for guests that * have been moved to another host */ action = stonith_action_create(dev->agent, "status", host, 5, dev->params, dev->aliases); exec_rc = stonith_action_execute(action, &rc, NULL); if(exec_rc != 0) { crm_err("Could not invoke %s: rc=%d", dev->id, exec_rc); } else if(rc == 1 /* unkown */) { crm_trace("Host %s is not known by %s", host, dev->id); } else if(rc == 0 /* active */ || rc == 2 /* inactive */) { can = TRUE; } else { crm_notice("Unkown result when testing if %s can fence %s: rc=%d", dev->id, host, rc); } } else { crm_err("Unknown check type: %s", check_type); } if(safe_str_eq(host, alias)) { crm_info("%s can%s fence %s: %s", dev->id, can?"":" not", host, check_type); } else { crm_info("%s can%s fence %s (aka. '%s'): %s", dev->id, can?"":" not", host, alias, check_type); } return can; } struct device_search_s { const char *host; GListPtr capable; }; static void search_devices( gpointer key, gpointer value, gpointer user_data) { stonith_device_t *dev = value; struct device_search_s *search = user_data; if(can_fence_host_with_device(dev, search->host)) { search->capable = g_list_append(search->capable, value); } } static int stonith_query(xmlNode *msg, xmlNode **list) { struct device_search_s search; int available_devices = 0; const char *action = NULL; xmlNode *dev = get_xpath_object("//@"F_STONITH_ACTION, msg, LOG_DEBUG_3); search.host = NULL; search.capable = NULL; if(dev) { const char *device = crm_element_value(dev, F_STONITH_DEVICE); search.host = crm_element_value(dev, F_STONITH_TARGET); if(device && safe_str_eq(device, "manual_ack")) { /* No query necessary */ if(list) { *list = NULL; } return pcmk_ok; } action = crm_element_value(dev, F_STONITH_ACTION); } crm_log_xml_debug(msg, "Query"); g_hash_table_foreach(device_list, search_devices, &search); available_devices = g_list_length(search.capable); if(search.host) { crm_debug("Found %d matching devices for '%s'", available_devices, search.host); } else { crm_debug("%d devices installed", available_devices); } /* Pack the results into data */ if(list) { GListPtr lpc = NULL; *list = create_xml_node(NULL, __FUNCTION__); crm_xml_add(*list, F_STONITH_TARGET, search.host); crm_xml_add_int(*list, "st-available-devices", available_devices); for(lpc = search.capable; lpc != NULL; lpc = lpc->next) { stonith_device_t *device = (stonith_device_t*)lpc->data; int action_specific_timeout = get_action_timeout(device, action, 0); dev = create_xml_node(*list, F_STONITH_DEVICE); crm_xml_add(dev, XML_ATTR_ID, device->id); crm_xml_add(dev, "namespace", device->namespace); crm_xml_add(dev, "agent", device->agent); crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified); if (action_specific_timeout) { crm_xml_add_int(dev, F_STONITH_ACTION_TIMEOUT, action_specific_timeout); } if(search.host == NULL) { xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS); g_hash_table_foreach(device->params, hash2field, attrs); } } } g_list_free(search.capable); return available_devices; } #define ST_LOG_OUTPUT_MAX 512 static void log_operation(async_command_t *cmd, int rc, int pid, const char *next, const char *output) { if(rc == 0) { next = NULL; } if(cmd->victim != NULL) { do_crm_log(rc==0?LOG_NOTICE:LOG_ERR, "Operation '%s' [%d] (call %d from %s) for host '%s' with device '%s' returned: %d (%s)%s%s", cmd->action, pid, cmd->id, cmd->client_name, cmd->victim, cmd->device, rc, pcmk_strerror(rc), next?". Trying: ":"", next?next:""); } else { do_crm_log_unlikely(rc==0?LOG_DEBUG:LOG_NOTICE, "Operation '%s' [%d] for device '%s' returned: %d (%s)%s%s", cmd->action, pid, cmd->device, rc, pcmk_strerror(rc), next?". Trying: ":"", next?next:""); } if(output) { /* Logging the whole string confuses syslog when the string is xml */ char *local_copy = strdup(output); int lpc = 0, last = 0, more = strlen(local_copy); /* Give the log output some reasonable boundary */ more = more > ST_LOG_OUTPUT_MAX ? ST_LOG_OUTPUT_MAX : more; for(lpc = 0; lpc < more; lpc++) { if(local_copy[lpc] == '\n' || local_copy[lpc] == 0) { local_copy[lpc] = 0; do_crm_log(rc==0?LOG_INFO:LOG_WARNING, "%s: %s", cmd->device, local_copy+last); last = lpc+1; } } crm_debug("%s: %s (total %d bytes)", cmd->device, local_copy+last, more); free(local_copy); } } static void stonith_send_async_reply(async_command_t *cmd, const char *output, int rc, GPid pid) { xmlNode *reply = NULL; gboolean bcast = FALSE; reply = stonith_construct_async_reply(cmd, output, NULL, rc); if(safe_str_eq(cmd->action, "metadata")) { /* Too verbose to log */ crm_trace("Metadata query for %s", cmd->device); output = NULL; } else if(crm_str_eq(cmd->action, "monitor", TRUE) || crm_str_eq(cmd->action, "list", TRUE) || crm_str_eq(cmd->action, "status", TRUE)) { crm_trace("Never broadcast %s replies", cmd->action); } else if(!stand_alone && safe_str_eq(cmd->origin, cmd->victim)) { crm_trace("Broadcast %s reply for %s", cmd->action, cmd->victim); crm_xml_add(reply, F_SUBTYPE, "broadcast"); bcast = TRUE; } log_operation(cmd, rc, pid, NULL, output); crm_log_xml_trace(reply, "Reply"); if(bcast) { crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); } else if(cmd->origin) { crm_trace("Directed reply to %s", cmd->origin); send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE); } else { crm_trace("Directed local %ssync reply to %s", (cmd->options & st_opt_sync_call)?"":"a-", cmd->client_name); do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE); } if(stand_alone) { /* Do notification with a clean data object */ xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); crm_xml_add_int(notify_data, F_STONITH_RC, rc); crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim); crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op); crm_xml_add(notify_data, F_STONITH_DELEGATE, cmd->device); crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client); do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data); } free_xml(reply); } static void cancel_stonith_command(async_command_t *cmd) { stonith_device_t *device; CRM_CHECK(cmd != NULL, return); if (!cmd->device) { return; } device = g_hash_table_lookup(device_list, cmd->device); if (device) { crm_trace("Cancel scheduled %s on %s", cmd->action, device->id); device->pending_ops = g_list_remove(device->pending_ops, cmd); } } #define READ_MAX 500 static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data) { stonith_device_t *device = NULL; async_command_t *cmd = user_data; GListPtr gIter = NULL; GListPtr gIterNext = NULL; CRM_CHECK(cmd != NULL, return); active_children--; /* The device is ready to do something else now */ device = g_hash_table_lookup(device_list, cmd->device); if(device) { device->active_pid = 0; if (rc == pcmk_ok && (safe_str_eq(cmd->action, "list") || safe_str_eq(cmd->action, "monitor") || safe_str_eq(cmd->action, "status"))) { device->verified = TRUE; } mainloop_set_trigger(device->work); } crm_trace("Operation %s on %s completed with rc=%d (%d remaining)", cmd->action, cmd->device, rc, g_list_length(cmd->device_next)); if(rc != 0 && cmd->device_next) { stonith_device_t *dev = cmd->device_next->data; log_operation(cmd, rc, pid, dev->id, output); cmd->device_next = cmd->device_next->next; schedule_stonith_command(cmd, dev); /* Prevent cmd from being freed */ cmd = NULL; goto done; } if(rc > 0) { rc = -pcmk_err_generic; } stonith_send_async_reply(cmd, output, rc, pid); if(rc != 0) { goto done; } /* Check to see if any operations are scheduled to do the exact * same thing that just completed. If so, rather than * performing the same fencing operation twice, return the result * of this operation for all pending commands it matches. */ for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd_other = gIter->data; gIterNext = gIter->next; if(cmd == cmd_other) { continue; } /* A pending scheduled command matches the command that just finished if. * 1. The client connections are different. * 2. The node victim is the same. * 3. The fencing action is the same. * 4. The device scheduled to execute the action is the same. */ if(safe_str_eq(cmd->client, cmd_other->client) || safe_str_neq(cmd->victim, cmd_other->victim) || safe_str_neq(cmd->action, cmd_other->action) || safe_str_neq(cmd->device, cmd_other->device)) { continue; } crm_notice("Merging stonith action %s for node %s originating from client %s with identical stonith request from client %s", cmd_other->action, cmd_other->victim, cmd_other->client_name, cmd->client_name); cmd_list = g_list_remove_link(cmd_list, gIter); stonith_send_async_reply(cmd_other, output, rc, pid); cancel_stonith_command(cmd_other); free_async_command(cmd_other); g_list_free_1(gIter); } done: free_async_command(cmd); } static gint sort_device_priority(gconstpointer a, gconstpointer b) { const stonith_device_t *dev_a = a; const stonith_device_t *dev_b = a; if(dev_a->priority > dev_b->priority) { return -1; } else if(dev_a->priority < dev_b->priority) { return 1; } return 0; } static int stonith_fence(xmlNode *msg) { int options = 0; const char *device_id = NULL; stonith_device_t *device = NULL; async_command_t *cmd = create_async_command(msg); xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, msg, LOG_ERR); if(cmd == NULL) { return -EPROTO; } device_id = crm_element_value(dev, F_STONITH_DEVICE); if(device_id) { device = g_hash_table_lookup(device_list, device_id); if(device == NULL) { crm_err("Requested device '%s' is not available", device_id); } } else { struct device_search_s search; search.capable = NULL; search.host = crm_element_value(dev, F_STONITH_TARGET); crm_element_value_int(msg, F_STONITH_CALLOPTS, &options); if(options & st_opt_cs_nodeid) { int nodeid = crm_atoi(search.host, NULL); crm_node_t *node = crm_get_peer(nodeid, NULL); if(node) { search.host = node->uname; } } g_hash_table_foreach(device_list, search_devices, &search); crm_info("Found %d matching devices for '%s'", g_list_length(search.capable), search.host); if(g_list_length(search.capable) > 0) { /* Order based on priority */ search.capable = g_list_sort(search.capable, sort_device_priority); device = search.capable->data; if(g_list_length(search.capable) > 1) { cmd->device_list = search.capable; cmd->device_next = cmd->device_list->next; } else { g_list_free(search.capable); } } } if(device) { schedule_stonith_command(cmd, device); return -EINPROGRESS; } free_async_command(cmd); return -EHOSTUNREACH; } xmlNode *stonith_construct_reply(xmlNode *request, char *output, xmlNode *data, int rc) { int lpc = 0; xmlNode *reply = NULL; const char *name = NULL; const char *value = NULL; const char *names[] = { F_STONITH_OPERATION, F_STONITH_CALLID, F_STONITH_CLIENTID, F_STONITH_CLIENTNAME, F_STONITH_REMOTE_OP_ID, F_STONITH_CALLOPTS }; crm_trace("Creating a basic reply"); reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __FUNCTION__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, "st_output", output); crm_xml_add_int(reply, F_STONITH_RC, rc); CRM_CHECK(request != NULL, crm_warn("Can't create a sane reply"); return reply); for(lpc = 0; lpc < DIMOF(names); lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } if(data != NULL) { crm_trace("Attaching reply output"); add_message_xml(reply, F_STONITH_CALLDATA, data); } return reply; } static xmlNode * stonith_construct_async_reply(async_command_t *cmd, const char *output, xmlNode *data, int rc) { xmlNode *reply = NULL; crm_trace("Creating a basic reply"); reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __FUNCTION__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, F_STONITH_OPERATION, cmd->op); crm_xml_add(reply, F_STONITH_DEVICE, cmd->device); crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client); crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name); crm_xml_add(reply, F_STONITH_TARGET, cmd->victim); crm_xml_add(reply, F_STONITH_ACTION, cmd->op); crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin); crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id); crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options); crm_xml_add_int(reply, F_STONITH_RC, rc); crm_xml_add(reply, "st_output", output); if(data != NULL) { crm_info("Attaching reply output"); add_message_xml(reply, F_STONITH_CALLDATA, data); } return reply; } /*! * \internal * \brief Determine if we need to use an alternate node to * fence the target. If so return that node's uname * * \retval NULL, no alternate host * \retval uname, uname of alternate host to use */ static const char * check_alternate_host(const char *target) { const char *alternate_host = NULL; if(g_hash_table_lookup(topology, target) && safe_str_eq(target, stonith_our_uname)) { GHashTableIter gIter; crm_node_t *entry = NULL; int membership = crm_proc_plugin | crm_proc_heartbeat | crm_proc_cpg; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { crm_trace("Checking for %s.%d != %s", entry->uname, entry->id, target); if(entry->uname && (entry->processes & membership) && safe_str_neq(entry->uname, target)) { alternate_host = entry->uname; break; } } if(alternate_host == NULL) { crm_err("No alternate host available to handle complex self fencing request"); g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { crm_notice("Peer[%d] %s", entry->id, entry->uname); } } } return alternate_host; } static int handle_request(stonith_client_t *client, uint32_t id, uint32_t flags, xmlNode *request, const char *remote_peer) { int call_options = 0; int rc = -EOPNOTSUPP; gboolean always_reply = FALSE; xmlNode *reply = NULL; xmlNode *data = NULL; char *output = NULL; const char *op = crm_element_value(request, F_STONITH_OPERATION); const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); if(is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if(crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { xmlNode *reply = create_xml_node(NULL, "reply"); CRM_ASSERT(client); crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(reply, F_STONITH_CLIENTID, client->id); crm_ipcs_send(client->channel, id, reply, FALSE); client->request_id = 0; free_xml(reply); return 0; } else if(crm_str_eq(op, STONITH_OP_EXEC, TRUE)) { rc = stonith_device_action(request, &output); } else if (crm_str_eq(op, STONITH_OP_TIMEOUT_UPDATE, TRUE)) { const char *call_id = crm_element_value(request, F_STONITH_CALLID); const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); int op_timeout = 0; crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout); do_stonith_async_timeout_update(client_id, call_id, op_timeout); return 0; } else if(crm_str_eq(op, STONITH_OP_QUERY, TRUE)) { if (remote_peer) { create_remote_stonith_op(client_id, request, TRUE); /* Record it for the future notification */ } rc = stonith_query(request, &data); always_reply = TRUE; if(!data) { return 0; } } else if(crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) { const char *flag_name = NULL; CRM_ASSERT(client); flag_name = crm_element_value(request, F_STONITH_NOTIFY_ACTIVATE); if(flag_name) { crm_debug("Setting %s callbacks for %s (%s): ON", flag_name, client->name, client->id); client->flags |= get_stonith_flag(flag_name); } flag_name = crm_element_value(request, F_STONITH_NOTIFY_DEACTIVATE); if(flag_name) { crm_debug("Setting %s callbacks for %s (%s): off", flag_name, client->name, client->id); client->flags |= get_stonith_flag(flag_name); } if(flags & crm_ipc_client_response) { crm_ipcs_send_ack(client->channel, id, "ack", __FUNCTION__, __LINE__); client->request_id = 0; } return 0; } else if(crm_str_eq(op, STONITH_OP_RELAY, TRUE)) { xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, request, LOG_TRACE); crm_notice("Peer %s has received a forwarded fencing request from %s to fence (%s) peer %s", stonith_our_uname, client ? client->name : remote_peer, crm_element_value(dev, F_STONITH_ACTION), crm_element_value(dev, F_STONITH_TARGET)); if(initiate_remote_stonith_op(NULL, request, FALSE) != NULL) { rc = -EINPROGRESS; } } else if(crm_str_eq(op, STONITH_OP_FENCE, TRUE)) { if(remote_peer || stand_alone) { rc = stonith_fence(request); } else if(call_options & st_opt_manual_ack) { remote_fencing_op_t *rop = initiate_remote_stonith_op(client, request, TRUE); rc = stonith_manual_ack(request, rop); } else { const char *alternate_host = NULL; xmlNode *dev = get_xpath_object("//@"F_STONITH_TARGET, request, LOG_TRACE); const char *target = crm_element_value(dev, F_STONITH_TARGET); const char *action = crm_element_value(dev, F_STONITH_ACTION); const char *device = crm_element_value(dev, F_STONITH_DEVICE); if(client) { int tolerance = 0; crm_notice("Client %s.%.8s wants to fence (%s) '%s' with device '%s'", client->name, client->id, action, target, device?device:"(any)"); crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance); if(stonith_check_fence_tolerance(tolerance, target, action)) { rc = 0; goto done; } } else { crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'", remote_peer, action, target, device?device:"(any)"); } alternate_host = check_alternate_host(target); if(alternate_host) { crm_notice("Forwarding complex self fencing request to peer %s", alternate_host); crm_xml_add(request, F_STONITH_OPERATION, STONITH_OP_RELAY); crm_xml_add(request, F_STONITH_CLIENTID, client->id); send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request, FALSE); rc = -EINPROGRESS; } else if(initiate_remote_stonith_op(client, request, FALSE) != NULL) { rc = -EINPROGRESS; } } } else if (crm_str_eq(op, STONITH_OP_FENCE_HISTORY, TRUE)) { rc = stonith_fence_history(request, &data); always_reply = TRUE; } else if(crm_str_eq(op, STONITH_OP_DEVICE_ADD, TRUE)) { const char *id = NULL; xmlNode *notify_data = create_xml_node(NULL, op); - rc = stonith_device_register(request, &id); + rc = stonith_device_register(request, &id, FALSE); crm_xml_add(notify_data, F_STONITH_DEVICE, id); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(device_list)); do_stonith_notify(call_options, op, rc, notify_data); free_xml(notify_data); } else if(crm_str_eq(op, STONITH_OP_DEVICE_DEL, TRUE)) { xmlNode *dev = get_xpath_object("//"F_STONITH_DEVICE, request, LOG_ERR); const char *id = crm_element_value(dev, XML_ATTR_ID); xmlNode *notify_data = create_xml_node(NULL, op); - rc = stonith_device_remove(id); + rc = stonith_device_remove(id, FALSE); crm_xml_add(notify_data, F_STONITH_DEVICE, id); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(device_list)); do_stonith_notify(call_options, op, rc, notify_data); free_xml(notify_data); } else if(crm_str_eq(op, STONITH_OP_LEVEL_ADD, TRUE)) { char *id = NULL; xmlNode *notify_data = create_xml_node(NULL, op); rc = stonith_level_register(request, &id); crm_xml_add(notify_data, F_STONITH_DEVICE, id); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(topology)); do_stonith_notify(call_options, op, rc, notify_data); free_xml(notify_data); } else if(crm_str_eq(op, STONITH_OP_LEVEL_DEL, TRUE)) { char *id = NULL; xmlNode *notify_data = create_xml_node(NULL, op); rc = stonith_level_remove(request, &id); crm_xml_add(notify_data, F_STONITH_DEVICE, id); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(topology)); do_stonith_notify(call_options, op, rc, notify_data); free_xml(notify_data); } else if(crm_str_eq(op, STONITH_OP_CONFIRM, TRUE)) { async_command_t *cmd = create_async_command(request); xmlNode *reply = stonith_construct_async_reply(cmd, NULL, NULL, 0); crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); crm_notice("Broadcasting manual fencing confirmation for node %s", cmd->victim); send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); free_async_command(cmd); free_xml(reply); } else { crm_err("Unknown %s from %s", op, client ? client->name : remote_peer); crm_log_xml_warn(request, "UnknownOp"); } done: if (rc == -EINPROGRESS) { /* Nothing (yet) */ } else if(remote_peer) { reply = stonith_construct_reply(request, output, data, rc); send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE); free_xml(reply); } else if(rc <= pcmk_ok || always_reply) { reply = stonith_construct_reply(request, output, data, rc); do_local_reply(reply, client_id, call_options & st_opt_sync_call, remote_peer!=NULL); free_xml(reply); } free(output); free_xml(data); return rc; } static void handle_reply(stonith_client_t *client, xmlNode *request, const char *remote_peer) { const char *op = crm_element_value(request, F_STONITH_OPERATION); if(crm_str_eq(op, STONITH_OP_QUERY, TRUE)) { process_remote_stonith_query(request); } else if(crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) { process_remote_stonith_exec(request); } else if(crm_str_eq(op, STONITH_OP_FENCE, TRUE)) { /* Reply to a complex fencing op */ process_remote_stonith_exec(request); } else { crm_err("Unknown %s reply from %s", op, client ? client->name : remote_peer); crm_log_xml_warn(request, "UnknownOp"); } } void stonith_command(stonith_client_t *client, uint32_t id, uint32_t flags, xmlNode *request, const char *remote_peer) { int call_options = 0; int rc = 0; gboolean is_reply = FALSE; const char *op = crm_element_value(request, F_STONITH_OPERATION); if(get_xpath_object("//"T_STONITH_REPLY, request, LOG_DEBUG_3)) { is_reply = TRUE; } crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); crm_debug("Processing %s%s from %s (%16x)", op, is_reply?" reply":"", client?client->name:remote_peer, call_options); if(is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (is_reply) { handle_reply(client, request, remote_peer); } else { rc = handle_request(client, id, flags, request, remote_peer); } do_crm_log_unlikely(rc>0?LOG_DEBUG:LOG_INFO,"Processed %s%s from %s: %s (%d)", op, is_reply?" reply":"", client?client->name:remote_peer, rc>0?"":pcmk_strerror(rc), rc); } diff --git a/fencing/internal.h b/fencing/internal.h index 94824cb5bf..20ddd746de 100644 --- a/fencing/internal.h +++ b/fencing/internal.h @@ -1,166 +1,168 @@ #include /*! * \internal * \brief Check to see if target was fenced in the last few seconds. * \param tolerance, The number of seconds to look back in time * \param target, The node to search for * \param action, The action we want to match. * * \retval FALSE, not match * \retval TRUE, fencing operation took place in the last 'tolerance' number of seconds. */ gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action); typedef struct stonith_device_s { char *id; char *agent; char *namespace; GListPtr targets; time_t targets_age; gboolean has_attr_map; guint priority; guint active_pid; GHashTable *params; GHashTable *aliases; GList *pending_ops; crm_trigger_t *work; /*! A verified device is one that has contacted the * agent successfully to perform a monitor operation */ gboolean verified; + gboolean cib_registered; + gboolean api_registered; } stonith_device_t; typedef struct stonith_client_s { char *id; char *name; int pid; int request_id; char *channel_name; qb_ipcs_connection_t *channel; long long flags; } stonith_client_t; typedef struct remote_fencing_op_s { /* The unique id associated with this operation */ char *id; /*! The node this operation will fence */ char *target; /*! The fencing action to perform on the target. (reboot, on, off) */ char *action; /*! Marks if the final notifications have been sent to local stonith clients. */ gboolean notify_sent; /*! The number of query replies received */ guint replies; /*! Does this node own control of this operation */ gboolean owner; /*! After query is complete, This the high level timer that expires the entire operation */ guint op_timer_total; /*! This timer expires the current fencing request. Many fencing * requests may exist in a single operation */ guint op_timer_one; /*! This timer expires the query request sent out to determine * what nodes are contain what devices, and who those devices can fence */ guint query_timer; /*! This is the default timeout to use for each fencing device if no * custom timeout is received in the query. */ gint base_timeout; /*! This is the calculated total timeout an operation can take before * expiring. This is calculated by adding together all the timeout * values associated with the devices this fencing operation may call */ gint total_timeout; /*! Delegate is the node being asked to perform a fencing action * on behalf of the node that owns the remote operation. Some operations * will involve multiple delegates. This value represents the final delegate * that is used. */ char *delegate; /*! The point at which the remote operation completed */ time_t completed; /*! The stonith_call_options associated with this remote operation */ long long call_options; /*! The current state of the remote operation. This indicates * what phase the op is in, query, exec, done, duplicate, failed. */ enum op_state state; /*! The node that owns the remote operation */ char *originator; /*! The local client id that initiated the fencing request */ char *client_id; /*! The name of client that initiated the fencing request */ char *client_name; /*! List of the received query results for all the nodes in the cpg group */ GListPtr query_results; /*! The original request that initiated the remote stonith operation */ xmlNode *request; /*! The current topology level being executed */ guint level; /*! The device list of all the devices at the current executing topology level. */ GListPtr devices; /*! List of duplicate operations attached to this operation. Once this operation * completes, the duplicate operations will be closed out as well. */ GListPtr duplicates; } remote_fencing_op_t; typedef struct stonith_topology_s { char *node; GListPtr levels[ST_LEVEL_MAX]; } stonith_topology_t; extern long long get_stonith_flag(const char *name); extern void stonith_command(stonith_client_t * client, uint32_t id, uint32_t flags, xmlNode * op_request, const char *remote_peer); -extern int stonith_device_register(xmlNode * msg, const char **desc); +extern int stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib); -extern int stonith_device_remove(const char *id); +extern int stonith_device_remove(const char *id, gboolean from_cib); extern int stonith_level_register(xmlNode * msg, char **desc); extern int stonith_level_remove(xmlNode * msg, char **desc); extern void do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer); extern xmlNode *stonith_construct_reply(xmlNode * request, char *output, xmlNode * data, int rc); void do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout); extern void do_stonith_notify(int options, const char *type, int result, xmlNode * data); extern remote_fencing_op_t *initiate_remote_stonith_op(stonith_client_t * client, xmlNode * request, gboolean manual_ack); extern int process_remote_stonith_exec(xmlNode * msg); extern int process_remote_stonith_query(xmlNode * msg); extern void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer); extern int stonith_fence_history(xmlNode * msg, xmlNode ** output); extern void free_device(gpointer data); extern void free_topology_entry(gpointer data); extern char *stonith_our_uname; extern gboolean stand_alone; extern GHashTable *device_list; extern GHashTable *topology; extern GHashTable *client_list; diff --git a/fencing/main.c b/fencing/main.c index cbbb19145f..407d51f68e 100644 --- a/fencing/main.c +++ b/fencing/main.c @@ -1,1075 +1,1075 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include char *stonith_our_uname = NULL; GMainLoop *mainloop = NULL; GHashTable *client_list = NULL; gboolean stand_alone = FALSE; gboolean no_cib_connect = FALSE; gboolean stonith_shutdown_flag = FALSE; qb_ipcs_service_t *ipcs = NULL; static void stonith_shutdown(int nsig); static void stonith_cleanup(void); static int32_t st_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { crm_trace("Connecting %p for uid=%d gid=%d", c, uid, gid); if(stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c)); return -EPERM; } return 0; } static void st_ipc_created(qb_ipcs_connection_t *c) { stonith_client_t *new_client = NULL; #if 0 struct qb_ipcs_stats srv_stats; qb_ipcs_stats_get(s1, &srv_stats, QB_FALSE); qb_log(LOG_INFO, "Connection created (active:%d, closed:%d)", srv_stats.active_connections, srv_stats.closed_connections); #endif new_client = calloc(1, sizeof(stonith_client_t)); new_client->channel = c; new_client->pid = crm_ipcs_client_pid(c); new_client->channel_name = strdup("ipc"); CRM_CHECK(new_client->id == NULL, free(new_client->id)); new_client->id = crm_generate_uuid(); crm_trace("Created channel %p for client %s", c, new_client->id); /* make sure we can find ourselves later for sync calls * redirected to the master instance */ g_hash_table_insert(client_list, new_client->id, new_client); qb_ipcs_context_set(c, new_client); CRM_ASSERT(qb_ipcs_context_get(c) != NULL); } /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t *c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; xmlNode *request = NULL; stonith_client_t *client = (stonith_client_t*)qb_ipcs_context_get(c); request = crm_ipcs_recv(c, data, size, &id, &flags); if (request == NULL) { crm_ipcs_send_ack(c, id, "nack", __FUNCTION__, __LINE__); return 0; } CRM_CHECK(client != NULL, goto cleanup); if(client->name == NULL) { const char *value = crm_element_value(request, F_STONITH_CLIENTNAME); if(value == NULL) { value = "unknown"; } client->name = g_strdup_printf("%s.%u", value, client->pid); } CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p/%s", client, client->name); goto cleanup); if(flags & crm_ipc_client_response) { CRM_LOG_ASSERT(client->request_id == 0); /* This means the client has two synchronous events in-flight */ client->request_id = id; /* Reply only to the last one */ } crm_xml_add(request, F_STONITH_CLIENTID, client->id); crm_xml_add(request, F_STONITH_CLIENTNAME, client->name); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); crm_log_xml_trace(request, "Client[inbound]"); stonith_command(client, id, flags, request, NULL); cleanup: if(client == NULL || client->id == NULL) { crm_log_xml_notice(request, "Invalid client"); } free_xml(request); return 0; } /* Error code means? */ static int32_t st_ipc_closed(qb_ipcs_connection_t *c) { stonith_client_t *client = (stonith_client_t*)qb_ipcs_context_get(c); #if 0 qb_ipcs_stats_get(s1, &srv_stats, QB_FALSE); qb_ipcs_connection_stats_get(c, &stats, QB_FALSE); qb_log(LOG_INFO, "Connection to pid:%d destroyed (active:%d, closed:%d)", stats.client_pid, srv_stats.active_connections, srv_stats.closed_connections); qb_log(LOG_DEBUG, " Requests %"PRIu64"", stats.requests); qb_log(LOG_DEBUG, " Responses %"PRIu64"", stats.responses); qb_log(LOG_DEBUG, " Events %"PRIu64"", stats.events); qb_log(LOG_DEBUG, " Send retries %"PRIu64"", stats.send_retries); qb_log(LOG_DEBUG, " Recv retries %"PRIu64"", stats.recv_retries); qb_log(LOG_DEBUG, " FC state %d", stats.flow_control_state); qb_log(LOG_DEBUG, " FC count %"PRIu64"", stats.flow_control_count); #endif if (client == NULL) { crm_err("No client"); return 0; } crm_trace("Cleaning up after client disconnect: %p/%s/%s", client, crm_str(client->name), client->id); if(client->id != NULL) { g_hash_table_remove(client_list, client->id); } /* 0 means: yes, go ahead and destroy the connection */ return 0; } static void st_ipc_destroy(qb_ipcs_connection_t *c) { stonith_client_t *client = (stonith_client_t*)qb_ipcs_context_get(c); /* Make sure the connection is fully cleaned up */ st_ipc_closed(c); if(client == NULL) { crm_trace("Nothing to destroy"); return; } crm_trace("Destroying %s (%p)", client->name, client); free(client->name); free(client->id); free(client); crm_trace("Done"); return; } static void stonith_peer_callback(xmlNode * msg, void* private_data) { const char *remote_peer = crm_element_value(msg, F_ORIG); const char *op = crm_element_value(msg, F_STONITH_OPERATION); if(crm_str_eq(op, "poke", TRUE)) { return; } crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, 0, 0, msg, remote_peer); } #if SUPPORT_HEARTBEAT static void stonith_peer_hb_callback(HA_Message * msg, void* private_data) { xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__); stonith_peer_callback(xml, private_data); free_xml(xml); } static void stonith_peer_hb_destroy(gpointer user_data) { if(stonith_shutdown_flag) { crm_info("Heartbeat disconnection complete... exiting"); } else { crm_err("Heartbeat connection lost! Exiting."); } stonith_shutdown(0); } #endif #if SUPPORT_COROSYNC static gboolean stonith_peer_ais_callback(int kind, const char *from, const char *data) { xmlNode *xml = NULL; if(kind == crm_class_cluster) { xml = string2xml(data); if(xml == NULL) { goto bail; } crm_xml_add(xml, F_ORIG, from); /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ stonith_peer_callback(xml, NULL); } free_xml(xml); return TRUE; bail: crm_err("Invalid XML: '%.120s'", data); return TRUE; } static void stonith_peer_ais_destroy(gpointer user_data) { crm_err("AIS connection terminated"); stonith_shutdown(0); } #endif void do_local_reply(xmlNode *notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { /* send callback to originating child */ stonith_client_t *client_obj = NULL; int local_rc = pcmk_ok; crm_trace("Sending response"); if(client_id != NULL) { client_obj = g_hash_table_lookup(client_list, client_id); } else { crm_trace("No client to sent the response to." " F_STONITH_CLIENTID not set."); } crm_trace("Sending callback to request originator"); if(client_obj == NULL) { local_rc = -1; } else { int rid = 0; if(sync_reply) { CRM_LOG_ASSERT(client_obj->request_id); rid = client_obj->request_id; client_obj->request_id = 0; crm_trace("Sending response %d to %s %s", rid, client_obj->name, from_peer?"(originator of delegated request)":""); } else { crm_trace("Sending an event to %s %s", client_obj->name, from_peer?"(originator of delegated request)":""); } local_rc = crm_ipcs_send(client_obj->channel, rid, notify_src, !sync_reply); } if(local_rc < pcmk_ok && client_obj != NULL) { crm_warn("%sSync reply to %s failed: %s", sync_reply?"":"A-", client_obj?client_obj->name:"", pcmk_strerror(local_rc)); } } long long get_stonith_flag(const char *name) { if(safe_str_eq(name, T_STONITH_NOTIFY_FENCE)) { return 0x01; } else if(safe_str_eq(name, STONITH_OP_DEVICE_ADD)) { return 0x04; } else if(safe_str_eq(name, STONITH_OP_DEVICE_DEL)) { return 0x10; } return 0; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; stonith_client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = crm_element_value(update_msg, F_SUBTYPE); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if(client->channel == NULL) { crm_trace("Skipping client with NULL channel"); return; } else if(client->name == NULL) { crm_trace("Skipping unnammed client / comamnd channel"); return; } if(client->flags & get_stonith_flag(type)) { int rc = crm_ipcs_send(client->channel, 0, update_msg, crm_ipc_server_event|crm_ipc_server_error); if(rc <= 0) { crm_warn("%s notification of client %s.%.6s failed: %s (%d)", type, client->name, client->id, pcmk_strerror(rc), rc); } else { crm_trace("Sent %s notification to client %s.%.6s", type, client->name, client->id); } } } void do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout) { stonith_client_t *client = NULL; xmlNode *notify_data = NULL; if (!timeout || !call_id || !client_id) { return; } client = g_hash_table_lookup(client_list, client_id); if (!client) { return; } notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_STONITH_CALLID, call_id); crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout); crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id); if (client->channel) { crm_ipcs_send(client->channel, 0, notify_data, crm_ipc_server_event); } free_xml(notify_data); } void do_stonith_notify(int options, const char *type, int result, xmlNode *data) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = create_xml_node(NULL, "notify"); CRM_CHECK(type != NULL, ;); crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(update_msg, F_SUBTYPE, type); crm_xml_add(update_msg, F_STONITH_OPERATION, type); crm_xml_add_int(update_msg, F_STONITH_RC, result); if(data != NULL) { add_message_xml(update_msg, F_STONITH_CALLDATA, data); } crm_trace("Notifying clients"); g_hash_table_foreach(client_list, stonith_notify_client, update_msg); free_xml(update_msg); crm_trace("Notify complete"); } static stonith_key_value_t *parse_device_list(const char *devices) { int lpc = 0; int max = 0; int last = 0; stonith_key_value_t *output = NULL; if(devices == NULL) { return output; } max = strlen(devices); for(lpc = 0; lpc <= max; lpc++) { if(devices[lpc] == ',' || devices[lpc] == 0) { char *line = NULL; line = calloc(1, 2 + lpc - last); snprintf(line, 1 + lpc - last, "%s", devices+last); output = stonith_key_value_add(output, NULL, line); free(line); last = lpc + 1; } } return output; } static void topology_remove_helper(const char *node, int level) { int rc; char *desc = NULL; xmlNode *data = create_xml_node(NULL, F_STONITH_LEVEL); xmlNode *notify_data = create_xml_node(NULL, STONITH_OP_LEVEL_DEL); crm_xml_add(data, "origin", __FUNCTION__); crm_xml_add_int(data, XML_ATTR_ID, level); crm_xml_add(data, F_STONITH_TARGET, node); rc = stonith_level_remove(data, &desc); crm_xml_add(notify_data, F_STONITH_DEVICE, desc); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(topology)); do_stonith_notify(0, STONITH_OP_LEVEL_DEL, rc, notify_data); free_xml(notify_data); free_xml(data); free(desc); } static void topology_register_helper(const char *node, int level, stonith_key_value_t *device_list) { int rc; char *desc = NULL; xmlNode *notify_data = create_xml_node(NULL, STONITH_OP_LEVEL_ADD); xmlNode *data = create_level_registration_xml(node, level, device_list); rc = stonith_level_register(data, &desc); crm_xml_add(notify_data, F_STONITH_DEVICE, desc); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(topology)); do_stonith_notify(0, STONITH_OP_LEVEL_ADD, rc, notify_data); free_xml(notify_data); free_xml(data); free(desc); } static void remove_cib_device(xmlXPathObjectPtr xpathObj) { int max = 0, lpc = 0; if(xpathObj && xpathObj->nodesetval) { max = xpathObj->nodesetval->nodeNr; } for(lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_CHECK(match != NULL, continue); standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); if (safe_str_neq(standard, "stonith")) { continue; } rsc_id = crm_element_value(match, XML_ATTR_ID); - stonith_device_remove(rsc_id); + stonith_device_remove(rsc_id, TRUE); } } static void remove_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = 0, lpc = 0; if(xpathObj && xpathObj->nodesetval) { max = xpathObj->nodesetval->nodeNr; } for(lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_CHECK(match != NULL, continue); if(crm_element_value(match, XML_DIFF_MARKER)) { /* Deletion */ int index = 0; const char *target = crm_element_value(match, XML_ATTR_STONITH_TARGET); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); if(target == NULL) { crm_err("Invalid fencing target in element %s", ID(match)); } else if(index <= 0) { crm_err("Invalid level for %s in element %s", target, ID(match)); } else { topology_remove_helper(target, index); } /* } else { Deal with modifications during the 'addition' stage */ } } } static void register_cib_device(xmlXPathObjectPtr xpathObj, gboolean force) { int max = 0, lpc = 0; if(xpathObj && xpathObj->nodesetval) { max = xpathObj->nodesetval->nodeNr; } for(lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *agent = NULL; const char *standard = NULL; const char *provider = NULL; stonith_key_value_t *params = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); xmlNode *attributes; xmlNode *attr; xmlNode *data; CRM_CHECK(match != NULL, continue); standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); agent = crm_element_value(match, XML_EXPR_ATTR_TYPE); provider = crm_element_value(match, XML_AGENT_ATTR_PROVIDER); if (safe_str_neq(standard, "stonith") || !agent) { continue; } rsc_id = crm_element_value(match, XML_ATTR_ID); attributes = find_xml_node(match, XML_TAG_ATTR_SETS, FALSE); for (attr = __xml_first_child(attributes); attr; attr = __xml_next(attr)) { const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); if (!name || !value) { continue; } params = stonith_key_value_add(params, name, value); } data = create_device_registration_xml(rsc_id, provider, agent, params); if(force == FALSE && crm_element_value(match, XML_DIFF_MARKER)) { - stonith_device_register(data, NULL); + stonith_device_register(data, NULL, TRUE); } else { - stonith_device_remove(rsc_id); - stonith_device_register(data, NULL); + stonith_device_remove(rsc_id, TRUE); + stonith_device_register(data, NULL, TRUE); } } } static void register_fencing_topology(xmlXPathObjectPtr xpathObj, gboolean force) { int max = 0, lpc = 0; if(xpathObj && xpathObj->nodesetval) { max = xpathObj->nodesetval->nodeNr; } for(lpc = 0; lpc < max; lpc++) { int index = 0; const char *target; const char *dev_list; stonith_key_value_t *devices = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_CHECK(match != NULL, continue); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); target = crm_element_value(match, XML_ATTR_STONITH_TARGET); dev_list = crm_element_value(match, XML_ATTR_STONITH_DEVICES); devices = parse_device_list(dev_list); crm_trace("Updating %s[%d] (%s) to %s", target, index, ID(match), dev_list); if(target == NULL) { crm_err("Invalid fencing target in element %s", ID(match)); } else if(index <= 0) { crm_err("Invalid level for %s in element %s", target, ID(match)); } else if(force == FALSE && crm_element_value(match, XML_DIFF_MARKER)) { /* Addition */ topology_register_helper(target, index, devices); } else { /* Modification */ /* Remove then re-add */ topology_remove_helper(target, index); topology_register_helper(target, index, devices); } stonith_key_value_freeall(devices, 1, 1); } } /* Fencing */ static void fencing_topology_init(xmlNode * msg) { xmlXPathObjectPtr xpathObj = NULL; const char *xpath = "//" XML_TAG_FENCING_LEVEL; crm_trace("Pushing in stonith topology"); /* Grab everything */ xpathObj = xpath_search(msg, xpath); register_fencing_topology(xpathObj, TRUE); if(xpathObj) { xmlXPathFreeObject(xpathObj); } } static void cib_stonith_devices_init(xmlNode *msg) { xmlXPathObjectPtr xpathObj = NULL; const char *xpath = "//" XML_CIB_TAG_RESOURCE; crm_trace("Pushing in stonith devices"); /* Grab everything */ xpathObj = xpath_search(msg, xpath); if(xpathObj) { register_cib_device(xpathObj, TRUE); xmlXPathFreeObject(xpathObj); } } static void update_cib_stonith_devices(const char *event, xmlNode * msg) { const char *xpath_add = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE; const char *xpath_del = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE; xmlXPathObjectPtr xpath_obj = NULL; /* process deletions */ xpath_obj = xpath_search(msg, xpath_del); if (xpath_obj) { remove_cib_device(xpath_obj); xmlXPathFreeObject(xpath_obj); } /* process additions */ xpath_obj = xpath_search(msg, xpath_add); if (xpath_obj) { register_cib_device(xpath_obj, FALSE); xmlXPathFreeObject(xpath_obj); } } static void update_fencing_topology(const char *event, xmlNode * msg) { const char *xpath; xmlXPathObjectPtr xpathObj = NULL; /* Process deletions (only) */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); remove_fencing_topology(xpathObj); if(xpathObj) { xmlXPathFreeObject(xpathObj); } /* Process additions and changes */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); register_fencing_topology(xpathObj, FALSE); if(xpathObj) { xmlXPathFreeObject(xpathObj); } } static void update_cib_cache_cb(const char *event, xmlNode * msg) { update_fencing_topology(event, msg); update_cib_stonith_devices(event, msg); } static void init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fencing_topology_init(msg); cib_stonith_devices_init(msg); } static void stonith_shutdown(int nsig) { stonith_shutdown_flag = TRUE; crm_info("Terminating with %d clients", g_hash_table_size(client_list)); if(mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); } else { stonith_cleanup(); exit(EX_OK); } } cib_t *cib = NULL; static void stonith_cleanup(void) { if(cib) { cib->cmds->signoff(cib); } if(ipcs) { qb_ipcs_destroy(ipcs); } crm_peer_destroy(); g_hash_table_destroy(client_list); free(stonith_our_uname); #if HAVE_LIBXML2 crm_xml_cleanup(); #endif } /* *INDENT-OFF* */ static struct crm_option long_options[] = { {"stand-alone", 0, 0, 's'}, {"stand-alone-w-cpg", 0, 0, 'c'}, {"verbose", 0, 0, 'V'}, {"version", 0, 0, '$'}, {"help", 0, 0, '?'}, {0, 0, 0, 0} }; /* *INDENT-ON* */ static void setup_cib(void) { static void *cib_library = NULL; static cib_t *(*cib_new_fn)(void) = NULL; static const char *(*cib_err_fn)(int) = NULL; int rc, retries = 0; if(cib_library == NULL) { cib_library = dlopen(CIB_LIBRARY, RTLD_LAZY); } if(cib_library && cib_new_fn == NULL) { cib_new_fn = dlsym(cib_library, "cib_new"); } if(cib_library && cib_err_fn == NULL) { cib_err_fn = dlsym(cib_library, "pcmk_strerror"); } if(cib_new_fn != NULL) { cib = (*cib_new_fn)(); } if(cib == NULL) { crm_err("No connection to the CIB"); return; } do { sleep(retries); rc = cib->cmds->signon(cib, CRM_SYSTEM_CRMD, cib_command); } while(rc == -ENOTCONN && ++retries < 5); if (rc != pcmk_ok) { crm_err("Could not connect to the CIB service: %d %p", rc, cib_err_fn); } else if (pcmk_ok != cib->cmds->add_notify_callback( cib, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { crm_err("Could not set CIB notification callback"); } else { rc = cib->cmds->query(cib, NULL, NULL, cib_scope_local); add_cib_op_callback(cib, rc, FALSE, NULL, init_cib_cache_cb); crm_notice("Watching for stonith topology changes"); } } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = st_ipc_created, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy }; static void st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in stonith_peer_callback() */ xmlNode *query = create_xml_node(NULL, "stonith_command"); crm_xml_add(query, F_XML_TAGNAME, "stonith_command"); crm_xml_add(query, F_TYPE, T_STONITH_NG); crm_xml_add(query, F_STONITH_OPERATION, "poke"); crm_debug("Broadcasting our uname because of node %u", node->id); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); } int main(int argc, char ** argv) { int flag; int rc = 0; int lpc = 0; int argerr = 0; int option_index = 0; crm_cluster_t cluster; const char *actions[] = { "reboot", "off", "list", "monitor", "status" }; crm_log_init("stonith-ng", LOG_INFO, TRUE, FALSE, argc, argv, FALSE); crm_set_options(NULL, "mode [options]", long_options, "Provides a summary of cluster's current state." "\n\nOutputs varying levels of detail in a number of different formats.\n"); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) { break; } switch(flag) { case 'V': crm_bump_log_level(argc, argv); break; case 's': stand_alone = TRUE; break; case 'c': stand_alone = FALSE; no_cib_connect = TRUE; break; case '$': case '?': crm_help(flag, EX_OK); break; default: ++argerr; break; } } if(argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { printf("\n"); printf("\n"); printf(" 1.0\n"); printf(" This is a fake resource that details the instance attributes handled by stonithd.\n"); printf(" Options available for all stonith resources\n"); printf(" \n"); printf(" \n"); printf(" How long to wait for the STONITH action to complete per a stonith device.\n"); printf(" Overrides the stonith-timeout cluster property\n"); printf(" \n"); printf(" \n"); printf(" \n"); printf(" The priority of the stonith resource. The lower the number, the higher the priority.\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTARG); printf(" Advanced use only: An alternate parameter to supply instead of 'port'\n"); printf(" Some devices do not support the standard 'port' parameter or may provide additional ones.\n" "Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n" "A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n" " \n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTMAP); printf(" A mapping of host names to ports numbers for devices that do not support host names.\n"); printf(" Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTLIST); printf(" A list of machines controlled by this device (Optional unless %s=static-list).\n", STONITH_ATTR_HOSTCHECK); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTCHECK); printf(" How to determin which machines are controlled by the device.\n"); printf(" Allowed values: dynamic-list (query the device), static-list (check the %s attribute), none (assume every device can fence every machine)\n", STONITH_ATTR_HOSTLIST); printf(" \n"); printf(" \n"); for(lpc = 0; lpc < DIMOF(actions); lpc++) { printf(" \n", actions[lpc]); printf(" Advanced use only: An alternate command to run instead of '%s'\n", actions[lpc]); printf(" Some devices do not support the standard commands or may provide additional ones.\n" "Use this to specify an alternate, device-specific, command that implements the '%s' action.\n", actions[lpc]); printf(" \n", actions[lpc]); printf(" \n"); } printf(" \n"); printf("\n"); return 0; } if (optind != argc) { ++argerr; } if (argerr) { crm_help('?', EX_USAGE); } mainloop_add_signal(SIGTERM, stonith_shutdown); crm_peer_init(); client_list = g_hash_table_new(crm_str_hash, g_str_equal); if(stand_alone == FALSE) { #if SUPPORT_HEARTBEAT cluster.hb_dispatch = stonith_peer_hb_callback; cluster.destroy = stonith_peer_hb_destroy; #endif if(is_openais_cluster()) { #if SUPPORT_COROSYNC cluster.destroy = stonith_peer_ais_destroy; cluster.cs_dispatch = stonith_peer_ais_callback; #endif } if(crm_cluster_connect(&cluster) == FALSE) { crm_crit("Cannot sign in to the cluster... terminating"); exit(100); } else { stonith_our_uname = cluster.uname; } stonith_our_uname = cluster.uname; if (no_cib_connect == FALSE) { setup_cib(); } } else { stonith_our_uname = strdup("localhost"); } crm_set_status_callback(&st_peer_update_callback); device_list = g_hash_table_new_full( crm_str_hash, g_str_equal, NULL, free_device); topology = g_hash_table_new_full( crm_str_hash, g_str_equal, NULL, free_topology_entry); ipcs = mainloop_add_ipc_server("stonith-ng", QB_IPC_NATIVE, &ipc_callbacks); #if SUPPORT_STONITH_CONFIG if (((stand_alone == TRUE)) && !(standalone_cfg_read_file(STONITH_NG_CONF_FILE))) { standalone_cfg_commit(); } #endif if(ipcs != NULL) { /* Create the mainloop and run it... */ mainloop = g_main_new(FALSE); crm_info("Starting %s mainloop", crm_system_name); g_main_run(mainloop); } else { crm_err("Couldnt start all communication channels, exiting."); } stonith_cleanup(); #if SUPPORT_HEARTBEAT if(cluster.hb_conn) { cluster.hb_conn->llc_ops->delete(cluster.hb_conn); } #endif crm_info("Done"); qb_log_fini(); return rc; } diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c index c38f280989..69fc3bad9b 100644 --- a/lrmd/lrmd.c +++ b/lrmd/lrmd.c @@ -1,1168 +1,1167 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #ifdef HAVE_SYS_TIMEB_H # include #endif GHashTable *rsc_list = NULL; GHashTable *client_list = NULL; typedef struct lrmd_cmd_s { int timeout; int interval; int start_delay; int call_id; int exec_rc; int lrmd_op_status; int call_opts; /* Timer ids, must be removed on cmd destruction. */ int delay_id; int stonith_recurring_id; int rsc_deleted; char *client_id; char *origin; char *rsc_id; char *action; char *output; char *userdata_str; #ifdef HAVE_SYS_TIMEB_H /* Timestamp of when op ran */ struct timeb t_run; /* Timestamp of when op was queued */ struct timeb t_queue; /* Timestamp of last rc change */ struct timeb t_rcchange; #endif int first_notify_sent; int last_notify_rc; int last_notify_op_status; GHashTable *params; } lrmd_cmd_t; static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc); static gboolean lrmd_rsc_dispatch(gpointer user_data); static void cancel_all_recurring(lrmd_rsc_t *rsc, const char *client_id); static lrmd_rsc_t * build_rsc_from_xml(xmlNode * msg) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_rsc_t *rsc = NULL; rsc = calloc(1, sizeof(lrmd_rsc_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts); rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS); rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER); rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE); rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc); return rsc; } static lrmd_cmd_t * create_lrmd_cmd(xmlNode * msg, lrmd_client_t * client) { int call_options = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_cmd_t *cmd = NULL; cmd = calloc(1, sizeof(lrmd_cmd_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options); cmd->call_opts = call_options; cmd->client_id = strdup(client->id); crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id); crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval); crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout); crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay); cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN); cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION); cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR); cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); cmd->params = xml2list(rsc_xml); return cmd; } static void free_lrmd_cmd(lrmd_cmd_t * cmd) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } if (cmd->params) { g_hash_table_destroy(cmd->params); } free(cmd->origin); free(cmd->action); free(cmd->userdata_str); free(cmd->rsc_id); free(cmd->output); free(cmd->client_id); free(cmd); } static gboolean stonith_recurring_op_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc; cmd->stonith_recurring_id = 0; if (!cmd->rsc_id) { return FALSE; } rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); CRM_ASSERT(rsc != NULL); /* take it out of recurring_ops list, and put it in the pending ops * to be executed */ rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef HAVE_SYS_TIMEB_H ftime(&cmd->t_queue); #endif mainloop_set_trigger(rsc->work); return FALSE; } static gboolean start_delay_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc = NULL; cmd->delay_id = 0; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc) { mainloop_set_trigger(rsc->work); } return FALSE; } static void schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { CRM_CHECK(cmd != NULL, return); CRM_CHECK(rsc != NULL, return); crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id); rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef HAVE_SYS_TIMEB_H ftime(&cmd->t_queue); #endif mainloop_set_trigger(rsc->work); if (cmd->start_delay) { cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); } } static void send_reply(lrmd_client_t * client, int rc, uint32_t id, int call_id) { int send_rc = 0; xmlNode *reply = NULL; reply = create_xml_node(NULL, T_LRMD_REPLY); crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(reply, F_LRMD_RC, rc); crm_xml_add_int(reply, F_LRMD_CALLID, call_id); send_rc = crm_ipcs_send(client->channel, id, reply, FALSE); free_xml(reply); if (send_rc < 0) { crm_warn("LRMD reply to %s failed: %d", client->name, send_rc); } } static void send_client_notify(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; lrmd_client_t *client = value; if (client == NULL) { crm_err("Asked to send event to NULL client"); return; } else if (client->channel == NULL) { crm_trace("Asked to send event to disconnected client"); return; } else if (client->name == NULL) { crm_trace("Asked to send event to client with no name"); return; } if (crm_ipcs_send(client->channel, 0, update_msg, TRUE) <= 0) { crm_warn("Notification of client %s/%s failed", client->name, client->id); } } #ifdef HAVE_SYS_TIMEB_H static int time_diff_ms(struct timeb *now, struct timeb *old) { int sec = difftime(now->time, old->time); int ms = now->millitm - old->millitm; if (old->time == 0) { return 0; } return (sec * 1000) + ms; } #endif static void send_cmd_complete_notify(lrmd_cmd_t * cmd) { #ifdef HAVE_SYS_TIMEB_H struct timeb now = { 0, }; #endif xmlNode *notify = NULL; /* if the first notify result for a cmd has already been sent earlier, and the * the option to only send notifies on result changes is set. Check to see * if the last result is the same as the new one. If so, suppress this update */ if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) { if (cmd->last_notify_rc == cmd->exec_rc && cmd->last_notify_op_status == cmd->lrmd_op_status) { /* only send changes */ return; } } cmd->first_notify_sent = 1; cmd->last_notify_rc = cmd->exec_rc; cmd->last_notify_op_status = cmd->lrmd_op_status; notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout); crm_xml_add_int(notify, F_LRMD_RSC_INTERVAL, cmd->interval); crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay); crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc); crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status); crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id); crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted); #ifdef HAVE_SYS_TIMEB_H ftime(&now); crm_xml_add_int(notify, F_LRMD_RSC_RUN_TIME, cmd->t_run.time); crm_xml_add_int(notify, F_LRMD_RSC_RCCHANGE_TIME, cmd->t_rcchange.time); crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, time_diff_ms(&now, &cmd->t_run)); crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, time_diff_ms(&cmd->t_run, &cmd->t_queue)); #endif crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC); crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id); crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action); crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str); crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output); if (cmd->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS); g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { hash2field((gpointer) key, (gpointer) value, args); } } if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) { lrmd_client_t *client = g_hash_table_lookup(client_list, cmd->client_id); if (client) { send_client_notify(client->id, client, notify); } } else { g_hash_table_foreach(client_list, send_client_notify, notify); } free_xml(notify); } static void send_generic_notify(int rc, xmlNode * request) { int call_id = 0; xmlNode *notify = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *op = crm_element_value(request, F_LRMD_OPERATION); crm_element_value_int(request, F_LRMD_CALLID, &call_id); notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(notify, F_LRMD_RC, rc); crm_xml_add_int(notify, F_LRMD_CALLID, call_id); crm_xml_add(notify, F_LRMD_OPERATION, op); crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id); g_hash_table_foreach(client_list, send_client_notify, notify); free_xml(notify); } static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc) { crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action, rsc?rsc->active:NULL, cmd); if (rsc && (rsc->active == cmd)) { rsc->active = NULL; mainloop_set_trigger(rsc->work); } if (!rsc) { cmd->rsc_deleted = 1; } send_cmd_complete_notify(cmd); /* crmd expects lrmd to automatically cancel recurring ops after rsc stops */ if (rsc && safe_str_eq(cmd->action, "stop")) { cancel_all_recurring(rsc, NULL); } if (cmd->interval && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) { if (rsc) { rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else if (cmd->interval == 0) { if (rsc) { rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else { /* Clear all the values pertaining just to the last iteration of a recurring op. */ cmd->lrmd_op_status = 0; memset(&cmd->t_run, 0, sizeof(cmd->t_run)); memset(&cmd->t_queue, 0, sizeof(cmd->t_queue)); free(cmd->output); cmd->output = NULL; } } static int lsb2uniform_rc(const char *action, int rc) { if (rc < 0) { return PCMK_EXECRA_UNKNOWN_ERROR; } /* status has different return codes that everything else. */ if (!safe_str_eq(action, "status") && !safe_str_eq(action, "monitor")) { if (rc > PCMK_LSB_NOT_RUNNING) { return PCMK_EXECRA_UNKNOWN_ERROR; } return rc; } switch (rc) { case PCMK_LSB_STATUS_OK: return PCMK_EXECRA_OK; case PCMK_LSB_STATUS_NOT_INSTALLED: return PCMK_EXECRA_NOT_INSTALLED; case PCMK_LSB_STATUS_VAR_PID: case PCMK_LSB_STATUS_VAR_LOCK: case PCMK_LSB_STATUS_NOT_RUNNING: return PCMK_EXECRA_NOT_RUNNING; default: return PCMK_EXECRA_UNKNOWN_ERROR; } return PCMK_EXECRA_UNKNOWN_ERROR; } static int ocf2uniform_rc(int rc) { if (rc < 0 || rc > PCMK_OCF_FAILED_MASTER) { return PCMK_EXECRA_UNKNOWN_ERROR; } return rc; } static int stonith2uniform_rc(const char *action, int rc) { if (rc == -ENODEV) { if (safe_str_eq(action, "stop")) { rc = PCMK_EXECRA_OK; } else if (safe_str_eq(action, "start")) { rc = PCMK_EXECRA_NOT_INSTALLED; } else { rc = PCMK_EXECRA_NOT_RUNNING; } } else if (rc != 0) { rc = PCMK_EXECRA_UNKNOWN_ERROR; } return rc; } static int get_uniform_rc(const char *standard, const char *action, int rc) { if (safe_str_eq(standard, "ocf")) { return ocf2uniform_rc(rc); } else if (safe_str_eq(standard, "stonith")) { return stonith2uniform_rc(action, rc); } else if (safe_str_eq(standard, "systemd")) { return rc; } else if (safe_str_eq(standard, "upstart")) { return rc; } else { return lsb2uniform_rc(action, rc); } } void client_disconnect_cleanup(const char *client_id) { GHashTableIter iter; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (rsc->call_opts & lrmd_opt_drop_recurring) { /* This client is disconnecting, drop any recurring operations * it may have initiated on the resource */ cancel_all_recurring(rsc, client_id); } } } static void action_complete(svc_action_t * action) { lrmd_rsc_t *rsc; lrmd_cmd_t *cmd = action->cb_data; if (!cmd) { crm_err("LRMD action (%s) completed does not match any known operations.", action->id); return; } #ifdef HAVE_SYS_TIMEB_H if (cmd->exec_rc != action->rc) { ftime(&cmd->t_rcchange); } #endif cmd->exec_rc = get_uniform_rc(action->standard, cmd->action, action->rc); cmd->lrmd_op_status = action->status; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (action->stdout_data) { cmd->output = strdup(action->stdout_data); } cmd_finalize(cmd, rsc); } static void stonith_action_complete(lrmd_cmd_t *cmd, int rc) { int recurring = cmd->interval; lrmd_rsc_t *rsc = NULL; cmd->exec_rc = get_uniform_rc("stonith", cmd->action, rc); rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); if (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED) { recurring = 0; /* do nothing */ } else if (rc) { /* Attempt to map return codes to op status if possible */ switch (rc) { case -EPROTONOSUPPORT: cmd->lrmd_op_status = PCMK_LRM_OP_NOTSUPPORTED; break; case -ETIME: cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT; break; default: cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; } } else { /* command successful */ cmd->lrmd_op_status = PCMK_LRM_OP_DONE; if (safe_str_eq(cmd->action, "start") && rsc) { rsc->stonith_started = 1; } } if (recurring && rsc) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } cmd->stonith_recurring_id = g_timeout_add(cmd->interval, stonith_recurring_op_helper, cmd); } cmd_finalize(cmd, rsc); } static void lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t *data) { stonith_action_complete(data->userdata, data->rc); } void stonith_connection_failed(void) { GHashTableIter iter; GList *cmd_list = NULL; GList *cmd_iter = NULL; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (safe_str_eq(rsc->class, "stonith")) { if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, rsc->recurring_ops); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, rsc->pending_ops); } rsc->pending_ops = rsc->recurring_ops = NULL; } } if (!cmd_list) { return; } crm_err("STONITH connection failed, finalizing %d pending operations.", g_list_length(cmd_list)); for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { stonith_action_complete(cmd_iter->data, -ENOTCONN); } g_list_free(cmd_list); } static int lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { int rc = 0; int do_monitor = 0; stonith_t *stonith_api = get_stonith_connection(); if (!stonith_api) { cmd->exec_rc = get_uniform_rc("stonith", cmd->action, -ENOTCONN); cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; cmd_finalize(cmd, rsc); return -EUNATCH; } if (safe_str_eq(cmd->action, "start")) { char *key = NULL; char *value = NULL; stonith_key_value_t *device_params = NULL; if (cmd->params) { GHashTableIter iter; g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { device_params = stonith_key_value_add(device_params, key, value); } } /* Stonith automatically registers devices from the CIB when changes occur, * but to avoid a possible race condition between stonith receiving the CIB update * and the lrmd requesting that resource, the lrmd still registers the device as well. * Stonith knows how to handle duplicate device registrations correctly. */ rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call, cmd->rsc_id, rsc->provider, rsc->type, device_params); stonith_key_value_freeall(device_params, 1, 1); if (rc == 0) { do_monitor = 1; } } else if (safe_str_eq(cmd->action, "stop")) { - /* Device removal now happens when the device is removed from the cib */ - /* rc = stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, cmd->rsc_id); */ + rc = stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, cmd->rsc_id); rsc->stonith_started = 0; } else if (safe_str_eq(cmd->action, "monitor")) { if (cmd->interval) { do_monitor = 1; } else { rc = rsc->stonith_started ? 0 : -ENODEV; } } if (!do_monitor) { goto cleanup_stonith_exec; } rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id, cmd->timeout / 1000 ); rc = stonith_api->cmds->register_callback( stonith_api, rc, 0, 0, cmd, "lrmd_stonith_callback", lrmd_stonith_callback); /* don't cleanup yet, we will find out the result of the monitor later */ if (rc > 0) { rsc->active = cmd; return rc; } else if (rc == 0) { rc = -1; } cleanup_stonith_exec: stonith_action_complete(cmd, rc); return rc; } static const char * normalize_action_name(lrmd_rsc_t * rsc, const char *action) { if (safe_str_eq(action, "monitor") && (safe_str_eq(rsc->class, "lsb") || safe_str_eq(rsc->class, "service") || safe_str_eq(rsc->class, "systemd"))) { return "status"; } return action; } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, strdup(key), strdup(value)); } static int lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { svc_action_t *action = NULL; GHashTable *params_copy = NULL; CRM_ASSERT(rsc); CRM_ASSERT(cmd); crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s", rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type); if (cmd->params) { params_copy = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (params_copy != NULL) { g_hash_table_foreach(cmd->params, dup_attr, params_copy); } } action = resources_action_create(rsc->rsc_id, rsc->class, rsc->provider, rsc->type, normalize_action_name(rsc, cmd->action), cmd->interval, cmd->timeout, params_copy); if (!action) { crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id); cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; goto exec_done; } action->cb_data = cmd; /* 'cmd' may not be valid after this point * * Upstart and systemd both synchronously determine monitor/status * results and call action_complete (which may free 'cmd') if necessary */ if (services_action_async(action, action_complete)) { return TRUE; } cmd->exec_rc = action->rc; cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; services_action_free(action); action = NULL; exec_done: cmd_finalize(cmd, rsc); return TRUE; } static gboolean lrmd_rsc_execute(lrmd_rsc_t * rsc) { lrmd_cmd_t *cmd = NULL; CRM_CHECK(rsc != NULL, return FALSE); if (rsc->active) { crm_trace("%s is still active", rsc->rsc_id); return TRUE; } if (rsc->pending_ops) { GList *first = rsc->pending_ops; cmd = first->data; if (cmd->delay_id) { crm_trace ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms", cmd->rsc_id, cmd->action, cmd->start_delay); return TRUE; } rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first); g_list_free_1(first); #ifdef HAVE_SYS_TIMEB_H ftime(&cmd->t_run); } #endif if (!cmd) { crm_trace("Nothing further to do for %s", rsc->rsc_id); return TRUE; } rsc->active = cmd; /* only one op at a time for a rsc */ if (cmd->interval) { rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd); } if (safe_str_eq(rsc->class, "stonith")) { lrmd_rsc_execute_stonith(rsc, cmd); } else { lrmd_rsc_execute_service_lib(rsc, cmd); } return TRUE; } static gboolean lrmd_rsc_dispatch(gpointer user_data) { return lrmd_rsc_execute(user_data); } void free_rsc(gpointer data) { GListPtr gIter = NULL; lrmd_rsc_t *rsc = data; int is_stonith = safe_str_eq(rsc->class, "stonith"); for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; /* command was never executed */ cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, NULL); } /* frees list, but not list elements. */ g_list_free(rsc->pending_ops); for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (is_stonith) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, NULL); } else { /* This command is already handed off to service library, * let service library cancel it and tell us via the callback * when it is cancelled. The rsc can be safely destroyed * even if we are waiting for the cancel result */ services_action_cancel(rsc->rsc_id, cmd->action, cmd->interval); } } /* frees list, but not list elements. */ g_list_free(rsc->recurring_ops); free(rsc->rsc_id); free(rsc->class); free(rsc->provider); free(rsc->type); mainloop_destroy_trigger(rsc->work); free(rsc); } static int process_lrmd_signon(lrmd_client_t * client, uint32_t id, xmlNode * request) { xmlNode *reply = create_xml_node(NULL, "reply"); crm_xml_add(reply, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(reply, F_LRMD_CLIENTID, client->id); crm_ipcs_send(client->channel, id, reply, FALSE); free_xml(reply); return pcmk_ok; } static int process_lrmd_rsc_register(lrmd_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = build_rsc_from_xml(request); lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id); if (dup && safe_str_eq(rsc->class, dup->class) && safe_str_eq(rsc->provider, dup->provider) && safe_str_eq(rsc->type, dup->type)) { crm_warn("Can't add, RSC '%s' already present in the rsc list (%d active resources)", rsc->rsc_id, g_hash_table_size(rsc_list)); free_rsc(rsc); return rc; } g_hash_table_replace(rsc_list, rsc->rsc_id, rsc); crm_info("Added '%s' to the rsc list (%d active resources)", rsc->rsc_id, g_hash_table_size(rsc_list)); return rc; } static void process_lrmd_get_rsc_info(lrmd_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; int send_rc = 0; int call_id = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); xmlNode *reply = NULL; lrmd_rsc_t *rsc = NULL; crm_element_value_int(request, F_LRMD_CALLID, &call_id); if (!rsc_id) { rc = -ENODEV; goto get_rsc_done; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); rc = -ENODEV; goto get_rsc_done; } get_rsc_done: reply = create_xml_node(NULL, T_LRMD_REPLY); crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(reply, F_LRMD_RC, rc); crm_xml_add_int(reply, F_LRMD_CALLID, call_id); if (rsc) { crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id); crm_xml_add(reply, F_LRMD_CLASS, rsc->class); crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider); crm_xml_add(reply, F_LRMD_TYPE, rsc->type); } send_rc = crm_ipcs_send(client->channel, id, reply, FALSE); if (send_rc < 0) { crm_warn("LRMD reply to %s failed: %d", client->name, send_rc); } free_xml(reply); } static int process_lrmd_rsc_unregister(lrmd_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); if (!rsc_id) { return -ENODEV; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return pcmk_ok; } if (rsc->active) { /* let the caller know there are still active ops on this rsc to watch for */ crm_trace("Operation still in progress: %p", rsc->active); rc = -EINPROGRESS; } g_hash_table_remove(rsc_list, rsc_id); return rc; } static int process_lrmd_rsc_exec(lrmd_client_t * client, uint32_t id, xmlNode * request) { lrmd_rsc_t *rsc = NULL; lrmd_cmd_t *cmd = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); if (!rsc_id) { return -EINVAL; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return -ENODEV; } cmd = create_lrmd_cmd(request, client); schedule_lrmd_cmd(rsc, cmd); return cmd->call_id; } static int cancel_op(const char *rsc_id, const char *action, int interval) { GListPtr gIter = NULL; lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id); /* How to cancel an action. * 1. Check pending ops list, if it hasn't been handed off * to the service library or stonith recurring list remove * it there and that will stop it. * 2. If it isn't in the pending ops list, then its either a * recurring op in the stonith recurring list, or the service * library's recurring list. Stop it there * 3. If not found in any lists, then this operation has either * been executed already and is not a recurring operation, or * never existed. */ if (!rsc) { return -ENODEV; } for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (safe_str_eq(cmd->action, action) && cmd->interval == interval) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, rsc); return pcmk_ok; } } if (safe_str_eq(rsc->class, "stonith")) { /* The service library does not handle stonith operations. * We have to handle recurring stonith opereations ourselves. */ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (safe_str_eq(cmd->action, action) && cmd->interval == interval) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; if (rsc->active != cmd) { cmd_finalize(cmd, rsc); } return pcmk_ok; } } } else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval) == TRUE) { /* The service library will tell the action_complete callback function * this action was cancelled, which will destroy the cmd and remove * it from the recurring_op list. Do not do that in this function * if the service library says it cancelled it. */ return pcmk_ok; } return -EOPNOTSUPP; } static void cancel_all_recurring(lrmd_rsc_t *rsc, const char *client_id) { GList *cmd_list = NULL; GList *cmd_iter = NULL; /* Notice a copy of each list is created when concat is called. * This prevents odd behavior from occurring when the cmd_list * is iterated through later on. It is possible the cancel_op * function may end up modifying the recurring_ops and pending_ops * lists. If we did not copy those lists, our cmd_list iteration * could get messed up.*/ if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops)); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops)); } if (!cmd_list) { return; } for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { lrmd_cmd_t *cmd = cmd_iter->data; if (cmd->interval == 0) { continue; } if (client_id && safe_str_neq(cmd->client_id, client_id)) { continue; } cancel_op(rsc->rsc_id, cmd->action, cmd->interval); } /* frees only the copied list data, not the cmds */ g_list_free(cmd_list); } static int process_lrmd_rsc_cancel(lrmd_client_t * client, uint32_t id, xmlNode * request) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION); int interval = 0; crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &interval); if (!rsc_id || !action) { return -EINVAL; } return cancel_op(rsc_id, action, interval); } void process_lrmd_message(lrmd_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; int call_id = 0; const char *op = crm_element_value(request, F_LRMD_OPERATION); int do_reply = 0; int do_notify = 0; int exit = 0; crm_trace("Processing %s operation from %s", op, client->id); crm_element_value_int(request, F_LRMD_CALLID, &call_id); if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { rc = process_lrmd_signon(client, id, request); } else if (crm_str_eq(op, LRMD_OP_RSC_REG, TRUE)) { rc = process_lrmd_rsc_register(client, id, request); do_notify = 1; do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_INFO, TRUE)) { process_lrmd_get_rsc_info(client, id, request); } else if (crm_str_eq(op, LRMD_OP_RSC_UNREG, TRUE)) { rc = process_lrmd_rsc_unregister(client, id, request); /* don't notify anyone about failed un-registers */ if (rc == pcmk_ok || rc == -EINPROGRESS) { do_notify = 1; } do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_EXEC, TRUE)) { rc = process_lrmd_rsc_exec(client, id, request); do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_CANCEL, TRUE)) { rc = process_lrmd_rsc_cancel(client, id, request); do_reply = 1; } else if (crm_str_eq(op, CRM_OP_QUIT, TRUE)) { do_reply = 1; exit = 1; } else { rc = -EOPNOTSUPP; do_reply = 1; crm_err("Unknown %s from %s", op, client->name); crm_log_xml_warn(request, "UnknownOp"); } crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d, exit=%d", op, client->id, rc, do_reply, do_notify, exit); if (do_reply) { send_reply(client, rc, id, call_id); } if (do_notify) { send_generic_notify(rc, request); } if (exit) { lrmd_shutdown(0); } }