diff --git a/fencing/commands.c b/fencing/commands.c index 830e71b5d7..b1991217a3 100644 --- a/fencing/commands.c +++ b/fencing/commands.c @@ -1,2735 +1,2735 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if SUPPORT_CIBSECRETS # include #endif #include GHashTable *device_list = NULL; GHashTable *topology = NULL; GList *cmd_list = NULL; struct device_search_s { /* target of fence action */ char *host; /* requested fence action */ char *action; /* timeout to use if a device is queried dynamically for possible targets */ int per_device_timeout; /* number of registered fencing devices at time of request */ int replies_needed; /* number of device replies received so far */ int replies_received; /* whether the target is eligible to perform requested action (or off) */ bool allow_suicide; /* private data to pass to search callback function */ void *user_data; /* function to call when all replies have been received */ void (*callback) (GList * devices, void *user_data); /* devices capable of performing requested action (or off if remapping) */ GListPtr capable; }; static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data); static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, const char *client_id); static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence); typedef struct async_command_s { int id; int pid; int fd_stdout; int options; int default_timeout; /* seconds */ int timeout; /* seconds */ int start_delay; /* milliseconds */ int delay_id; char *op; char *origin; char *client; char *client_name; char *remote_op_id; char *victim; uint32_t victim_nodeid; char *action; char *device; char *mode; GListPtr device_list; GListPtr device_next; void *internal_user_data; void (*done_cb) (GPid pid, int rc, const char *output, gpointer user_data); guint timer_sigterm; guint timer_sigkill; /*! If the operation timed out, this is the last signal * we sent to the process to get it to terminate */ int last_timeout_signo; stonith_device_t *active_on; } async_command_t; static xmlNode *stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc); static gboolean is_action_required(const char *action, stonith_device_t *device) { return device && device->automatic_unfencing && safe_str_eq(action, "on"); } static int get_action_delay_max(stonith_device_t * device, const char * action) { const char *value = NULL; int delay_max_ms = 0; if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) { return 0; } value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_MAX); if (value) { delay_max_ms = crm_get_msec(value); } return delay_max_ms; } static int get_action_delay_base(stonith_device_t * device, const char * action) { const char *value = NULL; int delay_base_ms = 0; if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) { return 0; } value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_BASE); if (value) { delay_base_ms = crm_get_msec(value); } return delay_base_ms; } /*! * \internal * \brief Override STONITH timeout with pcmk_*_timeout if available * * \param[in] device STONITH device to use * \param[in] action STONITH action name * \param[in] default_timeout Timeout to use if device does not have * a pcmk_*_timeout parameter for action * * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout * \note For consistency, it would be nice if reboot/off/on timeouts could be * set the same way as start/stop/monitor timeouts, i.e. with an * entry in the fencing resource configuration. However that * is insufficient because fencing devices may be registered directly via * the STONITH register_device() API instead of going through the CIB * (e.g. stonith_admin uses it for its -R option, and the LRMD uses it to * ensure a device is registered when a command is issued). As device * properties, pcmk_*_timeout parameters can be grabbed by stonithd when * the device is registered, whether by CIB change or API call. */ static int get_action_timeout(stonith_device_t * device, const char *action, int default_timeout) { if (action && device && device->params) { char buffer[64] = { 0, }; const char *value = NULL; /* If "reboot" was requested but the device does not support it, * we will remap to "off", so check timeout for "off" instead */ if (safe_str_eq(action, "reboot") && is_not_set(device->flags, st_device_supports_reboot)) { crm_trace("%s doesn't support reboot, using timeout for off instead", device->id); action = "off"; } /* If the device config specified an action-specific timeout, use it */ - snprintf(buffer, sizeof(buffer) - 1, "pcmk_%s_timeout", action); + snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action); value = g_hash_table_lookup(device->params, buffer); if (value) { return atoi(value); } } return default_timeout; } static void free_async_command(async_command_t * cmd) { if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } cmd_list = g_list_remove(cmd_list, cmd); g_list_free_full(cmd->device_list, free); free(cmd->device); free(cmd->action); free(cmd->victim); free(cmd->remote_op_id); free(cmd->client); free(cmd->client_name); free(cmd->origin); free(cmd->mode); free(cmd->op); free(cmd); } static async_command_t * create_async_command(xmlNode * msg) { async_command_t *cmd = NULL; xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR); const char *action = crm_element_value(op, F_STONITH_ACTION); CRM_CHECK(action != NULL, crm_log_xml_warn(msg, "NoAction"); return NULL); crm_log_xml_trace(msg, "Command"); cmd = calloc(1, sizeof(async_command_t)); crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id)); crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options)); crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout)); cmd->timeout = cmd->default_timeout; cmd->origin = crm_element_value_copy(msg, F_ORIG); cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID); cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID); cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME); cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION); cmd->action = strdup(action); cmd->victim = crm_element_value_copy(op, F_STONITH_TARGET); cmd->mode = crm_element_value_copy(op, F_STONITH_MODE); cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE); CRM_CHECK(cmd->op != NULL, crm_log_xml_warn(msg, "NoOp"); free_async_command(cmd); return NULL); CRM_CHECK(cmd->client != NULL, crm_log_xml_warn(msg, "NoClient")); cmd->done_cb = st_child_done; cmd_list = g_list_append(cmd_list, cmd); return cmd; } static int get_action_limit(stonith_device_t * device) { const char *value = NULL; int action_limit = 1; value = g_hash_table_lookup(device->params, STONITH_ATTR_ACTION_LIMIT); if (value) { action_limit = crm_parse_int(value, "1"); if (action_limit == 0) { /* pcmk_action_limit should not be 0. Enforce it to be 1. */ action_limit = 1; } } return action_limit; } static int get_active_cmds(stonith_device_t * device) { int counter = 0; GListPtr gIter = NULL; GListPtr gIterNext = NULL; CRM_CHECK(device != NULL, return 0); for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd = gIter->data; gIterNext = gIter->next; if (cmd->active_on == device) { counter++; } } return counter; } static gboolean stonith_device_execute(stonith_device_t * device) { int exec_rc = 0; const char *action_str = NULL; async_command_t *cmd = NULL; stonith_action_t *action = NULL; int active_cmds = 0; int action_limit = 0; CRM_CHECK(device != NULL, return FALSE); active_cmds = get_active_cmds(device); action_limit = get_action_limit(device); if (action_limit > -1 && active_cmds >= action_limit) { crm_trace("%s is over its action limit of %d (%u active action%s)", device->id, action_limit, active_cmds, active_cmds > 1 ? "s" : ""); return TRUE; } if (device->pending_ops) { GList *first = device->pending_ops; cmd = first->data; if (cmd && cmd->delay_id) { crm_trace ("Operation %s%s%s on %s was asked to run too early, waiting for start_delay timeout of %dms", cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "", device->id, cmd->start_delay); return TRUE; } device->pending_ops = g_list_remove_link(device->pending_ops, first); g_list_free_1(first); } if (cmd == NULL) { crm_trace("Nothing further to do for %s", device->id); return TRUE; } if(safe_str_eq(device->agent, STONITH_WATCHDOG_AGENT)) { if(safe_str_eq(cmd->action, "reboot")) { pcmk_panic(__FUNCTION__); return TRUE; } else if(safe_str_eq(cmd->action, "off")) { pcmk_panic(__FUNCTION__); return TRUE; } else { crm_info("Faking success for %s watchdog operation", cmd->action); cmd->done_cb(0, 0, NULL, cmd); return TRUE; } } #if SUPPORT_CIBSECRETS if (replace_secret_params(device->id, device->params) < 0) { /* replacing secrets failed! */ if (safe_str_eq(cmd->action,"stop")) { /* don't fail on stop! */ crm_info("proceeding with the stop operation for %s", device->id); } else { crm_err("failed to get secrets for %s, " "considering resource not configured", device->id); exec_rc = PCMK_OCF_NOT_CONFIGURED; cmd->done_cb(0, exec_rc, NULL, cmd); return TRUE; } } #endif action_str = cmd->action; if (safe_str_eq(cmd->action, "reboot") && is_not_set(device->flags, st_device_supports_reboot)) { crm_warn("Agent '%s' does not advertise support for 'reboot', performing 'off' action instead", device->agent); action_str = "off"; } action = stonith_action_create(device->agent, action_str, cmd->victim, cmd->victim_nodeid, cmd->timeout, device->params, device->aliases); /* for async exec, exec_rc is pid if positive and error code if negative/zero */ exec_rc = stonith_action_execute_async(action, (void *)cmd, cmd->done_cb); if (exec_rc > 0) { crm_debug("Operation %s%s%s on %s now running with pid=%d, timeout=%ds", cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "", device->id, exec_rc, cmd->timeout); cmd->active_on = device; } else { crm_warn("Operation %s%s%s on %s failed: %s (%d)", cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "", device->id, pcmk_strerror(exec_rc), exec_rc); cmd->done_cb(0, exec_rc, NULL, cmd); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static gboolean start_delay_helper(gpointer data) { async_command_t *cmd = data; stonith_device_t *device = NULL; cmd->delay_id = 0; device = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; if (device) { mainloop_set_trigger(device->work); } return FALSE; } static void schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) { int delay_max = 0; int delay_base = 0; CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); if (cmd->device) { free(cmd->device); } if (device->include_nodeid && cmd->victim) { crm_node_t *node = crm_get_peer(0, cmd->victim); cmd->victim_nodeid = node->id; } cmd->device = strdup(device->id); cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout); if (cmd->remote_op_id) { crm_debug("Scheduling %s on %s for remote peer %s with op id (%s) (timeout=%ds)", cmd->action, device->id, cmd->origin, cmd->remote_op_id, cmd->timeout); } else { crm_debug("Scheduling %s on %s for %s (timeout=%ds)", cmd->action, device->id, cmd->client, cmd->timeout); } device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); delay_max = get_action_delay_max(device, cmd->action); delay_base = get_action_delay_base(device, cmd->action); if (delay_max == 0) { delay_max = delay_base; } if (delay_max < delay_base) { crm_warn("Base-delay (%dms) is larger than max-delay (%dms) " "for %s on %s - limiting to max-delay", delay_base, delay_max, cmd->action, device->id); delay_base = delay_max; } if (delay_max > 0) { cmd->start_delay = ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + delay_base; crm_notice("Delaying %s on %s for %lldms (timeout=%ds, base=%dms, " "max=%dms)", cmd->action, device->id, cmd->start_delay, cmd->timeout, delay_base, delay_max); cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); } } void free_device(gpointer data) { GListPtr gIter = NULL; stonith_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { async_command_t *cmd = gIter->data; crm_warn("Removal of device '%s' purged operation %s", device->id, cmd->action); cmd->done_cb(0, -ENODEV, NULL, cmd); } g_list_free(device->pending_ops); g_list_free_full(device->targets, free); mainloop_destroy_trigger(device->work); free_xml(device->agent_metadata); free(device->namespace); free(device->on_target_actions); free(device->agent); free(device->id); free(device); } static GHashTable * build_port_aliases(const char *hostmap, GListPtr * targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = crm_strcase_table_new(); if (hostmap == NULL) { return aliases; } max = strlen(hostmap); for (; lpc <= max; lpc++) { switch (hostmap[lpc]) { /* Assignment chars */ case '=': case ':': if (lpc > last) { free(name); name = calloc(1, 1 + lpc - last); memcpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if (name) { char *value = NULL; value = calloc(1, 1 + lpc - last); memcpy(value, hostmap + last, lpc - last); crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if (targets) { *targets = g_list_append(*targets, strdup(value)); } value = NULL; name = NULL; added++; } else if (lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last); } last = lpc + 1; break; } if (hostmap[lpc] == 0) { break; } } if (added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } free(name); return aliases; } static void parse_host_line(const char *line, int max, GListPtr * output) { int lpc = 0; int last = 0; if (max <= 0) { return; } /* Check for any complaints about additional parameters that the device doesn't understand */ if (strstr(line, "invalid") || strstr(line, "variable")) { crm_debug("Skipping: %s", line); return; } crm_trace("Processing %d bytes: [%s]", max, line); /* Skip initial whitespace */ for (lpc = 0; lpc <= max && isspace(line[lpc]); lpc++) { last = lpc + 1; } /* Now the actual content */ for (lpc = 0; lpc <= max; lpc++) { gboolean a_space = isspace(line[lpc]); if (a_space && lpc < max && isspace(line[lpc + 1])) { /* fast-forward to the end of the spaces */ } else if (a_space || line[lpc] == ',' || line[lpc] == ';' || line[lpc] == 0) { int rc = 1; char *entry = NULL; if (lpc != last) { entry = calloc(1, 1 + lpc - last); rc = sscanf(line + last, "%[a-zA-Z0-9_-.]", entry); } if (entry == NULL) { /* Skip */ } else if (rc != 1) { crm_warn("Could not parse (%d %d): %s", last, lpc, line + last); } else if (safe_str_neq(entry, "on") && safe_str_neq(entry, "off")) { crm_trace("Adding '%s'", entry); *output = g_list_append(*output, entry); entry = NULL; } free(entry); last = lpc + 1; } } } static GListPtr parse_host_list(const char *hosts) { int lpc = 0; int max = 0; int last = 0; GListPtr output = NULL; if (hosts == NULL) { return output; } max = strlen(hosts); for (lpc = 0; lpc <= max; lpc++) { if (hosts[lpc] == '\n' || hosts[lpc] == 0) { char *line = NULL; int len = lpc - last; if(len > 1) { line = malloc(1 + len); } if(line) { snprintf(line, 1 + len, "%s", hosts + last); line[len] = 0; /* Because it might be '\n' */ parse_host_line(line, len, &output); free(line); } last = lpc + 1; } } crm_trace("Parsed %d entries from '%s'", g_list_length(output), hosts); return output; } GHashTable *metadata_cache = NULL; static xmlNode * get_agent_metadata(const char *agent) { xmlNode *xml = NULL; char *buffer = NULL; if(metadata_cache == NULL) { metadata_cache = crm_str_table_new(); } buffer = g_hash_table_lookup(metadata_cache, agent); if(safe_str_eq(agent, STONITH_WATCHDOG_AGENT)) { return NULL; } else if(buffer == NULL) { stonith_t *st = stonith_api_new(); int rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10); stonith_api_delete(st); if (rc || !buffer) { crm_err("Could not retrieve metadata for fencing agent %s", agent); return NULL; } g_hash_table_replace(metadata_cache, strdup(agent), buffer); } xml = string2xml(buffer); return xml; } static gboolean is_nodeid_required(xmlNode * xml) { xmlXPathObjectPtr xpath = NULL; if (stand_alone) { return FALSE; } if (!xml) { return FALSE; } xpath = xpath_search(xml, "//parameter[@name='nodeid']"); if (numXpathResults(xpath) <= 0) { freeXpathObject(xpath); return FALSE; } freeXpathObject(xpath); return TRUE; } #define MAX_ACTION_LEN 256 static char * add_action(char *actions, const char *action) { int offset = 0; if (actions == NULL) { actions = calloc(1, MAX_ACTION_LEN); } else { offset = strlen(actions); } if (offset > 0) { offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, " "); } offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, "%s", action); return actions; } static void read_action_metadata(stonith_device_t *device) { xmlXPathObjectPtr xpath = NULL; int max = 0; int lpc = 0; if (device->agent_metadata == NULL) { return; } xpath = xpath_search(device->agent_metadata, "//action"); max = numXpathResults(xpath); if (max <= 0) { freeXpathObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *on_target = NULL; const char *action = NULL; xmlNode *match = getXpathResult(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; on_target = crm_element_value(match, "on_target"); action = crm_element_value(match, "name"); if(safe_str_eq(action, "list")) { set_bit(device->flags, st_device_supports_list); } else if(safe_str_eq(action, "status")) { set_bit(device->flags, st_device_supports_status); } else if(safe_str_eq(action, "reboot")) { set_bit(device->flags, st_device_supports_reboot); } else if (safe_str_eq(action, "on")) { /* "automatic" means the cluster will unfence node when it joins */ const char *automatic = crm_element_value(match, "automatic"); /* "required" is a deprecated synonym for "automatic" */ const char *required = crm_element_value(match, "required"); if (crm_is_true(automatic) || crm_is_true(required)) { device->automatic_unfencing = TRUE; } } if (action && crm_is_true(on_target)) { device->on_target_actions = add_action(device->on_target_actions, action); } } freeXpathObject(xpath); } /*! * \internal * \brief Set a pcmk_*_action parameter if not already set * * \param[in,out] params Device parameters * \param[in] action Name of action * \param[in] value Value to use if action is not already set */ static void map_action(GHashTable *params, const char *action, const char *value) { char *key = crm_strdup_printf("pcmk_%s_action", action); if (g_hash_table_lookup(params, key)) { crm_warn("Ignoring %s='%s', see %s instead", STONITH_ATTR_ACTION_OP, value, key); free(key); } else { crm_warn("Mapping %s='%s' to %s='%s'", STONITH_ATTR_ACTION_OP, value, key, value); g_hash_table_insert(params, key, strdup(value)); } } /*! * \internal * \brief Create device parameter table from XML * * \param[in] name Device name (used for logging only) * \param[in,out] params Device parameters */ static GHashTable * xml2device_params(const char *name, xmlNode *dev) { GHashTable *params = xml2list(dev); const char *value; /* Action should never be specified in the device configuration, * but we support it for users who are familiar with other software * that worked that way. */ value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP); if (value != NULL) { crm_warn("%s has '%s' parameter, which should never be specified in configuration", name, STONITH_ATTR_ACTION_OP); if (*value == '\0') { crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, "reboot") == 0) { crm_warn("Ignoring %s='reboot' (see stonith-action cluster property instead)", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, "off") == 0) { map_action(params, "reboot", value); } else { map_action(params, "off", value); map_action(params, "reboot", value); } g_hash_table_remove(params, STONITH_ATTR_ACTION_OP); } return params; } static stonith_device_t * build_device_from_xml(xmlNode * msg) { const char *value = NULL; xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR); stonith_device_t *device = NULL; device = calloc(1, sizeof(stonith_device_t)); device->id = crm_element_value_copy(dev, XML_ATTR_ID); device->agent = crm_element_value_copy(dev, "agent"); device->namespace = crm_element_value_copy(dev, "namespace"); device->params = xml2device_params(device->id, dev); value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTLIST); if (value) { device->targets = parse_host_list(value); } value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTMAP); device->aliases = build_port_aliases(value, &(device->targets)); device->agent_metadata = get_agent_metadata(device->agent); read_action_metadata(device); value = g_hash_table_lookup(device->params, "nodeid"); if (!value) { device->include_nodeid = is_nodeid_required(device->agent_metadata); } value = crm_element_value(dev, "rsc_provides"); if (safe_str_eq(value, "unfencing")) { device->automatic_unfencing = TRUE; } if (is_action_required("on", device)) { crm_info("The fencing device '%s' requires unfencing", device->id); } if (device->on_target_actions) { crm_info("The fencing device '%s' requires actions (%s) to be executed on the target node", device->id, device->on_target_actions); } device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); /* TODO: Hook up priority */ return device; } static const char * target_list_type(stonith_device_t * dev) { const char *check_type = NULL; check_type = g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTCHECK); if (check_type == NULL) { if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTLIST)) { check_type = "static-list"; } else if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP)) { check_type = "static-list"; } else if(is_set(dev->flags, st_device_supports_list)){ check_type = "dynamic-list"; } else if(is_set(dev->flags, st_device_supports_status)){ check_type = "status"; } else { check_type = "none"; } } return check_type; } void schedule_internal_command(const char *origin, stonith_device_t * device, const char *action, const char *victim, int timeout, void *internal_user_data, void (*done_cb) (GPid pid, int rc, const char *output, gpointer user_data)) { async_command_t *cmd = NULL; cmd = calloc(1, sizeof(async_command_t)); cmd->id = -1; cmd->default_timeout = timeout ? timeout : 60; cmd->timeout = cmd->default_timeout; cmd->action = strdup(action); cmd->victim = victim ? strdup(victim) : NULL; cmd->device = strdup(device->id); cmd->origin = strdup(origin); cmd->client = strdup(crm_system_name); cmd->client_name = strdup(crm_system_name); cmd->internal_user_data = internal_user_data; cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */ schedule_stonith_command(cmd, device); } gboolean string_in_list(GListPtr list, const char *item) { int lpc = 0; int max = g_list_length(list); for (lpc = 0; lpc < max; lpc++) { const char *value = g_list_nth_data(list, lpc); if (safe_str_eq(item, value)) { return TRUE; } else { crm_trace("%d: '%s' != '%s'", lpc, item, value); } } return FALSE; } static void status_search_cb(GPid pid, int rc, const char *output, gpointer user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; gboolean can = FALSE; free_async_command(cmd); if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (rc == 1 /* unknown */ ) { crm_trace("Host %s is not known by %s", search->host, dev->id); } else if (rc == 0 /* active */ || rc == 2 /* inactive */ ) { crm_trace("Host %s is known by %s", search->host, dev->id); can = TRUE; } else { crm_notice("Unknown result when testing if %s can fence %s: rc=%d", dev->id, search->host, rc); } search_devices_record_result(search, dev->id, can); } static void dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; gboolean can_fence = FALSE; free_async_command(cmd); /* Host/alias must be in the list output to be eligible to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); /* If we successfully got the targets earlier, don't disable. */ if (rc != 0 && !dev->targets) { crm_notice("Disabling port list queries for %s (%d): %s", dev->id, rc, output); /* Fall back to status */ g_hash_table_replace(dev->params, strdup(STONITH_ATTR_HOSTCHECK), strdup("status")); g_list_free_full(dev->targets, free); dev->targets = NULL; } else if (!rc) { crm_info("Refreshing port list for %s", dev->id); g_list_free_full(dev->targets, free); dev->targets = parse_host_list(output); dev->targets_age = time(NULL); } if (dev->targets) { const char *alias = g_hash_table_lookup(dev->aliases, search->host); if (!alias) { alias = search->host; } if (string_in_list(dev->targets, alias)) { can_fence = TRUE; } } search_devices_record_result(search, dev->id, can_fence); } /*! * \internal * \brief Returns true if any key in first is not in second or second has a different value for key */ static int device_params_diff(GHashTable *first, GHashTable *second) { char *key = NULL; char *value = NULL; GHashTableIter gIter; g_hash_table_iter_init(&gIter, first); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) { if(strstr(key, "CRM_meta") == key) { continue; } else if(strcmp(key, "crm_feature_set") == 0) { continue; } else { char *other_value = g_hash_table_lookup(second, key); if (!other_value || safe_str_neq(other_value, value)) { crm_trace("Different value for %s: %s != %s", key, other_value, value); return 1; } } } return 0; } /*! * \internal * \brief Checks to see if an identical device already exists in the device_list */ static stonith_device_t * device_has_duplicate(stonith_device_t * device) { stonith_device_t *dup = g_hash_table_lookup(device_list, device->id); if (!dup) { crm_trace("No match for %s", device->id); return NULL; } else if (safe_str_neq(dup->agent, device->agent)) { crm_trace("Different agent: %s != %s", dup->agent, device->agent); return NULL; } /* Use calculate_operation_digest() here? */ if (device_params_diff(device->params, dup->params) || device_params_diff(dup->params, device->params)) { return NULL; } crm_trace("Match"); return dup; } int stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib) { stonith_device_t *dup = NULL; stonith_device_t *device = build_device_from_xml(msg); dup = device_has_duplicate(device); if (dup) { crm_debug("Device '%s' already existed in device list (%d active devices)", device->id, g_hash_table_size(device_list)); free_device(device); device = dup; } else { stonith_device_t *old = g_hash_table_lookup(device_list, device->id); if (from_cib && old && old->api_registered) { /* If the cib is writing over an entry that is shared with a stonith client, * copy any pending ops that currently exist on the old entry to the new one. * Otherwise the pending ops will be reported as failures */ crm_info("Overwriting an existing entry for %s from the cib", device->id); device->pending_ops = old->pending_ops; device->api_registered = TRUE; old->pending_ops = NULL; if (device->pending_ops) { mainloop_set_trigger(device->work); } } g_hash_table_replace(device_list, device->id, device); crm_notice("Added '%s' to the device list (%d active devices)", device->id, g_hash_table_size(device_list)); } if (desc) { *desc = device->id; } if (from_cib) { device->cib_registered = TRUE; } else { device->api_registered = TRUE; } return pcmk_ok; } int stonith_device_remove(const char *id, gboolean from_cib) { stonith_device_t *device = g_hash_table_lookup(device_list, id); if (!device) { crm_info("Device '%s' not found (%d active devices)", id, g_hash_table_size(device_list)); return pcmk_ok; } if (from_cib) { device->cib_registered = FALSE; } else { device->verified = FALSE; device->api_registered = FALSE; } if (!device->cib_registered && !device->api_registered) { g_hash_table_remove(device_list, id); crm_info("Removed '%s' from the device list (%d active devices)", id, g_hash_table_size(device_list)); } else { crm_trace("Not removing '%s' from the device list (%d active devices) " "- still %s%s_registered", id, g_hash_table_size(device_list), device->cib_registered?"cib":"", device->api_registered?"api":""); } return pcmk_ok; } /*! * \internal * \brief Return the number of stonith levels registered for a node * * \param[in] tp Node's topology table entry * * \return Number of non-NULL levels in topology entry * \note This function is used only for log messages. */ static int count_active_levels(stonith_topology_t * tp) { int lpc = 0; int count = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { count++; } } return count; } void free_topology_entry(gpointer data) { stonith_topology_t *tp = data; int lpc = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { g_list_free_full(tp->levels[lpc], free); } } free(tp->target); free(tp->target_value); free(tp->target_pattern); free(tp->target_attribute); free(tp); } char *stonith_level_key(xmlNode *level, int mode) { if(mode == -1) { mode = stonith_level_kind(level); } switch(mode) { case 0: return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET); case 1: return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN); case 2: { const char *name = crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE); const char *value = crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE); if(name && value) { return crm_strdup_printf("%s=%s", name, value); } } default: return crm_strdup_printf("Unknown-%d-%s", mode, ID(level)); } } int stonith_level_kind(xmlNode * level) { int mode = 0; const char *target = crm_element_value(level, XML_ATTR_STONITH_TARGET); if(target == NULL) { mode++; target = crm_element_value(level, XML_ATTR_STONITH_TARGET_PATTERN); } if(stand_alone == FALSE && target == NULL) { mode++; if(crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE) == NULL) { mode++; } else if(crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE) == NULL) { mode++; } } return mode; } static stonith_key_value_t * parse_device_list(const char *devices) { int lpc = 0; int max = 0; int last = 0; stonith_key_value_t *output = NULL; if (devices == NULL) { return output; } max = strlen(devices); for (lpc = 0; lpc <= max; lpc++) { if (devices[lpc] == ',' || devices[lpc] == 0) { char *line = NULL; line = calloc(1, 2 + lpc - last); snprintf(line, 1 + lpc - last, "%s", devices + last); output = stonith_key_value_add(output, NULL, line); free(line); last = lpc + 1; } } return output; } /*! * \internal * \brief Register a STONITH level for a target * * Given an XML request specifying the target name, level index, and device IDs * for the level, this will create an entry for the target in the global topology * table if one does not already exist, then append the specified device IDs to * the entry's device list for the specified level. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, will be set to string representation ("TARGET[LEVEL]") * * \return pcmk_ok on success, -EINVAL if XML does not specify valid level index */ int stonith_level_register(xmlNode *msg, char **desc) { int id = 0; xmlNode *level; int mode; char *target; stonith_topology_t *tp; stonith_key_value_t *dIter = NULL; stonith_key_value_t *devices = NULL; /* Allow the XML here to point to the level tag directly, or wrapped in * another tag. If directly, don't search by xpath, because it might give * multiple hits (e.g. if the XML is the CIB). */ if (safe_str_eq(TYPE(msg), XML_TAG_FENCING_LEVEL)) { level = msg; } else { level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR); } CRM_CHECK(level != NULL, return -EINVAL); mode = stonith_level_kind(level); target = stonith_level_key(level, mode); crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id); if (desc) { *desc = crm_strdup_printf("%s[%d]", target, id); } /* Sanity-check arguments */ if (mode >= 3 || (id <= 0) || (id >= ST_LEVEL_MAX)) { crm_trace("Could not add %s[%d] (%d) to the topology (%d active entries)", target, id, mode, g_hash_table_size(topology)); free(target); crm_log_xml_err(level, "Bad topology"); return -EINVAL; } /* Find or create topology table entry */ tp = g_hash_table_lookup(topology, target); if (tp == NULL) { tp = calloc(1, sizeof(stonith_topology_t)); tp->kind = mode; tp->target = target; tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE); tp->target_pattern = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN); tp->target_attribute = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE); g_hash_table_replace(topology, tp->target, tp); crm_trace("Added %s (%d) to the topology (%d active entries)", target, mode, g_hash_table_size(topology)); } else { free(target); } if (tp->levels[id] != NULL) { crm_info("Adding to the existing %s[%d] topology entry", tp->target, id); } devices = parse_device_list(crm_element_value(level, XML_ATTR_STONITH_DEVICES)); for (dIter = devices; dIter; dIter = dIter->next) { const char *device = dIter->value; crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id); tp->levels[id] = g_list_append(tp->levels[id], strdup(device)); } stonith_key_value_freeall(devices, 1, 1); crm_info("Target %s has %d active fencing levels", tp->target, count_active_levels(tp)); return pcmk_ok; } int stonith_level_remove(xmlNode *msg, char **desc) { int id = 0; stonith_topology_t *tp; char *target; /* Unlike additions, removal requests should always have one level tag */ xmlNode *level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR); CRM_CHECK(level != NULL, return -EINVAL); target = stonith_level_key(level, -1); crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id); if (desc) { *desc = crm_strdup_printf("%s[%d]", target, id); } /* Sanity-check arguments */ if (id >= ST_LEVEL_MAX) { free(target); return -EINVAL; } tp = g_hash_table_lookup(topology, target); if (tp == NULL) { crm_info("Topology for %s not found (%d active entries)", target, g_hash_table_size(topology)); } else if (id == 0 && g_hash_table_remove(topology, target)) { crm_info("Removed all %s related entries from the topology (%d active entries)", target, g_hash_table_size(topology)); } else if (id > 0 && tp->levels[id] != NULL) { g_list_free_full(tp->levels[id], free); tp->levels[id] = NULL; crm_info("Removed level '%d' from topology for %s (%d active levels remaining)", id, target, count_active_levels(tp)); } free(target); return pcmk_ok; } static int stonith_device_action(xmlNode * msg, char **output) { int rc = pcmk_ok; xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR); const char *id = crm_element_value(dev, F_STONITH_DEVICE); async_command_t *cmd = NULL; stonith_device_t *device = NULL; if (id) { crm_trace("Looking for '%s'", id); device = g_hash_table_lookup(device_list, id); } if (device && device->api_registered == FALSE) { rc = -ENODEV; } else if (device) { cmd = create_async_command(msg); if (cmd == NULL) { return -EPROTO; } schedule_stonith_command(cmd, device); rc = -EINPROGRESS; } else { crm_info("Device %s not found", id ? id : ""); rc = -ENODEV; } return rc; } static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence) { search->replies_received++; if (can_fence && device) { search->capable = g_list_append(search->capable, strdup(device)); } if (search->replies_needed == search->replies_received) { crm_debug("Finished Search. %d devices can perform action (%s) on node %s", g_list_length(search->capable), search->action ? search->action : "", search->host ? search->host : ""); search->callback(search->capable, search->user_data); free(search->host); free(search->action); free(search); } } /*! * \internal * \brief Check whether the local host is allowed to execute a fencing action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Hostname of fence target * \param[in] allow_suicide Whether self-fencing is allowed for this operation * * \return TRUE if local host is allowed to execute action, FALSE otherwise */ static gboolean localhost_is_eligible(const stonith_device_t *device, const char *action, const char *target, gboolean allow_suicide) { gboolean localhost_is_target = safe_str_eq(target, stonith_our_uname); if (device && action && device->on_target_actions && strstr(device->on_target_actions, action)) { if (!localhost_is_target) { crm_trace("%s operation with %s can only be executed for localhost not %s", action, device->id, target); return FALSE; } } else if (localhost_is_target && !allow_suicide) { crm_trace("%s operation does not support self-fencing", action); return FALSE; } return TRUE; } static void can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *search) { gboolean can = FALSE; const char *check_type = NULL; const char *host = search->host; const char *alias = NULL; CRM_LOG_ASSERT(dev != NULL); if (dev == NULL) { goto search_report_results; } else if (host == NULL) { can = TRUE; goto search_report_results; } /* Short-circuit query if this host is not allowed to perform the action */ if (safe_str_eq(search->action, "reboot")) { /* A "reboot" *might* get remapped to "off" then "on", so short-circuit * only if all three are disallowed. If only one or two are disallowed, * we'll report that with the results. We never allow suicide for * remapped "on" operations because the host is off at that point. */ if (!localhost_is_eligible(dev, "reboot", host, search->allow_suicide) && !localhost_is_eligible(dev, "off", host, search->allow_suicide) && !localhost_is_eligible(dev, "on", host, FALSE)) { goto search_report_results; } } else if (!localhost_is_eligible(dev, search->action, host, search->allow_suicide)) { goto search_report_results; } alias = g_hash_table_lookup(dev->aliases, host); if (alias == NULL) { alias = host; } check_type = target_list_type(dev); if (safe_str_eq(check_type, "none")) { can = TRUE; } else if (safe_str_eq(check_type, "static-list")) { /* Presence in the hostmap is sufficient * Only use if all hosts on which the device can be active can always fence all listed hosts */ if (string_in_list(dev->targets, host)) { can = TRUE; } else if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP) && g_hash_table_lookup(dev->aliases, host)) { can = TRUE; } } else if (safe_str_eq(check_type, "dynamic-list")) { time_t now = time(NULL); if (dev->targets == NULL || dev->targets_age + 60 < now) { crm_trace("Running %s command to see if %s can fence %s (%s)", check_type, dev?dev->id:"N/A", search->host, search->action); schedule_internal_command(__FUNCTION__, dev, "list", NULL, search->per_device_timeout, search, dynamic_list_search_cb); /* we'll respond to this search request async in the cb */ return; } if (string_in_list(dev->targets, alias)) { can = TRUE; } } else if (safe_str_eq(check_type, "status")) { crm_trace("Running %s command to see if %s can fence %s (%s)", check_type, dev?dev->id:"N/A", search->host, search->action); schedule_internal_command(__FUNCTION__, dev, "status", search->host, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ return; } else { crm_err("Unknown check type: %s", check_type); } if (safe_str_eq(host, alias)) { crm_notice("%s can%s fence (%s) %s: %s", dev->id, can ? "" : " not", search->action, host, check_type); } else { crm_notice("%s can%s fence (%s) %s (aka. '%s'): %s", dev->id, can ? "" : " not", search->action, host, alias, check_type); } search_report_results: search_devices_record_result(search, dev ? dev->id : NULL, can); } static void search_devices(gpointer key, gpointer value, gpointer user_data) { stonith_device_t *dev = value; struct device_search_s *search = user_data; can_fence_host_with_device(dev, search); } #define DEFAULT_QUERY_TIMEOUT 20 static void get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data, void (*callback) (GList * devices, void *user_data)) { struct device_search_s *search; int per_device_timeout = DEFAULT_QUERY_TIMEOUT; int devices_needing_async_query = 0; char *key = NULL; const char *check_type = NULL; GHashTableIter gIter; stonith_device_t *device = NULL; if (!g_hash_table_size(device_list)) { callback(NULL, user_data); return; } search = calloc(1, sizeof(struct device_search_s)); if (!search) { callback(NULL, user_data); return; } g_hash_table_iter_init(&gIter, device_list); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&device)) { check_type = target_list_type(device); if (safe_str_eq(check_type, "status") || safe_str_eq(check_type, "dynamic-list")) { devices_needing_async_query++; } } /* If we have devices that require an async event in order to know what * nodes they can fence, we have to give the events a timeout. The total * query timeout is divided among those events. */ if (devices_needing_async_query) { per_device_timeout = timeout / devices_needing_async_query; if (!per_device_timeout) { crm_err("STONITH timeout %ds is too low; using %ds, but consider raising to at least %ds", timeout, DEFAULT_QUERY_TIMEOUT, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query); per_device_timeout = DEFAULT_QUERY_TIMEOUT; } else if (per_device_timeout < DEFAULT_QUERY_TIMEOUT) { crm_notice("STONITH timeout %ds is low for the current configuration;" " consider raising to at least %ds", timeout, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query); } } search->host = host ? strdup(host) : NULL; search->action = action ? strdup(action) : NULL; search->per_device_timeout = per_device_timeout; /* We are guaranteed this many replies. Even if a device gets * unregistered some how during the async search, we will get * the correct number of replies. */ search->replies_needed = g_hash_table_size(device_list); search->allow_suicide = suicide; search->callback = callback; search->user_data = user_data; /* kick off the search */ crm_debug("Searching through %d devices to see what is capable of action (%s) for target %s", search->replies_needed, search->action ? search->action : "", search->host ? search->host : ""); g_hash_table_foreach(device_list, search_devices, search); } struct st_query_data { xmlNode *reply; char *remote_peer; char *client_id; char *target; char *action; int call_options; }; /*! * \internal * \brief Add action-specific attributes to query reply XML * * \param[in,out] xml XML to add attributes to * \param[in] action Fence action * \param[in] device Fence device */ static void add_action_specific_attributes(xmlNode *xml, const char *action, stonith_device_t *device) { int action_specific_timeout; int delay_max; int delay_base; CRM_CHECK(xml && action && device, return); if (is_action_required(action, device)) { crm_trace("Action %s is required on %s", action, device->id); crm_xml_add_int(xml, F_STONITH_DEVICE_REQUIRED, 1); } action_specific_timeout = get_action_timeout(device, action, 0); if (action_specific_timeout) { crm_trace("Action %s has timeout %dms on %s", action, action_specific_timeout, device->id); crm_xml_add_int(xml, F_STONITH_ACTION_TIMEOUT, action_specific_timeout); } delay_max = get_action_delay_max(device, action); if (delay_max > 0) { crm_trace("Action %s has maximum random delay %dms on %s", action, delay_max, device->id); crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max / 1000); } delay_base = get_action_delay_base(device, action); if (delay_base > 0) { crm_xml_add_int(xml, F_STONITH_DELAY_BASE, delay_base / 1000); } if ((delay_max > 0) && (delay_base == 0)) { crm_trace("Action %s has maximum random delay %dms on %s", action, delay_max, device->id); } else if ((delay_max == 0) && (delay_base > 0)) { crm_trace("Action %s has a static delay of %dms on %s", action, delay_base, device->id); } else if ((delay_max > 0) && (delay_base > 0)) { crm_trace("Action %s has a minimum delay of %dms and a randomly chosen " "maximum delay of %dms on %s", action, delay_base, delay_max, device->id); } } /*! * \internal * \brief Add "disallowed" attribute to query reply XML if appropriate * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_disallowed(xmlNode *xml, const char *action, stonith_device_t *device, const char *target, gboolean allow_suicide) { if (!localhost_is_eligible(device, action, target, allow_suicide)) { crm_trace("Action %s on %s is disallowed for local host", action, device->id); crm_xml_add(xml, F_STONITH_ACTION_DISALLOWED, XML_BOOLEAN_TRUE); } } /*! * \internal * \brief Add child element with action-specific values to query reply XML * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_action_reply(xmlNode *xml, const char *action, stonith_device_t *device, const char *target, gboolean allow_suicide) { xmlNode *child = create_xml_node(xml, F_STONITH_ACTION); crm_xml_add(child, XML_ATTR_ID, action); add_action_specific_attributes(child, action, device); add_disallowed(child, action, device, target, allow_suicide); } static void stonith_query_capable_device_cb(GList * devices, void *user_data) { struct st_query_data *query = user_data; int available_devices = 0; xmlNode *dev = NULL; xmlNode *list = NULL; GListPtr lpc = NULL; /* Pack the results into XML */ list = create_xml_node(NULL, __FUNCTION__); crm_xml_add(list, F_STONITH_TARGET, query->target); for (lpc = devices; lpc != NULL; lpc = lpc->next) { stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data); const char *action = query->action; if (!device) { /* It is possible the device got unregistered while * determining who can fence the target */ continue; } available_devices++; dev = create_xml_node(list, F_STONITH_DEVICE); crm_xml_add(dev, XML_ATTR_ID, device->id); crm_xml_add(dev, "namespace", device->namespace); crm_xml_add(dev, "agent", device->agent); crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified); /* If the originating stonithd wants to reboot the node, and we have a * capable device that doesn't support "reboot", remap to "off" instead. */ if (is_not_set(device->flags, st_device_supports_reboot) && safe_str_eq(query->action, "reboot")) { crm_trace("%s doesn't support reboot, using values for off instead", device->id); action = "off"; } /* Add action-specific values if available */ add_action_specific_attributes(dev, action, device); if (safe_str_eq(query->action, "reboot")) { /* A "reboot" *might* get remapped to "off" then "on", so after * sending the "reboot"-specific values in the main element, we add * sub-elements for "off" and "on" values. * * We short-circuited earlier if "reboot", "off" and "on" are all * disallowed for the local host. However if only one or two are * disallowed, we send back the results and mark which ones are * disallowed. If "reboot" is disallowed, this might cause problems * with older stonithd versions, which won't check for it. Older * versions will ignore "off" and "on", so they are not a problem. */ add_disallowed(dev, action, device, query->target, is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, "off", device, query->target, is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, "on", device, query->target, FALSE); } /* A query without a target wants device parameters */ if (query->target == NULL) { xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS); g_hash_table_foreach(device->params, hash2field, attrs); } } crm_xml_add_int(list, F_STONITH_AVAILABLE_DEVICES, available_devices); if (query->target) { crm_debug("Found %d matching devices for '%s'", available_devices, query->target); } else { crm_debug("%d devices installed", available_devices); } if (list != NULL) { crm_log_xml_trace(list, "Add query results"); add_message_xml(query->reply, F_STONITH_CALLDATA, list); } stonith_send_reply(query->reply, query->call_options, query->remote_peer, query->client_id); free_xml(query->reply); free(query->remote_peer); free(query->client_id); free(query->target); free(query->action); free(query); free_xml(list); g_list_free_full(devices, free); } static void stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int call_options) { struct st_query_data *query = NULL; const char *action = NULL; const char *target = NULL; int timeout = 0; xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_DEBUG_3); crm_element_value_int(msg, F_STONITH_TIMEOUT, &timeout); if (dev) { const char *device = crm_element_value(dev, F_STONITH_DEVICE); target = crm_element_value(dev, F_STONITH_TARGET); action = crm_element_value(dev, F_STONITH_ACTION); if (device && safe_str_eq(device, "manual_ack")) { /* No query or reply necessary */ return; } } crm_log_xml_debug(msg, "Query"); query = calloc(1, sizeof(struct st_query_data)); query->reply = stonith_construct_reply(msg, NULL, NULL, pcmk_ok); query->remote_peer = remote_peer ? strdup(remote_peer) : NULL; query->client_id = client_id ? strdup(client_id) : NULL; query->target = target ? strdup(target) : NULL; query->action = action ? strdup(action) : NULL; query->call_options = call_options; get_capable_devices(target, action, timeout, is_set(call_options, st_opt_allow_suicide), query, stonith_query_capable_device_cb); } #define ST_LOG_OUTPUT_MAX 512 static void log_operation(async_command_t * cmd, int rc, int pid, const char *next, const char *output) { if (rc == 0) { next = NULL; } if (cmd->victim != NULL) { do_crm_log(rc == 0 ? LOG_NOTICE : LOG_ERR, "Operation '%s' [%d] (call %d from %s) for host '%s' with device '%s' returned: %d (%s)%s%s", cmd->action, pid, cmd->id, cmd->client_name, cmd->victim, cmd->device, rc, pcmk_strerror(rc), next ? ". Trying: " : "", next ? next : ""); } else { do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE, "Operation '%s' [%d] for device '%s' returned: %d (%s)%s%s", cmd->action, pid, cmd->device, rc, pcmk_strerror(rc), next ? ". Trying: " : "", next ? next : ""); } if (output) { /* Logging the whole string confuses syslog when the string is xml */ char *prefix = crm_strdup_printf("%s:%d", cmd->device, pid); crm_log_output(rc == 0 ? LOG_DEBUG : LOG_WARNING, prefix, output); free(prefix); } } static void stonith_send_async_reply(async_command_t * cmd, const char *output, int rc, GPid pid) { xmlNode *reply = NULL; gboolean bcast = FALSE; reply = stonith_construct_async_reply(cmd, output, NULL, rc); if (safe_str_eq(cmd->action, "metadata")) { /* Too verbose to log */ crm_trace("Metadata query for %s", cmd->device); output = NULL; } else if (crm_str_eq(cmd->action, "monitor", TRUE) || crm_str_eq(cmd->action, "list", TRUE) || crm_str_eq(cmd->action, "status", TRUE)) { crm_trace("Never broadcast %s replies", cmd->action); } else if (!stand_alone && safe_str_eq(cmd->origin, cmd->victim) && safe_str_neq(cmd->action, "on")) { crm_trace("Broadcast %s reply for %s", cmd->action, cmd->victim); crm_xml_add(reply, F_SUBTYPE, "broadcast"); bcast = TRUE; } log_operation(cmd, rc, pid, NULL, output); crm_log_xml_trace(reply, "Reply"); if (bcast) { crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); } else if (cmd->origin) { crm_trace("Directed reply to %s", cmd->origin); send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE); } else { crm_trace("Directed local %ssync reply to %s", (cmd->options & st_opt_sync_call) ? "" : "a-", cmd->client_name); do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE); } if (stand_alone) { /* Do notification with a clean data object */ xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); crm_xml_add_int(notify_data, F_STONITH_RC, rc); crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim); crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op); crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost"); crm_xml_add(notify_data, F_STONITH_DEVICE, cmd->device); crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client); do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data); } free_xml(reply); } void unfence_cb(GPid pid, int rc, const char *output, gpointer user_data) { async_command_t * cmd = user_data; stonith_device_t *dev = g_hash_table_lookup(device_list, cmd->device); log_operation(cmd, rc, pid, NULL, output); cmd->active_on = NULL; if(dev) { mainloop_set_trigger(dev->work); } else { crm_trace("Device %s does not exist", cmd->device); } if(rc != 0) { crm_exit(DAEMON_RESPAWN_STOP); } } static void cancel_stonith_command(async_command_t * cmd) { stonith_device_t *device; CRM_CHECK(cmd != NULL, return); if (!cmd->device) { return; } device = g_hash_table_lookup(device_list, cmd->device); if (device) { crm_trace("Cancel scheduled %s on %s", cmd->action, device->id); device->pending_ops = g_list_remove(device->pending_ops, cmd); } } static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data) { stonith_device_t *device = NULL; stonith_device_t *next_device = NULL; async_command_t *cmd = user_data; GListPtr gIter = NULL; GListPtr gIterNext = NULL; CRM_CHECK(cmd != NULL, return); cmd->active_on = NULL; /* The device is ready to do something else now */ device = g_hash_table_lookup(device_list, cmd->device); if (device) { if (rc == pcmk_ok && (safe_str_eq(cmd->action, "list") || safe_str_eq(cmd->action, "monitor") || safe_str_eq(cmd->action, "status"))) { device->verified = TRUE; } mainloop_set_trigger(device->work); } crm_debug("Operation '%s' on '%s' completed with rc=%d (%d remaining)", cmd->action, cmd->device, rc, g_list_length(cmd->device_next)); if (rc == 0) { GListPtr iter; /* see if there are any required devices left to execute for this op */ for (iter = cmd->device_next; iter != NULL; iter = iter->next) { next_device = g_hash_table_lookup(device_list, iter->data); if (next_device != NULL && is_action_required(cmd->action, next_device)) { cmd->device_next = iter->next; break; } next_device = NULL; } } else if (rc != 0 && cmd->device_next && (is_action_required(cmd->action, device) == FALSE)) { /* if this device didn't work out, see if there are any others we can try. * if the failed device was 'required', we can't pick another device. */ next_device = g_hash_table_lookup(device_list, cmd->device_next->data); cmd->device_next = cmd->device_next->next; } /* this operation requires more fencing, hooray! */ if (next_device) { log_operation(cmd, rc, pid, cmd->device, output); schedule_stonith_command(cmd, next_device); /* Prevent cmd from being freed */ cmd = NULL; goto done; } stonith_send_async_reply(cmd, output, rc, pid); if (rc != 0) { goto done; } /* Check to see if any operations are scheduled to do the exact * same thing that just completed. If so, rather than * performing the same fencing operation twice, return the result * of this operation for all pending commands it matches. */ for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd_other = gIter->data; gIterNext = gIter->next; if (cmd == cmd_other) { continue; } /* A pending scheduled command matches the command that just finished if. * 1. The client connections are different. * 2. The node victim is the same. * 3. The fencing action is the same. * 4. The device scheduled to execute the action is the same. */ if (safe_str_eq(cmd->client, cmd_other->client) || safe_str_neq(cmd->victim, cmd_other->victim) || safe_str_neq(cmd->action, cmd_other->action) || safe_str_neq(cmd->device, cmd_other->device)) { continue; } /* Duplicate merging will do the right thing for either type of remapped * reboot. If the executing stonithd remapped an unsupported reboot to * off, then cmd->action will be reboot and will be merged with any * other reboot requests. If the originating stonithd remapped a * topology reboot to off then on, we will get here once with * cmd->action "off" and once with "on", and they will be merged * separately with similar requests. */ crm_notice ("Merging stonith action %s for node %s originating from client %s with identical stonith request from client %s", cmd_other->action, cmd_other->victim, cmd_other->client_name, cmd->client_name); cmd_list = g_list_remove_link(cmd_list, gIter); stonith_send_async_reply(cmd_other, output, rc, pid); cancel_stonith_command(cmd_other); free_async_command(cmd_other); g_list_free_1(gIter); } done: free_async_command(cmd); } static gint sort_device_priority(gconstpointer a, gconstpointer b) { const stonith_device_t *dev_a = a; const stonith_device_t *dev_b = b; if (dev_a->priority > dev_b->priority) { return -1; } else if (dev_a->priority < dev_b->priority) { return 1; } return 0; } static void stonith_fence_get_devices_cb(GList * devices, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; crm_info("Found %d matching devices for '%s'", g_list_length(devices), cmd->victim); if (g_list_length(devices) > 0) { /* Order based on priority */ devices = g_list_sort(devices, sort_device_priority); device = g_hash_table_lookup(device_list, devices->data); if (device) { cmd->device_list = devices; cmd->device_next = devices->next; devices = NULL; /* list owned by cmd now */ } } /* we have a device, schedule it for fencing. */ if (device) { schedule_stonith_command(cmd, device); /* in progress */ return; } /* no device found! */ stonith_send_async_reply(cmd, NULL, -ENODEV, 0); free_async_command(cmd); g_list_free_full(devices, free); } static int stonith_fence(xmlNode * msg) { const char *device_id = NULL; stonith_device_t *device = NULL; async_command_t *cmd = create_async_command(msg); xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); if (cmd == NULL) { return -EPROTO; } device_id = crm_element_value(dev, F_STONITH_DEVICE); if (device_id) { device = g_hash_table_lookup(device_list, device_id); if (device == NULL) { crm_err("Requested device '%s' is not available", device_id); return -ENODEV; } schedule_stonith_command(cmd, device); } else { const char *host = crm_element_value(dev, F_STONITH_TARGET); char *nodename = NULL; if (cmd->options & st_opt_cs_nodeid) { int nodeid = crm_atoi(host, NULL); nodename = stonith_get_peer_name(nodeid); if (nodename) { host = nodename; } } /* If we get to here, then self-fencing is implicitly allowed */ get_capable_devices(host, cmd->action, cmd->default_timeout, TRUE, cmd, stonith_fence_get_devices_cb); free(nodename); } return -EINPROGRESS; } xmlNode * stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc) { int lpc = 0; xmlNode *reply = NULL; const char *name = NULL; const char *value = NULL; const char *names[] = { F_STONITH_OPERATION, F_STONITH_CALLID, F_STONITH_CLIENTID, F_STONITH_CLIENTNAME, F_STONITH_REMOTE_OP_ID, F_STONITH_CALLOPTS }; crm_trace("Creating a basic reply"); reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __FUNCTION__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, "st_output", output); crm_xml_add_int(reply, F_STONITH_RC, rc); CRM_CHECK(request != NULL, crm_warn("Can't create a sane reply"); return reply); for (lpc = 0; lpc < DIMOF(names); lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } if (data != NULL) { crm_trace("Attaching reply output"); add_message_xml(reply, F_STONITH_CALLDATA, data); } return reply; } static xmlNode * stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc) { xmlNode *reply = NULL; crm_trace("Creating a basic reply"); reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __FUNCTION__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, F_STONITH_OPERATION, cmd->op); crm_xml_add(reply, F_STONITH_DEVICE, cmd->device); crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client); crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name); crm_xml_add(reply, F_STONITH_TARGET, cmd->victim); crm_xml_add(reply, F_STONITH_ACTION, cmd->op); crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin); crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id); crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options); crm_xml_add_int(reply, F_STONITH_RC, rc); crm_xml_add(reply, "st_output", output); if (data != NULL) { crm_info("Attaching reply output"); add_message_xml(reply, F_STONITH_CALLDATA, data); } return reply; } bool fencing_peer_active(crm_node_t *peer) { if (peer == NULL) { return FALSE; } else if (peer->uname == NULL) { return FALSE; } else if (is_set(peer->processes, crm_get_cluster_proc())) { return TRUE; } return FALSE; } /*! * \internal * \brief Determine if we need to use an alternate node to * fence the target. If so return that node's uname * * \retval NULL, no alternate host * \retval uname, uname of alternate host to use */ static const char * check_alternate_host(const char *target) { const char *alternate_host = NULL; crm_trace("Checking if we (%s) can fence %s", stonith_our_uname, target); if (find_topology_for_host(target) && safe_str_eq(target, stonith_our_uname)) { GHashTableIter gIter; crm_node_t *entry = NULL; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { crm_trace("Checking for %s.%d != %s", entry->uname, entry->id, target); if (fencing_peer_active(entry) && safe_str_neq(entry->uname, target)) { alternate_host = entry->uname; break; } } if (alternate_host == NULL) { crm_err("No alternate host available to handle complex self fencing request"); g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { crm_notice("Peer[%d] %s", entry->id, entry->uname); } } } return alternate_host; } static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, const char *client_id) { if (remote_peer) { send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE); } else { do_local_reply(reply, client_id, is_set(call_options, st_opt_sync_call), remote_peer != NULL); } } static int handle_request(crm_client_t * client, uint32_t id, uint32_t flags, xmlNode * request, const char *remote_peer) { int call_options = 0; int rc = -EOPNOTSUPP; xmlNode *data = NULL; xmlNode *reply = NULL; char *output = NULL; const char *op = crm_element_value(request, F_STONITH_OPERATION); const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { xmlNode *reply = create_xml_node(NULL, "reply"); CRM_ASSERT(client); crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(reply, F_STONITH_CLIENTID, client->id); crm_ipcs_send(client, id, reply, flags); client->request_id = 0; free_xml(reply); return 0; } else if (crm_str_eq(op, STONITH_OP_EXEC, TRUE)) { rc = stonith_device_action(request, &output); } else if (crm_str_eq(op, STONITH_OP_TIMEOUT_UPDATE, TRUE)) { const char *call_id = crm_element_value(request, F_STONITH_CALLID); const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); int op_timeout = 0; crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout); do_stonith_async_timeout_update(client_id, call_id, op_timeout); return 0; } else if (crm_str_eq(op, STONITH_OP_QUERY, TRUE)) { if (remote_peer) { create_remote_stonith_op(client_id, request, TRUE); /* Record it for the future notification */ } stonith_query(request, remote_peer, client_id, call_options); return 0; } else if (crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) { const char *flag_name = NULL; CRM_ASSERT(client); flag_name = crm_element_value(request, F_STONITH_NOTIFY_ACTIVATE); if (flag_name) { crm_debug("Setting %s callbacks for %s (%s): ON", flag_name, client->name, client->id); client->options |= get_stonith_flag(flag_name); } flag_name = crm_element_value(request, F_STONITH_NOTIFY_DEACTIVATE); if (flag_name) { crm_debug("Setting %s callbacks for %s (%s): off", flag_name, client->name, client->id); client->options |= get_stonith_flag(flag_name); } if (flags & crm_ipc_client_response) { crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__); } return 0; } else if (crm_str_eq(op, STONITH_OP_RELAY, TRUE)) { xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); crm_notice("Peer %s has received a forwarded fencing request from %s to fence (%s) peer %s", stonith_our_uname, client ? client->name : remote_peer, crm_element_value(dev, F_STONITH_ACTION), crm_element_value(dev, F_STONITH_TARGET)); if (initiate_remote_stonith_op(NULL, request, FALSE) != NULL) { rc = -EINPROGRESS; } } else if (crm_str_eq(op, STONITH_OP_FENCE, TRUE)) { if (remote_peer || stand_alone) { rc = stonith_fence(request); } else if (call_options & st_opt_manual_ack) { remote_fencing_op_t *rop = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); const char *target = crm_element_value(dev, F_STONITH_TARGET); crm_notice("Received manual confirmation that %s is fenced", target); rop = initiate_remote_stonith_op(client, request, TRUE); rc = stonith_manual_ack(request, rop); } else { const char *alternate_host = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); const char *target = crm_element_value(dev, F_STONITH_TARGET); const char *action = crm_element_value(dev, F_STONITH_ACTION); const char *device = crm_element_value(dev, F_STONITH_DEVICE); if (client) { int tolerance = 0; crm_notice("Client %s.%.8s wants to fence (%s) '%s' with device '%s'", client->name, client->id, action, target, device ? device : "(any)"); crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance); if (stonith_check_fence_tolerance(tolerance, target, action)) { rc = 0; goto done; } } else { crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'", remote_peer, action, target, device ? device : "(any)"); } alternate_host = check_alternate_host(target); if (alternate_host && client) { const char *client_id = NULL; crm_notice("Forwarding complex self fencing request to peer %s", alternate_host); if (client->id) { client_id = client->id; } else { client_id = crm_element_value(request, F_STONITH_CLIENTID); } /* Create a record of it, otherwise call_id will be 0 if we need to notify of failures */ create_remote_stonith_op(client_id, request, FALSE); crm_xml_add(request, F_STONITH_OPERATION, STONITH_OP_RELAY); crm_xml_add(request, F_STONITH_CLIENTID, client->id); send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request, FALSE); rc = -EINPROGRESS; } else if (initiate_remote_stonith_op(client, request, FALSE) != NULL) { rc = -EINPROGRESS; } } } else if (crm_str_eq(op, STONITH_OP_FENCE_HISTORY, TRUE)) { rc = stonith_fence_history(request, &data); } else if (crm_str_eq(op, STONITH_OP_DEVICE_ADD, TRUE)) { const char *device_id = NULL; rc = stonith_device_register(request, &device_id, FALSE); do_stonith_notify_device(call_options, op, rc, device_id); } else if (crm_str_eq(op, STONITH_OP_DEVICE_DEL, TRUE)) { xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR); const char *device_id = crm_element_value(dev, XML_ATTR_ID); rc = stonith_device_remove(device_id, FALSE); do_stonith_notify_device(call_options, op, rc, device_id); } else if (crm_str_eq(op, STONITH_OP_LEVEL_ADD, TRUE)) { char *device_id = NULL; rc = stonith_level_register(request, &device_id); do_stonith_notify_level(call_options, op, rc, device_id); free(device_id); } else if (crm_str_eq(op, STONITH_OP_LEVEL_DEL, TRUE)) { char *device_id = NULL; rc = stonith_level_remove(request, &device_id); do_stonith_notify_level(call_options, op, rc, device_id); } else if (crm_str_eq(op, STONITH_OP_CONFIRM, TRUE)) { async_command_t *cmd = create_async_command(request); xmlNode *reply = stonith_construct_async_reply(cmd, NULL, NULL, 0); crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); crm_notice("Broadcasting manual fencing confirmation for node %s", cmd->victim); send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); free_async_command(cmd); free_xml(reply); } else if(safe_str_eq(op, CRM_OP_RM_NODE_CACHE)) { int node_id = 0; const char *name = NULL; crm_element_value_int(request, XML_ATTR_ID, &node_id); name = crm_element_value(request, XML_ATTR_UNAME); reap_crm_member(node_id, name); return pcmk_ok; } else { crm_err("Unknown %s from %s", op, client ? client->name : remote_peer); crm_log_xml_warn(request, "UnknownOp"); } done: /* Always reply unless the request is in process still. * If in progress, a reply will happen async after the request * processing is finished */ if (rc != -EINPROGRESS) { crm_trace("Reply handling: %p %u %u %d %d %s", client, client?client->request_id:0, id, is_set(call_options, st_opt_sync_call), call_options, crm_element_value(request, F_STONITH_CALLOPTS)); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } reply = stonith_construct_reply(request, output, data, rc); stonith_send_reply(reply, call_options, remote_peer, client_id); } free(output); free_xml(data); free_xml(reply); return rc; } static void handle_reply(crm_client_t * client, xmlNode * request, const char *remote_peer) { const char *op = crm_element_value(request, F_STONITH_OPERATION); if (crm_str_eq(op, STONITH_OP_QUERY, TRUE)) { process_remote_stonith_query(request); } else if (crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) { process_remote_stonith_exec(request); } else if (crm_str_eq(op, STONITH_OP_FENCE, TRUE)) { /* Reply to a complex fencing op */ process_remote_stonith_exec(request); } else { crm_err("Unknown %s reply from %s", op, client ? client->name : remote_peer); crm_log_xml_warn(request, "UnknownOp"); } } void stonith_command(crm_client_t * client, uint32_t id, uint32_t flags, xmlNode * request, const char *remote_peer) { int call_options = 0; int rc = 0; gboolean is_reply = FALSE; /* Copy op for reporting. The original might get freed by handle_reply() * before we use it in crm_debug(): * handle_reply() * |- process_remote_stonith_exec() * |-- remote_op_done() * |--- handle_local_reply_and_notify() * |---- crm_xml_add(...F_STONITH_OPERATION...) * |--- free_xml(op->request) */ char *op = crm_element_value_copy(request, F_STONITH_OPERATION); if (get_xpath_object("//" T_STONITH_REPLY, request, LOG_DEBUG_3)) { is_reply = TRUE; } crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); crm_debug("Processing %s%s %u from %s (%16x)", op, is_reply ? " reply" : "", id, client ? client->name : remote_peer, call_options); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (is_reply) { handle_reply(client, request, remote_peer); } else { rc = handle_request(client, id, flags, request, remote_peer); } crm_debug("Processed %s%s from %s: %s (%d)", op, is_reply ? " reply" : "", client ? client->name : remote_peer, rc > 0 ? "" : pcmk_strerror(rc), rc); free(op); } diff --git a/lib/common/logging.c b/lib/common/logging.c index 454836758a..deb3941348 100644 --- a/lib/common/logging.c +++ b/lib/common/logging.c @@ -1,1269 +1,1269 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include unsigned int crm_log_priority = LOG_NOTICE; unsigned int crm_log_level = LOG_INFO; static gboolean crm_tracing_enabled(void); unsigned int crm_trace_nonlog = 0; bool crm_is_daemon = 0; #ifdef HAVE_G_LOG_SET_DEFAULT_HANDLER GLogFunc glib_log_default; static void crm_glib_handler(const gchar * log_domain, GLogLevelFlags flags, const gchar * message, gpointer user_data) { int log_level = LOG_WARNING; GLogLevelFlags msg_level = (flags & G_LOG_LEVEL_MASK); static struct qb_log_callsite *glib_cs = NULL; if (glib_cs == NULL) { glib_cs = qb_log_callsite_get(__FUNCTION__, __FILE__, "glib-handler", LOG_DEBUG, __LINE__, crm_trace_nonlog); } switch (msg_level) { case G_LOG_LEVEL_CRITICAL: log_level = LOG_CRIT; if (crm_is_callsite_active(glib_cs, LOG_DEBUG, 0) == FALSE) { /* log and record how we got here */ crm_abort(__FILE__, __FUNCTION__, __LINE__, message, TRUE, TRUE); } break; case G_LOG_LEVEL_ERROR: log_level = LOG_ERR; break; case G_LOG_LEVEL_MESSAGE: log_level = LOG_NOTICE; break; case G_LOG_LEVEL_INFO: log_level = LOG_INFO; break; case G_LOG_LEVEL_DEBUG: log_level = LOG_DEBUG; break; case G_LOG_LEVEL_WARNING: case G_LOG_FLAG_RECURSION: case G_LOG_FLAG_FATAL: case G_LOG_LEVEL_MASK: log_level = LOG_WARNING; break; } do_crm_log(log_level, "%s: %s", log_domain, message); } #endif #ifndef NAME_MAX # define NAME_MAX 256 #endif static void crm_trigger_blackbox(int nsig) { if(nsig == SIGTRAP) { /* Turn it on if it wasn't already */ crm_enable_blackbox(nsig); } crm_write_blackbox(nsig, NULL); } const char * daemon_option(const char *option) { char env_name[NAME_MAX]; const char *value = NULL; snprintf(env_name, NAME_MAX, "PCMK_%s", option); value = getenv(env_name); if (value != NULL) { crm_trace("Found %s = %s", env_name, value); return value; } snprintf(env_name, NAME_MAX, "HA_%s", option); value = getenv(env_name); if (value != NULL) { crm_trace("Found %s = %s", env_name, value); return value; } crm_trace("Nothing found for %s", option); return NULL; } void set_daemon_option(const char *option, const char *value) { char env_name[NAME_MAX]; snprintf(env_name, NAME_MAX, "PCMK_%s", option); if (value) { crm_trace("Setting %s to %s", env_name, value); setenv(env_name, value, 1); } else { crm_trace("Unsetting %s", env_name); unsetenv(env_name); } snprintf(env_name, NAME_MAX, "HA_%s", option); if (value) { crm_trace("Setting %s to %s", env_name, value); setenv(env_name, value, 1); } else { crm_trace("Unsetting %s", env_name); unsetenv(env_name); } } gboolean daemon_option_enabled(const char *daemon, const char *option) { const char *value = daemon_option(option); if (value != NULL && crm_is_true(value)) { return TRUE; } else if (value != NULL && strstr(value, daemon)) { return TRUE; } return FALSE; } void crm_log_deinit(void) { #ifdef HAVE_G_LOG_SET_DEFAULT_HANDLER g_log_set_default_handler(glib_log_default, NULL); #endif } #define FMT_MAX 256 static void set_format_string(int method, const char *daemon) { int offset = 0; char fmt[FMT_MAX]; if (method > QB_LOG_STDERR) { /* When logging to a file */ struct utsname res; if (uname(&res) == 0) { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%t [%lu] %s %10s: ", (unsigned long) getpid(), res.nodename, daemon); } else { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%t [%lu] %10s: ", (unsigned long) getpid(), daemon); } } if (method == QB_LOG_SYSLOG) { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%g %%-7p: %%b"); crm_extended_logging(method, QB_FALSE); } else if (crm_tracing_enabled()) { offset += snprintf(fmt + offset, FMT_MAX - offset, "(%%-12f:%%5l %%g) %%-7p: %%n:\t%%b"); } else { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%g %%-7p: %%n:\t%%b"); } CRM_LOG_ASSERT(offset > 0); qb_log_format_set(method, fmt); } gboolean crm_add_logfile(const char *filename) { bool is_default = false; static int default_fd = -1; static gboolean have_logfile = FALSE; /* @COMPAT This should be CRM_LOG_DIR "/pacemaker.log". We aren't changing * it yet because it will be a significant user-visible change to publicize. */ const char *default_logfile = "/var/log/pacemaker.log"; struct stat parent; int fd = 0, rc = 0; FILE *logfile = NULL; char *parent_dir = NULL; char *filename_cp; if (filename == NULL && have_logfile == FALSE) { filename = default_logfile; } if (filename == NULL) { return FALSE; /* Nothing to do */ } else if(safe_str_eq(filename, "none")) { return FALSE; /* Nothing to do */ } else if(safe_str_eq(filename, "/dev/null")) { return FALSE; /* Nothing to do */ } else if(safe_str_eq(filename, default_logfile)) { is_default = TRUE; } if(is_default && default_fd >= 0) { return TRUE; /* Nothing to do */ } /* Check the parent directory */ filename_cp = strdup(filename); parent_dir = dirname(filename_cp); rc = stat(parent_dir, &parent); if (rc != 0) { crm_err("Directory '%s' does not exist: logging to '%s' is disabled", parent_dir, filename); free(filename_cp); return FALSE; } free(filename_cp); errno = 0; logfile = fopen(filename, "a"); if(logfile == NULL) { crm_err("%s (%d): Logging to '%s' as uid=%u, gid=%u is disabled", pcmk_strerror(errno), errno, filename, geteuid(), getegid()); return FALSE; } /* Check/Set permissions if we're root */ if (geteuid() == 0) { struct stat st; uid_t pcmk_uid = 0; gid_t pcmk_gid = 0; gboolean fix = FALSE; int logfd = fileno(logfile); rc = fstat(logfd, &st); if (rc < 0) { crm_perror(LOG_WARNING, "Cannot stat %s", filename); fclose(logfile); return FALSE; } if(crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) == 0) { if (st.st_gid != pcmk_gid) { /* Wrong group */ fix = TRUE; } else if ((st.st_mode & S_IRWXG) != (S_IRGRP | S_IWGRP)) { /* Not read/writable by the correct group */ fix = TRUE; } } if (fix) { rc = fchown(logfd, pcmk_uid, pcmk_gid); if (rc < 0) { crm_warn("Cannot change the ownership of %s to user %s and gid %d", filename, CRM_DAEMON_USER, pcmk_gid); } rc = fchmod(logfd, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (rc < 0) { crm_warn("Cannot change the mode of %s to rw-rw----", filename); } fprintf(logfile, "Set r/w permissions for uid=%d, gid=%d on %s\n", pcmk_uid, pcmk_gid, filename); if (fflush(logfile) < 0 || fsync(logfd) < 0) { crm_err("Couldn't write out logfile: %s", filename); } } } /* Close and reopen with libqb */ fclose(logfile); fd = qb_log_file_open(filename); if (fd < 0) { crm_perror(LOG_WARNING, "Couldn't send additional logging to %s", filename); return FALSE; } if(is_default) { default_fd = fd; } else if(default_fd >= 0) { crm_notice("Switching to %s", filename); qb_log_ctl(default_fd, QB_LOG_CONF_ENABLED, QB_FALSE); } crm_notice("Additional logging available in %s", filename); qb_log_ctl(fd, QB_LOG_CONF_ENABLED, QB_TRUE); /* qb_log_ctl(fd, QB_LOG_CONF_FILE_SYNC, 1); Turn on synchronous writes */ /* Enable callsites */ crm_update_callsites(); have_logfile = TRUE; return TRUE; } static int blackbox_trigger = 0; static char *blackbox_file_prefix = NULL; static void blackbox_logger(int32_t t, struct qb_log_callsite *cs, time_t timestamp, const char *msg) { if(cs && cs->priority < LOG_ERR) { crm_write_blackbox(SIGTRAP, cs); /* Bypass the over-dumping logic */ } else { crm_write_blackbox(0, cs); } } static void crm_control_blackbox(int nsig, bool enable) { int lpc = 0; if (blackbox_file_prefix == NULL) { pid_t pid = getpid(); blackbox_file_prefix = malloc(NAME_MAX); snprintf(blackbox_file_prefix, NAME_MAX, "%s/%s-%lu", CRM_BLACKBOX_DIR, crm_system_name, (unsigned long) pid); } if (enable && qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) { qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_SIZE, 5 * 1024 * 1024); /* Any size change drops existing entries */ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); /* Setting the size seems to disable it */ /* Enable synchronous logging */ for (lpc = QB_LOG_BLACKBOX; lpc < QB_LOG_TARGET_MAX; lpc++) { qb_log_ctl(lpc, QB_LOG_CONF_FILE_SYNC, QB_TRUE); } crm_notice("Initiated blackbox recorder: %s", blackbox_file_prefix); /* Save to disk on abnormal termination */ crm_signal(SIGSEGV, crm_trigger_blackbox); crm_signal(SIGABRT, crm_trigger_blackbox); crm_signal(SIGILL, crm_trigger_blackbox); crm_signal(SIGBUS, crm_trigger_blackbox); crm_signal(SIGFPE, crm_trigger_blackbox); crm_update_callsites(); blackbox_trigger = qb_log_custom_open(blackbox_logger, NULL, NULL, NULL); qb_log_ctl(blackbox_trigger, QB_LOG_CONF_ENABLED, QB_TRUE); crm_trace("Trigger: %d is %d %d", blackbox_trigger, qb_log_ctl(blackbox_trigger, QB_LOG_CONF_STATE_GET, 0), QB_LOG_STATE_ENABLED); crm_update_callsites(); } else if (!enable && qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0) == QB_LOG_STATE_ENABLED) { qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); /* Disable synchronous logging again when the blackbox is disabled */ for (lpc = QB_LOG_BLACKBOX; lpc < QB_LOG_TARGET_MAX; lpc++) { qb_log_ctl(lpc, QB_LOG_CONF_FILE_SYNC, QB_FALSE); } } } void crm_enable_blackbox(int nsig) { crm_control_blackbox(nsig, TRUE); } void crm_disable_blackbox(int nsig) { crm_control_blackbox(nsig, FALSE); } void crm_write_blackbox(int nsig, struct qb_log_callsite *cs) { static int counter = 1; static time_t last = 0; char buffer[NAME_MAX]; time_t now = time(NULL); if (blackbox_file_prefix == NULL) { return; } switch (nsig) { case 0: case SIGTRAP: /* The graceful case - such as assertion failure or user request */ if (nsig == 0 && now == last) { /* Prevent over-dumping */ return; } snprintf(buffer, NAME_MAX, "%s.%d", blackbox_file_prefix, counter++); if (nsig == SIGTRAP) { crm_notice("Blackbox dump requested, please see %s for contents", buffer); } else if (cs) { syslog(LOG_NOTICE, "Problem detected at %s:%d (%s), please see %s for additional details", cs->function, cs->lineno, cs->filename, buffer); } else { crm_notice("Problem detected, please see %s for additional details", buffer); } last = now; qb_log_blackbox_write_to_file(buffer); /* Flush the existing contents * A size change would also work */ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); break; default: /* Do as little as possible, just try to get what we have out * We logged the filename when the blackbox was enabled */ crm_signal(nsig, SIG_DFL); qb_log_blackbox_write_to_file(blackbox_file_prefix); qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); raise(nsig); break; } } gboolean crm_log_cli_init(const char *entity) { return crm_log_init(entity, LOG_ERR, FALSE, FALSE, 0, NULL, TRUE); } static const char * crm_quark_to_string(uint32_t tag) { const char *text = g_quark_to_string(tag); if (text) { return text; } return ""; } static void crm_log_filter_source(int source, const char *trace_files, const char *trace_fns, const char *trace_fmts, const char *trace_tags, const char *trace_blackbox, struct qb_log_callsite *cs) { if (qb_log_ctl(source, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) { return; } else if (cs->tags != crm_trace_nonlog && source == QB_LOG_BLACKBOX) { /* Blackbox gets everything if enabled */ qb_bit_set(cs->targets, source); } else if (source == blackbox_trigger && blackbox_trigger > 0) { /* Should this log message result in the blackbox being dumped */ if (cs->priority <= LOG_ERR) { qb_bit_set(cs->targets, source); } else if (trace_blackbox) { char *key = crm_strdup_printf("%s:%d", cs->function, cs->lineno); if (strstr(trace_blackbox, key) != NULL) { qb_bit_set(cs->targets, source); } free(key); } } else if (source == QB_LOG_SYSLOG) { /* No tracing to syslog */ if (cs->priority <= crm_log_priority && cs->priority <= crm_log_level) { qb_bit_set(cs->targets, source); } /* Log file tracing options... */ } else if (cs->priority <= crm_log_level) { qb_bit_set(cs->targets, source); } else if (trace_files && strstr(trace_files, cs->filename) != NULL) { qb_bit_set(cs->targets, source); } else if (trace_fns && strstr(trace_fns, cs->function) != NULL) { qb_bit_set(cs->targets, source); } else if (trace_fmts && strstr(trace_fmts, cs->format) != NULL) { qb_bit_set(cs->targets, source); } else if (trace_tags && cs->tags != 0 && cs->tags != crm_trace_nonlog && g_quark_to_string(cs->tags) != NULL) { qb_bit_set(cs->targets, source); } } static void crm_log_filter(struct qb_log_callsite *cs) { int lpc = 0; static int need_init = 1; static const char *trace_fns = NULL; static const char *trace_tags = NULL; static const char *trace_fmts = NULL; static const char *trace_files = NULL; static const char *trace_blackbox = NULL; if (need_init) { need_init = 0; trace_fns = getenv("PCMK_trace_functions"); trace_fmts = getenv("PCMK_trace_formats"); trace_tags = getenv("PCMK_trace_tags"); trace_files = getenv("PCMK_trace_files"); trace_blackbox = getenv("PCMK_trace_blackbox"); if (trace_tags != NULL) { uint32_t tag; char token[500]; const char *offset = NULL; const char *next = trace_tags; do { offset = next; next = strchrnul(offset, ','); - snprintf(token, 499, "%.*s", (int)(next - offset), offset); + snprintf(token, sizeof(token), "%.*s", (int)(next - offset), offset); tag = g_quark_from_string(token); crm_info("Created GQuark %u from token '%s' in '%s'", tag, token, trace_tags); if (next[0] != 0) { next++; } } while (next != NULL && next[0] != 0); } } cs->targets = 0; /* Reset then find targets to enable */ for (lpc = QB_LOG_SYSLOG; lpc < QB_LOG_TARGET_MAX; lpc++) { crm_log_filter_source(lpc, trace_files, trace_fns, trace_fmts, trace_tags, trace_blackbox, cs); } } gboolean crm_is_callsite_active(struct qb_log_callsite *cs, uint8_t level, uint32_t tags) { gboolean refilter = FALSE; if (cs == NULL) { return FALSE; } if (cs->priority != level) { cs->priority = level; refilter = TRUE; } if (cs->tags != tags) { cs->tags = tags; refilter = TRUE; } if (refilter) { crm_log_filter(cs); } if (cs->targets == 0) { return FALSE; } return TRUE; } void crm_update_callsites(void) { static gboolean log = TRUE; if (log) { log = FALSE; crm_debug ("Enabling callsites based on priority=%d, files=%s, functions=%s, formats=%s, tags=%s", crm_log_level, getenv("PCMK_trace_files"), getenv("PCMK_trace_functions"), getenv("PCMK_trace_formats"), getenv("PCMK_trace_tags")); } qb_log_filter_fn_set(crm_log_filter); } static gboolean crm_tracing_enabled(void) { if (crm_log_level >= LOG_TRACE) { return TRUE; } else if (getenv("PCMK_trace_files") || getenv("PCMK_trace_functions") || getenv("PCMK_trace_formats") || getenv("PCMK_trace_tags")) { return TRUE; } return FALSE; } static int crm_priority2int(const char *name) { struct syslog_names { const char *name; int priority; }; static struct syslog_names p_names[] = { {"emerg", LOG_EMERG}, {"alert", LOG_ALERT}, {"crit", LOG_CRIT}, {"error", LOG_ERR}, {"warning", LOG_WARNING}, {"notice", LOG_NOTICE}, {"info", LOG_INFO}, {"debug", LOG_DEBUG}, {NULL, -1} }; int lpc; for (lpc = 0; name != NULL && p_names[lpc].name != NULL; lpc++) { if (crm_str_eq(p_names[lpc].name, name, TRUE)) { return p_names[lpc].priority; } } return crm_log_priority; } static void crm_identity(const char *entity, int argc, char **argv) { if(crm_system_name != NULL) { /* Nothing to do */ } else if (entity) { free(crm_system_name); crm_system_name = strdup(entity); } else if (argc > 0 && argv != NULL) { char *mutable = strdup(argv[0]); char *modified = basename(mutable); if (strstr(modified, "lt-") == modified) { modified += 3; } free(crm_system_name); crm_system_name = strdup(modified); free(mutable); } else if (crm_system_name == NULL) { crm_system_name = strdup("Unknown"); } setenv("PCMK_service", crm_system_name, 1); } void crm_log_preinit(const char *entity, int argc, char **argv) { /* Configure libqb logging with nothing turned on */ int lpc = 0; int32_t qb_facility = 0; static bool have_logging = FALSE; if(have_logging == FALSE) { have_logging = TRUE; crm_xml_init(); /* Sets buffer allocation strategy */ if (crm_trace_nonlog == 0) { crm_trace_nonlog = g_quark_from_static_string("Pacemaker non-logging tracepoint"); } umask(S_IWGRP | S_IWOTH | S_IROTH); /* Redirect messages from glib functions to our handler */ #ifdef HAVE_G_LOG_SET_DEFAULT_HANDLER glib_log_default = g_log_set_default_handler(crm_glib_handler, NULL); #endif /* and for good measure... - this enum is a bit field (!) */ g_log_set_always_fatal((GLogLevelFlags) 0); /*value out of range */ /* Who do we log as */ crm_identity(entity, argc, argv); qb_facility = qb_log_facility2int("local0"); qb_log_init(crm_system_name, qb_facility, LOG_ERR); crm_log_level = LOG_CRIT; /* Nuke any syslog activity until it's asked for */ qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE); /* Set format strings and disable threading * Pacemaker and threads do not mix well (due to the amount of forking) */ qb_log_tags_stringify_fn_set(crm_quark_to_string); for (lpc = QB_LOG_SYSLOG; lpc < QB_LOG_TARGET_MAX; lpc++) { qb_log_ctl(lpc, QB_LOG_CONF_THREADED, QB_FALSE); set_format_string(lpc, crm_system_name); } } } gboolean crm_log_init(const char *entity, uint8_t level, gboolean daemon, gboolean to_stderr, int argc, char **argv, gboolean quiet) { const char *syslog_priority = NULL; const char *logfile = daemon_option("logfile"); const char *facility = daemon_option("logfacility"); const char *f_copy = facility; crm_is_daemon = daemon; crm_log_preinit(entity, argc, argv); if(level > crm_log_level) { crm_log_level = level; } /* Should we log to syslog */ if (facility == NULL) { if(crm_is_daemon) { facility = "daemon"; } else { facility = "none"; } set_daemon_option("logfacility", facility); } if (safe_str_eq(facility, "none")) { quiet = TRUE; } else { qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_FACILITY, qb_log_facility2int(facility)); } if (daemon_option_enabled(crm_system_name, "debug")) { /* Override the default setting */ crm_log_level = LOG_DEBUG; } /* What lower threshold do we have for sending to syslog */ syslog_priority = daemon_option("logpriority"); if(syslog_priority) { int priority = crm_priority2int(syslog_priority); crm_log_priority = priority; qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "*", priority); } else { qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "*", LOG_NOTICE); } if (!quiet) { /* Nuke any syslog activity */ qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE); } /* Should we log to stderr */ if (daemon_option_enabled(crm_system_name, "stderr")) { /* Override the default setting */ to_stderr = TRUE; } crm_enable_stderr(to_stderr); /* Should we log to a file */ if (safe_str_eq("none", logfile)) { /* No soup^Hlogs for you! */ } else if(crm_is_daemon) { /* The daemons always get a log file, unless explicitly set to configured 'none' */ crm_add_logfile(logfile); } else if(logfile) { crm_add_logfile(logfile); } if (crm_is_daemon && daemon_option_enabled(crm_system_name, "blackbox")) { crm_enable_blackbox(0); } /* Summary */ crm_trace("Quiet: %d, facility %s", quiet, f_copy); daemon_option("logfile"); daemon_option("logfacility"); crm_update_callsites(); /* Ok, now we can start logging... */ if (quiet == FALSE && crm_is_daemon == FALSE) { crm_log_args(argc, argv); } if (crm_is_daemon) { const char *user = getenv("USER"); if (user != NULL && safe_str_neq(user, "root") && safe_str_neq(user, CRM_DAEMON_USER)) { crm_trace("Not switching to corefile directory for %s", user); crm_is_daemon = FALSE; } } if (crm_is_daemon) { int user = getuid(); const char *base = CRM_CORE_DIR; struct passwd *pwent = getpwuid(user); if (pwent == NULL) { crm_perror(LOG_ERR, "Cannot get name for uid: %d", user); } else if (safe_str_neq(pwent->pw_name, "root") && safe_str_neq(pwent->pw_name, CRM_DAEMON_USER)) { crm_trace("Don't change active directory for regular user: %s", pwent->pw_name); } else if (chdir(base) < 0) { crm_perror(LOG_INFO, "Cannot change active directory to %s", base); } else { crm_info("Changed active directory to %s", base); #if 0 { char path[512]; snprintf(path, 512, "%s-%lu", crm_system_name, (unsigned long) getpid()); mkdir(path, 0750); chdir(path); crm_info("Changed active directory to %s/%s/%s", base, pwent->pw_name, path); } #endif } /* Original meanings from signal(7) * * Signal Value Action Comment * SIGTRAP 5 Core Trace/breakpoint trap * SIGUSR1 30,10,16 Term User-defined signal 1 * SIGUSR2 31,12,17 Term User-defined signal 2 * * Our usage is as similar as possible */ mainloop_add_signal(SIGUSR1, crm_enable_blackbox); mainloop_add_signal(SIGUSR2, crm_disable_blackbox); mainloop_add_signal(SIGTRAP, crm_trigger_blackbox); } return TRUE; } /* returns the old value */ unsigned int set_crm_log_level(unsigned int level) { unsigned int old = crm_log_level; crm_log_level = level; crm_update_callsites(); crm_trace("New log level: %d", level); return old; } void crm_enable_stderr(int enable) { if (enable && qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) { qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_TRUE); crm_update_callsites(); } else if (enable == FALSE) { qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE); } } void crm_bump_log_level(int argc, char **argv) { static int args = TRUE; int level = crm_log_level; if (args && argc > 1) { crm_log_args(argc, argv); } if (qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_STATE_GET, 0) == QB_LOG_STATE_ENABLED) { set_crm_log_level(level + 1); } /* Enable after potentially logging the argstring, not before */ crm_enable_stderr(TRUE); } unsigned int get_crm_log_level(void) { return crm_log_level; } #define ARGS_FMT "Invoked: %s" void crm_log_args(int argc, char **argv) { int lpc = 0; int len = 0; int existing_len = 0; int line = __LINE__; static int logged = 0; char *arg_string = NULL; if (argc == 0 || argv == NULL || logged) { return; } logged = 1; for (; lpc < argc; lpc++) { if (argv[lpc] == NULL) { break; } len = 2 + strlen(argv[lpc]); /* +1 space, +1 EOS */ arg_string = realloc_safe(arg_string, len + existing_len); existing_len += sprintf(arg_string + existing_len, "%s ", argv[lpc]); } qb_log_from_external_source(__func__, __FILE__, ARGS_FMT, LOG_NOTICE, line, 0, arg_string); free(arg_string); } const char * pcmk_errorname(int rc) { int error = ABS(rc); switch (error) { case E2BIG: return "E2BIG"; case EACCES: return "EACCES"; case EADDRINUSE: return "EADDRINUSE"; case EADDRNOTAVAIL: return "EADDRNOTAVAIL"; case EAFNOSUPPORT: return "EAFNOSUPPORT"; case EAGAIN: return "EAGAIN"; case EALREADY: return "EALREADY"; case EBADF: return "EBADF"; case EBADMSG: return "EBADMSG"; case EBUSY: return "EBUSY"; case ECANCELED: return "ECANCELED"; case ECHILD: return "ECHILD"; case ECOMM: return "ECOMM"; case ECONNABORTED: return "ECONNABORTED"; case ECONNREFUSED: return "ECONNREFUSED"; case ECONNRESET: return "ECONNRESET"; /* case EDEADLK: return "EDEADLK"; */ case EDESTADDRREQ: return "EDESTADDRREQ"; case EDOM: return "EDOM"; case EDQUOT: return "EDQUOT"; case EEXIST: return "EEXIST"; case EFAULT: return "EFAULT"; case EFBIG: return "EFBIG"; case EHOSTDOWN: return "EHOSTDOWN"; case EHOSTUNREACH: return "EHOSTUNREACH"; case EIDRM: return "EIDRM"; case EILSEQ: return "EILSEQ"; case EINPROGRESS: return "EINPROGRESS"; case EINTR: return "EINTR"; case EINVAL: return "EINVAL"; case EIO: return "EIO"; case EISCONN: return "EISCONN"; case EISDIR: return "EISDIR"; case ELIBACC: return "ELIBACC"; case ELOOP: return "ELOOP"; case EMFILE: return "EMFILE"; case EMLINK: return "EMLINK"; case EMSGSIZE: return "EMSGSIZE"; case EMULTIHOP: return "EMULTIHOP"; case ENAMETOOLONG: return "ENAMETOOLONG"; case ENETDOWN: return "ENETDOWN"; case ENETRESET: return "ENETRESET"; case ENETUNREACH: return "ENETUNREACH"; case ENFILE: return "ENFILE"; case ENOBUFS: return "ENOBUFS"; case ENODATA: return "ENODATA"; case ENODEV: return "ENODEV"; case ENOENT: return "ENOENT"; case ENOEXEC: return "ENOEXEC"; case ENOKEY: return "ENOKEY"; case ENOLCK: return "ENOLCK"; case ENOLINK: return "ENOLINK"; case ENOMEM: return "ENOMEM"; case ENOMSG: return "ENOMSG"; case ENOPROTOOPT: return "ENOPROTOOPT"; case ENOSPC: return "ENOSPC"; case ENOSR: return "ENOSR"; case ENOSTR: return "ENOSTR"; case ENOSYS: return "ENOSYS"; case ENOTBLK: return "ENOTBLK"; case ENOTCONN: return "ENOTCONN"; case ENOTDIR: return "ENOTDIR"; case ENOTEMPTY: return "ENOTEMPTY"; case ENOTSOCK: return "ENOTSOCK"; /* case ENOTSUP: return "ENOTSUP"; */ case ENOTTY: return "ENOTTY"; case ENOTUNIQ: return "ENOTUNIQ"; case ENXIO: return "ENXIO"; case EOPNOTSUPP: return "EOPNOTSUPP"; case EOVERFLOW: return "EOVERFLOW"; case EPERM: return "EPERM"; case EPFNOSUPPORT: return "EPFNOSUPPORT"; case EPIPE: return "EPIPE"; case EPROTO: return "EPROTO"; case EPROTONOSUPPORT: return "EPROTONOSUPPORT"; case EPROTOTYPE: return "EPROTOTYPE"; case ERANGE: return "ERANGE"; case EREMOTE: return "EREMOTE"; case EREMOTEIO: return "EREMOTEIO"; case EROFS: return "EROFS"; case ESHUTDOWN: return "ESHUTDOWN"; case ESPIPE: return "ESPIPE"; case ESOCKTNOSUPPORT: return "ESOCKTNOSUPPORT"; case ESRCH: return "ESRCH"; case ESTALE: return "ESTALE"; case ETIME: return "ETIME"; case ETIMEDOUT: return "ETIMEDOUT"; case ETXTBSY: return "ETXTBSY"; case EUNATCH: return "EUNATCH"; case EUSERS: return "EUSERS"; /* case EWOULDBLOCK: return "EWOULDBLOCK"; */ case EXDEV: return "EXDEV"; #ifdef EBADE /* Not available on OSX */ case EBADE: return "EBADE"; case EBADFD: return "EBADFD"; case EBADSLT: return "EBADSLT"; case EDEADLOCK: return "EDEADLOCK"; case EBADR: return "EBADR"; case EBADRQC: return "EBADRQC"; case ECHRNG: return "ECHRNG"; #ifdef EISNAM /* Not available on Illumos/Solaris */ case EISNAM: return "EISNAM"; case EKEYEXPIRED: return "EKEYEXPIRED"; case EKEYREJECTED: return "EKEYREJECTED"; case EKEYREVOKED: return "EKEYREVOKED"; #endif case EL2HLT: return "EL2HLT"; case EL2NSYNC: return "EL2NSYNC"; case EL3HLT: return "EL3HLT"; case EL3RST: return "EL3RST"; case ELIBBAD: return "ELIBBAD"; case ELIBMAX: return "ELIBMAX"; case ELIBSCN: return "ELIBSCN"; case ELIBEXEC: return "ELIBEXEC"; #ifdef ENOMEDIUM /* Not available on Illumos/Solaris */ case ENOMEDIUM: return "ENOMEDIUM"; case EMEDIUMTYPE: return "EMEDIUMTYPE"; #endif case ENONET: return "ENONET"; case ENOPKG: return "ENOPKG"; case EREMCHG: return "EREMCHG"; case ERESTART: return "ERESTART"; case ESTRPIPE: return "ESTRPIPE"; #ifdef EUCLEAN /* Not available on Illumos/Solaris */ case EUCLEAN: return "EUCLEAN"; #endif case EXFULL: return "EXFULL"; #endif case pcmk_err_generic: return "pcmk_err_generic"; case pcmk_err_no_quorum: return "pcmk_err_no_quorum"; case pcmk_err_schema_validation: return "pcmk_err_schema_validation"; case pcmk_err_transform_failed: return "pcmk_err_transform_failed"; case pcmk_err_old_data: return "pcmk_err_old_data"; case pcmk_err_diff_failed: return "pcmk_err_diff_failed"; case pcmk_err_diff_resync: return "pcmk_err_diff_resync"; case pcmk_err_cib_modified: return "pcmk_err_cib_modified"; case pcmk_err_cib_backup: return "pcmk_err_cib_backup"; case pcmk_err_cib_save: return "pcmk_err_cib_save"; case pcmk_err_cib_corrupt: return "pcmk_err_cib_corrupt"; } return "Unknown"; } const char * pcmk_strerror(int rc) { int error = abs(rc); if (error == 0) { return "OK"; } else if (error < PCMK_ERROR_OFFSET) { return strerror(error); } switch (error) { case pcmk_err_generic: return "Generic Pacemaker error"; case pcmk_err_no_quorum: return "Operation requires quorum"; case pcmk_err_schema_validation: return "Update does not conform to the configured schema"; case pcmk_err_transform_failed: return "Schema transform failed"; case pcmk_err_old_data: return "Update was older than existing configuration"; case pcmk_err_diff_failed: return "Application of an update diff failed"; case pcmk_err_diff_resync: return "Application of an update diff failed, requesting a full refresh"; case pcmk_err_cib_modified: return "The on-disk configuration was manually modified"; case pcmk_err_cib_backup: return "Could not archive the previous configuration"; case pcmk_err_cib_save: return "Could not save the new configuration to disk"; case pcmk_err_cib_corrupt: return "Could not parse on-disk configuration"; case pcmk_err_schema_unchanged: return "Schema is already the latest available"; /* The following cases will only be hit on systems for which they are non-standard */ /* coverity[dead_error_condition] False positive on non-Linux */ case ENOTUNIQ: return "Name not unique on network"; /* coverity[dead_error_condition] False positive on non-Linux */ case ECOMM: return "Communication error on send"; /* coverity[dead_error_condition] False positive on non-Linux */ case ELIBACC: return "Can not access a needed shared library"; /* coverity[dead_error_condition] False positive on non-Linux */ case EREMOTEIO: return "Remote I/O error"; /* coverity[dead_error_condition] False positive on non-Linux */ case EUNATCH: return "Protocol driver not attached"; /* coverity[dead_error_condition] False positive on non-Linux */ case ENOKEY: return "Required key not available"; } crm_err("Unknown error code: %d", rc); return "Unknown error"; } const char * bz2_strerror(int rc) { /* http://www.bzip.org/1.0.3/html/err-handling.html */ switch (rc) { case BZ_OK: case BZ_RUN_OK: case BZ_FLUSH_OK: case BZ_FINISH_OK: case BZ_STREAM_END: return "Ok"; case BZ_CONFIG_ERROR: return "libbz2 has been improperly compiled on your platform"; case BZ_SEQUENCE_ERROR: return "library functions called in the wrong order"; case BZ_PARAM_ERROR: return "parameter is out of range or otherwise incorrect"; case BZ_MEM_ERROR: return "memory allocation failed"; case BZ_DATA_ERROR: return "data integrity error is detected during decompression"; case BZ_DATA_ERROR_MAGIC: return "the compressed stream does not start with the correct magic bytes"; case BZ_IO_ERROR: return "error reading or writing in the compressed file"; case BZ_UNEXPECTED_EOF: return "compressed file finishes before the logical end of stream is detected"; case BZ_OUTBUFF_FULL: return "output data will not fit into the buffer provided"; } return "Unknown error"; } void crm_log_output_fn(const char *file, const char *function, int line, int level, const char *prefix, const char *output) { const char *next = NULL; const char *offset = NULL; if (output == NULL) { level = LOG_DEBUG; output = "-- empty --"; } next = output; do { offset = next; next = strchrnul(offset, '\n'); do_crm_log_alias(level, file, function, line, "%s [ %.*s ]", prefix, (int)(next - offset), offset); if (next[0] != 0) { next++; } } while (next != NULL && next[0] != 0); } char * crm_strdup_printf (char const *format, ...) { va_list ap; int len = 0; char *string = NULL; va_start(ap, format); len = vasprintf (&string, format, ap); CRM_ASSERT(len > 0); va_end(ap); return string; } diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index 3b84621f30..72794a13a8 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -1,2677 +1,2676 @@ /* * Copyright (c) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include /* Add it for compiling on OSX */ #include #include #include #include #include #ifdef HAVE_STONITH_STONITH_H # include # define LHA_STONITH_LIBRARY "libstonith.so.1" static void *lha_agents_lib = NULL; #endif #include CRM_TRACE_INIT_DATA(stonith); struct stonith_action_s { /*! user defined data */ char *agent; char *action; char *victim; char *args; int timeout; int async; void *userdata; void (*done_cb) (GPid pid, gint status, const char *output, gpointer user_data); /*! internal async track data */ int fd_stdout; int fd_stderr; int last_timeout_signo; /*! internal timing information */ time_t initial_start_time; int tries; int remaining_timeout; guint timer_sigterm; guint timer_sigkill; int max_retries; /* device output data */ GPid pid; int rc; char *output; char *error; }; typedef struct stonith_private_s { char *token; crm_ipc_t *ipc; mainloop_io_t *source; GHashTable *stonith_op_callback_table; GList *notify_list; void (*op_callback) (stonith_t * st, stonith_callback_data_t * data); } stonith_private_t; typedef struct stonith_notify_client_s { const char *event; const char *obj_id; /* implement one day */ const char *obj_type; /* implement one day */ void (*notify) (stonith_t * st, stonith_event_t * e); } stonith_notify_client_t; typedef struct stonith_callback_client_s { void (*callback) (stonith_t * st, stonith_callback_data_t * data); const char *id; void *user_data; gboolean only_success; gboolean allow_timeout_updates; struct timer_rec_s *timer; } stonith_callback_client_t; struct notify_blob_s { stonith_t *stonith; xmlNode *xml; }; struct timer_rec_s { int call_id; int timeout; guint ref; stonith_t *stonith; }; typedef int (*stonith_op_t) (const char *, int, const char *, xmlNode *, xmlNode *, xmlNode *, xmlNode **, xmlNode **); #if HAVE_STONITH_STONITH_H static const char META_TEMPLATE[] = "\n" "\n" "\n" " 1.0\n" " \n" "%s\n" " \n" " %s\n" "%s\n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " 2.0\n" " \n" "\n"; #endif bool stonith_dispatch(stonith_t * st); int stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata); void stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc); xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); int stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data, int call_options, int timeout); static void stonith_connection_destroy(gpointer user_data); static void stonith_send_notification(gpointer data, gpointer user_data); static int internal_stonith_action_execute(stonith_action_t * action); static void log_action(stonith_action_t *action, pid_t pid); static void log_action(stonith_action_t *action, pid_t pid) { if (action->output) { /* Logging the whole string confuses syslog when the string is xml */ char *prefix = crm_strdup_printf("%s[%d] stdout:", action->agent, pid); crm_log_output(LOG_TRACE, prefix, action->output); free(prefix); } if (action->error) { /* Logging the whole string confuses syslog when the string is xml */ char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid); crm_log_output(LOG_WARNING, prefix, action->error); free(prefix); } } static void stonith_connection_destroy(gpointer user_data) { stonith_t *stonith = user_data; stonith_private_t *native = NULL; struct notify_blob_s blob; crm_trace("Sending destroyed notification"); blob.stonith = stonith; blob.xml = create_xml_node(NULL, "notify"); native = stonith->private; native->ipc = NULL; native->source = NULL; stonith->state = stonith_disconnected; crm_xml_add(blob.xml, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(blob.xml, F_SUBTYPE, T_STONITH_NOTIFY_DISCONNECT); g_list_foreach(native->notify_list, stonith_send_notification, &blob); free_xml(blob.xml); } xmlNode * create_device_registration_xml(const char *id, const char *namespace, const char *agent, stonith_key_value_t * params, const char *rsc_provides) { xmlNode *data = create_xml_node(NULL, F_STONITH_DEVICE); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); #if HAVE_STONITH_STONITH_H namespace = get_stonith_provider(agent, namespace); if (safe_str_eq(namespace, "heartbeat")) { hash2field((gpointer) "plugin", (gpointer) agent, args); agent = "fence_legacy"; } #endif crm_xml_add(data, XML_ATTR_ID, id); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add(data, "agent", agent); crm_xml_add(data, "namespace", namespace); if (rsc_provides) { crm_xml_add(data, "rsc_provides", rsc_provides); } for (; params; params = params->next) { hash2field((gpointer) params->key, (gpointer) params->value, args); } return data; } static int stonith_api_register_device(stonith_t * st, int call_options, const char *id, const char *namespace, const char *agent, stonith_key_value_t * params) { int rc = 0; xmlNode *data = NULL; data = create_device_registration_xml(id, namespace, agent, params, NULL); rc = stonith_send_command(st, STONITH_OP_DEVICE_ADD, data, NULL, call_options, 0); free_xml(data); return rc; } static int stonith_api_remove_device(stonith_t * st, int call_options, const char *name) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add(data, XML_ATTR_ID, name); rc = stonith_send_command(st, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0); free_xml(data); return rc; } static int stonith_api_remove_level_full(stonith_t *st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level) { int rc = 0; xmlNode *data = NULL; CRM_CHECK(node || pattern || (attr && value), return -EINVAL); data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); if (node) { crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); } else if (pattern) { crm_xml_add(data, XML_ATTR_STONITH_TARGET_PATTERN, pattern); } else { crm_xml_add(data, XML_ATTR_STONITH_TARGET_ATTRIBUTE, attr); crm_xml_add(data, XML_ATTR_STONITH_TARGET_VALUE, value); } crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); rc = stonith_send_command(st, STONITH_OP_LEVEL_DEL, data, NULL, options, 0); free_xml(data); return rc; } static int stonith_api_remove_level(stonith_t * st, int options, const char *node, int level) { return stonith_api_remove_level_full(st, options, node, NULL, NULL, NULL, level); } /*! * \internal * \brief Create XML for stonithd topology level registration request * * \param[in] node If not NULL, target level by this node name * \param[in] pattern If not NULL, target by node name using this regex * \param[in] attr If not NULL, target by this node attribute * \param[in] value If not NULL, target by this node attribute value * \param[in] level Index number of level to register * \param[in] device_list List of devices in level * * \return Newly allocated XML tree on success, NULL otherwise * * \note The caller should set only one of node, pattern or attr/value. */ xmlNode * create_level_registration_xml(const char *node, const char *pattern, const char *attr, const char *value, int level, stonith_key_value_t *device_list) { int len = 0; char *list = NULL; xmlNode *data; CRM_CHECK(node || pattern || (attr && value), return NULL); data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); CRM_CHECK(data, return NULL); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add_int(data, XML_ATTR_ID, level); crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); if (node) { crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); } else if (pattern) { crm_xml_add(data, XML_ATTR_STONITH_TARGET_PATTERN, pattern); } else { crm_xml_add(data, XML_ATTR_STONITH_TARGET_ATTRIBUTE, attr); crm_xml_add(data, XML_ATTR_STONITH_TARGET_VALUE, value); } for (; device_list; device_list = device_list->next) { int adding = strlen(device_list->value); if(list) { adding++; /* +1 space */ } crm_trace("Adding %s (%dc) at offset %d", device_list->value, adding, len); list = realloc_safe(list, len + adding + 1); /* +1 EOS */ if (list == NULL) { crm_perror(LOG_CRIT, "Could not create device list"); free_xml(data); return NULL; } sprintf(list + len, "%s%s", len?",":"", device_list->value); len += adding; } crm_xml_add(data, XML_ATTR_STONITH_DEVICES, list); free(list); return data; } static int stonith_api_register_level_full(stonith_t * st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level, stonith_key_value_t *device_list) { int rc = 0; xmlNode *data = create_level_registration_xml(node, pattern, attr, value, level, device_list); CRM_CHECK(data != NULL, return -EINVAL); rc = stonith_send_command(st, STONITH_OP_LEVEL_ADD, data, NULL, options, 0); free_xml(data); return rc; } static int stonith_api_register_level(stonith_t * st, int options, const char *node, int level, stonith_key_value_t * device_list) { return stonith_api_register_level_full(st, options, node, NULL, NULL, NULL, level, device_list); } static void append_arg(const char *key, const char *value, char **args) { int len = 3; /* =, \n, \0 */ int last = 0; CRM_CHECK(key != NULL, return); CRM_CHECK(value != NULL, return); if (strstr(key, "pcmk_")) { return; } else if (strstr(key, CRM_META)) { return; } else if (safe_str_eq(key, "crm_feature_set")) { return; } len += strlen(key); len += strlen(value); if (*args != NULL) { last = strlen(*args); } *args = realloc_safe(*args, last + len); crm_trace("Appending: %s=%s", key, value); sprintf((*args) + last, "%s=%s\n", key, value); } static void append_config_arg(gpointer key, gpointer value, gpointer user_data) { /* stonithd will filter action out when it registers the device, * but ignore it here just in case any other library callers * fail to do so. */ if (safe_str_neq(key, STONITH_ATTR_ACTION_OP)) { append_arg(key, value, user_data); return; } } static void append_host_specific_args(const char *victim, const char *map, GHashTable * params, char **arg_list) { char *name = NULL; int last = 0, lpc = 0, max = 0; if (map == NULL) { /* The best default there is for now... */ crm_debug("Using default arg map: port=uname"); append_arg("port", victim, arg_list); return; } max = strlen(map); crm_debug("Processing arg map: %s", map); for (; lpc < max + 1; lpc++) { if (isalpha(map[lpc])) { /* keep going */ } else if (map[lpc] == '=' || map[lpc] == ':') { free(name); name = calloc(1, 1 + lpc - last); memcpy(name, map + last, lpc - last); crm_debug("Got name: %s", name); last = lpc + 1; } else if (map[lpc] == 0 || map[lpc] == ',' || isspace(map[lpc])) { char *param = NULL; const char *value = NULL; param = calloc(1, 1 + lpc - last); memcpy(param, map + last, lpc - last); last = lpc + 1; crm_debug("Got key: %s", param); if (name == NULL) { crm_err("Misparsed '%s', found '%s' without a name", map, param); free(param); continue; } if (safe_str_eq(param, "uname")) { value = victim; } else { char *key = crm_meta_name(param); value = g_hash_table_lookup(params, key); free(key); } if (value) { crm_debug("Setting '%s'='%s' (%s) for %s", name, value, param, victim); append_arg(name, value, arg_list); } else { crm_err("No node attribute '%s' for '%s'", name, victim); } free(name); name = NULL; free(param); if (map[lpc] == 0) { break; } } else if (isspace(map[lpc])) { last = lpc; } } free(name); } static char * make_args(const char *agent, const char *action, const char *victim, uint32_t victim_nodeid, GHashTable * device_args, GHashTable * port_map) { char buffer[512]; char *arg_list = NULL; const char *value = NULL; CRM_CHECK(action != NULL, return NULL); - buffer[511] = 0; - snprintf(buffer, 511, "pcmk_%s_action", action); + snprintf(buffer, sizeof(buffer), "pcmk_%s_action", action); if (device_args) { value = g_hash_table_lookup(device_args, buffer); } if (value == NULL && device_args) { /* @COMPAT deprecated since 1.1.6 */ - snprintf(buffer, 511, "pcmk_%s_cmd", action); + snprintf(buffer, sizeof(buffer), "pcmk_%s_cmd", action); value = g_hash_table_lookup(device_args, buffer); } if (value == NULL && device_args && safe_str_eq(action, "off")) { /* @COMPAT deprecated since 1.1.8 */ value = g_hash_table_lookup(device_args, "pcmk_poweroff_action"); } if (value) { crm_info("Substituting action '%s' for requested operation '%s'", value, action); action = value; } append_arg(STONITH_ATTR_ACTION_OP, action, &arg_list); if (victim && device_args) { const char *alias = victim; const char *param = g_hash_table_lookup(device_args, STONITH_ATTR_HOSTARG); if (port_map && g_hash_table_lookup(port_map, victim)) { alias = g_hash_table_lookup(port_map, victim); } /* Always supply the node's name too: * https://fedorahosted.org/cluster/wiki/FenceAgentAPI */ append_arg("nodename", victim, &arg_list); if (victim_nodeid) { char nodeid_str[33] = { 0, }; if (snprintf(nodeid_str, 33, "%u", (unsigned int)victim_nodeid)) { crm_info("For stonith action (%s) for victim %s, adding nodeid (%s) to parameters", action, victim, nodeid_str); append_arg("nodeid", nodeid_str, &arg_list); } } /* Check if we need to supply the victim in any other form */ if(safe_str_eq(agent, "fence_legacy")) { value = agent; } else if (param == NULL) { const char *map = g_hash_table_lookup(device_args, STONITH_ATTR_ARGMAP); if (map == NULL) { param = "port"; value = g_hash_table_lookup(device_args, param); } else { /* Legacy handling */ append_host_specific_args(alias, map, device_args, &arg_list); value = map; /* Nothing more to do */ } } else if (safe_str_eq(param, "none")) { value = param; /* Nothing more to do */ } else { value = g_hash_table_lookup(device_args, param); } /* Don't overwrite explictly set values for $param */ if (value == NULL || safe_str_eq(value, "dynamic")) { crm_debug("Performing %s action for node '%s' as '%s=%s'", action, victim, param, alias); append_arg(param, alias, &arg_list); } } if (device_args) { g_hash_table_foreach(device_args, append_config_arg, &arg_list); } return arg_list; } static gboolean st_child_term(gpointer data) { int rc = 0; stonith_action_t *track = data; crm_info("Child %d timed out, sending SIGTERM", track->pid); track->timer_sigterm = 0; track->last_timeout_signo = SIGTERM; rc = kill(-track->pid, SIGTERM); if (rc < 0) { crm_perror(LOG_ERR, "Couldn't send SIGTERM to %d", track->pid); } return FALSE; } static gboolean st_child_kill(gpointer data) { int rc = 0; stonith_action_t *track = data; crm_info("Child %d timed out, sending SIGKILL", track->pid); track->timer_sigkill = 0; track->last_timeout_signo = SIGKILL; rc = kill(-track->pid, SIGKILL); if (rc < 0) { crm_perror(LOG_ERR, "Couldn't send SIGKILL to %d", track->pid); } return FALSE; } static void stonith_action_clear_tracking_data(stonith_action_t * action) { if (action->timer_sigterm > 0) { g_source_remove(action->timer_sigterm); action->timer_sigterm = 0; } if (action->timer_sigkill > 0) { g_source_remove(action->timer_sigkill); action->timer_sigkill = 0; } if (action->fd_stdout) { close(action->fd_stdout); action->fd_stdout = 0; } if (action->fd_stderr) { close(action->fd_stderr); action->fd_stderr = 0; } free(action->output); action->output = NULL; free(action->error); action->error = NULL; action->rc = 0; action->pid = 0; action->last_timeout_signo = 0; } static void stonith_action_destroy(stonith_action_t * action) { stonith_action_clear_tracking_data(action); free(action->agent); free(action->args); free(action->action); free(action->victim); free(action); } #define FAILURE_MAX_RETRIES 2 stonith_action_t * stonith_action_create(const char *agent, const char *_action, const char *victim, uint32_t victim_nodeid, int timeout, GHashTable * device_args, GHashTable * port_map) { stonith_action_t *action; action = calloc(1, sizeof(stonith_action_t)); crm_debug("Initiating action %s for agent %s (target=%s)", _action, agent, victim); action->args = make_args(agent, _action, victim, victim_nodeid, device_args, port_map); action->agent = strdup(agent); action->action = strdup(_action); if (victim) { action->victim = strdup(victim); } action->timeout = action->remaining_timeout = timeout; action->max_retries = FAILURE_MAX_RETRIES; if (device_args) { char buffer[512]; const char *value = NULL; - snprintf(buffer, 511, "pcmk_%s_retries", _action); + snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", _action); value = g_hash_table_lookup(device_args, buffer); if (value) { action->max_retries = atoi(value); } } return action; } #define READ_MAX 500 static char * read_output(int fd) { char buffer[READ_MAX]; char *output = NULL; int len = 0; int more = 0; if (!fd) { return NULL; } do { errno = 0; memset(&buffer, 0, READ_MAX); more = read(fd, buffer, READ_MAX - 1); if (more > 0) { buffer[more] = 0; /* Make sure it's nul-terminated for logging * 'more' is always less than our buffer size */ output = realloc_safe(output, len + more + 1); snprintf(output + len, more + 1, "%s", buffer); len += more; } } while (more == (READ_MAX - 1) || (more < 0 && errno == EINTR)); return output; } static gboolean update_remaining_timeout(stonith_action_t * action) { int diff = time(NULL) - action->initial_start_time; if (action->tries >= action->max_retries) { crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed", action->agent, action->action, action->max_retries); action->remaining_timeout = 0; } else if ((action->rc != -ETIME) && diff < (action->timeout * 0.7)) { /* only set remaining timeout period if there is 30% * or greater of the original timeout period left */ action->remaining_timeout = action->timeout - diff; } else { action->remaining_timeout = 0; } return action->remaining_timeout ? TRUE : FALSE; } static void stonith_action_async_done(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { stonith_action_t *action = mainloop_child_userdata(p); if (action->timer_sigterm > 0) { g_source_remove(action->timer_sigterm); action->timer_sigterm = 0; } if (action->timer_sigkill > 0) { g_source_remove(action->timer_sigkill); action->timer_sigkill = 0; } action->output = read_output(action->fd_stdout); action->error = read_output(action->fd_stderr); if (action->last_timeout_signo) { action->rc = -ETIME; crm_notice("Child process %d performing action '%s' timed out with signal %d", pid, action->action, action->last_timeout_signo); } else if (signo) { action->rc = -ECONNABORTED; crm_notice("Child process %d performing action '%s' timed out with signal %d", pid, action->action, signo); } else { crm_debug("Child process %d performing action '%s' exited with rc %d", pid, action->action, exitcode); if (exitcode > 0) { /* Try to provide a useful error code based on the fence agent's * error output. */ if (action->error == NULL) { exitcode = -ENODATA; } else if (strstr(action->error, "imed out")) { /* Some agents have their own internal timeouts */ exitcode = -ETIMEDOUT; } else if (strstr(action->error, "Unrecognised action")) { exitcode = -EOPNOTSUPP; } else { exitcode = -pcmk_err_generic; } } action->rc = exitcode; } log_action(action, pid); if (action->rc != pcmk_ok && update_remaining_timeout(action)) { int rc = internal_stonith_action_execute(action); if (rc == pcmk_ok) { return; } } if (action->done_cb) { action->done_cb(pid, action->rc, action->output, action->userdata); } stonith_action_destroy(action); } static int internal_stonith_action_execute(stonith_action_t * action) { int pid, status = 0, len, rc = -EPROTO; int ret; int total = 0; int p_read_fd, p_write_fd; /* parent read/write file descriptors */ int c_read_fd, c_write_fd; /* child read/write file descriptors */ int c_stderr_fd, p_stderr_fd; /* parent/child side file descriptors for stderr */ int fd1[2]; int fd2[2]; int fd3[2]; int is_retry = 0; /* clear any previous tracking data */ stonith_action_clear_tracking_data(action); if (!action->tries) { action->initial_start_time = time(NULL); } action->tries++; if (action->tries > 1) { crm_info("Attempt %d to execute %s (%s). remaining timeout is %d", action->tries, action->agent, action->action, action->remaining_timeout); is_retry = 1; } c_read_fd = c_write_fd = p_read_fd = p_write_fd = c_stderr_fd = p_stderr_fd = -1; if (action->args == NULL || action->agent == NULL) goto fail; len = strlen(action->args); if (pipe(fd1)) goto fail; p_read_fd = fd1[0]; c_write_fd = fd1[1]; if (pipe(fd2)) goto fail; c_read_fd = fd2[0]; p_write_fd = fd2[1]; if (pipe(fd3)) goto fail; p_stderr_fd = fd3[0]; c_stderr_fd = fd3[1]; crm_debug("forking"); pid = fork(); if (pid < 0) { rc = -ECHILD; goto fail; } if (!pid) { /* child */ setpgid(0, 0); close(1); /* coverity[leaked_handle] False positive */ if (dup(c_write_fd) < 0) goto fail; close(2); /* coverity[leaked_handle] False positive */ if (dup(c_stderr_fd) < 0) goto fail; close(0); /* coverity[leaked_handle] False positive */ if (dup(c_read_fd) < 0) goto fail; /* keep c_stderr_fd open so parent can report all errors. */ /* keep c_write_fd open so hostlist can be sent to parent. */ close(c_read_fd); close(p_read_fd); close(p_write_fd); close(p_stderr_fd); /* keep retries from executing out of control */ if (is_retry) { sleep(1); } execlp(action->agent, action->agent, NULL); exit(EXIT_FAILURE); } /* parent */ action->pid = pid; ret = fcntl(p_read_fd, F_SETFL, fcntl(p_read_fd, F_GETFL, 0) | O_NONBLOCK); if (ret < 0) { crm_perror(LOG_NOTICE, "Could not change the output of %s to be non-blocking", action->agent); } ret = fcntl(p_stderr_fd, F_SETFL, fcntl(p_stderr_fd, F_GETFL, 0) | O_NONBLOCK); if (ret < 0) { crm_perror(LOG_NOTICE, "Could not change the stderr of %s to be non-blocking", action->agent); } do { crm_debug("sending args"); ret = write(p_write_fd, action->args + total, len - total); if (ret > 0) { total += ret; } } while (errno == EINTR && total < len); if (total != len) { crm_perror(LOG_ERR, "Sent %d not %d bytes", total, len); if (ret >= 0) { rc = -ECOMM; } goto fail; } close(p_write_fd); p_write_fd = -1; /* async */ if (action->async) { action->fd_stdout = p_read_fd; action->fd_stderr = p_stderr_fd; mainloop_child_add(pid, 0/* Move the timeout here? */, action->action, action, stonith_action_async_done); crm_trace("Op: %s on %s, pid: %d, timeout: %ds", action->action, action->agent, pid, action->remaining_timeout); action->last_timeout_signo = 0; if (action->remaining_timeout) { action->timer_sigterm = g_timeout_add(1000 * action->remaining_timeout, st_child_term, action); action->timer_sigkill = g_timeout_add(1000 * (action->remaining_timeout + 5), st_child_kill, action); } else { crm_err("No timeout set for stonith operation %s with device %s", action->action, action->agent); } close(c_write_fd); close(c_read_fd); close(c_stderr_fd); return 0; } else { /* sync */ int timeout = action->remaining_timeout + 1; pid_t p = 0; while (action->remaining_timeout < 0 || timeout > 0) { p = waitpid(pid, &status, WNOHANG); if (p > 0) { break; } sleep(1); timeout--; } if (timeout == 0) { int killrc = kill(-pid, SIGKILL); if (killrc && errno != ESRCH) { crm_err("kill(%d, KILL) failed: %s (%d)", pid, pcmk_strerror(errno), errno); } /* * From sigprocmask(2): * It is not possible to block SIGKILL or SIGSTOP. Attempts to do so are silently ignored. * * This makes it safe to skip WNOHANG here */ p = waitpid(pid, &status, 0); } if (p <= 0) { crm_perror(LOG_ERR, "waitpid(%d)", pid); } else if (p != pid) { crm_err("Waited for %d, got %d", pid, p); } action->output = read_output(p_read_fd); action->error = read_output(p_stderr_fd); action->rc = -ECONNABORTED; log_action(action, pid); rc = action->rc; if (timeout == 0) { action->rc = -ETIME; } else if (WIFEXITED(status)) { crm_debug("result = %d", WEXITSTATUS(status)); action->rc = -WEXITSTATUS(status); rc = 0; } else if (WIFSIGNALED(status)) { crm_err("call %s for %s exited due to signal %d", action->action, action->agent, WTERMSIG(status)); } else { crm_err("call %s for %s returned unexpected status %#x", action->action, action->agent, status); } } fail: if (p_read_fd >= 0) { close(p_read_fd); } if (p_write_fd >= 0) { close(p_write_fd); } if (p_stderr_fd >= 0) { close(p_stderr_fd); } if (c_read_fd >= 0) { close(c_read_fd); } if (c_write_fd >= 0) { close(c_write_fd); } if (c_stderr_fd >= 0) { close(c_stderr_fd); } return rc; } GPid stonith_action_execute_async(stonith_action_t * action, void *userdata, void (*done) (GPid pid, int rc, const char *output, gpointer user_data)) { int rc = 0; if (!action) { return -1; } action->userdata = userdata; action->done_cb = done; action->async = 1; rc = internal_stonith_action_execute(action); return rc < 0 ? rc : action->pid; } int stonith_action_execute(stonith_action_t * action, int *agent_result, char **output) { int rc = 0; if (!action) { return -1; } do { rc = internal_stonith_action_execute(action); if (rc == pcmk_ok) { /* success! */ break; } /* keep retrying while we have time left */ } while (update_remaining_timeout(action)); if (rc) { /* error */ return rc; } if (agent_result) { *agent_result = action->rc; } if (output) { *output = action->output; action->output = NULL; /* handed it off, do not free */ } stonith_action_destroy(action); return rc; } static int stonith_api_device_list(stonith_t * stonith, int call_options, const char *namespace, stonith_key_value_t ** devices, int timeout) { int count = 0; if (devices == NULL) { crm_err("Parameter error: stonith_api_device_list"); return -EFAULT; } /* Include Heartbeat agents */ if (namespace == NULL || safe_str_eq("heartbeat", namespace)) { #if HAVE_STONITH_STONITH_H static gboolean need_init = TRUE; char **entry = NULL; char **type_list = NULL; static char **(*type_list_fn) (void) = NULL; static void (*type_free_fn) (char **) = NULL; if (need_init) { need_init = FALSE; type_list_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_types", FALSE); type_free_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_free_hostlist", FALSE); } if (type_list_fn) { type_list = (*type_list_fn) (); } for (entry = type_list; entry != NULL && *entry; ++entry) { crm_trace("Added: %s", *entry); *devices = stonith_key_value_add(*devices, NULL, *entry); count++; } if (type_list && type_free_fn) { (*type_free_fn) (type_list); } #else if (namespace != NULL) { return -EINVAL; /* Heartbeat agents not supported */ } #endif } /* Include Red Hat agents, basically: ls -1 @sbin_dir@/fence_* */ if (namespace == NULL || safe_str_eq("redhat", namespace)) { struct dirent **namelist; int file_num = scandir(RH_STONITH_DIR, &namelist, 0, alphasort); if (file_num > 0) { struct stat prop; char buffer[FILENAME_MAX + 1]; while (file_num--) { if ('.' == namelist[file_num]->d_name[0]) { free(namelist[file_num]); continue; } else if (0 != strncmp(RH_STONITH_PREFIX, namelist[file_num]->d_name, strlen(RH_STONITH_PREFIX))) { free(namelist[file_num]); continue; } snprintf(buffer, FILENAME_MAX, "%s/%s", RH_STONITH_DIR, namelist[file_num]->d_name); if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) { *devices = stonith_key_value_add(*devices, NULL, namelist[file_num]->d_name); count++; } free(namelist[file_num]); } free(namelist); } } return count; } #if HAVE_STONITH_STONITH_H static inline char * strdup_null(const char *val) { if (val) { return strdup(val); } return NULL; } static void stonith_plugin(int priority, const char *fmt, ...) __attribute__((__format__ (__printf__, 2, 3))); static void stonith_plugin(int priority, const char *format, ...) { int err = errno; va_list ap; int len = 0; char *string = NULL; va_start(ap, format); len = vasprintf (&string, format, ap); CRM_ASSERT(len > 0); do_crm_log_alias(priority, __FILE__, __func__, __LINE__, "%s", string); free(string); errno = err; } #endif static int stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent, const char *namespace, char **output, int timeout) { int rc = 0; char *buffer = NULL; const char *provider = get_stonith_provider(agent, namespace); crm_trace("looking up %s/%s metadata", agent, provider); /* By having this in a library, we can access it from stonith_admin * when neither lrmd or stonith-ng are running * Important for the crm shell's validations... */ if (safe_str_eq(provider, "redhat")) { stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0, 5, NULL, NULL); int exec_rc = stonith_action_execute(action, &rc, &buffer); xmlNode *xml = NULL; xmlNode *actions = NULL; xmlXPathObject *xpathObj = NULL; if (exec_rc < 0 || rc != 0 || buffer == NULL) { crm_warn("Could not obtain metadata for %s", agent); crm_debug("Query failed: %d %d: %s", exec_rc, rc, crm_str(buffer)); free(buffer); /* Just in case */ return -EINVAL; } xml = string2xml(buffer); if(xml == NULL) { crm_warn("Metadata for %s is invalid", agent); free(buffer); return -EINVAL; } xpathObj = xpath_search(xml, "//actions"); if (numXpathResults(xpathObj) > 0) { actions = getXpathResult(xpathObj, 0); } freeXpathObject(xpathObj); /* Now fudge the metadata so that the start/stop actions appear */ xpathObj = xpath_search(xml, "//action[@name='stop']"); if (numXpathResults(xpathObj) <= 0) { xmlNode *tmp = NULL; tmp = create_xml_node(actions, "action"); crm_xml_add(tmp, "name", "stop"); crm_xml_add(tmp, "timeout", "20s"); tmp = create_xml_node(actions, "action"); crm_xml_add(tmp, "name", "start"); crm_xml_add(tmp, "timeout", "20s"); } freeXpathObject(xpathObj); /* Now fudge the metadata so that the port isn't required in the configuration */ xpathObj = xpath_search(xml, "//parameter[@name='port']"); if (numXpathResults(xpathObj) > 0) { /* We'll fill this in */ xmlNode *tmp = getXpathResult(xpathObj, 0); crm_xml_add(tmp, "required", "0"); } freeXpathObject(xpathObj); free(buffer); buffer = dump_xml_formatted_with_text(xml); free_xml(xml); if (!buffer) { return -EINVAL; } } else { #if !HAVE_STONITH_STONITH_H return -EINVAL; /* Heartbeat agents not supported */ #else int bufferlen = 0; static const char *no_parameter_info = ""; Stonith *stonith_obj = NULL; static gboolean need_init = TRUE; static Stonith *(*st_new_fn) (const char *) = NULL; static const char *(*st_info_fn) (Stonith *, int) = NULL; static void (*st_del_fn) (Stonith *) = NULL; static void (*st_log_fn) (Stonith *, PILLogFun) = NULL; if (need_init) { need_init = FALSE; st_new_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE); st_del_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete", FALSE); st_log_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_set_log", FALSE); st_info_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_get_info", FALSE); } if (lha_agents_lib && st_new_fn && st_del_fn && st_info_fn && st_log_fn) { char *xml_meta_longdesc = NULL; char *xml_meta_shortdesc = NULL; char *meta_param = NULL; char *meta_longdesc = NULL; char *meta_shortdesc = NULL; stonith_obj = (*st_new_fn) (agent); if (stonith_obj) { (*st_log_fn) (stonith_obj, (PILLogFun) & stonith_plugin); meta_longdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEDESCR)); if (meta_longdesc == NULL) { crm_warn("no long description in %s's metadata.", agent); meta_longdesc = strdup(no_parameter_info); } meta_shortdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEID)); if (meta_shortdesc == NULL) { crm_warn("no short description in %s's metadata.", agent); meta_shortdesc = strdup(no_parameter_info); } meta_param = strdup_null((*st_info_fn) (stonith_obj, ST_CONF_XML)); if (meta_param == NULL) { crm_warn("no list of parameters in %s's metadata.", agent); meta_param = strdup(no_parameter_info); } (*st_del_fn) (stonith_obj); } else { return -EINVAL; /* Heartbeat agents not supported */ } xml_meta_longdesc = (char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_longdesc); xml_meta_shortdesc = (char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_shortdesc); bufferlen = strlen(META_TEMPLATE) + strlen(agent) + strlen(xml_meta_longdesc) + strlen(xml_meta_shortdesc) + strlen(meta_param) + 1; buffer = calloc(1, bufferlen); snprintf(buffer, bufferlen - 1, META_TEMPLATE, agent, xml_meta_longdesc, xml_meta_shortdesc, meta_param); xmlFree(xml_meta_longdesc); xmlFree(xml_meta_shortdesc); free(meta_shortdesc); free(meta_longdesc); free(meta_param); } #endif } if (output) { *output = buffer; } else { free(buffer); } return rc; } static int stonith_api_query(stonith_t * stonith, int call_options, const char *target, stonith_key_value_t ** devices, int timeout) { int rc = 0, lpc = 0, max = 0; xmlNode *data = NULL; xmlNode *output = NULL; xmlXPathObjectPtr xpathObj = NULL; CRM_CHECK(devices != NULL, return -EINVAL); data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add(data, F_STONITH_TARGET, target); crm_xml_add(data, F_STONITH_ACTION, "off"); rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout); if (rc < 0) { return rc; } xpathObj = xpath_search(output, "//@agent"); if (xpathObj) { max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { xmlChar *match_path = xmlGetNodePath(match); crm_info("%s[%d] = %s", "//@agent", lpc, match_path); free(match_path); *devices = stonith_key_value_add(*devices, NULL, crm_element_value(match, XML_ATTR_ID)); } } freeXpathObject(xpathObj); } free_xml(output); free_xml(data); return max; } static int stonith_api_call(stonith_t * stonith, int call_options, const char *id, const char *action, const char *victim, int timeout, xmlNode ** output) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add(data, F_STONITH_DEVICE, id); crm_xml_add(data, F_STONITH_ACTION, action); crm_xml_add(data, F_STONITH_TARGET, victim); rc = stonith_send_command(stonith, STONITH_OP_EXEC, data, output, call_options, timeout); free_xml(data); return rc; } static int stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **list_info, int timeout) { int rc; xmlNode *output = NULL; rc = stonith_api_call(stonith, call_options, id, "list", NULL, timeout, &output); if (output && list_info) { const char *list_str; list_str = crm_element_value(output, "st_output"); if (list_str) { *list_info = strdup(list_str); } } if (output) { free_xml(output); } return rc; } static int stonith_api_monitor(stonith_t * stonith, int call_options, const char *id, int timeout) { return stonith_api_call(stonith, call_options, id, "monitor", NULL, timeout, NULL); } static int stonith_api_status(stonith_t * stonith, int call_options, const char *id, const char *port, int timeout) { return stonith_api_call(stonith, call_options, id, "status", port, timeout, NULL); } static int stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const char *action, int timeout, int tolerance) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, __FUNCTION__); crm_xml_add(data, F_STONITH_TARGET, node); crm_xml_add(data, F_STONITH_ACTION, action); crm_xml_add_int(data, F_STONITH_TIMEOUT, timeout); crm_xml_add_int(data, F_STONITH_TOLERANCE, tolerance); rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout); free_xml(data); return rc; } static int stonith_api_confirm(stonith_t * stonith, int call_options, const char *target) { return stonith_api_fence(stonith, call_options | st_opt_manual_ack, target, "off", 0, 0); } static int stonith_api_history(stonith_t * stonith, int call_options, const char *node, stonith_history_t ** history, int timeout) { int rc = 0; xmlNode *data = NULL; xmlNode *output = NULL; stonith_history_t *last = NULL; *history = NULL; if (node) { data = create_xml_node(NULL, __FUNCTION__); crm_xml_add(data, F_STONITH_TARGET, node); } rc = stonith_send_command(stonith, STONITH_OP_FENCE_HISTORY, data, &output, call_options | st_opt_sync_call, timeout); free_xml(data); if (rc == 0) { xmlNode *op = NULL; xmlNode *reply = get_xpath_object("//" F_STONITH_HISTORY_LIST, output, LOG_ERR); for (op = __xml_first_child(reply); op != NULL; op = __xml_next(op)) { stonith_history_t *kvp; kvp = calloc(1, sizeof(stonith_history_t)); kvp->target = crm_element_value_copy(op, F_STONITH_TARGET); kvp->action = crm_element_value_copy(op, F_STONITH_ACTION); kvp->origin = crm_element_value_copy(op, F_STONITH_ORIGIN); kvp->delegate = crm_element_value_copy(op, F_STONITH_DELEGATE); kvp->client = crm_element_value_copy(op, F_STONITH_CLIENTNAME); crm_element_value_int(op, F_STONITH_DATE, &kvp->completed); crm_element_value_int(op, F_STONITH_STATE, &kvp->state); if (last) { last->next = kvp; } else { *history = kvp; } last = kvp; } } return rc; } gboolean is_redhat_agent(const char *agent) { int rc = 0; struct stat prop; char buffer[FILENAME_MAX + 1]; snprintf(buffer, FILENAME_MAX, "%s/%s", RH_STONITH_DIR, agent); rc = stat(buffer, &prop); if (rc >= 0 && S_ISREG(prop.st_mode)) { return TRUE; } return FALSE; } const char * get_stonith_provider(const char *agent, const char *provider) { /* This function sucks */ if (is_redhat_agent(agent)) { return "redhat"; #if HAVE_STONITH_STONITH_H } else { Stonith *stonith_obj = NULL; static gboolean need_init = TRUE; static Stonith *(*st_new_fn) (const char *) = NULL; static void (*st_del_fn) (Stonith *) = NULL; if (need_init) { need_init = FALSE; st_new_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE); st_del_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete", FALSE); } if (lha_agents_lib && st_new_fn && st_del_fn) { stonith_obj = (*st_new_fn) (agent); if (stonith_obj) { (*st_del_fn) (stonith_obj); return "heartbeat"; } } #endif } if (safe_str_eq(provider, "internal")) { return provider; } else { crm_err("No such device: %s", agent); return NULL; } } static gint stonithlib_GCompareFunc(gconstpointer a, gconstpointer b) { int rc = 0; const stonith_notify_client_t *a_client = a; const stonith_notify_client_t *b_client = b; CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0); rc = strcmp(a_client->event, b_client->event); if (rc == 0) { if (a_client->notify == NULL || b_client->notify == NULL) { return 0; } else if (a_client->notify == b_client->notify) { return 0; } else if (((long)a_client->notify) < ((long)b_client->notify)) { crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return -1; } crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return 1; } return rc; } xmlNode * stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options) { xmlNode *op_msg = create_xml_node(NULL, "stonith_command"); CRM_CHECK(op_msg != NULL, return NULL); CRM_CHECK(token != NULL, return NULL); crm_xml_add(op_msg, F_XML_TAGNAME, "stonith_command"); crm_xml_add(op_msg, F_TYPE, T_STONITH_NG); crm_xml_add(op_msg, F_STONITH_CALLBACK_TOKEN, token); crm_xml_add(op_msg, F_STONITH_OPERATION, op); crm_xml_add_int(op_msg, F_STONITH_CALLID, call_id); crm_trace("Sending call options: %.8lx, %d", (long)call_options, call_options); crm_xml_add_int(op_msg, F_STONITH_CALLOPTS, call_options); if (data != NULL) { add_message_xml(op_msg, F_STONITH_CALLDATA, data); } return op_msg; } static void stonith_destroy_op_callback(gpointer data) { stonith_callback_client_t *blob = data; if (blob->timer && blob->timer->ref > 0) { g_source_remove(blob->timer->ref); } free(blob->timer); free(blob); } static int stonith_api_signoff(stonith_t * stonith) { stonith_private_t *native = stonith->private; crm_debug("Signing out of the STONITH Service"); if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } free(native->token); native->token = NULL; stonith->state = stonith_disconnected; return pcmk_ok; } static int stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd) { int rc = pcmk_ok; stonith_private_t *native = stonith->private; static struct ipc_client_callbacks st_callbacks = { .dispatch = stonith_dispatch_internal, .destroy = stonith_connection_destroy }; crm_trace("Connecting command channel"); stonith->state = stonith_connected_command; if (stonith_fd) { /* No mainloop */ native->ipc = crm_ipc_new("stonith-ng", 0); if (native->ipc && crm_ipc_connect(native->ipc)) { *stonith_fd = crm_ipc_get_fd(native->ipc); } else if (native->ipc) { crm_perror(LOG_ERR, "Connection to STONITH manager failed"); rc = -ENOTCONN; } } else { /* With mainloop */ native->source = mainloop_add_ipc_client("stonith-ng", G_PRIORITY_MEDIUM, 0, stonith, &st_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { crm_debug("Could not connect to the Stonith API"); rc = -ENOTCONN; } if (rc == pcmk_ok) { xmlNode *reply = NULL; xmlNode *hello = create_xml_node(NULL, "stonith_command"); crm_xml_add(hello, F_TYPE, T_STONITH_NG); crm_xml_add(hello, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(hello, F_STONITH_CLIENTNAME, name); rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't complete registration with the fencing API: %d", rc); rc = -ECOMM; } else if (reply == NULL) { crm_err("Did not receive registration reply"); rc = -EPROTO; } else { const char *msg_type = crm_element_value(reply, F_STONITH_OPERATION); const char *tmp_ticket = crm_element_value(reply, F_STONITH_CLIENTID); if (safe_str_neq(msg_type, CRM_OP_REGISTER)) { crm_err("Invalid registration message: %s", msg_type); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else if (tmp_ticket == NULL) { crm_err("No registration token provided"); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else { crm_trace("Obtained registration token: %s", tmp_ticket); native->token = strdup(tmp_ticket); rc = pcmk_ok; } } free_xml(reply); free_xml(hello); } if (rc == pcmk_ok) { #if HAVE_MSGFROMIPC_TIMEOUT stonith->call_timeout = MAX_IPC_DELAY; #endif crm_debug("Connection to STONITH successful"); return pcmk_ok; } crm_debug("Connection to STONITH failed: %s", pcmk_strerror(rc)); stonith->cmds->disconnect(stonith); return rc; } static int stonith_set_notification(stonith_t * stonith, const char *callback, int enabled) { int rc = pcmk_ok; xmlNode *notify_msg = create_xml_node(NULL, __FUNCTION__); stonith_private_t *native = stonith->private; if (stonith->state != stonith_disconnected) { crm_xml_add(notify_msg, F_STONITH_OPERATION, T_STONITH_NOTIFY); if (enabled) { crm_xml_add(notify_msg, F_STONITH_NOTIFY_ACTIVATE, callback); } else { crm_xml_add(notify_msg, F_STONITH_NOTIFY_DEACTIVATE, callback); } rc = crm_ipc_send(native->ipc, notify_msg, crm_ipc_client_response, -1, NULL); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't register for fencing notifications: %d", rc); rc = -ECOMM; } else { rc = pcmk_ok; } } free_xml(notify_msg); return rc; } static int stonith_api_add_notification(stonith_t * stonith, const char *event, void (*callback) (stonith_t * stonith, stonith_event_t * e)) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = NULL; private = stonith->private; crm_trace("Adding callback for %s events (%d)", event, g_list_length(private->notify_list)); new_client = calloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = callback; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); if (list_item != NULL) { crm_warn("Callback already present"); free(new_client); return -ENOTUNIQ; } else { private->notify_list = g_list_append(private->notify_list, new_client); stonith_set_notification(stonith, event, 1); crm_trace("Callback added (%d)", g_list_length(private->notify_list)); } return pcmk_ok; } static int stonith_api_del_notification(stonith_t * stonith, const char *event) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = NULL; crm_debug("Removing callback for %s events", event); private = stonith->private; new_client = calloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = NULL; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); stonith_set_notification(stonith, event, 0); if (list_item != NULL) { stonith_notify_client_t *list_client = list_item->data; private->notify_list = g_list_remove(private->notify_list, list_client); free(list_client); crm_trace("Removed callback"); } else { crm_trace("Callback not present"); } free(new_client); return pcmk_ok; } static gboolean stonith_async_timeout_handler(gpointer data) { struct timer_rec_s *timer = data; crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout); stonith_perform_callback(timer->stonith, NULL, timer->call_id, -ETIME); /* Always return TRUE, never remove the handler * We do that in stonith_del_callback() */ return TRUE; } static void set_callback_timeout(stonith_callback_client_t * callback, stonith_t * stonith, int call_id, int timeout) { struct timer_rec_s *async_timer = callback->timer; if (timeout <= 0) { return; } if (!async_timer) { async_timer = calloc(1, sizeof(struct timer_rec_s)); callback->timer = async_timer; } async_timer->stonith = stonith; async_timer->call_id = call_id; /* Allow a fair bit of grace to allow the server to tell us of a timeout * This is only a fallback */ async_timer->timeout = (timeout + 60) * 1000; if (async_timer->ref) { g_source_remove(async_timer->ref); } async_timer->ref = g_timeout_add(async_timer->timeout, stonith_async_timeout_handler, async_timer); } static void update_callback_timeout(int call_id, int timeout, stonith_t * st) { stonith_callback_client_t *callback = NULL; stonith_private_t *private = st->private; callback = g_hash_table_lookup(private->stonith_op_callback_table, GINT_TO_POINTER(call_id)); if (!callback || !callback->allow_timeout_updates) { return; } set_callback_timeout(callback, st, call_id, timeout); } static void invoke_callback(stonith_t * st, int call_id, int rc, void *userdata, void (*callback) (stonith_t * st, stonith_callback_data_t * data)) { stonith_callback_data_t data = { 0, }; data.call_id = call_id; data.rc = rc; data.userdata = userdata; callback(st, &data); } static int stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int options, void *user_data, const char *callback_name, void (*callback) (stonith_t * st, stonith_callback_data_t * data)) { stonith_callback_client_t *blob = NULL; stonith_private_t *private = NULL; CRM_CHECK(stonith != NULL, return -EINVAL); CRM_CHECK(stonith->private != NULL, return -EINVAL); private = stonith->private; if (call_id == 0) { private->op_callback = callback; } else if (call_id < 0) { if (!(options & st_opt_report_only_success)) { crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id)); invoke_callback(stonith, call_id, call_id, user_data, callback); } else { crm_warn("STONITH call failed: %s", pcmk_strerror(call_id)); } return FALSE; } blob = calloc(1, sizeof(stonith_callback_client_t)); blob->id = callback_name; blob->only_success = (options & st_opt_report_only_success) ? TRUE : FALSE; blob->user_data = user_data; blob->callback = callback; blob->allow_timeout_updates = (options & st_opt_timeout_updates) ? TRUE : FALSE; if (timeout > 0) { set_callback_timeout(blob, stonith, call_id, timeout); } g_hash_table_insert(private->stonith_op_callback_table, GINT_TO_POINTER(call_id), blob); crm_trace("Added callback to %s for call %d", callback_name, call_id); return TRUE; } static int stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks) { stonith_private_t *private = stonith->private; if (all_callbacks) { private->op_callback = NULL; g_hash_table_destroy(private->stonith_op_callback_table); private->stonith_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, stonith_destroy_op_callback); } else if (call_id == 0) { private->op_callback = NULL; } else { g_hash_table_remove(private->stonith_op_callback_table, GINT_TO_POINTER(call_id)); } return pcmk_ok; } static void stonith_dump_pending_op(gpointer key, gpointer value, gpointer user_data) { int call = GPOINTER_TO_INT(key); stonith_callback_client_t *blob = value; crm_debug("Call %d (%s): pending", call, crm_str(blob->id)); } void stonith_dump_pending_callbacks(stonith_t * stonith) { stonith_private_t *private = stonith->private; if (private->stonith_op_callback_table == NULL) { return; } return g_hash_table_foreach(private->stonith_op_callback_table, stonith_dump_pending_op, NULL); } void stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc) { stonith_private_t *private = NULL; stonith_callback_client_t *blob = NULL; stonith_callback_client_t local_blob; CRM_CHECK(stonith != NULL, return); CRM_CHECK(stonith->private != NULL, return); private = stonith->private; local_blob.id = NULL; local_blob.callback = NULL; local_blob.user_data = NULL; local_blob.only_success = FALSE; if (msg != NULL) { crm_element_value_int(msg, F_STONITH_RC, &rc); crm_element_value_int(msg, F_STONITH_CALLID, &call_id); } CRM_CHECK(call_id > 0, crm_log_xml_err(msg, "Bad result")); blob = g_hash_table_lookup(private->stonith_op_callback_table, GINT_TO_POINTER(call_id)); if (blob != NULL) { local_blob = *blob; blob = NULL; stonith_api_del_callback(stonith, call_id, FALSE); } else { crm_trace("No callback found for call %d", call_id); local_blob.callback = NULL; } if (local_blob.callback != NULL && (rc == pcmk_ok || local_blob.only_success == FALSE)) { crm_trace("Invoking callback %s for call %d", crm_str(local_blob.id), call_id); invoke_callback(stonith, call_id, rc, local_blob.user_data, local_blob.callback); } else if (private->op_callback == NULL && rc != pcmk_ok) { crm_warn("STONITH command failed: %s", pcmk_strerror(rc)); crm_log_xml_debug(msg, "Failed STONITH Update"); } if (private->op_callback != NULL) { crm_trace("Invoking global callback for call %d", call_id); invoke_callback(stonith, call_id, rc, NULL, private->op_callback); } crm_trace("OP callback activated."); } /* */ static stonith_event_t * xml_to_event(xmlNode * msg) { stonith_event_t *event = calloc(1, sizeof(stonith_event_t)); const char *ntype = crm_element_value(msg, F_SUBTYPE); char *data_addr = crm_strdup_printf("//%s", ntype); xmlNode *data = get_xpath_object(data_addr, msg, LOG_DEBUG); crm_log_xml_trace(msg, "stonith_notify"); crm_element_value_int(msg, F_STONITH_RC, &(event->result)); if (safe_str_eq(ntype, T_STONITH_NOTIFY_FENCE)) { event->operation = crm_element_value_copy(msg, F_STONITH_OPERATION); if (data) { event->origin = crm_element_value_copy(data, F_STONITH_ORIGIN); event->action = crm_element_value_copy(data, F_STONITH_ACTION); event->target = crm_element_value_copy(data, F_STONITH_TARGET); event->executioner = crm_element_value_copy(data, F_STONITH_DELEGATE); event->id = crm_element_value_copy(data, F_STONITH_REMOTE_OP_ID); event->client_origin = crm_element_value_copy(data, F_STONITH_CLIENTNAME); event->device = crm_element_value_copy(data, F_STONITH_DEVICE); } else { crm_err("No data for %s event", ntype); crm_log_xml_notice(msg, "BadEvent"); } } free(data_addr); return event; } static void event_free(stonith_event_t * event) { free(event->id); free(event->type); free(event->message); free(event->operation); free(event->origin); free(event->action); free(event->target); free(event->executioner); free(event->device); free(event->client_origin); free(event); } static void stonith_send_notification(gpointer data, gpointer user_data) { struct notify_blob_s *blob = user_data; stonith_notify_client_t *entry = data; stonith_event_t *st_event = NULL; const char *event = NULL; if (blob->xml == NULL) { crm_warn("Skipping callback - NULL message"); return; } event = crm_element_value(blob->xml, F_SUBTYPE); if (entry == NULL) { crm_warn("Skipping callback - NULL callback client"); return; } else if (entry->notify == NULL) { crm_warn("Skipping callback - NULL callback"); return; } else if (safe_str_neq(entry->event, event)) { crm_trace("Skipping callback - event mismatch %p/%s vs. %s", entry, entry->event, event); return; } st_event = xml_to_event(blob->xml); crm_trace("Invoking callback for %p/%s event...", entry, event); entry->notify(blob->stonith, st_event); crm_trace("Callback invoked..."); event_free(st_event); } int stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data, int call_options, int timeout) { int rc = 0; int reply_id = -1; enum crm_ipc_flags ipc_flags = crm_ipc_flags_none; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; stonith_private_t *native = stonith->private; if (stonith->state == stonith_disconnected) { return -ENOTCONN; } if (output_data != NULL) { *output_data = NULL; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } if (call_options & st_opt_sync_call) { ipc_flags |= crm_ipc_client_response; } stonith->call_id++; /* prevent call_id from being negative (or zero) and conflicting * with the stonith_errors enum * use 2 because we use it as (stonith->call_id - 1) below */ if (stonith->call_id < 1) { stonith->call_id = 1; } CRM_CHECK(native->token != NULL,; ); op_msg = stonith_create_op(stonith->call_id, native->token, op, data, call_options); if (op_msg == NULL) { return -EINVAL; } crm_xml_add_int(op_msg, F_STONITH_TIMEOUT, timeout); crm_trace("Sending %s message to STONITH service, Timeout: %ds", op, timeout); rc = crm_ipc_send(native->ipc, op_msg, ipc_flags, 1000 * (timeout + 60), &op_reply); free_xml(op_msg); if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%ds): %d", op, timeout, rc); rc = -ECOMM; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (!(call_options & st_opt_sync_call)) { crm_trace("Async call %d, returning", stonith->call_id); CRM_CHECK(stonith->call_id != 0, return -EPROTO); free_xml(op_reply); return stonith->call_id; } rc = pcmk_ok; crm_element_value_int(op_reply, F_STONITH_CALLID, &reply_id); if (reply_id == stonith->call_id) { crm_trace("Synchronous reply %d received", reply_id); if (crm_element_value_int(op_reply, F_STONITH_RC, &rc) != 0) { rc = -ENOMSG; } if ((call_options & st_opt_discard_reply) || output_data == NULL) { crm_trace("Discarding reply"); } else { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } } else if (reply_id <= 0) { crm_err("Received bad reply: No id set"); crm_log_xml_err(op_reply, "Bad reply"); free_xml(op_reply); rc = -ENOMSG; } else { crm_err("Received bad reply: %d (wanted %d)", reply_id, stonith->call_id); crm_log_xml_err(op_reply, "Old reply"); free_xml(op_reply); rc = -ENOMSG; } done: if (crm_ipc_connected(native->ipc) == FALSE) { crm_err("STONITH disconnected"); stonith->state = stonith_disconnected; } free_xml(op_reply); return rc; } /* Not used with mainloop */ bool stonith_dispatch(stonith_t * st) { gboolean stay_connected = TRUE; stonith_private_t *private = NULL; CRM_ASSERT(st != NULL); private = st->private; while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); stonith_dispatch_internal(msg, strlen(msg), st); } if (crm_ipc_connected(private->ipc) == FALSE) { crm_err("Connection closed"); stay_connected = FALSE; } } return stay_connected; } int stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) { const char *type = NULL; struct notify_blob_s blob; stonith_t *st = userdata; stonith_private_t *private = NULL; CRM_ASSERT(st != NULL); private = st->private; blob.stonith = st; blob.xml = string2xml(buffer); if (blob.xml == NULL) { crm_warn("Received a NULL msg from STONITH service: %s.", buffer); return 0; } /* do callbacks */ type = crm_element_value(blob.xml, F_TYPE); crm_trace("Activating %s callbacks...", type); if (safe_str_eq(type, T_STONITH_NG)) { stonith_perform_callback(st, blob.xml, 0, 0); } else if (safe_str_eq(type, T_STONITH_NOTIFY)) { g_list_foreach(private->notify_list, stonith_send_notification, &blob); } else if (safe_str_eq(type, T_STONITH_TIMEOUT_VALUE)) { int call_id = 0; int timeout = 0; crm_element_value_int(blob.xml, F_STONITH_TIMEOUT, &timeout); crm_element_value_int(blob.xml, F_STONITH_CALLID, &call_id); update_callback_timeout(call_id, timeout, st); } else { crm_err("Unknown message type: %s", type); crm_log_xml_warn(blob.xml, "BadReply"); } free_xml(blob.xml); return 1; } static int stonith_api_free(stonith_t * stonith) { int rc = pcmk_ok; crm_trace("Destroying %p", stonith); if (stonith->state != stonith_disconnected) { crm_trace("Disconnecting %p first", stonith); rc = stonith->cmds->disconnect(stonith); } if (stonith->state == stonith_disconnected) { stonith_private_t *private = stonith->private; crm_trace("Removing %d callbacks", g_hash_table_size(private->stonith_op_callback_table)); g_hash_table_destroy(private->stonith_op_callback_table); crm_trace("Destroying %d notification clients", g_list_length(private->notify_list)); g_list_free_full(private->notify_list, free); free(stonith->private); free(stonith->cmds); free(stonith); } else { crm_err("Not free'ing active connection: %s (%d)", pcmk_strerror(rc), rc); } return rc; } void stonith_api_delete(stonith_t * stonith) { crm_trace("Destroying %p", stonith); if(stonith) { stonith->cmds->free(stonith); } } stonith_t * stonith_api_new(void) { stonith_t *new_stonith = NULL; stonith_private_t *private = NULL; new_stonith = calloc(1, sizeof(stonith_t)); private = calloc(1, sizeof(stonith_private_t)); new_stonith->private = private; private->stonith_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, stonith_destroy_op_callback); private->notify_list = NULL; new_stonith->call_id = 1; new_stonith->state = stonith_disconnected; new_stonith->cmds = calloc(1, sizeof(stonith_api_operations_t)); /* *INDENT-OFF* */ new_stonith->cmds->free = stonith_api_free; new_stonith->cmds->connect = stonith_api_signon; new_stonith->cmds->disconnect = stonith_api_signoff; new_stonith->cmds->list = stonith_api_list; new_stonith->cmds->monitor = stonith_api_monitor; new_stonith->cmds->status = stonith_api_status; new_stonith->cmds->fence = stonith_api_fence; new_stonith->cmds->confirm = stonith_api_confirm; new_stonith->cmds->history = stonith_api_history; new_stonith->cmds->list_agents = stonith_api_device_list; new_stonith->cmds->metadata = stonith_api_device_metadata; new_stonith->cmds->query = stonith_api_query; new_stonith->cmds->remove_device = stonith_api_remove_device; new_stonith->cmds->register_device = stonith_api_register_device; new_stonith->cmds->remove_level = stonith_api_remove_level; new_stonith->cmds->remove_level_full = stonith_api_remove_level_full; new_stonith->cmds->register_level = stonith_api_register_level; new_stonith->cmds->register_level_full = stonith_api_register_level_full; new_stonith->cmds->remove_callback = stonith_api_del_callback; new_stonith->cmds->register_callback = stonith_api_add_callback; new_stonith->cmds->remove_notification = stonith_api_del_notification; new_stonith->cmds->register_notification = stonith_api_add_notification; /* *INDENT-ON* */ return new_stonith; } stonith_key_value_t * stonith_key_value_add(stonith_key_value_t * head, const char *key, const char *value) { stonith_key_value_t *p, *end; p = calloc(1, sizeof(stonith_key_value_t)); if (key) { p->key = strdup(key); } if (value) { p->value = strdup(value); } end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void stonith_key_value_freeall(stonith_key_value_t * head, int keys, int values) { stonith_key_value_t *p; while (head) { p = head->next; if (keys) { free(head->key); } if (values) { free(head->value); } free(head); head = p; } } #define api_log_open() openlog("stonith-api", LOG_CONS | LOG_NDELAY | LOG_PID, LOG_DAEMON) #define api_log(level, fmt, args...) syslog(level, "%s: "fmt, __FUNCTION__, args) int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off) { char *name = NULL; const char *action = "reboot"; int rc = -EPROTO; stonith_t *st = NULL; enum stonith_call_options opts = st_opt_sync_call | st_opt_allow_suicide; api_log_open(); st = stonith_api_new(); if (st) { rc = st->cmds->connect(st, "stonith-api", NULL); if(rc != pcmk_ok) { api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } } if (uname != NULL) { name = strdup(uname); } else if (nodeid > 0) { opts |= st_opt_cs_nodeid; name = crm_itoa(nodeid); } if (off) { action = "off"; } if (rc == pcmk_ok) { rc = st->cmds->fence(st, opts, name, action, timeout, 0); if(rc != pcmk_ok) { api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } else { api_log(LOG_NOTICE, "Node %u/%s kicked: %s ", nodeid, uname, action); } } if (st) { st->cmds->disconnect(st); stonith_api_delete(st); } free(name); return rc; } time_t stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress) { int rc = 0; char *name = NULL; time_t when = 0; stonith_t *st = NULL; stonith_history_t *history, *hp = NULL; enum stonith_call_options opts = st_opt_sync_call; st = stonith_api_new(); if (st) { rc = st->cmds->connect(st, "stonith-api", NULL); if(rc != pcmk_ok) { api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc); } } if (uname != NULL) { name = strdup(uname); } else if (nodeid > 0) { opts |= st_opt_cs_nodeid; name = crm_itoa(nodeid); } if (st && rc == pcmk_ok) { int entries = 0; int progress = 0; int completed = 0; rc = st->cmds->history(st, opts, name, &history, 120); for (hp = history; hp; hp = hp->next) { entries++; if (in_progress) { progress++; if (hp->state != st_done && hp->state != st_failed) { when = time(NULL); } } else if (hp->state == st_done) { completed++; if (hp->completed > when) { when = hp->completed; } } } if(rc == pcmk_ok) { api_log(LOG_INFO, "Found %d entries for %u/%s: %d in progress, %d completed", entries, nodeid, uname, progress, completed); } else { api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: %s (%d)", nodeid, uname, pcmk_strerror(rc), rc); } } if (st) { st->cmds->disconnect(st); stonith_api_delete(st); } if(when) { api_log(LOG_INFO, "Node %u/%s last kicked at: %ld", nodeid, uname, (long int)when); } free(name); return when; } #if HAVE_STONITH_STONITH_H # include const char *i_hate_pils(int rc); const char * i_hate_pils(int rc) { return PIL_strerror(rc); } #endif diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c index 1609759e01..242fd50887 100644 --- a/mcp/pacemaker.c +++ b/mcp/pacemaker.c @@ -1,1159 +1,1157 @@ /* * Copyright (C) 2010 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include gboolean pcmk_quorate = FALSE; gboolean fatal_error = FALSE; GMainLoop *mainloop = NULL; #define PCMK_PROCESS_CHECK_INTERVAL 5 const char *local_name = NULL; uint32_t local_nodeid = 0; crm_trigger_t *shutdown_trigger = NULL; const char *pid_file = "/var/run/pacemaker.pid"; typedef struct pcmk_child_s { int pid; long flag; int start_seq; int respawn_count; gboolean respawn; const char *name; const char *uid; const char *command; gboolean active_before_startup; } pcmk_child_t; /* Index into the array below */ #define pcmk_child_crmd 4 #define pcmk_child_mgmtd 8 /* *INDENT-OFF* */ static pcmk_child_t pcmk_children[] = { { 0, crm_proc_none, 0, 0, FALSE, "none", NULL, NULL }, { 0, crm_proc_plugin, 0, 0, FALSE, "ais", NULL, NULL }, { 0, crm_proc_lrmd, 3, 0, TRUE, "lrmd", NULL, CRM_DAEMON_DIR"/lrmd" }, { 0, crm_proc_cib, 1, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib" }, { 0, crm_proc_crmd, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd" }, { 0, crm_proc_attrd, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd" }, { 0, crm_proc_stonithd, 0, 0, TRUE, "stonithd", NULL, NULL }, { 0, crm_proc_pe, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine" }, { 0, crm_proc_mgmtd, 0, 0, TRUE, "mgmtd", NULL, HB_DAEMON_DIR"/mgmtd" }, { 0, crm_proc_stonith_ng, 2, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd" }, }; /* *INDENT-ON* */ static gboolean start_child(pcmk_child_t * child); static gboolean check_active_before_startup_processes(gpointer user_data); void update_process_clients(crm_client_t *client); void update_process_peers(void); void enable_crmd_as_root(gboolean enable) { if (enable) { pcmk_children[pcmk_child_crmd].uid = NULL; } else { pcmk_children[pcmk_child_crmd].uid = CRM_DAEMON_USER; } } void enable_mgmtd(gboolean enable) { if (enable) { pcmk_children[pcmk_child_mgmtd].start_seq = 7; } else { pcmk_children[pcmk_child_mgmtd].start_seq = 0; } } static uint32_t get_process_list(void) { int lpc = 0; uint32_t procs = crm_get_cluster_proc(); for (lpc = 0; lpc < SIZEOF(pcmk_children); lpc++) { if (pcmk_children[lpc].pid != 0) { procs |= pcmk_children[lpc].flag; } } return procs; } static void pcmk_process_exit(pcmk_child_t * child) { child->pid = 0; child->active_before_startup = FALSE; /* Broadcast the fact that one of our processes died ASAP * * Try to get some logging of the cause out first though * because we're probably about to get fenced * * Potentially do this only if respawn_count > N * to allow for local recovery */ update_node_processes(local_nodeid, NULL, get_process_list()); child->respawn_count += 1; if (child->respawn_count > MAX_RESPAWN) { crm_err("Child respawn count exceeded by %s", child->name); child->respawn = FALSE; } if (shutdown_trigger) { mainloop_set_trigger(shutdown_trigger); update_node_processes(local_nodeid, NULL, get_process_list()); } else if (child->respawn && crm_is_true(getenv("PCMK_fail_fast"))) { crm_err("Rebooting system because of %s", child->name); pcmk_panic(__FUNCTION__); } else if (child->respawn) { crm_notice("Respawning failed child process: %s", child->name); start_child(child); } } static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { pcmk_child_t *child = mainloop_child_userdata(p); const char *name = mainloop_child_name(p); if (signo && signo == SIGKILL) { crm_warn("The %s process (%d) terminated with signal %d (core=%d)", name, pid, signo, core); } else if (signo) { crm_err("The %s process (%d) terminated with signal %d (core=%d)", name, pid, signo, core); } else { switch(exitcode) { case pcmk_ok: crm_info("The %s process (%d) exited: %s (%d)", name, pid, pcmk_strerror(exitcode), exitcode); break; case DAEMON_RESPAWN_STOP: crm_warn("The %s process (%d) can no longer be respawned, shutting the cluster down.", name, pid); child->respawn = FALSE; fatal_error = TRUE; pcmk_shutdown(SIGTERM); break; case pcmk_err_panic: do_crm_log_always(LOG_EMERG, "The %s process (%d) instructed the machine to reset", name, pid); child->respawn = FALSE; fatal_error = TRUE; pcmk_panic(__FUNCTION__); pcmk_shutdown(SIGTERM); break; default: crm_err("The %s process (%d) exited: %s (%d)", name, pid, pcmk_strerror(exitcode), exitcode); break; } } pcmk_process_exit(child); } static gboolean stop_child(pcmk_child_t * child, int signal) { if (signal == 0) { signal = SIGTERM; } if (child->command == NULL) { crm_debug("Nothing to do for child \"%s\"", child->name); return TRUE; } if (child->pid <= 0) { crm_trace("Client %s not running", child->name); return TRUE; } errno = 0; if (kill(child->pid, signal) == 0) { crm_notice("Stopping %s "CRM_XS" sent signal %d to process %d", child->name, signal, child->pid); } else { crm_perror(LOG_ERR, "Could not stop %s (process %d) with signal %d", child->name, child->pid, signal); } return TRUE; } static char *opts_default[] = { NULL, NULL }; static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL }; static gboolean start_child(pcmk_child_t * child) { int lpc = 0; uid_t uid = 0; gid_t gid = 0; struct rlimit oflimits; gboolean use_valgrind = FALSE; gboolean use_callgrind = FALSE; const char *devnull = "/dev/null"; const char *env_valgrind = getenv("PCMK_valgrind_enabled"); const char *env_callgrind = getenv("PCMK_callgrind_enabled"); enum cluster_type_e stack = get_cluster_type(); child->active_before_startup = FALSE; if (child->command == NULL) { crm_info("Nothing to do for child \"%s\"", child->name); return TRUE; } if (env_callgrind != NULL && crm_is_true(env_callgrind)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) { use_valgrind = TRUE; } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) { use_valgrind = TRUE; } if (use_valgrind && strlen(VALGRIND_BIN) == 0) { crm_warn("Cannot enable valgrind for %s:" " The location of the valgrind binary is unknown", child->name); use_valgrind = FALSE; } if (child->uid) { if (crm_user_lookup(child->uid, &uid, &gid) < 0) { crm_err("Invalid user (%s) for %s: not found", child->uid, child->name); return FALSE; } crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name); } child->pid = fork(); CRM_ASSERT(child->pid != -1); if (child->pid > 0) { /* parent */ mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit); crm_info("Forked child %d for process %s%s", child->pid, child->name, use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : ""); update_node_processes(local_nodeid, NULL, get_process_list()); return TRUE; } else { /* Start a new session */ (void)setsid(); /* Setup the two alternate arg arrays */ opts_vgrind[0] = strdup(VALGRIND_BIN); if (use_callgrind) { opts_vgrind[1] = strdup("--tool=callgrind"); opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p"); opts_vgrind[3] = strdup(child->command); opts_vgrind[4] = NULL; } else { opts_vgrind[1] = strdup(child->command); opts_vgrind[2] = NULL; opts_vgrind[3] = NULL; opts_vgrind[4] = NULL; } opts_default[0] = strdup(child->command); if(gid) { if(stack == pcmk_cluster_corosync) { /* Drop root privileges completely * * We can do this because we set uidgid.gid.${gid}=1 * via CMAP which allows these processes to connect to * corosync */ if (setgid(gid) < 0) { crm_perror(LOG_ERR, "Could not set group to %d", gid); } /* Keep the root group (so we can access corosync), but add the haclient group (so we can access ipc) */ } else if (initgroups(child->uid, gid) < 0) { crm_err("Cannot initialize groups for %s: %s (%d)", child->uid, pcmk_strerror(errno), errno); } } if (uid && setuid(uid) < 0) { crm_perror(LOG_ERR, "Could not set user to %d (%s)", uid, child->uid); } /* Close all open file descriptors */ getrlimit(RLIMIT_NOFILE, &oflimits); for (lpc = 0; lpc < oflimits.rlim_cur; lpc++) { close(lpc); } (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ if (use_valgrind) { (void)execvp(VALGRIND_BIN, opts_vgrind); } else { (void)execvp(child->command, opts_default); } crm_perror(LOG_ERR, "FATAL: Cannot exec %s", child->command); crm_exit(DAEMON_RESPAWN_STOP); } return TRUE; /* never reached */ } static gboolean escalate_shutdown(gpointer data) { pcmk_child_t *child = data; if (child->pid) { /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */ crm_err("Child %s not terminating in a timely manner, forcing", child->name); stop_child(child, SIGSEGV); } return FALSE; } static gboolean pcmk_shutdown_worker(gpointer user_data) { static int phase = 0; static time_t next_log = 0; static int max = SIZEOF(pcmk_children); int lpc = 0; if (phase == 0) { crm_notice("Shutting down Pacemaker"); phase = max; /* Add a second, more frequent, check to speed up shutdown */ g_timeout_add_seconds(5, check_active_before_startup_processes, NULL); } for (; phase > 0; phase--) { /* Don't stop anything with start_seq < 1 */ for (lpc = max - 1; lpc >= 0; lpc--) { pcmk_child_t *child = &(pcmk_children[lpc]); if (phase != child->start_seq) { continue; } if (child->pid) { time_t now = time(NULL); if (child->respawn) { next_log = now + 30; child->respawn = FALSE; stop_child(child, SIGTERM); if (phase < pcmk_children[pcmk_child_crmd].start_seq) { g_timeout_add(180000 /* 3m */ , escalate_shutdown, child); } } else if (now >= next_log) { next_log = now + 30; crm_notice("Still waiting for %s to terminate " CRM_XS " pid=%d seq=%d", child->name, child->pid, child->start_seq); } return TRUE; } /* cleanup */ crm_debug("%s confirmed stopped", child->name); child->pid = 0; } } /* send_cluster_id(); */ crm_notice("Shutdown complete"); { const char *delay = daemon_option("shutdown_delay"); if(delay) { sync(); sleep(crm_get_msec(delay) / 1000); } } g_main_loop_quit(mainloop); if (fatal_error) { crm_notice("Attempting to inhibit respawning after fatal error"); crm_exit(DAEMON_RESPAWN_STOP); } return TRUE; } static void pcmk_ignore(int nsig) { crm_info("Ignoring signal %s (%d)", strsignal(nsig), nsig); } static void pcmk_sigquit(int nsig) { pcmk_panic(__FUNCTION__); } void pcmk_shutdown(int nsig) { if (shutdown_trigger == NULL) { shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL); } mainloop_set_trigger(shutdown_trigger); } static int32_t pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void pcmk_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); } /* Exit code means? */ static int32_t pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; const char *task = NULL; crm_client_t *c = crm_client_get(qbc); xmlNode *msg = crm_ipcs_recv(c, data, size, &id, &flags); crm_ipcs_send_ack(c, id, flags, "ack", __FUNCTION__, __LINE__); if (msg == NULL) { return 0; } task = crm_element_value(msg, F_CRM_TASK); if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) { /* Time to quit */ crm_notice("Shutting down in response to ticket %s (%s)", crm_element_value(msg, F_CRM_REFERENCE), crm_element_value(msg, F_CRM_ORIGIN)); pcmk_shutdown(15); } else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) { /* Send to everyone */ struct iovec *iov; int id = 0; const char *name = NULL; crm_element_value_int(msg, XML_ATTR_ID, &id); name = crm_element_value(msg, XML_ATTR_UNAME); crm_notice("Instructing peers to remove references to node %s/%u", name, id); iov = calloc(1, sizeof(struct iovec)); iov->iov_base = dump_xml_unformatted(msg); iov->iov_len = 1 + strlen(iov->iov_base); send_cpg_iov(iov); } else { update_process_clients(c); } free_xml(msg); return 0; } /* Error code means? */ static int32_t pcmk_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); crm_client_destroy(client); return 0; } static void pcmk_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); pcmk_ipc_closed(c); } struct qb_ipcs_service_handlers mcp_ipc_callbacks = { .connection_accept = pcmk_ipc_accept, .connection_created = pcmk_ipc_created, .msg_process = pcmk_ipc_dispatch, .connection_closed = pcmk_ipc_closed, .connection_destroyed = pcmk_ipc_destroy }; /*! * \internal * \brief Send an XML message with process list of all known peers to client(s) * * \param[in] client Send message to this client, or all clients if NULL */ void update_process_clients(crm_client_t *client) { GHashTableIter iter; crm_node_t *node = NULL; xmlNode *update = create_xml_node(NULL, "nodes"); if (is_corosync_cluster()) { crm_xml_add_int(update, "quorate", pcmk_quorate); } g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { xmlNode *xml = create_xml_node(update, "node"); crm_xml_add_int(xml, "id", node->id); crm_xml_add(xml, "uname", node->uname); crm_xml_add(xml, "state", node->state); crm_xml_add_int(xml, "processes", node->processes); } if(client) { crm_trace("Sending process list to client %s", client->id); crm_ipcs_send(client, 0, update, crm_ipc_server_event); } else { crm_trace("Sending process list to %d clients", crm_hash_table_size(client_connections)); g_hash_table_iter_init(&iter, client_connections); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & client)) { crm_ipcs_send(client, 0, update, crm_ipc_server_event); } } free_xml(update); } /*! * \internal * \brief Send a CPG message with local node's process list to all peers */ void update_process_peers(void) { /* Do nothing for corosync-2 based clusters */ char buffer[1024]; struct iovec *iov; int rc = 0; - memset(buffer, 0, SIZEOF(buffer)); - if (local_name) { - rc = snprintf(buffer, SIZEOF(buffer) - 1, "", + rc = snprintf(buffer, SIZEOF(buffer), "", local_name, get_process_list()); } else { - rc = snprintf(buffer, SIZEOF(buffer) - 1, "", get_process_list()); + rc = snprintf(buffer, SIZEOF(buffer), "", get_process_list()); } crm_trace("Sending %s", buffer); iov = calloc(1, sizeof(struct iovec)); iov->iov_base = strdup(buffer); iov->iov_len = rc + 1; send_cpg_iov(iov); } /*! * \internal * \brief Update a node's process list, notifying clients and peers if needed * * \param[in] id Node ID of affected node * \param[in] uname Uname of affected node * \param[in] procs Affected node's process list mask * * \return TRUE if the process list changed, FALSE otherwise */ gboolean update_node_processes(uint32_t id, const char *uname, uint32_t procs) { gboolean changed = FALSE; crm_node_t *node = crm_get_peer(id, uname); if (procs != 0) { if (procs != node->processes) { crm_debug("Node %s now has process list: %.32x (was %.32x)", node->uname, procs, node->processes); node->processes = procs; changed = TRUE; /* If local node's processes have changed, notify clients/peers */ if (id == local_nodeid) { update_process_clients(NULL); update_process_peers(); } } else { crm_trace("Node %s still has process list: %.32x", node->uname, procs); } } return changed; } /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"shutdown", 0, 0, 'S', "\tInstruct Pacemaker to shutdown on this machine"}, {"features", 0, 0, 'F', "\tDisplay the full version and list of features Pacemaker was built with"}, {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, {"foreground", 0, 0, 'f', "\t(Ignored) Pacemaker always runs in the foreground"}, {"pid-file", 1, 0, 'p', "\t(Ignored) Daemon pid file location"}, {"standby", 0, 0, 's', "\tStart node in standby state"}, {NULL, 0, 0, 0} }; /* *INDENT-ON* */ static void mcp_chown(const char *path, uid_t uid, gid_t gid) { int rc = chown(path, uid, gid); if (rc < 0) { crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s", path, CRM_DAEMON_USER, gid, pcmk_strerror(errno)); } } static gboolean check_active_before_startup_processes(gpointer user_data) { int start_seq = 1, lpc = 0; static int max = SIZEOF(pcmk_children); gboolean keep_tracking = FALSE; for (start_seq = 1; start_seq < max; start_seq++) { for (lpc = 0; lpc < max; lpc++) { if (pcmk_children[lpc].active_before_startup == FALSE) { /* we are already tracking it as a child process. */ continue; } else if (start_seq != pcmk_children[lpc].start_seq) { continue; } else { const char *name = pcmk_children[lpc].name; if (pcmk_children[lpc].flag == crm_proc_stonith_ng) { name = "stonithd"; } if (crm_pid_active(pcmk_children[lpc].pid, name) != 1) { crm_notice("Process %s terminated (pid=%d)", name, pcmk_children[lpc].pid); pcmk_process_exit(&(pcmk_children[lpc])); continue; } } /* at least one of the processes found at startup * is still going, so keep this recurring timer around */ keep_tracking = TRUE; } } return keep_tracking; } static bool find_and_track_existing_processes(void) { DIR *dp; struct dirent *entry; int start_tracker = 0; char entry_name[64]; dp = opendir("/proc"); if (!dp) { /* no proc directory to search through */ crm_notice("Can not read /proc directory to track existing components"); return FALSE; } while ((entry = readdir(dp)) != NULL) { int pid; int max = SIZEOF(pcmk_children); int i; if (crm_procfs_process_info(entry, entry_name, &pid) < 0) { continue; } for (i = 0; i < max; i++) { const char *name = pcmk_children[i].name; if (pcmk_children[i].start_seq == 0) { continue; } if (pcmk_children[i].flag == crm_proc_stonith_ng) { name = "stonithd"; } if (safe_str_eq(entry_name, name) && (crm_pid_active(pid, NULL) == 1)) { crm_notice("Tracking existing %s process (pid=%d)", name, pid); pcmk_children[i].pid = pid; pcmk_children[i].active_before_startup = TRUE; start_tracker = 1; break; } } } if (start_tracker) { g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_active_before_startup_processes, NULL); } closedir(dp); return start_tracker; } static void init_children_processes(void) { int start_seq = 1, lpc = 0; static int max = SIZEOF(pcmk_children); /* start any children that have not been detected */ for (start_seq = 1; start_seq < max; start_seq++) { /* don't start anything with start_seq < 1 */ for (lpc = 0; lpc < max; lpc++) { if (pcmk_children[lpc].pid) { /* we are already tracking it */ continue; } if (start_seq == pcmk_children[lpc].start_seq) { start_child(&(pcmk_children[lpc])); } } } /* From this point on, any daemons being started will be due to * respawning rather than node start. * * This may be useful for the daemons to know */ setenv("PCMK_respawned", "true", 1); } static void mcp_cpg_destroy(gpointer user_data) { crm_err("Connection destroyed"); crm_exit(ENOTCONN); } /*! * \internal * \brief Process a CPG message (process list or manual peer cache removal) * * \param[in] handle CPG connection (ignored) * \param[in] groupName CPG group name (ignored) * \param[in] nodeid ID of affected node * \param[in] pid Process ID (ignored) * \param[in] msg CPG XML message * \param[in] msg_len Length of msg in bytes (ignored) */ static void mcp_cpg_deliver(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { xmlNode *xml = string2xml(msg); const char *task = crm_element_value(xml, F_CRM_TASK); crm_trace("Received CPG message (%s): %.200s", (task? task : "process list"), (char*)msg); if (task == NULL) { if (nodeid == local_nodeid) { crm_info("Ignoring process list sent by peer for local node"); } else { uint32_t procs = 0; const char *uname = crm_element_value(xml, "uname"); crm_element_value_int(xml, "proclist", (int *)&procs); if (update_node_processes(nodeid, uname, procs)) { update_process_clients(NULL); } } } else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) { int id = 0; const char *name = NULL; crm_element_value_int(xml, XML_ATTR_ID, &id); name = crm_element_value(xml, XML_ATTR_UNAME); reap_crm_member(id, name); } if (xml != NULL) { free_xml(xml); } } static void mcp_cpg_membership(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { /* Update peer cache if needed */ pcmk_cpg_membership(handle, groupName, member_list, member_list_entries, left_list, left_list_entries, joined_list, joined_list_entries); /* Always broadcast our own presence after any membership change */ update_process_peers(); } static gboolean mcp_quorum_callback(unsigned long long seq, gboolean quorate) { pcmk_quorate = quorate; return TRUE; } static void mcp_quorum_destroy(gpointer user_data) { crm_info("connection lost"); } #if SUPPORT_CMAN static gboolean mcp_cman_dispatch(unsigned long long seq, gboolean quorate) { pcmk_quorate = quorate; return TRUE; } static void mcp_cman_destroy(gpointer user_data) { crm_info("connection closed"); } #endif int main(int argc, char **argv) { int rc; int flag; int argerr = 0; int option_index = 0; gboolean shutdown = FALSE; uid_t pcmk_uid = 0; gid_t pcmk_gid = 0; struct rlimit cores; crm_ipc_t *old_instance = NULL; qb_ipcs_service_t *ipcs = NULL; const char *facility = daemon_option("logfacility"); static crm_cluster_t cluster; crm_log_preinit(NULL, argc, argv); crm_set_options(NULL, "mode [options]", long_options, "Start/Stop Pacemaker\n"); mainloop_add_signal(SIGHUP, pcmk_ignore); mainloop_add_signal(SIGQUIT, pcmk_sigquit); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'f': /* Legacy */ break; case 'p': pid_file = optarg; break; case 's': set_daemon_option("node_start_state", "standby"); break; case '$': case '?': crm_help(flag, EX_OK); break; case 'S': shutdown = TRUE; break; case 'F': printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURE_SET, CRM_FEATURES); crm_exit(pcmk_ok); default: printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); } if (argerr) { crm_help('?', EX_USAGE); } setenv("LC_ALL", "C", 1); setenv("HA_LOGD", "no", 1); set_daemon_option("mcp", "true"); set_daemon_option("use_logd", "off"); crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); /* Restore the original facility so that mcp_read_config() does the right thing */ set_daemon_option("logfacility", facility); crm_debug("Checking for old instances of %s", CRM_SYSTEM_MCP); old_instance = crm_ipc_new(CRM_SYSTEM_MCP, 0); crm_ipc_connect(old_instance); if (shutdown) { crm_debug("Terminating previous instance"); while (crm_ipc_connected(old_instance)) { xmlNode *cmd = create_request(CRM_OP_QUIT, NULL, NULL, CRM_SYSTEM_MCP, CRM_SYSTEM_MCP, NULL); crm_debug("."); crm_ipc_send(old_instance, cmd, 0, 0, NULL); free_xml(cmd); sleep(2); } crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_exit(pcmk_ok); } else if (crm_ipc_connected(old_instance)) { crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_err("Pacemaker is already active, aborting startup"); crm_exit(DAEMON_RESPAWN_STOP); } crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); if (mcp_read_config() == FALSE) { crm_notice("Could not obtain corosync config data, exiting"); crm_exit(ENODATA); } crm_notice("Starting Pacemaker %s "CRM_XS" build=%s features:%s", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES); mainloop = g_main_new(FALSE); sysrq_init(); rc = getrlimit(RLIMIT_CORE, &cores); if (rc < 0) { crm_perror(LOG_ERR, "Cannot determine current maximum core size."); } else { if (cores.rlim_max == 0 && geteuid() == 0) { cores.rlim_max = RLIM_INFINITY; } else { crm_info("Maximum core file size is: %lu", (unsigned long)cores.rlim_max); } cores.rlim_cur = cores.rlim_max; rc = setrlimit(RLIMIT_CORE, &cores); if (rc < 0) { crm_perror(LOG_ERR, "Core file generation will remain disabled." " Core files are an important diagnositic tool," " please consider enabling them by default."); } #if 0 /* system() is not thread-safe, can't call from here * Actually, it's a pretty hacky way to try and achieve this anyway */ if (system("echo 1 > /proc/sys/kernel/core_uses_pid") != 0) { crm_perror(LOG_ERR, "Could not enable /proc/sys/kernel/core_uses_pid"); } #endif } rc = pcmk_ok; if (crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) < 0) { crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER); crm_exit(ENOKEY); } mkdir(CRM_STATE_DIR, 0750); mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid); /* Used to store core/blackbox/pengine/cib files in */ crm_build_path(CRM_PACEMAKER_DIR, 0750); mcp_chown(CRM_PACEMAKER_DIR, pcmk_uid, pcmk_gid); /* Used to store core files in */ crm_build_path(CRM_CORE_DIR, 0750); mcp_chown(CRM_CORE_DIR, pcmk_uid, pcmk_gid); /* Used to store blackbox dumps in */ crm_build_path(CRM_BLACKBOX_DIR, 0750); mcp_chown(CRM_BLACKBOX_DIR, pcmk_uid, pcmk_gid); /* Used to store policy engine inputs in */ crm_build_path(PE_STATE_DIR, 0750); mcp_chown(PE_STATE_DIR, pcmk_uid, pcmk_gid); /* Used to store the cluster configuration */ crm_build_path(CRM_CONFIG_DIR, 0750); mcp_chown(CRM_CONFIG_DIR, pcmk_uid, pcmk_gid); /* Resource agent paths are constructed by the lrmd */ ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, &mcp_ipc_callbacks); if (ipcs == NULL) { crm_err("Couldn't start IPC server"); crm_exit(EIO); } /* Allows us to block shutdown */ if (cluster_connect_cfg(&local_nodeid) == FALSE) { crm_err("Couldn't connect to Corosync's CFG service"); crm_exit(ENOPROTOOPT); } if(pcmk_locate_sbd() > 0) { setenv("PCMK_watchdog", "true", 1); } else { setenv("PCMK_watchdog", "false", 1); } find_and_track_existing_processes(); cluster.destroy = mcp_cpg_destroy; cluster.cpg.cpg_deliver_fn = mcp_cpg_deliver; cluster.cpg.cpg_confchg_fn = mcp_cpg_membership; crm_set_autoreap(FALSE); if(cluster_connect_cpg(&cluster) == FALSE) { crm_err("Couldn't connect to Corosync's CPG service"); rc = -ENOPROTOOPT; } if (rc == pcmk_ok && is_corosync_cluster()) { /* Keep the membership list up-to-date for crm_node to query */ if(cluster_connect_quorum(mcp_quorum_callback, mcp_quorum_destroy) == FALSE) { rc = -ENOTCONN; } } #if SUPPORT_CMAN if (rc == pcmk_ok && is_cman_cluster()) { init_cman_connection(mcp_cman_dispatch, mcp_cman_destroy); } #endif if(rc == pcmk_ok) { local_name = get_local_node_name(); update_node_processes(local_nodeid, local_name, get_process_list()); mainloop_add_signal(SIGTERM, pcmk_shutdown); mainloop_add_signal(SIGINT, pcmk_shutdown); init_children_processes(); crm_info("Starting mainloop"); g_main_run(mainloop); } if (ipcs) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; } g_main_destroy(mainloop); cluster_disconnect_cpg(&cluster); cluster_disconnect_cfg(); crm_info("Exiting %s", crm_system_name); return crm_exit(rc); } diff --git a/tools/crm_simulate.c b/tools/crm_simulate.c index 886255d785..5473e316d8 100644 --- a/tools/crm_simulate.c +++ b/tools/crm_simulate.c @@ -1,928 +1,928 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fake_transition.h" cib_t *global_cib = NULL; GListPtr op_fail = NULL; bool action_numbers = FALSE; gboolean quiet = FALSE; gboolean print_pending = TRUE; char *temp_shadow = NULL; extern gboolean bringing_nodes_online; #define quiet_log(fmt, args...) do { \ if(quiet == FALSE) { \ printf(fmt , ##args); \ } \ } while(0) extern void cleanup_alloc_calculations(pe_working_set_t * data_set); extern xmlNode *do_calculations(pe_working_set_t * data_set, xmlNode * xml_input, crm_time_t * now); char *use_date = NULL; static void get_date(pe_working_set_t * data_set) { int value = 0; time_t original_date = 0; crm_element_value_int(data_set->input, "execution-date", &value); original_date = value; if (use_date) { data_set->now = crm_time_new(use_date); } else if(original_date) { char *when = NULL; data_set->now = crm_time_new(NULL); crm_time_set_timet(data_set->now, &original_date); when = crm_time_as_string(data_set->now, crm_time_log_date|crm_time_log_timeofday); printf("Using the original execution date of: %s\n", when); free(when); } } static void print_cluster_status(pe_working_set_t * data_set, long options) { char *online_nodes = NULL; char *online_remote_nodes = NULL; char *online_remote_containers = NULL; char *offline_nodes = NULL; char *offline_remote_nodes = NULL; GListPtr gIter = NULL; for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; const char *node_mode = NULL; char *node_name = NULL; if (is_container_remote_node(node)) { node_name = crm_strdup_printf("%s:%s", node->details->uname, node->details->remote_rsc->container->id); } else { node_name = crm_strdup_printf("%s", node->details->uname); } if (node->details->unclean) { if (node->details->online && node->details->unclean) { node_mode = "UNCLEAN (online)"; } else if (node->details->pending) { node_mode = "UNCLEAN (pending)"; } else { node_mode = "UNCLEAN (offline)"; } } else if (node->details->pending) { node_mode = "pending"; } else if (node->details->standby_onfail && node->details->online) { node_mode = "standby (on-fail)"; } else if (node->details->standby) { if (node->details->online) { node_mode = "standby"; } else { node_mode = "OFFLINE (standby)"; } } else if (node->details->maintenance) { if (node->details->online) { node_mode = "maintenance"; } else { node_mode = "OFFLINE (maintenance)"; } } else if (node->details->online) { if (is_container_remote_node(node)) { online_remote_containers = add_list_element(online_remote_containers, node_name); } else if (is_baremetal_remote_node(node)) { online_remote_nodes = add_list_element(online_remote_nodes, node_name); } else { online_nodes = add_list_element(online_nodes, node_name); } free(node_name); continue; } else { if (is_baremetal_remote_node(node)) { offline_remote_nodes = add_list_element(offline_remote_nodes, node_name); } else if (is_container_remote_node(node)) { /* ignore offline container nodes */ } else { offline_nodes = add_list_element(offline_nodes, node_name); } free(node_name); continue; } if (is_container_remote_node(node)) { printf("ContainerNode %s: %s\n", node_name, node_mode); } else if (is_baremetal_remote_node(node)) { printf("RemoteNode %s: %s\n", node_name, node_mode); } else if (safe_str_eq(node->details->uname, node->details->id)) { printf("Node %s: %s\n", node_name, node_mode); } else { printf("Node %s (%s): %s\n", node_name, node->details->id, node_mode); } free(node_name); } if (online_nodes) { printf("Online: [%s ]\n", online_nodes); free(online_nodes); } if (offline_nodes) { printf("OFFLINE: [%s ]\n", offline_nodes); free(offline_nodes); } if (online_remote_nodes) { printf("RemoteOnline: [%s ]\n", online_remote_nodes); free(online_remote_nodes); } if (offline_remote_nodes) { printf("RemoteOFFLINE: [%s ]\n", offline_remote_nodes); free(offline_remote_nodes); } if (online_remote_containers) { printf("Containers: [%s ]\n", online_remote_containers); free(online_remote_containers); } fprintf(stdout, "\n"); for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; if (is_set(rsc->flags, pe_rsc_orphan) && rsc->role == RSC_ROLE_STOPPED) { continue; } rsc->fns->print(rsc, NULL, pe_print_printf | options, stdout); } fprintf(stdout, "\n"); } static char * create_action_name(action_t * action) { char *action_name = NULL; const char *prefix = NULL; const char *action_host = NULL; const char *task = action->task; if (action->node) { action_host = action->node->details->uname; } else if (is_not_set(action->flags, pe_action_pseudo)) { action_host = ""; } if (safe_str_eq(action->task, RSC_CANCEL)) { prefix = "Cancel "; task = "monitor"; /* TO-DO: Hack! */ } if (action->rsc && action->rsc->clone_name) { char *key = NULL; const char *name = action->rsc->clone_name; const char *interval_s = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL); int interval = crm_parse_int(interval_s, "0"); if (safe_str_eq(action->task, RSC_NOTIFY) || safe_str_eq(action->task, RSC_NOTIFIED)) { const char *n_type = g_hash_table_lookup(action->meta, "notify_key_type"); const char *n_task = g_hash_table_lookup(action->meta, "notify_key_operation"); CRM_ASSERT(n_type != NULL); CRM_ASSERT(n_task != NULL); key = generate_notify_key(name, n_type, n_task); } else { key = generate_op_key(name, task, interval); } if (action_host) { action_name = crm_strdup_printf("%s%s %s", prefix ? prefix : "", key, action_host); } else { action_name = crm_strdup_printf("%s%s", prefix ? prefix : "", key); } free(key); } else if (safe_str_eq(action->task, CRM_OP_FENCE)) { const char *op = g_hash_table_lookup(action->meta, "stonith_action"); action_name = crm_strdup_printf("%s%s '%s' %s", prefix ? prefix : "", action->task, op, action_host); } else if (action->rsc && action_host) { action_name = crm_strdup_printf("%s%s %s", prefix ? prefix : "", action->uuid, action_host); } else if (action_host) { action_name = crm_strdup_printf("%s%s %s", prefix ? prefix : "", action->task, action_host); } else { action_name = crm_strdup_printf("%s", action->uuid); } if(action_numbers) { char *with_id = crm_strdup_printf("%s (%d)", action_name, action->id); free(action_name); action_name = with_id; } return action_name; } static void create_dotfile(pe_working_set_t * data_set, const char *dot_file, gboolean all_actions) { GListPtr gIter = NULL; FILE *dot_strm = fopen(dot_file, "w"); if (dot_strm == NULL) { crm_perror(LOG_ERR, "Could not open %s for writing", dot_file); return; } fprintf(dot_strm, " digraph \"g\" {\n"); for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; const char *style = "dashed"; const char *font = "black"; const char *color = "black"; char *action_name = create_action_name(action); crm_trace("Action %d: %s %s %p", action->id, action_name, action->uuid, action); if (is_set(action->flags, pe_action_pseudo)) { font = "orange"; } if (is_set(action->flags, pe_action_dumped)) { style = "bold"; color = "green"; } else if (action->rsc != NULL && is_not_set(action->rsc->flags, pe_rsc_managed)) { color = "red"; font = "purple"; if (all_actions == FALSE) { goto dont_write; } } else if (is_set(action->flags, pe_action_optional)) { color = "blue"; if (all_actions == FALSE) { goto dont_write; } } else { color = "red"; CRM_CHECK(is_set(action->flags, pe_action_runnable) == FALSE,; ); } set_bit(action->flags, pe_action_dumped); crm_trace("\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\"]", action_name, style, color, font); fprintf(dot_strm, "\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\"]\n", action_name, style, color, font); dont_write: free(action_name); } for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; GListPtr gIter2 = NULL; for (gIter2 = action->actions_before; gIter2 != NULL; gIter2 = gIter2->next) { action_wrapper_t *before = (action_wrapper_t *) gIter2->data; char *before_name = NULL; char *after_name = NULL; const char *style = "dashed"; gboolean optional = TRUE; if (before->state == pe_link_dumped) { optional = FALSE; style = "bold"; } else if (is_set(action->flags, pe_action_pseudo) && (before->type & pe_order_stonith_stop)) { continue; } else if (before->state == pe_link_dup) { continue; } else if (before->type == pe_order_none) { continue; } else if (is_set(before->action->flags, pe_action_dumped) && is_set(action->flags, pe_action_dumped) && before->type != pe_order_load) { optional = FALSE; } if (all_actions || optional == FALSE) { before_name = create_action_name(before->action); after_name = create_action_name(action); crm_trace("\"%s\" -> \"%s\" [ style = %s]", before_name, after_name, style); fprintf(dot_strm, "\"%s\" -> \"%s\" [ style = %s]\n", before_name, after_name, style); free(before_name); free(after_name); } } } fprintf(dot_strm, "}\n"); if (dot_strm != NULL) { fflush(dot_strm); fclose(dot_strm); } } static void setup_input(const char *input, const char *output) { int rc = pcmk_ok; cib_t *cib_conn = NULL; xmlNode *cib_object = NULL; char *local_output = NULL; if (input == NULL) { /* Use live CIB */ cib_conn = cib_new(); rc = cib_conn->cmds->signon(cib_conn, crm_system_name, cib_command); if (rc == pcmk_ok) { rc = cib_conn->cmds->query(cib_conn, NULL, &cib_object, cib_scope_local | cib_sync_call); } cib_conn->cmds->signoff(cib_conn); cib_delete(cib_conn); cib_conn = NULL; if (rc != pcmk_ok) { fprintf(stderr, "Live CIB query failed: %s (%d)\n", pcmk_strerror(rc), rc); crm_exit(rc); } else if (cib_object == NULL) { fprintf(stderr, "Live CIB query failed: empty result\n"); crm_exit(ENOTCONN); } } else if (safe_str_eq(input, "-")) { cib_object = filename2xml(NULL); } else { cib_object = filename2xml(input); } if (get_object_root(XML_CIB_TAG_STATUS, cib_object) == NULL) { create_xml_node(cib_object, XML_CIB_TAG_STATUS); } if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) { free_xml(cib_object); crm_exit(ENOKEY); } if (validate_xml(cib_object, NULL, FALSE) != TRUE) { free_xml(cib_object); crm_exit(pcmk_err_schema_validation); } if (output == NULL) { char *pid = crm_getpid_s(); local_output = get_shadow_file(pid); temp_shadow = strdup(local_output); output = local_output; free(pid); } rc = write_xml_file(cib_object, output, FALSE); free_xml(cib_object); cib_object = NULL; if (rc < 0) { fprintf(stderr, "Could not create '%s': %s\n", output, strerror(errno)); crm_exit(rc); } setenv("CIB_file", output, 1); free(local_output); } /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"quiet", 0, 0, 'Q', "\tDisplay only essentialoutput"}, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"-spacer-", 0, 0, '-', "\nOperations:"}, {"run", 0, 0, 'R', "\tDetermine the cluster's response to the given configuration and status"}, {"simulate", 0, 0, 'S', "Simulate the transition's execution and display the resulting cluster status"}, {"in-place", 0, 0, 'X', "Simulate the transition's execution and store the result back to the input file"}, {"show-scores", 0, 0, 's', "Show allocation scores"}, {"show-utilization", 0, 0, 'U', "Show utilization information"}, {"profile", 1, 0, 'P', "Run all tests in the named directory to create profiling data"}, {"pending", 0, 0, 'j', "\tDisplay pending state if 'record-pending' is enabled", pcmk_option_hidden}, {"-spacer-", 0, 0, '-', "\nSynthetic Cluster Events:"}, {"node-up", 1, 0, 'u', "\tBring a node online"}, {"node-down", 1, 0, 'd', "\tTake a node offline"}, {"node-fail", 1, 0, 'f', "\tMark a node as failed"}, {"op-inject", 1, 0, 'i', "\tGenerate a failure for the cluster to react to in the simulation"}, {"-spacer-", 0, 0, '-', "\t\tValue is of the form ${resource}_${task}_${interval}@${node}=${rc}."}, {"-spacer-", 0, 0, '-', "\t\tEg. memcached_monitor_20000@bart.example.com=7"}, {"-spacer-", 0, 0, '-', "\t\tFor more information on OCF return codes, refer to: http://www.clusterlabs.org/doc/en-US/Pacemaker/1.1/html/Pacemaker_Explained/s-ocf-return-codes.html"}, {"op-fail", 1, 0, 'F', "\tIf the specified task occurs during the simulation, have it fail with return code ${rc}"}, {"-spacer-", 0, 0, '-', "\t\tValue is of the form ${resource}_${task}_${interval}@${node}=${rc}."}, {"-spacer-", 0, 0, '-', "\t\tEg. memcached_stop_0@bart.example.com=1\n"}, {"-spacer-", 0, 0, '-', "\t\tThe transition will normally stop at the failed action. Save the result with --save-output and re-run with --xml-file"}, {"set-datetime", 1, 0, 't', "Set date/time"}, {"quorum", 1, 0, 'q', "\tSpecify a value for quorum"}, {"watchdog", 1, 0, 'w', "\tAssume a watchdog device is active"}, {"ticket-grant", 1, 0, 'g', "Grant a ticket"}, {"ticket-revoke", 1, 0, 'r', "Revoke a ticket"}, {"ticket-standby", 1, 0, 'b', "Make a ticket standby"}, {"ticket-activate", 1, 0, 'e', "Activate a ticket"}, {"-spacer-", 0, 0, '-', "\nOutput Options:"}, {"save-input", 1, 0, 'I', "\tSave the input configuration to the named file"}, {"save-output", 1, 0, 'O', "Save the output configuration to the named file"}, {"save-graph", 1, 0, 'G', "\tSave the transition graph (XML format) to the named file"}, {"save-dotfile", 1, 0, 'D', "Save the transition graph (DOT format) to the named file"}, {"all-actions", 0, 0, 'a', "\tDisplay all possible actions in the DOT graph - even ones not part of the transition"}, {"-spacer-", 0, 0, '-', "\nData Source:"}, {"live-check", 0, 0, 'L', "\tConnect to the CIB and use the current contents as input"}, {"xml-file", 1, 0, 'x', "\tRetrieve XML from the named file"}, {"xml-pipe", 0, 0, 'p', "\tRetrieve XML from stdin"}, {"-spacer-", 0, 0, '-', "\nExamples:\n"}, {"-spacer-", 0, 0, '-', "Pretend a recurring monitor action found memcached stopped on node fred.example.com and, during recovery, that the memcached stop action failed", pcmk_option_paragraph}, {"-spacer-", 0, 0, '-', " crm_simulate -LS --op-inject memcached:0_monitor_20000@bart.example.com=7 --op-fail memcached:0_stop_0@fred.example.com=1 --save-output /tmp/memcached-test.xml", pcmk_option_example}, {"-spacer-", 0, 0, '-', "Now see what the reaction to the stop failure would be", pcmk_option_paragraph}, {"-spacer-", 0, 0, '-', " crm_simulate -S --xml-file /tmp/memcached-test.xml", pcmk_option_example}, {0, 0, 0, 0} }; /* *INDENT-ON* */ static void profile_one(const char *xml_file) { xmlNode *cib_object = NULL; pe_working_set_t data_set; printf("* Testing %s\n", xml_file); cib_object = filename2xml(xml_file); if (get_object_root(XML_CIB_TAG_STATUS, cib_object) == NULL) { create_xml_node(cib_object, XML_CIB_TAG_STATUS); } if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) { free_xml(cib_object); return; } if (validate_xml(cib_object, NULL, FALSE) != TRUE) { free_xml(cib_object); return; } set_working_set_defaults(&data_set); data_set.input = cib_object; get_date(&data_set); do_calculations(&data_set, cib_object, NULL); cleanup_alloc_calculations(&data_set); } #ifndef FILENAME_MAX # define FILENAME_MAX 512 #endif static int profile_all(const char *dir) { struct dirent **namelist; int lpc = 0; int file_num = scandir(dir, &namelist, 0, alphasort); if (file_num > 0) { struct stat prop; - char buffer[FILENAME_MAX + 1]; + char buffer[FILENAME_MAX]; while (file_num--) { if ('.' == namelist[file_num]->d_name[0]) { free(namelist[file_num]); continue; } else if (!crm_ends_with_ext(namelist[file_num]->d_name, ".xml")) { free(namelist[file_num]); continue; } lpc++; - snprintf(buffer, FILENAME_MAX, "%s/%s", dir, namelist[file_num]->d_name); + snprintf(buffer, sizeof(buffer), "%s/%s", dir, namelist[file_num]->d_name); if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) { profile_one(buffer); } free(namelist[file_num]); } free(namelist); } return lpc; } static int count_resources(pe_working_set_t * data_set, resource_t * rsc) { int count = 0; GListPtr gIter = NULL; if (rsc == NULL) { gIter = data_set->resources; } else if (rsc->children) { gIter = rsc->children; } else { return is_not_set(rsc->flags, pe_rsc_orphan); } for (; gIter != NULL; gIter = gIter->next) { count += count_resources(data_set, gIter->data); } return count; } int main(int argc, char **argv) { int rc = 0; guint modified = 0; gboolean store = FALSE; gboolean process = FALSE; gboolean simulate = FALSE; gboolean all_actions = FALSE; gboolean have_stdout = FALSE; pe_working_set_t data_set; const char *xml_file = "-"; const char *quorum = NULL; const char *watchdog = NULL; const char *test_dir = NULL; const char *dot_file = NULL; const char *graph_file = NULL; const char *input_file = NULL; const char *output_file = NULL; int flag = 0; int index = 0; int argerr = 0; GListPtr node_up = NULL; GListPtr node_down = NULL; GListPtr node_fail = NULL; GListPtr op_inject = NULL; GListPtr ticket_grant = NULL; GListPtr ticket_revoke = NULL; GListPtr ticket_standby = NULL; GListPtr ticket_activate = NULL; xmlNode *input = NULL; crm_log_cli_init("crm_simulate"); crm_set_options(NULL, "datasource operation [additional options]", long_options, "Tool for simulating the cluster's response to events"); if (argc < 2) { crm_help('?', EX_USAGE); } while (1) { flag = crm_get_option(argc, argv, &index); if (flag == -1) break; switch (flag) { case 'V': if (have_stdout == FALSE) { /* Redirect stderr to stdout so we can grep the output */ have_stdout = TRUE; close(STDERR_FILENO); dup2(STDOUT_FILENO, STDERR_FILENO); } crm_bump_log_level(argc, argv); action_numbers = TRUE; break; case '?': case '$': crm_help(flag, EX_OK); break; case 'p': xml_file = "-"; break; case 'Q': quiet = TRUE; break; case 'L': xml_file = NULL; break; case 'x': xml_file = optarg; break; case 'u': modified++; bringing_nodes_online = TRUE; node_up = g_list_append(node_up, optarg); break; case 'd': modified++; node_down = g_list_append(node_down, optarg); break; case 'f': modified++; node_fail = g_list_append(node_fail, optarg); break; case 't': use_date = strdup(optarg); break; case 'i': modified++; op_inject = g_list_append(op_inject, optarg); break; case 'F': process = TRUE; simulate = TRUE; op_fail = g_list_append(op_fail, optarg); break; case 'w': modified++; watchdog = optarg; break; case 'q': modified++; quorum = optarg; break; case 'g': modified++; ticket_grant = g_list_append(ticket_grant, optarg); break; case 'r': modified++; ticket_revoke = g_list_append(ticket_revoke, optarg); break; case 'b': modified++; ticket_standby = g_list_append(ticket_standby, optarg); break; case 'e': modified++; ticket_activate = g_list_append(ticket_activate, optarg); break; case 'a': all_actions = TRUE; break; case 's': process = TRUE; show_scores = TRUE; break; case 'U': process = TRUE; show_utilization = TRUE; break; case 'j': print_pending = TRUE; break; case 'S': process = TRUE; simulate = TRUE; break; case 'X': store = TRUE; process = TRUE; simulate = TRUE; break; case 'R': process = TRUE; break; case 'D': process = TRUE; dot_file = optarg; break; case 'G': process = TRUE; graph_file = optarg; break; case 'I': input_file = optarg; break; case 'O': output_file = optarg; break; case 'P': test_dir = optarg; break; default: ++argerr; break; } } if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', EX_USAGE); } if (test_dir != NULL) { return profile_all(test_dir); } setup_input(xml_file, store ? xml_file : output_file); global_cib = cib_new(); global_cib->cmds->signon(global_cib, crm_system_name, cib_command); set_working_set_defaults(&data_set); if (data_set.now != NULL) { quiet_log(" + Setting effective cluster time: %s", use_date); crm_time_log(LOG_WARNING, "Set fake 'now' to", data_set.now, crm_time_log_date | crm_time_log_timeofday); } rc = global_cib->cmds->query(global_cib, NULL, &input, cib_sync_call | cib_scope_local); CRM_ASSERT(rc == pcmk_ok); data_set.input = input; get_date(&data_set); if(xml_file) { set_bit(data_set.flags, pe_flag_sanitized); } cluster_status(&data_set); if (quiet == FALSE) { int options = print_pending ? pe_print_pending : 0; if(is_set(data_set.flags, pe_flag_maintenance_mode)) { quiet_log("\n *** Resource management is DISABLED ***"); quiet_log("\n The cluster will not attempt to start, stop or recover services"); quiet_log("\n"); } if(data_set.disabled_resources || data_set.blocked_resources) { quiet_log("%d of %d resources DISABLED and %d BLOCKED from being started due to failures\n", data_set.disabled_resources, count_resources(&data_set, NULL), data_set.blocked_resources); } quiet_log("\nCurrent cluster status:\n"); print_cluster_status(&data_set, options); } if (modified) { quiet_log("Performing requested modifications\n"); modify_configuration(&data_set, global_cib, quorum, watchdog, node_up, node_down, node_fail, op_inject, ticket_grant, ticket_revoke, ticket_standby, ticket_activate); rc = global_cib->cmds->query(global_cib, NULL, &input, cib_sync_call); if (rc != pcmk_ok) { fprintf(stderr, "Could not connect to the CIB for input: %s\n", pcmk_strerror(rc)); goto done; } cleanup_calculations(&data_set); data_set.input = input; get_date(&data_set); if(xml_file) { set_bit(data_set.flags, pe_flag_sanitized); } cluster_status(&data_set); } if (input_file != NULL) { rc = write_xml_file(input, input_file, FALSE); if (rc < 0) { fprintf(stderr, "Could not create '%s': %s\n", input_file, strerror(errno)); goto done; } } rc = 0; if (process || simulate) { crm_time_t *local_date = NULL; if (show_scores && show_utilization) { printf("Allocation scores and utilization information:\n"); } else if (show_scores) { fprintf(stdout, "Allocation scores:\n"); } else if (show_utilization) { printf("Utilization information:\n"); } do_calculations(&data_set, input, local_date); input = NULL; /* Don't try and free it twice */ if (graph_file != NULL) { write_xml_file(data_set.graph, graph_file, FALSE); } if (dot_file != NULL) { create_dotfile(&data_set, dot_file, all_actions); } if (quiet == FALSE) { GListPtr gIter = NULL; quiet_log("%sTransition Summary:\n", show_scores || show_utilization || modified ? "\n" : ""); fflush(stdout); LogNodeActions(&data_set, TRUE); for (gIter = data_set.resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; LogActions(rsc, &data_set, TRUE); } } } if (simulate) { rc = run_simulation(&data_set, global_cib, op_fail, quiet); if(quiet == FALSE) { get_date(&data_set); quiet_log("\nRevised cluster status:\n"); cluster_status(&data_set); print_cluster_status(&data_set, 0); } } done: cleanup_alloc_calculations(&data_set); global_cib->cmds->signoff(global_cib); cib_delete(global_cib); free(use_date); fflush(stderr); if (temp_shadow) { unlink(temp_shadow); free(temp_shadow); } return crm_exit(rc); }