diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index b9f8f14701..eb8fe98985 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -1,3107 +1,3108 @@ /* * Copyright 2009-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *device_list = NULL; GHashTable *topology = NULL; GList *cmd_list = NULL; struct device_search_s { /* target of fence action */ char *host; /* requested fence action */ char *action; /* timeout to use if a device is queried dynamically for possible targets */ int per_device_timeout; /* number of registered fencing devices at time of request */ int replies_needed; /* number of device replies received so far */ int replies_received; /* whether the target is eligible to perform requested action (or off) */ bool allow_suicide; /* private data to pass to search callback function */ void *user_data; /* function to call when all replies have been received */ void (*callback) (GList * devices, void *user_data); /* devices capable of performing requested action (or off if remapping) */ GList *capable; }; static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data); static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, const char *client_id); static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence); static int get_agent_metadata(const char *agent, xmlNode **metadata); static void read_action_metadata(stonith_device_t *device); typedef struct async_command_s { int id; int pid; int fd_stdout; int options; int default_timeout; /* seconds */ int timeout; /* seconds */ int start_delay; /* seconds */ int delay_id; char *op; char *origin; char *client; char *client_name; char *remote_op_id; char *victim; uint32_t victim_nodeid; char *action; char *device; GList *device_list; GList *device_next; void *internal_user_data; void (*done_cb) (GPid pid, int rc, const char *output, gpointer user_data); guint timer_sigterm; guint timer_sigkill; /*! If the operation timed out, this is the last signal * we sent to the process to get it to terminate */ int last_timeout_signo; stonith_device_t *active_on; stonith_device_t *activating_on; } async_command_t; static xmlNode *stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc); static gboolean is_action_required(const char *action, stonith_device_t *device) { return device && device->automatic_unfencing && pcmk__str_eq(action, "on", pcmk__str_casei); } static int get_action_delay_max(stonith_device_t * device, const char * action) { const char *value = NULL; int delay_max = 0; if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) { return 0; } value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX); if (value) { delay_max = crm_parse_interval_spec(value) / 1000; } return delay_max; } static int get_action_delay_base(stonith_device_t *device, const char *action, const char *victim) { char *hash_value = NULL; int delay_base = 0; if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) { return 0; } hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE); if (hash_value) { char *value = strdup(hash_value); char *valptr = value; CRM_ASSERT(value != NULL); if (victim) { for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) { char *mapval = strchr(val, ':'); if (mapval == NULL || mapval[1] == 0) { crm_err("pcmk_delay_base: empty value in mapping", val); continue; } if (mapval != val && strncasecmp(victim, val, (size_t)(mapval - val)) == 0) { value = mapval + 1; crm_debug("pcmk_delay_base mapped to %s for %s", value, victim); break; } } } if (strchr(value, ':') == 0) { delay_base = crm_parse_interval_spec(value) / 1000; } free(valptr); } return delay_base; } /*! * \internal * \brief Override STONITH timeout with pcmk_*_timeout if available * * \param[in] device STONITH device to use * \param[in] action STONITH action name * \param[in] default_timeout Timeout to use if device does not have * a pcmk_*_timeout parameter for action * * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout * \note For consistency, it would be nice if reboot/off/on timeouts could be * set the same way as start/stop/monitor timeouts, i.e. with an * entry in the fencing resource configuration. However that * is insufficient because fencing devices may be registered directly via * the fencer's register_device() API instead of going through the CIB * (e.g. stonith_admin uses it for its -R option, and the executor uses it * to ensure a device is registered when a command is issued). As device * properties, pcmk_*_timeout parameters can be grabbed by the fencer when * the device is registered, whether by CIB change or API call. */ static int get_action_timeout(stonith_device_t * device, const char *action, int default_timeout) { if (action && device && device->params) { char buffer[64] = { 0, }; const char *value = NULL; /* If "reboot" was requested but the device does not support it, * we will remap to "off", so check timeout for "off" instead */ if (pcmk__str_eq(action, "reboot", pcmk__str_casei) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_trace("%s doesn't support reboot, using timeout for off instead", device->id); action = "off"; } /* If the device config specified an action-specific timeout, use it */ snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action); value = g_hash_table_lookup(device->params, buffer); if (value) { return atoi(value); } } return default_timeout; } static void free_async_command(async_command_t * cmd) { if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } cmd_list = g_list_remove(cmd_list, cmd); g_list_free_full(cmd->device_list, free); free(cmd->device); free(cmd->action); free(cmd->victim); free(cmd->remote_op_id); free(cmd->client); free(cmd->client_name); free(cmd->origin); free(cmd->op); free(cmd); } static async_command_t * create_async_command(xmlNode * msg) { async_command_t *cmd = NULL; xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR); const char *action = crm_element_value(op, F_STONITH_ACTION); CRM_CHECK(action != NULL, crm_log_xml_warn(msg, "NoAction"); return NULL); crm_log_xml_trace(msg, "Command"); cmd = calloc(1, sizeof(async_command_t)); crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id)); crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options)); crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout)); cmd->timeout = cmd->default_timeout; // Value -1 means disable any static/random fencing delays crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay)); cmd->origin = crm_element_value_copy(msg, F_ORIG); cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID); cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID); cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME); cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION); cmd->action = strdup(action); cmd->victim = crm_element_value_copy(op, F_STONITH_TARGET); cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE); CRM_CHECK(cmd->op != NULL, crm_log_xml_warn(msg, "NoOp"); free_async_command(cmd); return NULL); CRM_CHECK(cmd->client != NULL, crm_log_xml_warn(msg, "NoClient")); cmd->done_cb = st_child_done; cmd_list = g_list_append(cmd_list, cmd); return cmd; } static int get_action_limit(stonith_device_t * device) { const char *value = NULL; int action_limit = 1; value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT); if ((value == NULL) || (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok) || (action_limit == 0)) { action_limit = 1; } return action_limit; } static int get_active_cmds(stonith_device_t * device) { int counter = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return 0); for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd = gIter->data; gIterNext = gIter->next; if (cmd->active_on == device) { counter++; } } return counter; } static void fork_cb(GPid pid, gpointer user_data) { async_command_t *cmd = (async_command_t *) user_data; stonith_device_t * device = /* in case of a retry we've done the move from activating_on to active_on already */ cmd->activating_on?cmd->activating_on:cmd->active_on; CRM_ASSERT(device); crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout", cmd->action, pid, ((cmd->victim == NULL)? "" : " targeting "), ((cmd->victim == NULL)? "" : cmd->victim), device->id, cmd->timeout); cmd->active_on = device; cmd->activating_on = NULL; } static int get_agent_metadata_cb(gpointer data) { stonith_device_t *device = data; guint period_ms; switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } return G_SOURCE_REMOVE; case EAGAIN: period_ms = pcmk__mainloop_timer_get_period(device->timer); if (period_ms < 160 * 1000) { mainloop_timer_set_period(device->timer, 2 * period_ms); } return G_SOURCE_CONTINUE; default: return G_SOURCE_REMOVE; } } static gboolean stonith_device_execute(stonith_device_t * device) { int exec_rc = 0; const char *action_str = NULL; const char *host_arg = NULL; async_command_t *cmd = NULL; stonith_action_t *action = NULL; int active_cmds = 0; int action_limit = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return FALSE); active_cmds = get_active_cmds(device); action_limit = get_action_limit(device); if (action_limit > -1 && active_cmds >= action_limit) { crm_trace("%s is over its action limit of %d (%u active action%s)", device->id, action_limit, active_cmds, pcmk__plural_s(active_cmds)); return TRUE; } for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) { async_command_t *pending_op = gIter->data; gIterNext = gIter->next; if (pending_op && pending_op->delay_id) { crm_trace("Operation '%s'%s%s using %s was asked to run too early, " "waiting for start delay of %ds", pending_op->action, ((pending_op->victim == NULL)? "" : " targeting "), ((pending_op->victim == NULL)? "" : pending_op->victim), device->id, pending_op->start_delay); continue; } device->pending_ops = g_list_remove_link(device->pending_ops, gIter); g_list_free_1(gIter); cmd = pending_op; break; } if (cmd == NULL) { crm_trace("No actions using %s are needed", device->id); return TRUE; } if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { if (pcmk__strcase_any_of(cmd->action, "reboot", "off", NULL)) { if (node_does_watchdog_fencing(stonith_our_uname)) { pcmk__panic(__func__); goto done; } } else { crm_info("Faking success for %s watchdog operation", cmd->action); cmd->done_cb(0, 0, NULL, cmd); goto done; } } #if SUPPORT_CIBSECRETS if (pcmk__substitute_secrets(device->id, device->params) != pcmk_rc_ok) { /* replacing secrets failed! */ if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { /* don't fail on stop! */ crm_info("Proceeding with stop operation for %s", device->id); } else { crm_err("Considering %s unconfigured: Failed to get secrets", device->id); exec_rc = PCMK_OCF_NOT_CONFIGURED; cmd->done_cb(0, exec_rc, NULL, cmd); goto done; } } #endif action_str = cmd->action; if (pcmk__str_eq(cmd->action, "reboot", pcmk__str_casei) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_notice("Remapping 'reboot' action%s%s using %s to 'off' " "because agent '%s' does not support reboot", ((cmd->victim == NULL)? "" : " targeting "), ((cmd->victim == NULL)? "" : cmd->victim), device->id, device->agent); action_str = "off"; } if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) { host_arg = "port"; } else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) { host_arg = "plug"; } action = stonith_action_create(device->agent, action_str, cmd->victim, cmd->victim_nodeid, cmd->timeout, device->params, device->aliases, host_arg); /* for async exec, exec_rc is negative for early error exit otherwise handling of success/errors is done via callbacks */ cmd->activating_on = device; exec_rc = stonith_action_execute_async(action, (void *)cmd, cmd->done_cb, fork_cb); if (exec_rc < 0) { crm_warn("Operation '%s'%s%s using %s failed: %s " CRM_XS " rc=%d", cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "", device->id, pcmk_strerror(exec_rc), exec_rc); cmd->activating_on = NULL; cmd->done_cb(0, exec_rc, NULL, cmd); } done: /* Device might get triggered to work by multiple fencing commands * simultaneously. Trigger the device again to make sure any * remaining concurrent commands get executed. */ if (device->pending_ops) { mainloop_set_trigger(device->work); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static gboolean start_delay_helper(gpointer data) { async_command_t *cmd = data; stonith_device_t *device = NULL; cmd->delay_id = 0; device = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; if (device) { mainloop_set_trigger(device->work); } return FALSE; } static void schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) { int delay_max = 0; int delay_base = 0; int requested_delay = cmd->start_delay; CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); if (cmd->device) { free(cmd->device); } if (device->include_nodeid && cmd->victim) { crm_node_t *node = crm_get_peer(0, cmd->victim); cmd->victim_nodeid = node->id; } cmd->device = strdup(device->id); cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout); if (cmd->remote_op_id) { crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s " "with op id %.8s and timeout %ds", cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "", device->id, cmd->origin, cmd->remote_op_id, cmd->timeout); } else { crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds", cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "", device->id, cmd->client, cmd->timeout); } device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); // Value -1 means disable any static/random fencing delays if (requested_delay < 0) { return; } delay_max = get_action_delay_max(device, cmd->action); delay_base = get_action_delay_base(device, cmd->action, cmd->victim); if (delay_max == 0) { delay_max = delay_base; } if (delay_max < delay_base) { crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than " PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s " "(limiting to maximum delay)", delay_base, delay_max, cmd->action, device->id); delay_base = delay_max; } if (delay_max > 0) { // coverity[dont_call] We're not using rand() for security cmd->start_delay += ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + delay_base; } if (cmd->start_delay > 0) { crm_notice("Delaying '%s' action%s%s using %s for %ds " CRM_XS " timeout=%ds requested_delay=%ds base=%ds max=%ds", cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "", device->id, cmd->start_delay, cmd->timeout, requested_delay, delay_base, delay_max); cmd->delay_id = g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd); } } static void free_device(gpointer data) { GList *gIter = NULL; stonith_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { async_command_t *cmd = gIter->data; crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action); cmd->done_cb(0, -ENODEV, NULL, cmd); } g_list_free(device->pending_ops); g_list_free_full(device->targets, free); if (device->timer) { mainloop_timer_stop(device->timer); mainloop_timer_del(device->timer); } mainloop_destroy_trigger(device->work); free_xml(device->agent_metadata); free(device->namespace); free(device->on_target_actions); free(device->agent); free(device->id); free(device); } void free_device_list(void) { if (device_list != NULL) { g_hash_table_destroy(device_list); device_list = NULL; } } void init_device_list(void) { if (device_list == NULL) { device_list = pcmk__strkey_table(NULL, free_device); } } static GHashTable * build_port_aliases(const char *hostmap, GList ** targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = pcmk__strikey_table(free, free); if (hostmap == NULL) { return aliases; } max = strlen(hostmap); for (; lpc <= max; lpc++) { switch (hostmap[lpc]) { /* Skip escaped chars */ case '\\': lpc++; break; /* Assignment chars */ case '=': case ':': if (lpc > last) { free(name); name = calloc(1, 1 + lpc - last); memcpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if (name) { char *value = NULL; int k = 0; value = calloc(1, 1 + lpc - last); memcpy(value, hostmap + last, lpc - last); for (int i = 0; value[i] != '\0'; i++) { if (value[i] != '\\') { value[k++] = value[i]; } } value[k] = '\0'; crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if (targets) { *targets = g_list_append(*targets, strdup(value)); } value = NULL; name = NULL; added++; } else if (lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last); } last = lpc + 1; break; } if (hostmap[lpc] == 0) { break; } } if (added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } free(name); return aliases; } GHashTable *metadata_cache = NULL; void free_metadata_cache(void) { if (metadata_cache != NULL) { g_hash_table_destroy(metadata_cache); metadata_cache = NULL; } } static void init_metadata_cache(void) { if (metadata_cache == NULL) { metadata_cache = pcmk__strkey_table(free, free); } } int get_agent_metadata(const char *agent, xmlNode ** metadata) { char *buffer = NULL; if (metadata == NULL) { return EINVAL; } *metadata = NULL; if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) { return pcmk_rc_ok; } init_metadata_cache(); buffer = g_hash_table_lookup(metadata_cache, agent); if (buffer == NULL) { stonith_t *st = stonith_api_new(); int rc; if (st == NULL) { crm_warn("Could not get agent meta-data: " "API memory allocation failed"); return EAGAIN; } rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10); stonith_api_delete(st); if (rc || !buffer) { crm_err("Could not retrieve metadata for fencing agent %s", agent); return EAGAIN; } g_hash_table_replace(metadata_cache, strdup(agent), buffer); } *metadata = string2xml(buffer); return pcmk_rc_ok; } static gboolean is_nodeid_required(xmlNode * xml) { xmlXPathObjectPtr xpath = NULL; if (stand_alone) { return FALSE; } if (!xml) { return FALSE; } xpath = xpath_search(xml, "//parameter[@name='nodeid']"); if (numXpathResults(xpath) <= 0) { freeXpathObject(xpath); return FALSE; } freeXpathObject(xpath); return TRUE; } #define MAX_ACTION_LEN 256 static char * add_action(char *actions, const char *action) { int offset = 0; if (actions == NULL) { actions = calloc(1, MAX_ACTION_LEN); } else { offset = strlen(actions); } if (offset > 0) { offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, " "); } offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, "%s", action); return actions; } static void read_action_metadata(stonith_device_t *device) { xmlXPathObjectPtr xpath = NULL; int max = 0; int lpc = 0; if (device->agent_metadata == NULL) { return; } xpath = xpath_search(device->agent_metadata, "//action"); max = numXpathResults(xpath); if (max <= 0) { freeXpathObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *on_target = NULL; const char *action = NULL; xmlNode *match = getXpathResult(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; on_target = crm_element_value(match, "on_target"); action = crm_element_value(match, "name"); if(pcmk__str_eq(action, "list", pcmk__str_casei)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_list); } else if(pcmk__str_eq(action, "status", pcmk__str_casei)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_status); } else if(pcmk__str_eq(action, "reboot", pcmk__str_casei)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_reboot); } else if (pcmk__str_eq(action, "on", pcmk__str_casei)) { /* "automatic" means the cluster will unfence node when it joins */ const char *automatic = crm_element_value(match, "automatic"); /* "required" is a deprecated synonym for "automatic" */ const char *required = crm_element_value(match, "required"); if (crm_is_true(automatic) || crm_is_true(required)) { device->automatic_unfencing = TRUE; } } if (action && crm_is_true(on_target)) { device->on_target_actions = add_action(device->on_target_actions, action); } } freeXpathObject(xpath); } /*! * \internal * \brief Set a pcmk_*_action parameter if not already set * * \param[in,out] params Device parameters * \param[in] action Name of action * \param[in] value Value to use if action is not already set */ static void map_action(GHashTable *params, const char *action, const char *value) { char *key = crm_strdup_printf("pcmk_%s_action", action); if (g_hash_table_lookup(params, key)) { crm_warn("Ignoring %s='%s', see %s instead", STONITH_ATTR_ACTION_OP, value, key); free(key); } else { crm_warn("Mapping %s='%s' to %s='%s'", STONITH_ATTR_ACTION_OP, value, key, value); g_hash_table_insert(params, key, strdup(value)); } } /*! * \internal * \brief Create device parameter table from XML * * \param[in] name Device name (used for logging only) * \param[in,out] params Device parameters */ static GHashTable * xml2device_params(const char *name, xmlNode *dev) { GHashTable *params = xml2list(dev); const char *value; /* Action should never be specified in the device configuration, * but we support it for users who are familiar with other software * that worked that way. */ value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP); if (value != NULL) { crm_warn("%s has '%s' parameter, which should never be specified in configuration", name, STONITH_ATTR_ACTION_OP); if (*value == '\0') { crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, "reboot") == 0) { crm_warn("Ignoring %s='reboot' (see stonith-action cluster property instead)", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, "off") == 0) { map_action(params, "reboot", value); } else { map_action(params, "off", value); map_action(params, "reboot", value); } g_hash_table_remove(params, STONITH_ATTR_ACTION_OP); } return params; } static const char * target_list_type(stonith_device_t * dev) { const char *check_type = NULL; check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK); if (check_type == NULL) { if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) { check_type = "static-list"; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) { check_type = "static-list"; } else if (pcmk_is_set(dev->flags, st_device_supports_list)) { check_type = "dynamic-list"; } else if (pcmk_is_set(dev->flags, st_device_supports_status)) { check_type = "status"; } else { check_type = "none"; } } return check_type; } static stonith_device_t * build_device_from_xml(xmlNode * msg) { const char *value; xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR); stonith_device_t *device = NULL; char *agent = crm_element_value_copy(dev, "agent"); CRM_CHECK(agent != NULL, return device); device = calloc(1, sizeof(stonith_device_t)); CRM_CHECK(device != NULL, {free(agent); return device;}); device->id = crm_element_value_copy(dev, XML_ATTR_ID); device->agent = agent; device->namespace = crm_element_value_copy(dev, "namespace"); device->params = xml2device_params(device->id, dev); value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST); if (value) { device->targets = stonith__parse_targets(value); } value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP); device->aliases = build_port_aliases(value, &(device->targets)); value = target_list_type(device); if (!pcmk__str_eq(value, "static-list", pcmk__str_casei) && device->targets) { /* Other than "static-list", dev-> targets is unnecessary. */ g_list_free_full(device->targets, free); device->targets = NULL; } switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } break; case EAGAIN: if (device->timer == NULL) { device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000, TRUE, get_agent_metadata_cb, device); } if (!mainloop_timer_running(device->timer)) { mainloop_timer_start(device->timer); } break; default: break; } value = g_hash_table_lookup(device->params, "nodeid"); if (!value) { device->include_nodeid = is_nodeid_required(device->agent_metadata); } value = crm_element_value(dev, "rsc_provides"); if (pcmk__str_eq(value, "unfencing", pcmk__str_casei)) { device->automatic_unfencing = TRUE; } if (is_action_required("on", device)) { crm_info("Fencing device '%s' requires unfencing", device->id); } if (device->on_target_actions) { crm_info("Fencing device '%s' requires actions (%s) to be executed " "on target", device->id, device->on_target_actions); } device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); /* TODO: Hook up priority */ return device; } static void schedule_internal_command(const char *origin, stonith_device_t * device, const char *action, const char *victim, int timeout, void *internal_user_data, void (*done_cb) (GPid pid, int rc, const char *output, gpointer user_data)) { async_command_t *cmd = NULL; cmd = calloc(1, sizeof(async_command_t)); cmd->id = -1; cmd->default_timeout = timeout ? timeout : 60; cmd->timeout = cmd->default_timeout; cmd->action = strdup(action); cmd->victim = victim ? strdup(victim) : NULL; cmd->device = strdup(device->id); cmd->origin = strdup(origin); cmd->client = strdup(crm_system_name); cmd->client_name = strdup(crm_system_name); cmd->internal_user_data = internal_user_data; cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */ schedule_stonith_command(cmd, device); } static void status_search_cb(GPid pid, int rc, const char *output, gpointer user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; gboolean can = FALSE; free_async_command(cmd); if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (rc == 1 /* unknown */ ) { crm_trace("Host %s is not known by %s", search->host, dev->id); } else if (rc == 0 /* active */ || rc == 2 /* inactive */ ) { crm_trace("Host %s is known by %s", search->host, dev->id); can = TRUE; } else { crm_notice("Unknown result when testing if %s can fence %s: rc=%d", dev->id, search->host, rc); } search_devices_record_result(search, dev->id, can); } static void dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; gboolean can_fence = FALSE; free_async_command(cmd); /* Host/alias must be in the list output to be eligible to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); /* If we successfully got the targets earlier, don't disable. */ if (rc != 0 && !dev->targets) { if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) { /* If the operation fails if the user does not explicitly specify "dynamic-list", it will fall back to "status". */ crm_notice("Disabling port list queries for %s (%d): %s", dev->id, rc, output); g_hash_table_replace(dev->params, strdup(PCMK_STONITH_HOST_CHECK), strdup("status")); } } else if (!rc) { crm_info("Refreshing port list for %s", dev->id); g_list_free_full(dev->targets, free); dev->targets = stonith__parse_targets(output); dev->targets_age = time(NULL); } if (dev->targets) { const char *alias = g_hash_table_lookup(dev->aliases, search->host); if (!alias) { alias = search->host; } if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) { can_fence = TRUE; } } search_devices_record_result(search, dev->id, can_fence); } /*! * \internal * \brief Returns true if any key in first is not in second or second has a different value for key */ static int device_params_diff(GHashTable *first, GHashTable *second) { char *key = NULL; char *value = NULL; GHashTableIter gIter; g_hash_table_iter_init(&gIter, first); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) { if(strstr(key, "CRM_meta") == key) { continue; } else if(strcmp(key, "crm_feature_set") == 0) { continue; } else { char *other_value = g_hash_table_lookup(second, key); if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) { crm_trace("Different value for %s: %s != %s", key, other_value, value); return 1; } } } return 0; } /*! * \internal * \brief Checks to see if an identical device already exists in the device_list */ static stonith_device_t * device_has_duplicate(stonith_device_t * device) { stonith_device_t *dup = g_hash_table_lookup(device_list, device->id); if (!dup) { crm_trace("No match for %s", device->id); return NULL; } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) { crm_trace("Different agent: %s != %s", dup->agent, device->agent); return NULL; } /* Use calculate_operation_digest() here? */ if (device_params_diff(device->params, dup->params) || device_params_diff(dup->params, device->params)) { return NULL; } crm_trace("Match"); return dup; } int stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib) { stonith_device_t *dup = NULL; stonith_device_t *device = build_device_from_xml(msg); guint ndevices = 0; int rv = pcmk_ok; CRM_CHECK(device != NULL, return -ENOMEM); /* do we have a watchdog-device? */ if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) || pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do { if (stonith_watchdog_timeout_ms <= 0) { crm_err("Ignoring watchdog fence device without " "stonith-watchdog-timeout set."); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { crm_err("Ignoring watchdog fence device with unknown " "agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.", device->agent?device->agent:""); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { crm_err("Ignoring watchdog fence device " "named %s !='"STONITH_WATCHDOG_ID"'.", device->id?device->id:""); rv = -ENODEV; /* fall through to cleanup & return */ } else { if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) { /* this either has an empty list or the targets configured for watchdog-fencing */ g_list_free_full(stonith_watchdog_targets, free); stonith_watchdog_targets = device->targets; device->targets = NULL; } if (node_does_watchdog_fencing(stonith_our_uname)) { g_list_free_full(device->targets, free); device->targets = stonith__parse_targets(stonith_our_uname); g_hash_table_replace(device->params, strdup(PCMK_STONITH_HOST_LIST), strdup(stonith_our_uname)); /* proceed as with any other stonith-device */ break; } crm_debug("Skip registration of watchdog fence device on node not in host-list."); /* cleanup and fall through to more cleanup and return */ device->targets = NULL; stonith_device_remove(device->id, from_cib); } free_device(device); return rv; } while (0); dup = device_has_duplicate(device); if (dup) { ndevices = g_hash_table_size(device_list); crm_debug("Device '%s' already in device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); free_device(device); device = dup; dup = g_hash_table_lookup(device_list, device->id); dup->dirty = FALSE; } else { stonith_device_t *old = g_hash_table_lookup(device_list, device->id); if (from_cib && old && old->api_registered) { /* If the cib is writing over an entry that is shared with a stonith client, * copy any pending ops that currently exist on the old entry to the new one. * Otherwise the pending ops will be reported as failures */ crm_info("Overwriting existing entry for %s from CIB", device->id); device->pending_ops = old->pending_ops; device->api_registered = TRUE; old->pending_ops = NULL; if (device->pending_ops) { mainloop_set_trigger(device->work); } } g_hash_table_replace(device_list, device->id, device); ndevices = g_hash_table_size(device_list); crm_notice("Added '%s' to device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); } if (desc) { *desc = device->id; } if (from_cib) { device->cib_registered = TRUE; } else { device->api_registered = TRUE; } return pcmk_ok; } int stonith_device_remove(const char *id, gboolean from_cib) { stonith_device_t *device = g_hash_table_lookup(device_list, id); guint ndevices = 0; if (!device) { ndevices = g_hash_table_size(device_list); crm_info("Device '%s' not found (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); return pcmk_ok; } if (from_cib) { device->cib_registered = FALSE; } else { device->verified = FALSE; device->api_registered = FALSE; } if (!device->cib_registered && !device->api_registered) { g_hash_table_remove(device_list, id); ndevices = g_hash_table_size(device_list); crm_info("Removed '%s' from device list (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); } else { crm_trace("Not removing '%s' from device list (%d active) because " "still registered via:%s%s", id, g_hash_table_size(device_list), (device->cib_registered? " cib" : ""), (device->api_registered? " api" : "")); } return pcmk_ok; } /*! * \internal * \brief Return the number of stonith levels registered for a node * * \param[in] tp Node's topology table entry * * \return Number of non-NULL levels in topology entry * \note This function is used only for log messages. */ static int count_active_levels(stonith_topology_t * tp) { int lpc = 0; int count = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { count++; } } return count; } static void free_topology_entry(gpointer data) { stonith_topology_t *tp = data; int lpc = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { g_list_free_full(tp->levels[lpc], free); } } free(tp->target); free(tp->target_value); free(tp->target_pattern); free(tp->target_attribute); free(tp); } void free_topology_list(void) { if (topology != NULL) { g_hash_table_destroy(topology); topology = NULL; } } void init_topology_list(void) { if (topology == NULL) { topology = pcmk__strkey_table(NULL, free_topology_entry); } } char *stonith_level_key(xmlNode *level, int mode) { if(mode == -1) { mode = stonith_level_kind(level); } switch(mode) { case 0: return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET); case 1: return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN); case 2: { const char *name = crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE); const char *value = crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE); if(name && value) { return crm_strdup_printf("%s=%s", name, value); } } default: return crm_strdup_printf("Unknown-%d-%s", mode, ID(level)); } } int stonith_level_kind(xmlNode * level) { int mode = 0; const char *target = crm_element_value(level, XML_ATTR_STONITH_TARGET); if(target == NULL) { mode++; target = crm_element_value(level, XML_ATTR_STONITH_TARGET_PATTERN); } if(stand_alone == FALSE && target == NULL) { mode++; if(crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE) == NULL) { mode++; } else if(crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE) == NULL) { mode++; } } return mode; } static stonith_key_value_t * parse_device_list(const char *devices) { int lpc = 0; int max = 0; int last = 0; stonith_key_value_t *output = NULL; if (devices == NULL) { return output; } max = strlen(devices); for (lpc = 0; lpc <= max; lpc++) { if (devices[lpc] == ',' || devices[lpc] == 0) { char *line = strndup(devices + last, lpc - last); output = stonith_key_value_add(output, NULL, line); free(line); last = lpc + 1; } } return output; } /*! * \internal * \brief Register a STONITH level for a target * * Given an XML request specifying the target name, level index, and device IDs * for the level, this will create an entry for the target in the global topology * table if one does not already exist, then append the specified device IDs to * the entry's device list for the specified level. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, will be set to string representation ("TARGET[LEVEL]") * * \return pcmk_ok on success, -EINVAL if XML does not specify valid level index */ int stonith_level_register(xmlNode *msg, char **desc) { int id = 0; xmlNode *level; int mode; char *target; stonith_topology_t *tp; stonith_key_value_t *dIter = NULL; stonith_key_value_t *devices = NULL; /* Allow the XML here to point to the level tag directly, or wrapped in * another tag. If directly, don't search by xpath, because it might give * multiple hits (e.g. if the XML is the CIB). */ if (pcmk__str_eq(TYPE(msg), XML_TAG_FENCING_LEVEL, pcmk__str_casei)) { level = msg; } else { level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR); } CRM_CHECK(level != NULL, return -EINVAL); mode = stonith_level_kind(level); target = stonith_level_key(level, mode); crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id); if (desc) { *desc = crm_strdup_printf("%s[%d]", target, id); } /* Sanity-check arguments */ if (mode >= 3 || (id <= 0) || (id >= ST_LEVEL_MAX)) { crm_trace("Could not add %s[%d] (%d) to the topology (%d active entries)", target, id, mode, g_hash_table_size(topology)); free(target); crm_log_xml_err(level, "Bad topology"); return -EINVAL; } /* Find or create topology table entry */ tp = g_hash_table_lookup(topology, target); if (tp == NULL) { tp = calloc(1, sizeof(stonith_topology_t)); tp->kind = mode; tp->target = target; tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE); tp->target_pattern = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN); tp->target_attribute = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE); g_hash_table_replace(topology, tp->target, tp); crm_trace("Added %s (%d) to the topology (%d active entries)", target, mode, g_hash_table_size(topology)); } else { free(target); } if (tp->levels[id] != NULL) { crm_info("Adding to the existing %s[%d] topology entry", tp->target, id); } devices = parse_device_list(crm_element_value(level, XML_ATTR_STONITH_DEVICES)); for (dIter = devices; dIter; dIter = dIter->next) { const char *device = dIter->value; crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id); tp->levels[id] = g_list_append(tp->levels[id], strdup(device)); } stonith_key_value_freeall(devices, 1, 1); { int nlevels = count_active_levels(tp); crm_info("Target %s has %d active fencing level%s", tp->target, nlevels, pcmk__plural_s(nlevels)); } return pcmk_ok; } int stonith_level_remove(xmlNode *msg, char **desc) { int id = 0; stonith_topology_t *tp; char *target; /* Unlike additions, removal requests should always have one level tag */ xmlNode *level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR); CRM_CHECK(level != NULL, return -EINVAL); target = stonith_level_key(level, -1); crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id); if (desc) { *desc = crm_strdup_printf("%s[%d]", target, id); } /* Sanity-check arguments */ if (id >= ST_LEVEL_MAX) { free(target); return -EINVAL; } tp = g_hash_table_lookup(topology, target); if (tp == NULL) { guint nentries = g_hash_table_size(topology); crm_info("No fencing topology found for %s (%d active %s)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (id == 0 && g_hash_table_remove(topology, target)) { guint nentries = g_hash_table_size(topology); crm_info("Removed all fencing topology entries related to %s " "(%d active %s remaining)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (id > 0 && tp->levels[id] != NULL) { guint nlevels; g_list_free_full(tp->levels[id], free); tp->levels[id] = NULL; nlevels = count_active_levels(tp); crm_info("Removed level %d from fencing topology for %s " "(%d active level%s remaining)", id, target, nlevels, pcmk__plural_s(nlevels)); } free(target); return pcmk_ok; } /*! * \internal * \brief Schedule an (asynchronous) action directly on a stonith device * * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action * directly on a specified device. Only list, monitor, and status actions are * expected to use this call, though it should work with any agent command. * * \param[in] msg API message XML with desired action * \param[out] output Unused * * \return -EINPROGRESS on success, -errno otherwise * \note If the action is monitor, the device must be registered via the API * (CIB registration is not sufficient), because monitor should not be * possible unless the device is "started" (API registered). */ static char * list_to_string(GList *list, const char *delim, gboolean terminate_with_delim) { int max = g_list_length(list); size_t delim_len = delim?strlen(delim):0; size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0); char *rv; GList *gIter; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; alloc_size += strlen(value); } rv = calloc(alloc_size, sizeof(char)); if (rv) { char *pos = rv; const char *lead_delim = ""; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; pos = &pos[sprintf(pos, "%s%s", lead_delim, value)]; lead_delim = delim; } if (max && terminate_with_delim) { sprintf(pos, "%s", delim); } } return rv; } static int stonith_device_action(xmlNode * msg, char **output) { xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR); xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR); const char *id = crm_element_value(dev, F_STONITH_DEVICE); const char *action = crm_element_value(op, F_STONITH_ACTION); async_command_t *cmd = NULL; stonith_device_t *device = NULL; if ((id == NULL) || (action == NULL)) { crm_info("Malformed API action request: device %s, action %s", (id? id : "not specified"), (action? action : "not specified")); return -EPROTO; } if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) { if (stonith_watchdog_timeout_ms <= 0) { return -ENODEV; } else { if (pcmk__str_eq(action, "list", pcmk__str_casei)) { *output = list_to_string(stonith_watchdog_targets, "\n", TRUE); return pcmk_ok; } else if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) { return pcmk_ok; } } } device = g_hash_table_lookup(device_list, id); if ((device == NULL) || (!device->api_registered && !strcmp(action, "monitor"))) { // Monitors may run only on "started" (API-registered) devices crm_info("Ignoring API '%s' action request because device %s not found", action, id); return -ENODEV; } cmd = create_async_command(msg); if (cmd == NULL) { return -EPROTO; } schedule_stonith_command(cmd, device); return -EINPROGRESS; } static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence) { search->replies_received++; if (can_fence && device) { search->capable = g_list_append(search->capable, strdup(device)); } if (search->replies_needed == search->replies_received) { guint ndevices = g_list_length(search->capable); crm_debug("Search found %d device%s that can perform '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); search->callback(search->capable, search->user_data); free(search->host); free(search->action); free(search); } } /*! * \internal * \brief Check whether the local host is allowed to execute a fencing action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Hostname of fence target * \param[in] allow_suicide Whether self-fencing is allowed for this operation * * \return TRUE if local host is allowed to execute action, FALSE otherwise */ static gboolean localhost_is_eligible(const stonith_device_t *device, const char *action, const char *target, gboolean allow_suicide) { gboolean localhost_is_target = pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei); if (device && action && device->on_target_actions && strstr(device->on_target_actions, action)) { if (!localhost_is_target) { crm_trace("Operation '%s' using %s can only be executed for " "local host, not %s", action, device->id, target); return FALSE; } } else if (localhost_is_target && !allow_suicide) { crm_trace("'%s' operation does not support self-fencing", action); return FALSE; } return TRUE; } static void can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *search) { gboolean can = FALSE; const char *check_type = NULL; const char *host = search->host; const char *alias = NULL; CRM_LOG_ASSERT(dev != NULL); if (dev == NULL) { goto search_report_results; } else if (host == NULL) { can = TRUE; goto search_report_results; } /* Short-circuit query if this host is not allowed to perform the action */ if (pcmk__str_eq(search->action, "reboot", pcmk__str_casei)) { /* A "reboot" *might* get remapped to "off" then "on", so short-circuit * only if all three are disallowed. If only one or two are disallowed, * we'll report that with the results. We never allow suicide for * remapped "on" operations because the host is off at that point. */ if (!localhost_is_eligible(dev, "reboot", host, search->allow_suicide) && !localhost_is_eligible(dev, "off", host, search->allow_suicide) && !localhost_is_eligible(dev, "on", host, FALSE)) { goto search_report_results; } } else if (!localhost_is_eligible(dev, search->action, host, search->allow_suicide)) { goto search_report_results; } alias = g_hash_table_lookup(dev->aliases, host); if (alias == NULL) { alias = host; } check_type = target_list_type(dev); if (pcmk__str_eq(check_type, "none", pcmk__str_casei)) { can = TRUE; } else if (pcmk__str_eq(check_type, "static-list", pcmk__str_casei)) { /* Presence in the hostmap is sufficient * Only use if all hosts on which the device can be active can always fence all listed hosts */ if (pcmk__str_in_list(host, dev->targets, pcmk__str_casei)) { can = TRUE; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP) && g_hash_table_lookup(dev->aliases, host)) { can = TRUE; } } else if (pcmk__str_eq(check_type, "dynamic-list", pcmk__str_casei)) { time_t now = time(NULL); if (dev->targets == NULL || dev->targets_age + 60 < now) { crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev->id, search->host, search->action); schedule_internal_command(__func__, dev, "list", NULL, search->per_device_timeout, search, dynamic_list_search_cb); /* we'll respond to this search request async in the cb */ return; } if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) { can = TRUE; } } else if (pcmk__str_eq(check_type, "status", pcmk__str_casei)) { crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev->id, search->host, search->action); schedule_internal_command(__func__, dev, "status", search->host, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ return; } else { crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type); check_type = "Invalid " PCMK_STONITH_HOST_CHECK; } if (pcmk__str_eq(host, alias, pcmk__str_casei)) { crm_notice("%s is%s eligible to fence (%s) %s: %s", dev->id, (can? "" : " not"), search->action, host, check_type); } else { crm_notice("%s is%s eligible to fence (%s) %s (aka. '%s'): %s", dev->id, (can? "" : " not"), search->action, host, alias, check_type); } search_report_results: search_devices_record_result(search, dev ? dev->id : NULL, can); } static void search_devices(gpointer key, gpointer value, gpointer user_data) { stonith_device_t *dev = value; struct device_search_s *search = user_data; can_fence_host_with_device(dev, search); } #define DEFAULT_QUERY_TIMEOUT 20 static void get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data, void (*callback) (GList * devices, void *user_data)) { struct device_search_s *search; int per_device_timeout = DEFAULT_QUERY_TIMEOUT; int devices_needing_async_query = 0; char *key = NULL; const char *check_type = NULL; GHashTableIter gIter; stonith_device_t *device = NULL; guint ndevices = g_hash_table_size(device_list); if (ndevices == 0) { callback(NULL, user_data); return; } search = calloc(1, sizeof(struct device_search_s)); if (!search) { callback(NULL, user_data); return; } g_hash_table_iter_init(&gIter, device_list); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&device)) { check_type = target_list_type(device); if (pcmk__strcase_any_of(check_type, "status", "dynamic-list", NULL)) { devices_needing_async_query++; } } /* If we have devices that require an async event in order to know what * nodes they can fence, we have to give the events a timeout. The total * query timeout is divided among those events. */ if (devices_needing_async_query) { per_device_timeout = timeout / devices_needing_async_query; if (!per_device_timeout) { crm_err("Fencing timeout %ds is too low; using %ds, " "but consider raising to at least %ds", timeout, DEFAULT_QUERY_TIMEOUT, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query); per_device_timeout = DEFAULT_QUERY_TIMEOUT; } else if (per_device_timeout < DEFAULT_QUERY_TIMEOUT) { crm_notice("Fencing timeout %ds is low for the current " "configuration; consider raising to at least %ds", timeout, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query); } } search->host = host ? strdup(host) : NULL; search->action = action ? strdup(action) : NULL; search->per_device_timeout = per_device_timeout; search->allow_suicide = suicide; search->callback = callback; search->user_data = user_data; /* We are guaranteed this many replies, even if a device is * unregistered while the search is in progress. */ search->replies_needed = ndevices; crm_debug("Searching %d device%s to see which can execute '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); g_hash_table_foreach(device_list, search_devices, search); } struct st_query_data { xmlNode *reply; char *remote_peer; char *client_id; char *target; char *action; int call_options; }; /*! * \internal * \brief Add action-specific attributes to query reply XML * * \param[in,out] xml XML to add attributes to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target */ static void add_action_specific_attributes(xmlNode *xml, const char *action, stonith_device_t *device, const char *target) { int action_specific_timeout; int delay_max; int delay_base; CRM_CHECK(xml && action && device, return); if (is_action_required(action, device)) { crm_trace("Action '%s' is required using %s", action, device->id); crm_xml_add_int(xml, F_STONITH_DEVICE_REQUIRED, 1); } action_specific_timeout = get_action_timeout(device, action, 0); if (action_specific_timeout) { crm_trace("Action '%s' has timeout %dms using %s", action, action_specific_timeout, device->id); crm_xml_add_int(xml, F_STONITH_ACTION_TIMEOUT, action_specific_timeout); } delay_max = get_action_delay_max(device, action); if (delay_max > 0) { crm_trace("Action '%s' has maximum random delay %dms using %s", action, delay_max, device->id); crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max / 1000); } delay_base = get_action_delay_base(device, action, target); if (delay_base > 0) { crm_xml_add_int(xml, F_STONITH_DELAY_BASE, delay_base / 1000); } if ((delay_max > 0) && (delay_base == 0)) { crm_trace("Action '%s' has maximum random delay %dms using %s", action, delay_max, device->id); } else if ((delay_max == 0) && (delay_base > 0)) { crm_trace("Action '%s' has a static delay of %dms using %s", action, delay_base, device->id); } else if ((delay_max > 0) && (delay_base > 0)) { crm_trace("Action '%s' has a minimum delay of %dms and a randomly chosen " "maximum delay of %dms using %s", action, delay_base, delay_max, device->id); } } /*! * \internal * \brief Add "disallowed" attribute to query reply XML if appropriate * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_disallowed(xmlNode *xml, const char *action, stonith_device_t *device, const char *target, gboolean allow_suicide) { if (!localhost_is_eligible(device, action, target, allow_suicide)) { crm_trace("Action '%s' using %s is disallowed for local host", action, device->id); crm_xml_add(xml, F_STONITH_ACTION_DISALLOWED, XML_BOOLEAN_TRUE); } } /*! * \internal * \brief Add child element with action-specific values to query reply XML * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_action_reply(xmlNode *xml, const char *action, stonith_device_t *device, const char *target, gboolean allow_suicide) { xmlNode *child = create_xml_node(xml, F_STONITH_ACTION); crm_xml_add(child, XML_ATTR_ID, action); add_action_specific_attributes(child, action, device, target); add_disallowed(child, action, device, target, allow_suicide); } static void stonith_query_capable_device_cb(GList * devices, void *user_data) { struct st_query_data *query = user_data; int available_devices = 0; xmlNode *dev = NULL; xmlNode *list = NULL; GList *lpc = NULL; /* Pack the results into XML */ list = create_xml_node(NULL, __func__); crm_xml_add(list, F_STONITH_TARGET, query->target); for (lpc = devices; lpc != NULL; lpc = lpc->next) { stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data); const char *action = query->action; if (!device) { /* It is possible the device got unregistered while * determining who can fence the target */ continue; } available_devices++; dev = create_xml_node(list, F_STONITH_DEVICE); crm_xml_add(dev, XML_ATTR_ID, device->id); crm_xml_add(dev, "namespace", device->namespace); crm_xml_add(dev, "agent", device->agent); crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified); /* If the originating fencer wants to reboot the node, and we have a * capable device that doesn't support "reboot", remap to "off" instead. */ if (!pcmk_is_set(device->flags, st_device_supports_reboot) && pcmk__str_eq(query->action, "reboot", pcmk__str_casei)) { crm_trace("%s doesn't support reboot, using values for off instead", device->id); action = "off"; } /* Add action-specific values if available */ add_action_specific_attributes(dev, action, device, query->target); if (pcmk__str_eq(query->action, "reboot", pcmk__str_casei)) { /* A "reboot" *might* get remapped to "off" then "on", so after * sending the "reboot"-specific values in the main element, we add * sub-elements for "off" and "on" values. * * We short-circuited earlier if "reboot", "off" and "on" are all * disallowed for the local host. However if only one or two are * disallowed, we send back the results and mark which ones are * disallowed. If "reboot" is disallowed, this might cause problems * with older fencer versions, which won't check for it. Older * versions will ignore "off" and "on", so they are not a problem. */ add_disallowed(dev, action, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, "off", device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, "on", device, query->target, FALSE); } /* A query without a target wants device parameters */ if (query->target == NULL) { xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS); g_hash_table_foreach(device->params, hash2field, attrs); } } crm_xml_add_int(list, F_STONITH_AVAILABLE_DEVICES, available_devices); if (query->target) { crm_debug("Found %d matching device%s for target '%s'", available_devices, pcmk__plural_s(available_devices), query->target); } else { crm_debug("%d device%s installed", available_devices, pcmk__plural_s(available_devices)); } if (list != NULL) { crm_log_xml_trace(list, "Add query results"); add_message_xml(query->reply, F_STONITH_CALLDATA, list); } stonith_send_reply(query->reply, query->call_options, query->remote_peer, query->client_id); free_xml(query->reply); free(query->remote_peer); free(query->client_id); free(query->target); free(query->action); free(query); free_xml(list); g_list_free_full(devices, free); } static void stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int call_options) { struct st_query_data *query = NULL; const char *action = NULL; const char *target = NULL; int timeout = 0; xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_NEVER); crm_element_value_int(msg, F_STONITH_TIMEOUT, &timeout); if (dev) { const char *device = crm_element_value(dev, F_STONITH_DEVICE); target = crm_element_value(dev, F_STONITH_TARGET); action = crm_element_value(dev, F_STONITH_ACTION); if (device && pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) { /* No query or reply necessary */ return; } } crm_log_xml_debug(msg, "Query"); query = calloc(1, sizeof(struct st_query_data)); query->reply = stonith_construct_reply(msg, NULL, NULL, pcmk_ok); query->remote_peer = remote_peer ? strdup(remote_peer) : NULL; query->client_id = client_id ? strdup(client_id) : NULL; query->target = target ? strdup(target) : NULL; query->action = action ? strdup(action) : NULL; query->call_options = call_options; get_capable_devices(target, action, timeout, pcmk_is_set(call_options, st_opt_allow_suicide), query, stonith_query_capable_device_cb); } #define ST_LOG_OUTPUT_MAX 512 static void log_operation(async_command_t * cmd, int rc, int pid, const char *next, const char *output, gboolean op_merged) { if (rc == 0) { next = NULL; } if (cmd->victim != NULL) { do_crm_log(((rc == 0)? LOG_NOTICE : LOG_ERR), "Operation '%s' [%d] (%scall %d from %s) targeting %s " "using %s returned %d (%s)%s%s", cmd->action, pid, (op_merged? "merged " : ""), cmd->id, cmd->client_name, cmd->victim, cmd->device, rc, pcmk_strerror(rc), (next? ", retrying with " : ""), (next ? next : "")); } else { do_crm_log_unlikely(((rc == 0)? LOG_DEBUG : LOG_NOTICE), "Operation '%s' [%d]%s using %s returned %d (%s)%s%s", cmd->action, pid, (op_merged? " (merged)" : ""), cmd->device, rc, pcmk_strerror(rc), (next? ", retrying with " : ""), (next ? next : "")); } if (output) { // Output may have multiple lines char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid); crm_log_output(rc == 0 ? LOG_DEBUG : LOG_WARNING, prefix, output); free(prefix); } } static void stonith_send_async_reply(async_command_t * cmd, const char *output, int rc, GPid pid, int options) { xmlNode *reply = NULL; gboolean bcast = FALSE; reply = stonith_construct_async_reply(cmd, output, NULL, rc); if (pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) { /* Too verbose to log */ crm_trace("Metadata query for %s", cmd->device); output = NULL; } else if (pcmk__str_any_of(cmd->action, "monitor", "list", "status", NULL)) { crm_trace("Never broadcast '%s' replies", cmd->action); } else if (!stand_alone && pcmk__str_eq(cmd->origin, cmd->victim, pcmk__str_casei) && !pcmk__str_eq(cmd->action, "on", pcmk__str_casei)) { crm_trace("Broadcast '%s' reply for %s", cmd->action, cmd->victim); crm_xml_add(reply, F_SUBTYPE, "broadcast"); bcast = TRUE; } log_operation(cmd, rc, pid, NULL, output, (options & st_reply_opt_merged ? TRUE : FALSE)); crm_log_xml_trace(reply, "Reply"); if (options & st_reply_opt_merged) { crm_xml_add(reply, F_STONITH_MERGED, "true"); } if (bcast) { crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); } else if (cmd->origin) { crm_trace("Directed reply to %s", cmd->origin); send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE); } else { crm_trace("Directed local %ssync reply to %s", (cmd->options & st_opt_sync_call) ? "" : "a-", cmd->client_name); do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE); } if (stand_alone) { /* Do notification with a clean data object */ xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); crm_xml_add_int(notify_data, F_STONITH_RC, rc); crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim); crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op); crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost"); crm_xml_add(notify_data, F_STONITH_DEVICE, cmd->device); crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client); do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data); do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL); } free_xml(reply); } static void cancel_stonith_command(async_command_t * cmd) { stonith_device_t *device; CRM_CHECK(cmd != NULL, return); if (!cmd->device) { return; } device = g_hash_table_lookup(device_list, cmd->device); if (device) { crm_trace("Cancel scheduled '%s' action using %s", cmd->action, device->id); device->pending_ops = g_list_remove(device->pending_ops, cmd); } } static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data) { stonith_device_t *device = NULL; stonith_device_t *next_device = NULL; async_command_t *cmd = user_data; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(cmd != NULL, return); cmd->active_on = NULL; /* The device is ready to do something else now */ device = g_hash_table_lookup(device_list, cmd->device); if (device) { if (!device->verified && (rc == pcmk_ok) && (pcmk__strcase_any_of(cmd->action, "list", "monitor", "status", NULL))) { device->verified = TRUE; } mainloop_set_trigger(device->work); } crm_debug("Operation '%s' using %s returned %d (%d devices remaining)", cmd->action, cmd->device, rc, g_list_length(cmd->device_next)); if (rc == 0) { GList *iter; /* see if there are any required devices left to execute for this op */ for (iter = cmd->device_next; iter != NULL; iter = iter->next) { next_device = g_hash_table_lookup(device_list, iter->data); if (next_device != NULL && is_action_required(cmd->action, next_device)) { cmd->device_next = iter->next; break; } next_device = NULL; } } else if (rc != 0 && cmd->device_next && (is_action_required(cmd->action, device) == FALSE)) { /* if this device didn't work out, see if there are any others we can try. * if the failed device was 'required', we can't pick another device. */ next_device = g_hash_table_lookup(device_list, cmd->device_next->data); cmd->device_next = cmd->device_next->next; } /* this operation requires more fencing, hooray! */ if (next_device) { log_operation(cmd, rc, pid, next_device->id, output, FALSE); schedule_stonith_command(cmd, next_device); /* Prevent cmd from being freed */ cmd = NULL; goto done; } stonith_send_async_reply(cmd, output, rc, pid, st_reply_opt_none); if (rc != 0) { goto done; } /* Check to see if any operations are scheduled to do the exact * same thing that just completed. If so, rather than * performing the same fencing operation twice, return the result * of this operation for all pending commands it matches. */ for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd_other = gIter->data; gIterNext = gIter->next; if (cmd == cmd_other) { continue; } /* A pending scheduled command matches the command that just finished if. * 1. The client connections are different. * 2. The node victim is the same. * 3. The fencing action is the same. * 4. The device scheduled to execute the action is the same. */ if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) || !pcmk__str_eq(cmd->victim, cmd_other->victim, pcmk__str_casei) || !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_casei) || !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) { continue; } /* Duplicate merging will do the right thing for either type of remapped * reboot. If the executing fencer remapped an unsupported reboot to * off, then cmd->action will be reboot and will be merged with any * other reboot requests. If the originating fencer remapped a * topology reboot to off then on, we will get here once with * cmd->action "off" and once with "on", and they will be merged * separately with similar requests. */ crm_notice("Merging fencing action '%s' targeting %s originating from " "client %s with identical fencing request from client %s", cmd_other->action, cmd_other->victim, cmd_other->client_name, cmd->client_name); cmd_list = g_list_remove_link(cmd_list, gIter); stonith_send_async_reply(cmd_other, output, rc, pid, st_reply_opt_merged); cancel_stonith_command(cmd_other); free_async_command(cmd_other); g_list_free_1(gIter); } done: free_async_command(cmd); } static gint sort_device_priority(gconstpointer a, gconstpointer b) { const stonith_device_t *dev_a = a; const stonith_device_t *dev_b = b; if (dev_a->priority > dev_b->priority) { return -1; } else if (dev_a->priority < dev_b->priority) { return 1; } return 0; } static void stonith_fence_get_devices_cb(GList * devices, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; guint ndevices = g_list_length(devices); crm_info("Found %d matching device%s for target '%s'", ndevices, pcmk__plural_s(ndevices), cmd->victim); if (devices != NULL) { /* Order based on priority */ devices = g_list_sort(devices, sort_device_priority); device = g_hash_table_lookup(device_list, devices->data); if (device) { cmd->device_list = devices; cmd->device_next = devices->next; devices = NULL; /* list owned by cmd now */ } } /* we have a device, schedule it for fencing. */ if (device) { schedule_stonith_command(cmd, device); /* in progress */ return; } /* no device found! */ stonith_send_async_reply(cmd, NULL, -ENODEV, 0, st_reply_opt_none); free_async_command(cmd); g_list_free_full(devices, free); } static int stonith_fence(xmlNode * msg) { const char *device_id = NULL; stonith_device_t *device = NULL; async_command_t *cmd = create_async_command(msg); xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); if (cmd == NULL) { return -EPROTO; } device_id = crm_element_value(dev, F_STONITH_DEVICE); if (device_id) { device = g_hash_table_lookup(device_list, device_id); if (device == NULL) { crm_err("Requested device '%s' is not available", device_id); return -ENODEV; } schedule_stonith_command(cmd, device); } else { const char *host = crm_element_value(dev, F_STONITH_TARGET); if (cmd->options & st_opt_cs_nodeid) { int nodeid; crm_node_t *node; pcmk__scan_min_int(host, &nodeid, 0); node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY); if (node) { host = node->uname; } } /* If we get to here, then self-fencing is implicitly allowed */ get_capable_devices(host, cmd->action, cmd->default_timeout, TRUE, cmd, stonith_fence_get_devices_cb); } return -EINPROGRESS; } xmlNode * stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc) { xmlNode *reply = NULL; reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __func__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, "st_output", output); crm_xml_add_int(reply, F_STONITH_RC, rc); if (request == NULL) { /* Most likely, this is the result of a stonith operation that was * initiated before we came up. Unfortunately that means we lack enough * information to provide clients with a full result. * * @TODO Maybe synchronize this information at start-up? */ crm_warn("Missing request information for client notifications for " "operation with result %d (initiated before we came up?)", rc); } else { const char *name = NULL; const char *value = NULL; const char *names[] = { F_STONITH_OPERATION, F_STONITH_CALLID, F_STONITH_CLIENTID, F_STONITH_CLIENTNAME, F_STONITH_REMOTE_OP_ID, F_STONITH_CALLOPTS }; crm_trace("Creating a result reply with%s reply output (rc=%d)", (data? "" : "out"), rc); for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } if (data != NULL) { add_message_xml(reply, F_STONITH_CALLDATA, data); } } return reply; } static xmlNode * stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc) { xmlNode *reply = NULL; crm_trace("Creating a basic reply"); reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __func__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, F_STONITH_OPERATION, cmd->op); crm_xml_add(reply, F_STONITH_DEVICE, cmd->device); crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client); crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name); crm_xml_add(reply, F_STONITH_TARGET, cmd->victim); crm_xml_add(reply, F_STONITH_ACTION, cmd->op); crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin); crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id); crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options); crm_xml_add_int(reply, F_STONITH_RC, rc); crm_xml_add(reply, "st_output", output); if (data != NULL) { crm_info("Attaching reply output"); add_message_xml(reply, F_STONITH_CALLDATA, data); } return reply; } bool fencing_peer_active(crm_node_t *peer) { if (peer == NULL) { return FALSE; } else if (peer->uname == NULL) { return FALSE; } else if (pcmk_is_set(peer->processes, crm_get_cluster_proc())) { return TRUE; } return FALSE; } void set_fencing_completed(remote_fencing_op_t * op) { #ifdef CLOCK_MONOTONIC struct timespec tv; clock_gettime(CLOCK_MONOTONIC, &tv); op->completed = tv.tv_sec; op->completed_nsec = tv.tv_nsec; #else op->completed = time(NULL); op->completed_nsec = 0L; #endif } /*! * \internal - * \brief Determine if we need to use an alternate node to - * fence the target. If so return that node's uname + * \brief Look for alternate node needed if local node shouldn't fence target * - * \retval NULL, no alternate host - * \retval uname, uname of alternate host to use + * \param[in] target Node that must be fenced + * + * \return Name of an alternate node that should fence \p target if any, + * or NULL otherwise */ static const char * check_alternate_host(const char *target) { const char *alternate_host = NULL; crm_trace("Checking if we (%s) can fence %s", stonith_our_uname, target); if (find_topology_for_host(target) && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { GHashTableIter gIter; crm_node_t *entry = NULL; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { crm_trace("Checking for %s.%d != %s", entry->uname, entry->id, target); if (fencing_peer_active(entry) && !pcmk__str_eq(entry->uname, target, pcmk__str_casei)) { alternate_host = entry->uname; break; } } if (alternate_host == NULL) { crm_err("No alternate host available to handle request " "for self-fencing with topology"); g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { crm_notice("Peer[%d] %s", entry->id, entry->uname); } } } return alternate_host; } static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, const char *client_id) { if (remote_peer) { send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE); } else { do_local_reply(reply, client_id, pcmk_is_set(call_options, st_opt_sync_call), (remote_peer != NULL)); } } static void remove_relay_op(xmlNode * request) { xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, request, LOG_TRACE); const char *relay_op_id = NULL; const char *op_id = NULL; const char *client_name = NULL; const char *target = NULL; remote_fencing_op_t *relay_op = NULL; if (dev) { target = crm_element_value(dev, F_STONITH_TARGET); } relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID_RELAY); op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID); client_name = crm_element_value(request, F_STONITH_CLIENTNAME); /* Delete RELAY operation. */ if (relay_op_id && target && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id); if (relay_op) { GHashTableIter iter; remote_fencing_op_t *list_op = NULL; g_hash_table_iter_init(&iter, stonith_remote_op_list); /* If the operation to be deleted is registered as a duplicate, delete the registration. */ while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) { GList *dup_iter = NULL; if (list_op != relay_op) { for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) { remote_fencing_op_t *other = dup_iter->data; if (other == relay_op) { other->duplicates = g_list_remove(other->duplicates, relay_op); break; } } } } crm_debug("Deleting relay op %s ('%s' targeting %s for %s), " "replaced by op %s ('%s' targeting %s for %s)", relay_op->id, relay_op->action, relay_op->target, relay_op->client_name, op_id, relay_op->action, target, client_name); g_hash_table_remove(stonith_remote_op_list, relay_op_id); } } } static int handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *request, const char *remote_peer) { int call_options = 0; int rc = -EOPNOTSUPP; xmlNode *data = NULL; xmlNode *reply = NULL; char *output = NULL; const char *op = crm_element_value(request, F_STONITH_OPERATION); const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); /* IPC commands related to fencing configuration may be done only by * privileged users (i.e. root or hacluster), because all other users should * go through the CIB to have ACLs applied. * * If no client was given, this is a peer request, which is always allowed. */ bool allowed = (client == NULL) || pcmk_is_set(client->flags, pcmk__client_privileged); crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { xmlNode *reply = create_xml_node(NULL, "reply"); CRM_ASSERT(client); crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(reply, F_STONITH_CLIENTID, client->id); pcmk__ipc_send_xml(client, id, reply, flags); client->request_id = 0; free_xml(reply); return 0; } else if (pcmk__str_eq(op, STONITH_OP_EXEC, pcmk__str_none)) { rc = stonith_device_action(request, &output); } else if (pcmk__str_eq(op, STONITH_OP_TIMEOUT_UPDATE, pcmk__str_none)) { const char *call_id = crm_element_value(request, F_STONITH_CALLID); const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); int op_timeout = 0; crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout); do_stonith_async_timeout_update(client_id, call_id, op_timeout); return 0; } else if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { if (remote_peer) { create_remote_stonith_op(client_id, request, TRUE); /* Record it for the future notification */ } /* Delete the DC node RELAY operation. */ remove_relay_op(request); stonith_query(request, remote_peer, client_id, call_options); return 0; } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) { const char *flag_name = NULL; CRM_ASSERT(client); flag_name = crm_element_value(request, F_STONITH_NOTIFY_ACTIVATE); if (flag_name) { crm_debug("Enabling %s callbacks for client %s", flag_name, pcmk__client_name(client)); pcmk__set_client_flags(client, get_stonith_flag(flag_name)); } flag_name = crm_element_value(request, F_STONITH_NOTIFY_DEACTIVATE); if (flag_name) { crm_debug("Disabling %s callbacks for client %s", flag_name, pcmk__client_name(client)); pcmk__clear_client_flags(client, get_stonith_flag(flag_name)); } pcmk__ipc_send_ack(client, id, flags, "ack", CRM_EX_OK); return 0; } else if (pcmk__str_eq(op, STONITH_OP_RELAY, pcmk__str_none)) { xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); crm_notice("Received forwarded fencing request from " "%s %s to fence (%s) peer %s", ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client)), crm_element_value(dev, F_STONITH_ACTION), crm_element_value(dev, F_STONITH_TARGET)); if (initiate_remote_stonith_op(NULL, request, FALSE) != NULL) { rc = -EINPROGRESS; } } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) { if (remote_peer || stand_alone) { rc = stonith_fence(request); } else if (call_options & st_opt_manual_ack) { remote_fencing_op_t *rop = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); const char *target = crm_element_value(dev, F_STONITH_TARGET); crm_notice("Received manual confirmation that %s is fenced", target); rop = initiate_remote_stonith_op(client, request, TRUE); rc = stonith_manual_ack(request, rop); } else { const char *alternate_host = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); const char *target = crm_element_value(dev, F_STONITH_TARGET); const char *action = crm_element_value(dev, F_STONITH_ACTION); const char *device = crm_element_value(dev, F_STONITH_DEVICE); if (client) { int tolerance = 0; crm_notice("Client %s wants to fence (%s) %s using %s", pcmk__client_name(client), action, target, (device? device : "any device")); crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance); if (stonith_check_fence_tolerance(tolerance, target, action)) { rc = 0; goto done; } } else { crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'", remote_peer, action, target, device ? device : "(any)"); } alternate_host = check_alternate_host(target); if (alternate_host && client) { const char *client_id = NULL; remote_fencing_op_t *op = NULL; crm_notice("Forwarding self-fencing request to peer %s" "due to topology", alternate_host); if (client->id) { client_id = client->id; } else { client_id = crm_element_value(request, F_STONITH_CLIENTID); } /* Create an operation for RELAY and send the ID in the RELAY message. */ /* When a QUERY response is received, delete the RELAY operation to avoid the existence of duplicate operations. */ op = create_remote_stonith_op(client_id, request, FALSE); crm_xml_add(request, F_STONITH_OPERATION, STONITH_OP_RELAY); crm_xml_add(request, F_STONITH_CLIENTID, client->id); crm_xml_add(request, F_STONITH_REMOTE_OP_ID, op->id); send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request, FALSE); rc = -EINPROGRESS; } else if (initiate_remote_stonith_op(client, request, FALSE) != NULL) { rc = -EINPROGRESS; } } } else if (pcmk__str_eq(op, STONITH_OP_FENCE_HISTORY, pcmk__str_none)) { rc = stonith_fence_history(request, &data, remote_peer, call_options); if (call_options & st_opt_discard_reply) { /* we don't expect answers to the broadcast * we might have sent out */ free_xml(data); return pcmk_ok; } } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_ADD, pcmk__str_none)) { const char *device_id = NULL; if (allowed) { rc = stonith_device_register(request, &device_id, FALSE); } else { rc = -EACCES; } do_stonith_notify_device(call_options, op, rc, device_id); } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_DEL, pcmk__str_none)) { xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR); const char *device_id = crm_element_value(dev, XML_ATTR_ID); if (allowed) { rc = stonith_device_remove(device_id, FALSE); } else { rc = -EACCES; } do_stonith_notify_device(call_options, op, rc, device_id); } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) { char *device_id = NULL; if (allowed) { rc = stonith_level_register(request, &device_id); } else { rc = -EACCES; } do_stonith_notify_level(call_options, op, rc, device_id); free(device_id); } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) { char *device_id = NULL; if (allowed) { rc = stonith_level_remove(request, &device_id); } else { rc = -EACCES; } do_stonith_notify_level(call_options, op, rc, device_id); } else if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) { int node_id = 0; const char *name = NULL; crm_element_value_int(request, XML_ATTR_ID, &node_id); name = crm_element_value(request, XML_ATTR_UNAME); reap_crm_member(node_id, name); return pcmk_ok; } else { crm_err("Unknown IPC request %s from %s %s", op, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); } done: if (rc == -EACCES) { crm_warn("Rejecting IPC request '%s' from unprivileged client %s", crm_str(op), pcmk__client_name(client)); } /* Always reply unless the request is in process still. * If in progress, a reply will happen async after the request * processing is finished */ if (rc != -EINPROGRESS) { crm_trace("Reply handling: %p %u %u %d %d %s", client, client?client->request_id:0, id, pcmk_is_set(call_options, st_opt_sync_call), call_options, crm_element_value(request, F_STONITH_CALLOPTS)); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } reply = stonith_construct_reply(request, output, data, rc); stonith_send_reply(reply, call_options, remote_peer, client_id); } free(output); free_xml(data); free_xml(reply); return rc; } static void handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer) { const char *op = crm_element_value(request, F_STONITH_OPERATION); if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { process_remote_stonith_query(request); } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) { process_remote_stonith_exec(request); } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) { /* Reply to a complex fencing op */ process_remote_stonith_exec(request); } else { crm_err("Unknown %s reply from %s %s", op, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); crm_log_xml_warn(request, "UnknownOp"); } } void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *request, const char *remote_peer) { int call_options = 0; int rc = 0; gboolean is_reply = FALSE; /* Copy op for reporting. The original might get freed by handle_reply() * before we use it in crm_debug(): * handle_reply() * |- process_remote_stonith_exec() * |-- remote_op_done() * |--- handle_local_reply_and_notify() * |---- crm_xml_add(...F_STONITH_OPERATION...) * |--- free_xml(op->request) */ char *op = crm_element_value_copy(request, F_STONITH_OPERATION); if (get_xpath_object("//" T_STONITH_REPLY, request, LOG_NEVER)) { is_reply = TRUE; } crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); crm_debug("Processing %s%s %u from %s %s with call options 0x%08x", op, (is_reply? " reply" : ""), id, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client)), call_options); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (is_reply) { handle_reply(client, request, remote_peer); } else { rc = handle_request(client, id, flags, request, remote_peer); } crm_debug("Processed %s%s from %s %s: %s (rc=%d)", op, (is_reply? " reply" : ""), ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client)), ((rc > 0)? "" : pcmk_strerror(rc)), rc); free(op); } diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h index 77b3881cb7..b4e107d8d6 100644 --- a/daemons/fenced/pacemaker-fenced.h +++ b/daemons/fenced/pacemaker-fenced.h @@ -1,279 +1,280 @@ /* - * Copyright 2009-2020 the Pacemaker project contributors + * Copyright 2009-2021 the Pacemaker project contributors * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include // uint32_t, uint64_t #include /*! * \internal - * \brief Check to see if target was fenced in the last few seconds. - * \param tolerance, The number of seconds to look back in time - * \param target, The node to search for - * \param action, The action we want to match. + * \brief Check whether target has already been fenced recently * - * \retval FALSE, not match - * \retval TRUE, fencing operation took place in the last 'tolerance' number of seconds. + * \param[in] tolerance Number of seconds to look back in time + * \param[in] target Name of node to search for + * \param[in] action Action we want to match + * + * \return TRUE if an equivalent fencing operation took place in the last + * \p tolerance seconds, FALSE otherwise */ gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action); typedef struct stonith_device_s { char *id; char *agent; char *namespace; /*! list of actions that must execute on the target node. Used for unfencing */ char *on_target_actions; GList *targets; time_t targets_age; gboolean has_attr_map; /* should nodeid parameter for victim be included in agent arguments */ gboolean include_nodeid; /* whether the cluster should automatically unfence nodes with the device */ gboolean automatic_unfencing; guint priority; uint32_t flags; // Group of enum st_device_flags GHashTable *params; GHashTable *aliases; GList *pending_ops; mainloop_timer_t *timer; crm_trigger_t *work; xmlNode *agent_metadata; /*! A verified device is one that has contacted the * agent successfully to perform a monitor operation */ gboolean verified; gboolean cib_registered; gboolean api_registered; gboolean dirty; } stonith_device_t; /* These values are used to index certain arrays by "phase". Usually an * operation has only one "phase", so phase is always zero. However, some * reboots are remapped to "off" then "on", in which case "reboot" will be * phase 0, "off" will be phase 1 and "on" will be phase 2. */ enum st_remap_phase { st_phase_requested = 0, st_phase_off = 1, st_phase_on = 2, st_phase_max = 3 }; /* These values provide additional information for STONITH's asynchronous reply response. * The st_reply_opt_merged value indicates an operation that has been merged and completed without being executed. */ enum st_replay_option { st_reply_opt_none = 0x00000000, st_reply_opt_merged = 0x00000001, }; typedef struct remote_fencing_op_s { /* The unique id associated with this operation */ char *id; /*! The node this operation will fence */ char *target; /*! The fencing action to perform on the target. (reboot, on, off) */ char *action; /*! When was the fencing action recorded (seconds since epoch) */ time_t created; /*! Marks if the final notifications have been sent to local stonith clients. */ gboolean notify_sent; /*! The number of query replies received */ guint replies; /*! The number of query replies expected */ guint replies_expected; /*! Does this node own control of this operation */ gboolean owner; /*! After query is complete, This the high level timer that expires the entire operation */ guint op_timer_total; /*! This timer expires the current fencing request. Many fencing * requests may exist in a single operation */ guint op_timer_one; /*! This timer expires the query request sent out to determine * what nodes are contain what devices, and who those devices can fence */ guint query_timer; /*! This is the default timeout to use for each fencing device if no * custom timeout is received in the query. */ gint base_timeout; /*! This is the calculated total timeout an operation can take before * expiring. This is calculated by adding together all the timeout * values associated with the devices this fencing operation may call */ gint total_timeout; /*! Requested fencing delay. * Value -1 means disable any static/random fencing delays. */ int delay; /*! Delegate is the node being asked to perform a fencing action * on behalf of the node that owns the remote operation. Some operations * will involve multiple delegates. This value represents the final delegate * that is used. */ char *delegate; /*! The point at which the remote operation completed */ time_t completed; //! Group of enum stonith_call_options associated with this operation uint32_t call_options; /*! The current state of the remote operation. This indicates * what stage the op is in, query, exec, done, duplicate, failed. */ enum op_state state; /*! The node that owns the remote operation */ char *originator; /*! The local client id that initiated the fencing request */ char *client_id; /*! The client's call_id that initiated the fencing request */ int client_callid; /*! The name of client that initiated the fencing request */ char *client_name; /*! List of the received query results for all the nodes in the cpg group */ GList *query_results; /*! The original request that initiated the remote stonith operation */ xmlNode *request; /*! The current topology level being executed */ guint level; /*! The current operation phase being executed */ enum st_remap_phase phase; /*! Devices with automatic unfencing (always run if "on" requested, never if remapped) */ GList *automatic_list; /*! List of all devices at the currently executing topology level */ GList *devices_list; /*! Current entry in the topology device list */ GList *devices; /*! List of duplicate operations attached to this operation. Once this operation * completes, the duplicate operations will be closed out as well. */ GList *duplicates; /*! The point at which the remote operation completed(nsec) */ long long completed_nsec; } remote_fencing_op_t; /*! * \internal * \brief Broadcast the result of an operation to the peers. * \param op, Operation whose result should be broadcast * \param rc, Result of the operation */ void stonith_bcast_result_to_peers(remote_fencing_op_t * op, int rc, gboolean op_merged); // Fencer-specific client flags enum st_client_flags { st_callback_unknown = UINT64_C(0), st_callback_notify_fence = (UINT64_C(1) << 0), st_callback_device_add = (UINT64_C(1) << 2), st_callback_device_del = (UINT64_C(1) << 4), st_callback_notify_history = (UINT64_C(1) << 5), st_callback_notify_history_synced = (UINT64_C(1) << 6) }; /* * Complex fencing requirements are specified via fencing topologies. * A topology consists of levels; each level is a list of fencing devices. * Topologies are stored in a hash table by node name. When a node needs to be * fenced, if it has an entry in the topology table, the levels are tried * sequentially, and the devices in each level are tried sequentially. * Fencing is considered successful as soon as any level succeeds; * a level is considered successful if all its devices succeed. * Essentially, all devices at a given level are "and-ed" and the * levels are "or-ed". * * This structure is used for the topology table entries. * Topology levels start from 1, so levels[0] is unused and always NULL. */ typedef struct stonith_topology_s { int kind; /*! Node name regex or attribute name=value for which topology applies */ char *target; char *target_value; char *target_pattern; char *target_attribute; /*! Names of fencing devices at each topology level */ GList *levels[ST_LEVEL_MAX]; } stonith_topology_t; void init_device_list(void); void free_device_list(void); void init_topology_list(void); void free_topology_list(void); void free_stonith_remote_op_list(void); void init_stonith_remote_op_hash_table(GHashTable **table); void free_metadata_cache(void); uint64_t get_stonith_flag(const char *name); void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *op_request, const char *remote_peer); int stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib); int stonith_device_remove(const char *id, gboolean from_cib); char *stonith_level_key(xmlNode * msg, int mode); int stonith_level_kind(xmlNode * msg); int stonith_level_register(xmlNode * msg, char **desc); int stonith_level_remove(xmlNode * msg, char **desc); stonith_topology_t *find_topology_for_host(const char *host); void do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer); xmlNode *stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc); void do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout); void do_stonith_notify(int options, const char *type, int result, xmlNode * data); void do_stonith_notify_device(int options, const char *op, int rc, const char *desc); void do_stonith_notify_level(int options, const char *op, int rc, const char *desc); remote_fencing_op_t *initiate_remote_stonith_op(pcmk__client_t *client, xmlNode *request, gboolean manual_ack); int process_remote_stonith_exec(xmlNode * msg); int process_remote_stonith_query(xmlNode * msg); void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer); int stonith_fence_history(xmlNode *msg, xmlNode **output, const char *remote_peer, int options); void stonith_fence_history_trim(void); bool fencing_peer_active(crm_node_t *peer); void set_fencing_completed(remote_fencing_op_t * op); int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op); gboolean node_has_attr(const char *node, const char *name, const char *value); gboolean node_does_watchdog_fencing(const char *node); extern char *stonith_our_uname; extern gboolean stand_alone; extern GHashTable *device_list; extern GHashTable *topology; extern long stonith_watchdog_timeout_ms; extern GList *stonith_watchdog_targets; extern GHashTable *stonith_remote_op_list; diff --git a/include/crm/cib/internal.h b/include/crm/cib/internal.h index fa28d2dc7a..cdba501d96 100644 --- a/include/crm/cib/internal.h +++ b/include/crm/cib/internal.h @@ -1,233 +1,233 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef CIB_INTERNAL__H # define CIB_INTERNAL__H # include # include # include # define CIB_OP_SLAVE "cib_slave" # define CIB_OP_SLAVEALL "cib_slave_all" # define CIB_OP_MASTER "cib_master" # define CIB_OP_SYNC "cib_sync" # define CIB_OP_SYNC_ONE "cib_sync_one" # define CIB_OP_ISMASTER "cib_ismaster" # define CIB_OP_BUMP "cib_bump" # define CIB_OP_QUERY "cib_query" # define CIB_OP_CREATE "cib_create" # define CIB_OP_MODIFY "cib_modify" # define CIB_OP_DELETE "cib_delete" # define CIB_OP_ERASE "cib_erase" # define CIB_OP_REPLACE "cib_replace" # define CIB_OP_APPLY_DIFF "cib_apply_diff" # define CIB_OP_UPGRADE "cib_upgrade" # define CIB_OP_DELETE_ALT "cib_delete_alt" # define F_CIB_CLIENTID "cib_clientid" # define F_CIB_CALLOPTS "cib_callopt" # define F_CIB_CALLID "cib_callid" # define F_CIB_CALLDATA "cib_calldata" # define F_CIB_OPERATION "cib_op" # define F_CIB_ISREPLY "cib_isreplyto" # define F_CIB_SECTION "cib_section" # define F_CIB_HOST "cib_host" # define F_CIB_RC "cib_rc" # define F_CIB_UPGRADE_RC "cib_upgrade_rc" # define F_CIB_DELEGATED "cib_delegated_from" # define F_CIB_OBJID "cib_object" # define F_CIB_OBJTYPE "cib_object_type" # define F_CIB_EXISTING "cib_existing_object" # define F_CIB_SEENCOUNT "cib_seen" # define F_CIB_TIMEOUT "cib_timeout" # define F_CIB_UPDATE "cib_update" # define F_CIB_CALLBACK_TOKEN "cib_async_id" # define F_CIB_GLOBAL_UPDATE "cib_update" # define F_CIB_UPDATE_RESULT "cib_update_result" # define F_CIB_CLIENTNAME "cib_clientname" # define F_CIB_NOTIFY_TYPE "cib_notify_type" # define F_CIB_NOTIFY_ACTIVATE "cib_notify_activate" # define F_CIB_UPDATE_DIFF "cib_update_diff" # define F_CIB_USER "cib_user" # define F_CIB_LOCAL_NOTIFY_ID "cib_local_notify_id" # define F_CIB_PING_ID "cib_ping_id" # define F_CIB_SCHEMA_MAX "cib_schema_max" # define T_CIB "cib" # define T_CIB_NOTIFY "cib_notify" /* notify sub-types */ # define T_CIB_PRE_NOTIFY "cib_pre_notify" # define T_CIB_POST_NOTIFY "cib_post_notify" # define T_CIB_UPDATE_CONFIRM "cib_update_confirmation" # define T_CIB_REPLACE_NOTIFY "cib_refresh_notify" gboolean cib_diff_version_details(xmlNode * diff, int *admin_epoch, int *epoch, int *updates, int *_admin_epoch, int *_epoch, int *_updates); gboolean cib_read_config(GHashTable * options, xmlNode * current_cib); void verify_cib_options(GHashTable * options); gboolean cib_internal_config_changed(xmlNode * diff); typedef struct cib_notify_client_s { const char *event; const char *obj_id; /* implement one day */ const char *obj_type; /* implement one day */ void (*callback) (const char *event, xmlNode * msg); } cib_notify_client_t; typedef struct cib_callback_client_s { void (*callback) (xmlNode *, int, int, xmlNode *, void *); const char *id; void *user_data; gboolean only_success; struct timer_rec_s *timer; void (*free_func)(void *); } cib_callback_client_t; struct timer_rec_s { int call_id; int timeout; guint ref; cib_t *cib; }; #define cib__set_call_options(cib_call_opts, call_for, flags_to_set) do { \ cib_call_opts = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "CIB call", (call_for), (cib_call_opts), \ (flags_to_set), #flags_to_set); \ } while (0) #define cib__clear_call_options(cib_call_opts, call_for, flags_to_clear) do { \ cib_call_opts = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, "CIB call", (call_for), (cib_call_opts), \ (flags_to_clear), #flags_to_clear); \ } while (0) typedef int (*cib_op_t) (const char *, int, const char *, xmlNode *, xmlNode *, xmlNode *, xmlNode **, xmlNode **); cib_t *cib_new_variant(void); int cib_perform_op(const char *op, int call_options, cib_op_t * fn, gboolean is_query, const char *section, xmlNode * req, xmlNode * input, gboolean manage_counters, gboolean * config_changed, xmlNode * current_cib, xmlNode ** result_cib, xmlNode ** diff, xmlNode ** output); xmlNode *cib_create_op(int call_id, const char *token, const char *op, const char *host, const char *section, xmlNode * data, int call_options, const char *user_name); void cib_native_callback(cib_t * cib, xmlNode * msg, int call_id, int rc); void cib_native_notify(gpointer data, gpointer user_data); int cib_native_register_notification(cib_t * cib, const char *callback, int enabled); gboolean cib_client_register_callback(cib_t * cib, int call_id, int timeout, gboolean only_success, void *user_data, const char *callback_name, void (*callback) (xmlNode *, int, int, xmlNode *, void *)); gboolean cib_client_register_callback_full(cib_t *cib, int call_id, int timeout, gboolean only_success, void *user_data, const char *callback_name, void (*callback)(xmlNode *, int, int, xmlNode *, void *), void (*free_func)(void *)); int cib_process_query(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); int cib_process_erase(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); int cib_process_bump(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); int cib_process_replace(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); int cib_process_create(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); int cib_process_modify(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); int cib_process_delete(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); int cib_process_diff(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); int cib_process_upgrade(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); /*! * \internal - * \brief Core function to manipulate with/query CIB/XML per xpath + arguments - * \param[in] op, the operation to be performed: - * CIB_OP_{CREATE,DELETE,MODIFY,QUERY,REPLACE} - * \param[in] options, ORed flags per relevant \c cib_call_options enumeration: - * cib_{multiple,no_children,xpath_address} - * \param[in] section, xpath defining place of interest in - * {existing,result}_cib - * \param[in] req, UNUSED - * \param[in] input, the input operand for - * CIB_OP_{CREATE,MODIFY,REPLACE} - * \param[in] existing_cib, the input operand (CIB) for \c CIB_OP_QUERY - * \param[inout] result_cib, the operand and result for - * CIB_OP_{CREATE,DELETE,MODIFY,REPLACE} - * \param[out] answer, the result for \c CIB_OP_QUERY, structured per \c options + * \brief Query or modify a CIB * - * \retval \c pcmk_ok (0) for success, different value for failure + * \param[in] op CIB_OP_* operation to be performed + * \param[in] options Flag set of \c cib_call_options + * \param[in] section XPath to query or modify + * \param[in] req unused + * \param[in] input Portion of CIB to modify (used with + * CIB_OP_CREATE, CIB_OP_MODIFY, and + * CIB_OP_REPLACE) + * \param[in] existing_cib Input CIB (used with CIB_OP_QUERY) + * \param[in,out] result_cib CIB copy to make changes in (used with + * CIB_OP_CREATE, CIB_OP_MODIFY, CIB_OP_DELETE, and + * CIB_OP_REPLACE) + * \param[out] answer Query result (used with CIB_OP_QUERY) + * + * \return Legacy Pacemaker return code */ int cib_process_xpath(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); gboolean cib_config_changed(xmlNode * last, xmlNode * next, xmlNode ** diff); gboolean update_results(xmlNode * failed, xmlNode * target, const char *operation, int return_code); int cib_update_counter(xmlNode * xml_obj, const char *field, gboolean reset); int cib_internal_op(cib_t * cib, const char *op, const char *host, const char *section, xmlNode * data, xmlNode ** output_data, int call_options, const char *user_name); int cib_file_read_and_verify(const char *filename, const char *sigfile, xmlNode **root); int cib_file_write_with_digest(xmlNode *cib_root, const char *cib_dirname, const char *cib_filename); void cib__set_output(cib_t *cib, pcmk__output_t *out); cib_callback_client_t* cib__lookup_id (int call_id); /*! * \internal * \brief Connect to, query, and optionally disconnect from the CIB, returning * the resulting XML object. * * \param[out] cib If non-NULL, a pointer to where to store the CIB * connection. In this case, it is up to the caller to * disconnect from the CIB when finished. * \param[out] cib_object A pointer to where to store the XML query result. * * \return A standard Pacemaker return code */ int cib__signon_query(cib_t **cib, xmlNode **cib_object); #endif diff --git a/include/crm/lrmd.h b/include/crm/lrmd.h index c532f708c4..f7b2f3bc3e 100644 --- a/include/crm/lrmd.h +++ b/include/crm/lrmd.h @@ -1,541 +1,540 @@ /* * Copyright 2012-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef LRMD__H # define LRMD__H #ifdef __cplusplus extern "C" { #endif /** * \file * \brief Resource agent executor * \ingroup lrmd */ #include // bool #include // guint, GList #include #include typedef struct lrmd_s lrmd_t; typedef struct lrmd_key_value_s { char *key; char *value; struct lrmd_key_value_s *next; } lrmd_key_value_t; /* This should be bumped every time there is an incompatible change that * prevents older clients from connecting to this version of the server. */ #define LRMD_PROTOCOL_VERSION "1.1" /* This is the version that the client version will actually be compared * against. This should be identical to LRMD_PROTOCOL_VERSION. However, we * accidentally bumped LRMD_PROTOCOL_VERSION in 6424a647 (1.1.15) when we didn't * need to, so for now it's different. If we ever have a truly incompatible * bump, we can drop this and compare against LRMD_PROTOCOL_VERSION. */ #define LRMD_MIN_PROTOCOL_VERSION "1.0" /* *INDENT-OFF* */ #define DEFAULT_REMOTE_KEY_LOCATION PACEMAKER_CONFIG_DIR "/authkey" #define ALT_REMOTE_KEY_LOCATION "/etc/corosync/authkey" #define DEFAULT_REMOTE_PORT 3121 #define DEFAULT_REMOTE_USERNAME "lrmd" #define F_LRMD_OPERATION "lrmd_op" #define F_LRMD_CLIENTNAME "lrmd_clientname" #define F_LRMD_IS_IPC_PROVIDER "lrmd_is_ipc_provider" #define F_LRMD_CLIENTID "lrmd_clientid" #define F_LRMD_PROTOCOL_VERSION "lrmd_protocol_version" #define F_LRMD_REMOTE_MSG_TYPE "lrmd_remote_msg_type" #define F_LRMD_REMOTE_MSG_ID "lrmd_remote_msg_id" #define F_LRMD_CALLBACK_TOKEN "lrmd_async_id" #define F_LRMD_CALLID "lrmd_callid" #define F_LRMD_CALLOPTS "lrmd_callopt" #define F_LRMD_CALLDATA "lrmd_calldata" #define F_LRMD_RC "lrmd_rc" #define F_LRMD_EXEC_RC "lrmd_exec_rc" #define F_LRMD_OP_STATUS "lrmd_exec_op_status" #define F_LRMD_TIMEOUT "lrmd_timeout" #define F_LRMD_WATCHDOG "lrmd_watchdog" #define F_LRMD_CLASS "lrmd_class" #define F_LRMD_PROVIDER "lrmd_provider" #define F_LRMD_TYPE "lrmd_type" #define F_LRMD_ORIGIN "lrmd_origin" #define F_LRMD_RSC_RUN_TIME "lrmd_run_time" #define F_LRMD_RSC_RCCHANGE_TIME "lrmd_rcchange_time" #define F_LRMD_RSC_EXEC_TIME "lrmd_exec_time" #define F_LRMD_RSC_QUEUE_TIME "lrmd_queue_time" #define F_LRMD_RSC_ID "lrmd_rsc_id" #define F_LRMD_RSC_ACTION "lrmd_rsc_action" #define F_LRMD_RSC_USERDATA_STR "lrmd_rsc_userdata_str" #define F_LRMD_RSC_OUTPUT "lrmd_rsc_output" #define F_LRMD_RSC_EXIT_REASON "lrmd_rsc_exit_reason" #define F_LRMD_RSC_START_DELAY "lrmd_rsc_start_delay" #define F_LRMD_RSC_INTERVAL "lrmd_rsc_interval" #define F_LRMD_RSC_DELETED "lrmd_rsc_deleted" #define F_LRMD_RSC "lrmd_rsc" #define F_LRMD_ALERT_ID "lrmd_alert_id" #define F_LRMD_ALERT_PATH "lrmd_alert_path" #define F_LRMD_ALERT "lrmd_alert" #define LRMD_OP_RSC_REG "lrmd_rsc_register" #define LRMD_OP_RSC_EXEC "lrmd_rsc_exec" #define LRMD_OP_RSC_CANCEL "lrmd_rsc_cancel" #define LRMD_OP_RSC_UNREG "lrmd_rsc_unregister" #define LRMD_OP_RSC_INFO "lrmd_rsc_info" #define LRMD_OP_RSC_METADATA "lrmd_rsc_metadata" #define LRMD_OP_POKE "lrmd_rsc_poke" #define LRMD_OP_NEW_CLIENT "lrmd_rsc_new_client" #define LRMD_OP_CHECK "lrmd_check" #define LRMD_OP_ALERT_EXEC "lrmd_alert_exec" #define LRMD_OP_GET_RECURRING "lrmd_get_recurring" #define LRMD_IPC_OP_NEW "new" #define LRMD_IPC_OP_DESTROY "destroy" #define LRMD_IPC_OP_EVENT "event" #define LRMD_IPC_OP_REQUEST "request" #define LRMD_IPC_OP_RESPONSE "response" #define LRMD_IPC_OP_SHUTDOWN_REQ "shutdown_req" #define LRMD_IPC_OP_SHUTDOWN_ACK "shutdown_ack" #define LRMD_IPC_OP_SHUTDOWN_NACK "shutdown_nack" #define F_LRMD_IPC_OP "lrmd_ipc_op" #define F_LRMD_IPC_IPC_SERVER "lrmd_ipc_server" #define F_LRMD_IPC_SESSION "lrmd_ipc_session" #define F_LRMD_IPC_CLIENT "lrmd_ipc_client" #define F_LRMD_IPC_USER "lrmd_ipc_user" #define F_LRMD_IPC_MSG "lrmd_ipc_msg" #define F_LRMD_IPC_MSG_ID "lrmd_ipc_msg_id" #define F_LRMD_IPC_MSG_FLAGS "lrmd_ipc_msg_flags" #define T_LRMD "lrmd" #define T_LRMD_REPLY "lrmd_reply" #define T_LRMD_NOTIFY "lrmd_notify" #define T_LRMD_IPC_PROXY "lrmd_ipc_proxy" #define T_LRMD_RSC_OP "lrmd_rsc_op" /* *INDENT-ON* */ /*! * \brief Create a new connection to the local executor */ lrmd_t *lrmd_api_new(void); /*! * \brief Create a new TLS connection to a remote executor * * \param nodename name of remote node identified with this connection * \param server name of server to connect to * \param port port number to connect to * * \note nodename and server may be the same value. */ lrmd_t *lrmd_remote_api_new(const char *nodename, const char *server, int port); /*! * \brief Use after lrmd_poll returns 1 to read and dispatch a message * * \param[in,out] lrmd Executor connection object * * \return TRUE if connection is still up, FALSE if disconnected */ bool lrmd_dispatch(lrmd_t * lrmd); /*! * \brief Poll for a specified timeout period to determine if a message - * is ready for dispatch. - * \retval 1 msg is ready - * \retval 0 timeout occurred - * \retval negative error code + * is ready for dispatch + * + * \retval 1 Message is ready + * \retval 0 Timeout occurred + * \retval negative errno Error occurred */ int lrmd_poll(lrmd_t * lrmd, int timeout); /*! * \brief Destroy executor connection object */ void lrmd_api_delete(lrmd_t * lrmd); lrmd_key_value_t *lrmd_key_value_add(lrmd_key_value_t * kvp, const char *key, const char *value); /* *INDENT-OFF* */ /* Reserved for future use */ enum lrmd_call_options { lrmd_opt_none = 0x00000000, /* lrmd_opt_sync_call = 0x00000001, //Not implemented, patches welcome. */ /*! Only notify the client originating a exec() the results */ lrmd_opt_notify_orig_only = 0x00000002, /*! Drop recurring operations initiated by a client when client disconnects. * This call_option is only valid when registering a resource. When used * remotely with the pacemaker_remote daemon, this option means that recurring * operations will be dropped once all the remote connections disconnect. */ lrmd_opt_drop_recurring = 0x00000003, /*! Send notifications for recurring operations only when the result changes */ lrmd_opt_notify_changes_only = 0x00000004, }; enum lrmd_callback_event { lrmd_event_register, lrmd_event_unregister, lrmd_event_exec_complete, lrmd_event_disconnect, lrmd_event_connect, lrmd_event_poke, lrmd_event_new_client, }; /* *INDENT-ON* */ typedef struct lrmd_event_data_s { /*! Type of event, register, unregister, call_completed... */ enum lrmd_callback_event type; /*! The resource this event occurred on. */ const char *rsc_id; /*! The action performed, start, stop, monitor... */ const char *op_type; /*! The user data passed by caller of exec() API function */ const char *user_data; /*! The client api call id associated with this event */ int call_id; /*! The operation's timeout period in ms. */ int timeout; /*! The operation's recurring interval in ms. */ guint interval_ms; /*! The operation's start delay value in ms. */ int start_delay; /*! This operation that just completed is on a deleted rsc. */ int rsc_deleted; /*! The executed ra return code mapped to OCF */ enum ocf_exitcode rc; /*! The executor status returned for exec_complete events */ int op_status; /*! stdout from resource agent operation */ const char *output; /*! Timestamp of when op ran */ unsigned int t_run; /*! Timestamp of last rc change */ unsigned int t_rcchange; /*! Time in length op took to execute */ unsigned int exec_time; /*! Time in length spent in queue */ unsigned int queue_time; /*! int connection result. Used for connection and poke events */ int connection_rc; /* This is a GHashTable containing the * parameters given to the operation */ void *params; /*! client node name associated with this connection * (used to match actions to the proper client when there are multiple) */ const char *remote_nodename; /*! exit failure reason string from resource agent operation */ const char *exit_reason; } lrmd_event_data_t; lrmd_event_data_t *lrmd_new_event(const char *rsc_id, const char *task, guint interval_ms); lrmd_event_data_t *lrmd_copy_event(lrmd_event_data_t * event); void lrmd_free_event(lrmd_event_data_t * event); typedef struct lrmd_rsc_info_s { char *id; char *type; char *standard; char *provider; } lrmd_rsc_info_t; typedef struct lrmd_op_info_s { char *rsc_id; char *action; char *interval_ms_s; char *timeout_ms_s; } lrmd_op_info_t; lrmd_rsc_info_t *lrmd_new_rsc_info(const char *rsc_id, const char *standard, const char *provider, const char *type); lrmd_rsc_info_t *lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info); void lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info); void lrmd_free_op_info(lrmd_op_info_t *op_info); typedef void (*lrmd_event_callback) (lrmd_event_data_t * event); typedef struct lrmd_list_s { const char *val; struct lrmd_list_s *next; } lrmd_list_t; void lrmd_list_freeall(lrmd_list_t * head); void lrmd_key_value_freeall(lrmd_key_value_t * head); typedef struct lrmd_api_operations_s { /*! * \brief Connect to an executor * * \return Legacy Pacemaker return code */ int (*connect) (lrmd_t * lrmd, const char *client_name, int *fd); /*! * \brief Initiate an executor connection without blocking * * \return Legacy Pacemaker return code (if pcmk_ok, the event callback will * be called later with the result) * \note This function requires a mainloop. */ int (*connect_async) (lrmd_t * lrmd, const char *client_name, int timeout /*ms */ ); /*! - * \brief Is connected to lrmd daemon? + * \brief Check whether connection to executor daemon is (still) active * - * \retval 0, false - * \retval 1, true + * \return 1 if the executor connection is active, 0 otherwise */ int (*is_connected) (lrmd_t * lrmd); /*! * \brief Poke executor connection to verify it is still capable of serving requests * \note The response comes in the form of a poke event to the callback. * * \return Legacy Pacemaker return code (if pcmk_ok, the event callback will * be called later with the result) */ int (*poke_connection) (lrmd_t * lrmd); /*! * \brief Disconnect from the executor. * * \return Legacy Pacemaker return code */ int (*disconnect) (lrmd_t * lrmd); /*! * \brief Register a resource with the executor. * * \note Synchronous, guaranteed to occur in daemon before function returns. * * \return Legacy Pacemaker return code */ int (*register_rsc) (lrmd_t * lrmd, const char *rsc_id, const char *standard, const char *provider, const char *agent, enum lrmd_call_options options); /*! - * \brief Retrieve registration info for a rsc + * \brief Retrieve a resource's registration information * - * \retval info on success - * \retval NULL on failure + * \return Resource information on success, otherwise NULL */ lrmd_rsc_info_t *(*get_rsc_info) (lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options); /*! * \brief Retrieve registered recurring operations * * \return Legacy Pacemaker return code */ int (*get_recurring_ops) (lrmd_t *lrmd, const char *rsc_id, int timeout_ms, enum lrmd_call_options options, GList **output); /*! * \brief Unregister a resource from the executor. * * \note All pending and recurring operations will be cancelled * automatically. * * \note Synchronous, guaranteed to occur in daemon before function returns. * * \return Legacy Pacemaker return code (of particular interest, EINPROGRESS * means that operations are in progress for the resource, and the * unregistration will be done when they complete) */ int (*unregister_rsc) (lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options); /*! * \brief Set a callback for executor events */ void (*set_callback) (lrmd_t * lrmd, lrmd_event_callback callback); /*! * \brief Issue a command on a resource * * \return A call ID for the action on success (in which case the action is * queued in the executor, and the event callback will be called * later with the result), otherwise a negative legacy Pacemaker * return code * * \note exec() and cancel() operations on an individual resource are * guaranteed to occur in the order the client API is called. However, * operations on different resources are not guaranteed to occur in * any specific order. */ int (*exec) (lrmd_t * lrmd, const char *rsc_id, const char *action, const char *userdata, /* userdata string given back in event notification */ guint interval_ms, int timeout, /* ms */ int start_delay, /* ms */ enum lrmd_call_options options, lrmd_key_value_t * params); /* ownership of params is given up to api here */ /*! * \brief Cancel a recurring command. * * \return Legacy Pacemaker return code (if pcmk_ok, command is queued in * daemon on function return, and the event callback will be called * later with an exec_complete event with an lrmd_op_status * signifying that the operation is cancelled) * * \note exec() and cancel() operations on an individual resource are * guaranteed to occur in the order the client API is called. However, * operations on different resources are not guaranteed to occur in * any specific order. */ int (*cancel) (lrmd_t *lrmd, const char *rsc_id, const char *action, guint interval_ms); /*! * \brief Get resource metadata for a specified resource agent * * \param[in] lrmd Executor connection (unused) * \param[in] standard Resource agent class * \param[in] provider Resource agent provider * \param[in] agent Resource agent type * \param[out] output Metadata will be stored here (must not be NULL) * \param[in] options Options to use with any executor API calls (unused) * * \return Legacy Pacemaker return code * * \note Caller is responsible for freeing output. This call is currently * always synchronous (blocking), and always done directly by the * library (not via the executor connection). This means that it is based * on the local host environment, even if the executor connection is to a * remote node, so (for most resource agent classes) this will fail if * the agent is not installed locally. This also means that, if an * external agent must be executed, it will be executed by the * caller's user, not the executor's. * \todo Add a metadata call to the executor API and let the server handle this. */ int (*get_metadata) (lrmd_t * lrmd, const char *standard, const char *provider, const char *agent, char **output, enum lrmd_call_options options); /*! * \brief Retrieve a list of installed resource agents. * * \return Number of items in list on success, negative legacy Pacemaker * return code otherwise * * \note if standard is not provided, all known agents will be returned * \note list must be freed using lrmd_list_freeall() */ int (*list_agents) (lrmd_t * lrmd, lrmd_list_t ** agents, const char *standard, const char *provider); /*! * \brief Retrieve a list of resource agent providers * * \return Number of items in list on success, negative legacy Pacemaker * return code otherwise * * \note When the agent is provided, only the agent's provider will be returned * \note When no agent is supplied, all providers will be returned. * \note List must be freed using lrmd_list_freeall() */ int (*list_ocf_providers) (lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers); /*! * \brief Retrieve a list of standards supported by this machine/installation * * \return Number of items in list on success, negative legacy Pacemaker * return code otherwise * * \note List must be freed using lrmd_list_freeall() */ int (*list_standards) (lrmd_t * lrmd, lrmd_list_t ** standards); /*! * \brief Execute an alert agent * * \return Legacy Pacemaker return code (if pcmk_ok, the alert is queued in * the executor, and the event callback will be called later with * the result) * * \note Operations on individual alerts (by ID) are guaranteed to occur in * the order the client API is called. Operations on different alerts * are not guaranteed to occur in any specific order. */ int (*exec_alert) (lrmd_t *lrmd, const char *alert_id, const char *alert_path, int timeout, /* ms */ lrmd_key_value_t *params); /* ownership of params is given up to api here */ /*! * \brief Get resource metadata for a resource agent, passing parameters * * \param[in] lrmd Executor connection (unused) * \param[in] standard Resource agent class * \param[in] provider Resource agent provider * \param[in] agent Resource agent type * \param[out] output Metadata will be stored here (must not be NULL) * \param[in] options Options to use with any executor API calls (unused) * \param[in] params Parameters to pass to agent via environment * * \return Legacy Pacemaker return code * * \note This is identical to the get_metadata() API call, except parameters * will be passed to the resource agent via environment variables. * \note The API will handle freeing params. */ int (*get_metadata_params) (lrmd_t *lrmd, const char *standard, const char *provider, const char *agent, char **output, enum lrmd_call_options options, lrmd_key_value_t *params); } lrmd_api_operations_t; struct lrmd_s { lrmd_api_operations_t *cmds; void *lrmd_private; }; static inline const char * lrmd_event_type2str(enum lrmd_callback_event type) { switch (type) { case lrmd_event_register: return "register"; case lrmd_event_unregister: return "unregister"; case lrmd_event_exec_complete: return "exec_complete"; case lrmd_event_disconnect: return "disconnect"; case lrmd_event_connect: return "connect"; case lrmd_event_poke: return "poke"; case lrmd_event_new_client: return "new_client"; } return "unknown"; } #ifdef __cplusplus } #endif #endif diff --git a/include/crm/services.h b/include/crm/services.h index 1f1d466a8f..7fa37715f8 100644 --- a/include/crm/services.h +++ b/include/crm/services.h @@ -1,401 +1,401 @@ /* * Copyright 2010-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef __PCMK_SERVICES__ # define __PCMK_SERVICES__ #ifdef __cplusplus extern "C" { #endif /** * \file * \brief Services API * \ingroup core */ # include # include # include # include # include # include # include // OCF_ROOT_DIR # include "common/results.h" # ifndef LSB_ROOT_DIR # define LSB_ROOT_DIR "/etc/init.d" # endif /* TODO: Autodetect these two ?*/ # ifndef SYSTEMCTL # define SYSTEMCTL "/bin/systemctl" # endif /* Known resource classes */ #define PCMK_RESOURCE_CLASS_OCF "ocf" #define PCMK_RESOURCE_CLASS_SERVICE "service" #define PCMK_RESOURCE_CLASS_LSB "lsb" #define PCMK_RESOURCE_CLASS_SYSTEMD "systemd" #define PCMK_RESOURCE_CLASS_UPSTART "upstart" #define PCMK_RESOURCE_CLASS_NAGIOS "nagios" #define PCMK_RESOURCE_CLASS_STONITH "stonith" /* This is the string passed in the OCF_EXIT_REASON_PREFIX environment variable. * The stderr output that occurs after this prefix is encountered is considered * the exit reason for a completed operation. */ #define PCMK_OCF_REASON_PREFIX "ocf-exit-reason:" // Agent version to use if agent doesn't specify one #define PCMK_DEFAULT_AGENT_VERSION "0.1" enum lsb_exitcode { PCMK_LSB_OK = 0, PCMK_LSB_UNKNOWN_ERROR = 1, PCMK_LSB_INVALID_PARAM = 2, PCMK_LSB_UNIMPLEMENT_FEATURE = 3, PCMK_LSB_INSUFFICIENT_PRIV = 4, PCMK_LSB_NOT_INSTALLED = 5, PCMK_LSB_NOT_CONFIGURED = 6, PCMK_LSB_NOT_RUNNING = 7, }; /* The return codes for the status operation are not the same for other * operatios - go figure */ enum lsb_status_exitcode { PCMK_LSB_STATUS_OK = 0, PCMK_LSB_STATUS_VAR_PID = 1, PCMK_LSB_STATUS_VAR_LOCK = 2, PCMK_LSB_STATUS_NOT_RUNNING = 3, PCMK_LSB_STATUS_UNKNOWN = 4, /* custom codes should be in the 150-199 range reserved for application use */ PCMK_LSB_STATUS_NOT_INSTALLED = 150, PCMK_LSB_STATUS_INSUFFICIENT_PRIV = 151, }; enum nagios_exitcode { NAGIOS_STATE_OK = 0, NAGIOS_STATE_WARNING = 1, NAGIOS_STATE_CRITICAL = 2, NAGIOS_STATE_UNKNOWN = 3, NAGIOS_STATE_DEPENDENT = 4, NAGIOS_INSUFFICIENT_PRIV = 100, NAGIOS_NOT_INSTALLED = 101, }; enum svc_action_flags { /* On timeout, only kill pid, do not kill entire pid group */ SVC_ACTION_LEAVE_GROUP = 0x01, SVC_ACTION_NON_BLOCKED = 0x02, }; typedef struct svc_action_private_s svc_action_private_t; /*! * \brief Object for executing external actions * * \note This object should never be instantiated directly, but instead created * using one of the constructor functions (resources_action_create() for * resource agents, services_alert_create() for alert agents, or * services_action_create_generic() for generic executables). Similarly, * do not use sizeof() on this struct. * * \internal Internally, services__create_resource_action() is preferable to * resources_action_create(). */ typedef struct svc_action_s { /*! Operation key (__) for resource actions, * XML ID for alert actions, or NULL for generic actions */ char *id; //! XML ID of resource being executed for resource actions, otherwise NULL char *rsc; //! Name of action being executed for resource actions, otherwise NULL char *action; //! Action interval for recurring resource actions, otherwise 0 guint interval_ms; //! Resource standard for resource actions, otherwise NULL char *standard; //! Resource provider for resource actions that require it, otherwise NULL char *provider; //! Resource agent name for resource actions, otherwise NULL char *agent; int timeout; //!< Action timeout (in milliseconds) /*! A hash table of name/value pairs to use as parameters for resource and * alert actions, otherwise NULL. These will be used to set environment * variables for non-fencing resource agents and alert agents, and to send * stdin to fence agents. */ GHashTable *params; int rc; //!< Exit status of action (set by library upon completion) //!@{ //! This field should be treated as internal to Pacemaker int pid; // Process ID of child int cancel; // Whether this is a cancellation of a recurring action //!@} int status; //!< Execution status (enum pcmk_exec_status set by library) /*! Action counter (set by library for resource actions, or by caller * otherwise) */ int sequence; //!@{ //! This field should be treated as internal to Pacemaker int expected_rc; // Unused int synchronous; // Whether execution should be synchronous (blocking) //!@} enum svc_action_flags flags; //!< Flag group of enum svc_action_flags char *stderr_data; //!< Action stderr (set by library) char *stdout_data; //!< Action stdout (set by library) void *cb_data; //!< For caller's use (not used by library) //! This field should be treated as internal to Pacemaker svc_action_private_t *opaque; } svc_action_t; /** * \brief Get a list of files or directories in a given path * * \param[in] root full path to a directory to read * \param[in] files return list of files if TRUE or directories if FALSE * \param[in] executable if TRUE and files is TRUE, only return executable files * * \return a list of what was found. The list items are char *. * \note It is the caller's responsibility to free the result with g_list_free_full(list, free). */ GList *get_directory_list(const char *root, gboolean files, gboolean executable); /** * \brief Get a list of providers * * \param[in] standard list providers of this standard (e.g. ocf, lsb, etc.) * * \return a list of providers as char * list items (or NULL if standard does not support providers) * \note The caller is responsible for freeing the result using g_list_free_full(list, free). */ GList *resources_list_providers(const char *standard); /** * \brief Get a list of resource agents * * \param[in] standard list agents using this standard (e.g. ocf, lsb, etc.) (or NULL for all) * \param[in] provider list agents from this provider (or NULL for all) * * \return a list of resource agents. The list items are char *. * \note The caller is responsible for freeing the result using g_list_free_full(list, free). */ GList *resources_list_agents(const char *standard, const char *provider); /** * Get list of available standards * * \return a list of resource standards. The list items are char *. This list _must_ * be destroyed using g_list_free_full(list, free). */ GList *resources_list_standards(void); /** * Does the given standard, provider, and agent describe a resource that can exist? * * \param[in] standard Which class of agent does the resource belong to? * \param[in] provider What provides the agent (NULL for most standards)? * \param[in] agent What is the name of the agent? * * \return A boolean */ gboolean resources_agent_exists(const char *standard, const char *provider, const char *agent); /** * \brief Create a new resource action * * \param[in] name Name of resource * \param[in] standard Resource agent standard (ocf, lsb, etc.) * \param[in] provider Resource agent provider * \param[in] agent Resource agent name * \param[in] action action (start, stop, monitor, etc.) * \param[in] interval_ms How often to repeat this action (if 0, execute once) * \param[in] timeout Consider action failed if it does not complete in this many milliseconds * \param[in] params Action parameters * * \return newly allocated action instance * * \post After the call, 'params' is owned, and later free'd by the svc_action_t result * \note The caller is responsible for freeing the return value using * services_action_free(). */ svc_action_t *resources_action_create(const char *name, const char *standard, const char *provider, const char *agent, const char *action, guint interval_ms, int timeout /* ms */, GHashTable *params, enum svc_action_flags flags); /** * Kick a recurring action so it is scheduled immediately for re-execution */ gboolean services_action_kick(const char *name, const char *action, guint interval_ms); const char *resources_find_service_class(const char *agent); /** * Utilize services API to execute an arbitrary command. * * This API has useful infrastructure in place to be able to run a command * in the background and get notified via a callback when the command finishes. * * \param[in] exec command to execute * \param[in] args arguments to the command, NULL terminated * * \return a svc_action_t object, used to pass to the execute function * (services_action_sync() or services_action_async()) and is * provided to the callback. */ svc_action_t *services_action_create_generic(const char *exec, const char *args[]); void services_action_cleanup(svc_action_t * op); void services_action_free(svc_action_t * op); int services_action_user(svc_action_t *op, const char *user); gboolean services_action_sync(svc_action_t * op); /** - * Run an action asynchronously. + * \brief Run an action asynchronously * - * \param[in] op services action data - * \param[in] action_callback callback for when the action completes - * \param[in] action_fork_callback callback for when action forked successfully + * \param[in] op Action to run + * \param[in] action_callback Function to call when the action completes + * \param[in] action_fork_callback Function to call after action process forks * - * \retval TRUE succesfully started execution - * \retval FALSE failed to start execution, no callback will be received + * \return TRUE if execution was successfully initiated, FALSE otherwise (in + * which case the callback will not be called) */ gboolean services_action_async_fork_notify(svc_action_t * op, void (*action_callback) (svc_action_t *), void (*action_fork_callback) (svc_action_t *)); gboolean services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *)); gboolean services_action_cancel(const char *name, const char *action, guint interval_ms); /* functions for alert agents */ svc_action_t *services_alert_create(const char *id, const char *exec, int timeout, GHashTable *params, int sequence, void *cb_data); gboolean services_alert_async(svc_action_t *action, void (*cb)(svc_action_t *op)); static inline const char *services_ocf_exitcode_str(enum ocf_exitcode code) { switch (code) { case PCMK_OCF_OK: return "ok"; case PCMK_OCF_UNKNOWN_ERROR: return "error"; case PCMK_OCF_INVALID_PARAM: return "invalid parameter"; case PCMK_OCF_UNIMPLEMENT_FEATURE: return "unimplemented feature"; case PCMK_OCF_INSUFFICIENT_PRIV: return "insufficient privileges"; case PCMK_OCF_NOT_INSTALLED: return "not installed"; case PCMK_OCF_NOT_CONFIGURED: return "not configured"; case PCMK_OCF_NOT_RUNNING: return "not running"; case PCMK_OCF_RUNNING_PROMOTED: return "promoted"; case PCMK_OCF_FAILED_PROMOTED: return "promoted (failed)"; case PCMK_OCF_TIMEOUT: return "OCF_TIMEOUT"; case PCMK_OCF_DEGRADED: return "OCF_DEGRADED"; case PCMK_OCF_DEGRADED_PROMOTED: return "promoted (degraded)"; #if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1) case PCMK_OCF_NOT_SUPPORTED: return "not supported (DEPRECATED STATUS)"; case PCMK_OCF_CANCELLED: return "cancelled (DEPRECATED STATUS)"; case PCMK_OCF_OTHER_ERROR: return "other error (DEPRECATED STATUS)"; case PCMK_OCF_SIGNAL: return "interrupted by signal (DEPRECATED STATUS)"; case PCMK_OCF_PENDING: return "pending (DEPRECATED STATUS)"; #endif default: return "unknown"; } } /** * \brief Get OCF equivalent of LSB exit code * * \param[in] action LSB action that produced exit code * \param[in] lsb_exitcode Exit code of LSB action * * \return PCMK_OCF_* constant that corresponds to LSB exit code */ static inline enum ocf_exitcode services_get_ocf_exitcode(const char *action, int lsb_exitcode) { /* For non-status actions, LSB and OCF share error code meaning <= 7 */ if (action && strcmp(action, "status") && strcmp(action, "monitor")) { if ((lsb_exitcode < 0) || (lsb_exitcode > PCMK_LSB_NOT_RUNNING)) { return PCMK_OCF_UNKNOWN_ERROR; } return (enum ocf_exitcode)lsb_exitcode; } /* status has different return codes */ switch (lsb_exitcode) { case PCMK_LSB_STATUS_OK: return PCMK_OCF_OK; case PCMK_LSB_STATUS_NOT_INSTALLED: return PCMK_OCF_NOT_INSTALLED; case PCMK_LSB_STATUS_INSUFFICIENT_PRIV: return PCMK_OCF_INSUFFICIENT_PRIV; case PCMK_LSB_STATUS_VAR_PID: case PCMK_LSB_STATUS_VAR_LOCK: case PCMK_LSB_STATUS_NOT_RUNNING: return PCMK_OCF_NOT_RUNNING; } return PCMK_OCF_UNKNOWN_ERROR; } #if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1) #include #endif # ifdef __cplusplus } # endif #endif /* __PCMK_SERVICES__ */ diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h index 67e62964d8..8d6ad477d1 100644 --- a/include/crm/stonith-ng.h +++ b/include/crm/stonith-ng.h @@ -1,583 +1,567 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef STONITH_NG__H # define STONITH_NG__H #ifdef __cplusplus extern "C" { #endif /** * \file * \brief Fencing aka. STONITH * \ingroup fencing */ /* IMPORTANT: DLM source code includes this file directly, without having access * to other Pacemaker headers on its include path, so this file should *not* * include any other Pacemaker headers. (DLM might be updated to avoid the * issue, but we should still follow this guideline for a long time after.) */ # include # include # include // bool # include // uint32_t # include // time_t # define T_STONITH_NOTIFY_DISCONNECT "st_notify_disconnect" # define T_STONITH_NOTIFY_FENCE "st_notify_fence" # define T_STONITH_NOTIFY_HISTORY "st_notify_history" # define T_STONITH_NOTIFY_HISTORY_SYNCED "st_notify_history_synced" /* *INDENT-OFF* */ enum stonith_state { stonith_connected_command, stonith_connected_query, stonith_disconnected, }; enum stonith_call_options { st_opt_none = 0x00000000, st_opt_verbose = 0x00000001, st_opt_allow_suicide = 0x00000002, st_opt_manual_ack = 0x00000008, st_opt_discard_reply = 0x00000010, /* st_opt_all_replies = 0x00000020, */ st_opt_topology = 0x00000040, st_opt_scope_local = 0x00000100, st_opt_cs_nodeid = 0x00000200, st_opt_sync_call = 0x00001000, /*! Allow the timeout period for a callback to be adjusted * based on the time the server reports the operation will take. */ st_opt_timeout_updates = 0x00002000, /*! Only report back if operation is a success in callback */ st_opt_report_only_success = 0x00004000, /* used where ever apropriate - e.g. cleanup of history */ st_opt_cleanup = 0x000080000, /* used where ever apropriate - e.g. send out a history query to all nodes */ st_opt_broadcast = 0x000100000, }; /*! Order matters here, do not change values */ enum op_state { st_query, st_exec, st_done, st_duplicate, st_failed, }; // Supported fence agent interface standards enum stonith_namespace { st_namespace_invalid, st_namespace_any, st_namespace_internal, // Implemented internally by Pacemaker /* Neither of these projects are active any longer, but the fence agent * interfaces they created are still in use and supported by Pacemaker. */ st_namespace_rhcs, // Red Hat Cluster Suite compatible st_namespace_lha, // Linux-HA compatible }; enum stonith_namespace stonith_text2namespace(const char *namespace_s); const char *stonith_namespace2text(enum stonith_namespace st_namespace); enum stonith_namespace stonith_get_namespace(const char *agent, const char *namespace_s); typedef struct stonith_key_value_s { char *key; char *value; struct stonith_key_value_s *next; } stonith_key_value_t; typedef struct stonith_history_s { char *target; char *action; char *origin; char *delegate; char *client; int state; time_t completed; struct stonith_history_s *next; long completed_nsec; } stonith_history_t; typedef struct stonith_s stonith_t; typedef struct stonith_event_s { char *id; char *type; char *message; char *operation; int result; char *origin; char *target; char *action; char *executioner; char *device; /*! The name of the client that initiated the action. */ char *client_origin; } stonith_event_t; typedef struct stonith_callback_data_s { int rc; int call_id; void *userdata; } stonith_callback_data_t; typedef struct stonith_api_operations_s { /*! * \brief Destroy the stonith api structure. */ int (*free) (stonith_t *st); /*! * \brief Connect to the local stonith daemon. * - * \retval 0, success - * \retval negative error code on failure + * \return Legacy Pacemaker return code */ int (*connect) (stonith_t *st, const char *name, int *stonith_fd); /*! * \brief Disconnect from the local stonith daemon. * - * \retval 0, success - * \retval negative error code on failure + * \return Legacy Pacemaker return code */ int (*disconnect)(stonith_t *st); /*! * \brief Remove a registered stonith device with the local stonith daemon. * * \note Synchronous, guaranteed to occur in daemon before function returns. * - * \retval 0, success - * \retval negative error code on failure + * \return Legacy Pacemaker return code */ int (*remove_device)( stonith_t *st, int options, const char *name); /*! * \brief Register a stonith device with the local stonith daemon. * * \note Synchronous, guaranteed to occur in daemon before function returns. * - * \retval 0, success - * \retval negative error code on failure + * \return Legacy Pacemaker return code */ int (*register_device)( stonith_t *st, int options, const char *id, const char *provider, const char *agent, stonith_key_value_t *params); /*! * \brief Remove a fencing level for a specific node. * - * \retval 0, success - * \retval negative error code on failure + * \return Legacy Pacemaker return code */ int (*remove_level)( stonith_t *st, int options, const char *node, int level); /*! * \brief Register a fencing level containing the fencing devices to be used * at that level for a specific node. * - * \retval 0, success - * \retval negative error code on failure + * \return Legacy Pacemaker return code */ int (*register_level)( stonith_t *st, int options, const char *node, int level, stonith_key_value_t *device_list); /*! * \brief Get the metadata documentation for a resource. * * \note Value is returned in output. Output must be freed when set. * - * \retval 0 success - * \retval negative error code on failure + * \return Legacy Pacemaker return code */ int (*metadata)(stonith_t *st, int options, const char *device, const char *provider, char **output, int timeout); /*! * \brief Retrieve a list of installed stonith agents * * \note if provider is not provided, all known agents will be returned * \note list must be freed using stonith_key_value_freeall() * \note call_options parameter is not used, it is reserved for future use. * - * \retval num items in list on success - * \retval negative error code on failure + * \return Number of items in list on success, or negative errno otherwise */ int (*list_agents)(stonith_t *stonith, int call_options, const char *provider, stonith_key_value_t **devices, int timeout); /*! * \brief Retrieve string listing hosts and port assignments from a local stonith device. * - * \retval 0 on success - * \retval negative error code on failure + * \return Legacy Pacemaker return code */ int (*list)(stonith_t *st, int options, const char *id, char **list_output, int timeout); /*! * \brief Check to see if a local stonith device is reachable * - * \retval 0 on success - * \retval negative error code on failure + * \return Legacy Pacemaker return code */ int (*monitor)(stonith_t *st, int options, const char *id, int timeout); /*! * \brief Check to see if a local stonith device's port is reachable * - * \retval 0 on success - * \retval negative error code on failure + * \return Legacy Pacemaker return code */ int (*status)(stonith_t *st, int options, const char *id, const char *port, int timeout); /*! * \brief Retrieve a list of registered stonith devices. * * \note If node is provided, only devices that can fence the node id * will be returned. * - * \retval num items in list on success - * \retval negative error code on failure + * \return Number of items in list on success, or negative errno otherwise */ int (*query)(stonith_t *st, int options, const char *node, stonith_key_value_t **devices, int timeout); /*! * \brief Issue a fencing action against a node. * * \note Possible actions are, 'on', 'off', and 'reboot'. * * \param st, stonith connection * \param options, call options * \param node, The target node to fence * \param action, The fencing action to take * \param timeout, The default per device timeout to use with each device * capable of fencing the target. * - * \retval 0 success - * \retval negative error code on failure. + * \return Legacy Pacemaker return code */ int (*fence)(stonith_t *st, int options, const char *node, const char *action, int timeout, int tolerance); /*! * \brief Manually confirm that a node is down. * - * \retval 0 success - * \retval negative error code on failure. + * \return Legacy Pacemaker return code */ int (*confirm)(stonith_t *st, int options, const char *node); /*! * \brief Retrieve a list of fencing operations that have occurred for a specific node. * - * \retval 0 success - * \retval negative error code on failure. + * \return Legacy Pacemaker return code */ int (*history)(stonith_t *st, int options, const char *node, stonith_history_t **output, int timeout); int (*register_notification)( stonith_t *st, const char *event, void (*notify)(stonith_t *st, stonith_event_t *e)); int (*remove_notification)(stonith_t *st, const char *event); /*! * \brief Register a callback to receive the result of an asynchronous call * * \param[in] call_id The call ID to register callback for * \param[in] timeout Default time to wait until callback expires * \param[in] options Bitmask of \c stonith_call_options (respects * \c st_opt_timeout_updates and * \c st_opt_report_only_success) * \param[in] userdata Pointer that will be given to callback * \param[in] callback_name Unique name to identify callback * \param[in] callback The callback function to register * * \return \c TRUE on success, \c FALSE if call_id is negative, -errno otherwise * * \todo This function should return \c pcmk_ok on success, and \c call_id * when negative, but that would break backward compatibility. */ int (*register_callback)(stonith_t *st, int call_id, int timeout, int options, void *userdata, const char *callback_name, void (*callback)(stonith_t *st, stonith_callback_data_t *data)); /*! * \brief Remove a registered callback for a given call id. */ int (*remove_callback)(stonith_t *st, int call_id, bool all_callbacks); /*! * \brief Remove fencing level for specific node, node regex or attribute * * \param[in] st Fencer connection to use * \param[in] options Bitmask of stonith_call_options to pass to the fencer * \param[in] node If not NULL, target level by this node name * \param[in] pattern If not NULL, target by node name using this regex * \param[in] attr If not NULL, target by this node attribute * \param[in] value If not NULL, target by this node attribute value * \param[in] level Index number of level to remove * * \return 0 on success, negative error code otherwise * * \note The caller should set only one of node, pattern or attr/value. */ int (*remove_level_full)(stonith_t *st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level); /*! * \brief Register fencing level for specific node, node regex or attribute * * \param[in] st Fencer connection to use * \param[in] options Bitmask of stonith_call_options to pass to fencer * \param[in] node If not NULL, target level by this node name * \param[in] pattern If not NULL, target by node name using this regex * \param[in] attr If not NULL, target by this node attribute * \param[in] value If not NULL, target by this node attribute value * \param[in] level Index number of level to add * \param[in] device_list Devices to use in level * * \return 0 on success, negative error code otherwise * * \note The caller should set only one of node, pattern or attr/value. */ int (*register_level_full)(stonith_t *st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level, stonith_key_value_t *device_list); /*! * \brief Validate an arbitrary stonith device configuration * * \param[in] st Stonithd connection to use * \param[in] call_options Bitmask of stonith_call_options to use with fencer * \param[in] rsc_id ID used to replace CIB secrets in params * \param[in] namespace_s Namespace of fence agent to validate (optional) * \param[in] agent Fence agent to validate * \param[in] params Configuration parameters to pass to fence agent * \param[in] timeout Fail if no response within this many seconds * \param[out] output If non-NULL, where to store any agent output * \param[out] error_output If non-NULL, where to store agent error output * * \return pcmk_ok if validation succeeds, -errno otherwise * * \note If pcmk_ok is returned, the caller is responsible for freeing * the output (if requested). */ int (*validate)(stonith_t *st, int call_options, const char *rsc_id, const char *namespace_s, const char *agent, stonith_key_value_t *params, int timeout, char **output, char **error_output); /*! * \brief Issue a fencing action against a node with requested fencing delay. * * \note Possible actions are, 'on', 'off', and 'reboot'. * * \param st, stonith connection * \param options, call options * \param node, The target node to fence * \param action, The fencing action to take * \param timeout, The default per device timeout to use with each device * capable of fencing the target. * \param delay, Apply a fencing delay. Value -1 means disable also any * static/random fencing delays from pcmk_delay_base/max * - * \retval 0 success - * \retval negative error code on failure. + * \return Legacy Pacemaker return code */ int (*fence_with_delay)(stonith_t *st, int options, const char *node, const char *action, int timeout, int tolerance, int delay); } stonith_api_operations_t; struct stonith_s { enum stonith_state state; int call_id; int call_timeout; void *st_private; stonith_api_operations_t *cmds; }; /* *INDENT-ON* */ /* Core functions */ stonith_t *stonith_api_new(void); void stonith_api_delete(stonith_t * st); void stonith_dump_pending_callbacks(stonith_t * st); bool stonith_dispatch(stonith_t * st); stonith_key_value_t *stonith_key_value_add(stonith_key_value_t * kvp, const char *key, const char *value); void stonith_key_value_freeall(stonith_key_value_t * kvp, int keys, int values); void stonith_history_free(stonith_history_t *history); // Convenience functions int stonith_api_connect_retry(stonith_t *st, const char *name, int max_attempts); const char *stonith_op_state_str(enum op_state state); /* Basic helpers that allows nodes to be fenced and the history to be * queried without mainloop or the caller understanding the full API * * At least one of nodeid and uname are required */ int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off); time_t stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress); /* * Helpers for using the above functions without install-time dependencies * * Usage: * #include * * To turn a node off by corosync nodeid: * stonith_api_kick_helper(nodeid, 120, 1); * * To check the last fence date/time (also by nodeid): * last = stonith_api_time_helper(nodeid, 0); * * To check if fencing is in progress: * if(stonith_api_time_helper(nodeid, 1) > 0) { ... } * * eg. #include #include #include int main(int argc, char ** argv) { int rc = 0; int nodeid = 102; rc = stonith_api_time_helper(nodeid, 0); printf("%d last fenced at %s\n", nodeid, ctime(rc)); rc = stonith_api_kick_helper(nodeid, 120, 1); printf("%d fence result: %d\n", nodeid, rc); rc = stonith_api_time_helper(nodeid, 0); printf("%d last fenced at %s\n", nodeid, ctime(rc)); return 0; } */ # define STONITH_LIBRARY "libstonithd.so.26" typedef int (*st_api_kick_fn) (int nodeid, const char *uname, int timeout, bool off); typedef time_t (*st_api_time_fn) (int nodeid, const char *uname, bool in_progress); static inline int stonith_api_kick_helper(uint32_t nodeid, int timeout, bool off) { static void *st_library = NULL; static st_api_kick_fn st_kick_fn; if (st_library == NULL) { st_library = dlopen(STONITH_LIBRARY, RTLD_LAZY); } if (st_library && st_kick_fn == NULL) { st_kick_fn = (st_api_kick_fn) dlsym(st_library, "stonith_api_kick"); } if (st_kick_fn == NULL) { #ifdef ELIBACC return -ELIBACC; #else return -ENOSYS; #endif } return (*st_kick_fn) (nodeid, NULL, timeout, off); } static inline time_t stonith_api_time_helper(uint32_t nodeid, bool in_progress) { static void *st_library = NULL; static st_api_time_fn st_time_fn; if (st_library == NULL) { st_library = dlopen(STONITH_LIBRARY, RTLD_LAZY); } if (st_library && st_time_fn == NULL) { st_time_fn = (st_api_time_fn) dlsym(st_library, "stonith_api_time"); } if (st_time_fn == NULL) { return 0; } return (*st_time_fn) (nodeid, NULL, in_progress); } /** * Does the given agent describe a stonith resource that can exist? * * \param[in] agent What is the name of the agent? * \param[in] timeout Timeout to use when querying. If 0 is given, * use a default of 120. * * \return A boolean */ bool stonith_agent_exists(const char *agent, int timeout); /*! * \brief Turn stonith action into a more readable string. * * \param action Stonith action */ const char *stonith_action_str(const char *action); #if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1) /* Normally we'd put this section in a separate file (crm/fencing/compat.h), but * we can't do that for the reason noted at the top of this file. That does mean * we have to duplicate these declarations where they're implemented. */ //! \deprecated Use stonith_get_namespace() instead const char *get_stonith_provider(const char *agent, const char *provider); #endif #ifdef __cplusplus } #endif #endif diff --git a/lib/common/crmcommon_private.h b/lib/common/crmcommon_private.h index 2202e1f103..4765e305db 100644 --- a/lib/common/crmcommon_private.h +++ b/lib/common/crmcommon_private.h @@ -1,311 +1,311 @@ /* - * Copyright 2018-2020 the Pacemaker project contributors + * Copyright 2018-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef CRMCOMMON_PRIVATE__H # define CRMCOMMON_PRIVATE__H /* This header is for the sole use of libcrmcommon, so that functions can be * declared with G_GNUC_INTERNAL for efficiency. */ #include // uint8_t, uint32_t #include // bool #include // size_t #include // GList #include // xmlNode, xmlAttr #include // struct qb_ipc_response_header // Decent chunk size for processing large amounts of data #define PCMK__BUFFER_SIZE 4096 /* * XML and ACLs */ enum xml_private_flags { xpf_none = 0x0000, xpf_dirty = 0x0001, xpf_deleted = 0x0002, xpf_created = 0x0004, xpf_modified = 0x0008, xpf_tracking = 0x0010, xpf_processed = 0x0020, xpf_skip = 0x0040, xpf_moved = 0x0080, xpf_acl_enabled = 0x0100, xpf_acl_read = 0x0200, xpf_acl_write = 0x0400, xpf_acl_deny = 0x0800, xpf_acl_create = 0x1000, xpf_acl_denied = 0x2000, xpf_lazy = 0x4000, }; /* When deleting portions of an XML tree, we keep a record so we can know later * (e.g. when checking differences) that something was deleted. */ typedef struct pcmk__deleted_xml_s { char *path; int position; } pcmk__deleted_xml_t; typedef struct xml_private_s { long check; uint32_t flags; char *user; GList *acls; GList *deleted_objs; // List of pcmk__deleted_xml_t } xml_private_t; #define pcmk__set_xml_flags(xml_priv, flags_to_set) do { \ (xml_priv)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_NEVER, "XML", "XML node", (xml_priv)->flags, \ (flags_to_set), #flags_to_set); \ } while (0) #define pcmk__clear_xml_flags(xml_priv, flags_to_clear) do { \ (xml_priv)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_NEVER, "XML", "XML node", (xml_priv)->flags, \ (flags_to_clear), #flags_to_clear); \ } while (0) G_GNUC_INTERNAL void pcmk__xml2text(xmlNode *data, int options, char **buffer, int *offset, int *max, int depth); G_GNUC_INTERNAL void pcmk__buffer_add_char(char **buffer, int *offset, int *max, char c); G_GNUC_INTERNAL void pcmk__set_xml_doc_flag(xmlNode *xml, enum xml_private_flags flag); G_GNUC_INTERNAL bool pcmk__tracking_xml_changes(xmlNode *xml, bool lazy); G_GNUC_INTERNAL int pcmk__element_xpath(const char *prefix, xmlNode *xml, char *buffer, int offset, size_t buffer_size); G_GNUC_INTERNAL void pcmk__mark_xml_created(xmlNode *xml); G_GNUC_INTERNAL int pcmk__xml_position(xmlNode *xml, enum xml_private_flags ignore_if_set); G_GNUC_INTERNAL xmlNode *pcmk__xml_match(xmlNode *haystack, xmlNode *needle, bool exact); G_GNUC_INTERNAL void pcmk__xe_log(int log_level, const char *file, const char *function, int line, const char *prefix, xmlNode *data, int depth, int options); G_GNUC_INTERNAL void pcmk__xml_update(xmlNode *parent, xmlNode *target, xmlNode *update, bool as_diff); G_GNUC_INTERNAL xmlNode *pcmk__xc_match(xmlNode *root, xmlNode *search_comment, bool exact); G_GNUC_INTERNAL void pcmk__xc_update(xmlNode *parent, xmlNode *target, xmlNode *update); G_GNUC_INTERNAL void pcmk__free_acls(GList *acls); G_GNUC_INTERNAL void pcmk__unpack_acl(xmlNode *source, xmlNode *target, const char *user); G_GNUC_INTERNAL bool pcmk__check_acl(xmlNode *xml, const char *name, enum xml_private_flags mode); G_GNUC_INTERNAL void pcmk__apply_acl(xmlNode *xml); G_GNUC_INTERNAL void pcmk__apply_creation_acl(xmlNode *xml, bool check_top); G_GNUC_INTERNAL void pcmk__mark_xml_attr_dirty(xmlAttr *a); G_GNUC_INTERNAL bool pcmk__xa_filterable(const char *name); static inline const char * pcmk__xml_attr_value(const xmlAttr *attr) { return ((attr == NULL) || (attr->children == NULL))? NULL : (const char *) attr->children->content; } /* * IPC */ #define PCMK__IPC_VERSION 1 #define PCMK__CONTROLD_API_MAJOR "1" #define PCMK__CONTROLD_API_MINOR "0" // IPC behavior that varies by daemon typedef struct pcmk__ipc_methods_s { /*! * \internal * \brief Allocate any private data needed by daemon IPC * * \param[in] api IPC API connection * * \return Standard Pacemaker return code */ int (*new_data)(pcmk_ipc_api_t *api); /*! * \internal * \brief Free any private data used by daemon IPC * * \param[in] api_data Data allocated by new_data() method */ void (*free_data)(void *api_data); /*! * \internal * \brief Perform daemon-specific handling after successful connection * * Some daemons require clients to register before sending any other * commands. The controller requires a CRM_OP_HELLO (with no reply), and * the CIB manager, executor, and fencer require a CRM_OP_REGISTER (with a * reply). Ideally this would be consistent across all daemons, but for now * this allows each to do its own authorization. * * \param[in] api IPC API connection * * \return Standard Pacemaker return code */ int (*post_connect)(pcmk_ipc_api_t *api); /*! * \internal * \brief Check whether an IPC request results in a reply * - * \parma[in] api IPC API connection + * \param[in] api IPC API connection * \param[in] request IPC request XML * * \return true if request would result in an IPC reply, false otherwise */ bool (*reply_expected)(pcmk_ipc_api_t *api, xmlNode *request); /*! * \internal * \brief Perform daemon-specific handling of an IPC message * * \param[in] api IPC API connection * \param[in] msg Message read from IPC connection */ void (*dispatch)(pcmk_ipc_api_t *api, xmlNode *msg); /*! * \internal * \brief Perform daemon-specific handling of an IPC disconnect * * \param[in] api IPC API connection */ void (*post_disconnect)(pcmk_ipc_api_t *api); } pcmk__ipc_methods_t; // Implementation of pcmk_ipc_api_t struct pcmk_ipc_api_s { enum pcmk_ipc_server server; // Daemon this IPC API instance is for enum pcmk_ipc_dispatch dispatch_type; // How replies should be dispatched size_t ipc_size_max; // maximum IPC buffer size crm_ipc_t *ipc; // IPC connection mainloop_io_t *mainloop_io; // If using mainloop, I/O source for IPC bool free_on_disconnect; // Whether disconnect should free object pcmk_ipc_callback_t cb; // Caller-registered callback (if any) void *user_data; // Caller-registered data (if any) void *api_data; // For daemon-specific use pcmk__ipc_methods_t *cmds; // Behavior that varies by daemon }; typedef struct pcmk__ipc_header_s { struct qb_ipc_response_header qb; uint32_t size_uncompressed; uint32_t size_compressed; uint32_t flags; uint8_t version; } pcmk__ipc_header_t; G_GNUC_INTERNAL int pcmk__send_ipc_request(pcmk_ipc_api_t *api, xmlNode *request); G_GNUC_INTERNAL void pcmk__call_ipc_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data); G_GNUC_INTERNAL unsigned int pcmk__ipc_buffer_size(unsigned int max); G_GNUC_INTERNAL bool pcmk__valid_ipc_header(const pcmk__ipc_header_t *header); G_GNUC_INTERNAL pcmk__ipc_methods_t *pcmk__controld_api_methods(void); G_GNUC_INTERNAL pcmk__ipc_methods_t *pcmk__pacemakerd_api_methods(void); /* * Logging */ /*! * \brief Check the authenticity of the IPC socket peer process * * If everything goes well, peer's authenticity is verified by the means * of comparing against provided referential UID and GID (either satisfies), * and the result of this check can be deduced from the return value. * As an exception, detected UID of 0 ("root") satisfies arbitrary * provided referential daemon's credentials. * * \param[in] qb_ipc libqb client connection if available * \param[in] sock IPC related, connected Unix socket to check peer of * \param[in] refuid referential UID to check against * \param[in] refgid referential GID to check against * \param[out] gotpid to optionally store obtained PID of the peer * (not available on FreeBSD, special value of 1 * used instead, and the caller is required to * special case this value respectively) * \param[out] gotuid to optionally store obtained UID of the peer * \param[out] gotgid to optionally store obtained GID of the peer * * \return Standard Pacemaker return code * ie: 0 if it the connection is authentic * pcmk_rc_ipc_unauthorized if the connection is not authentic, * standard errors. * * \note While this function is tolerant on what constitutes authorized * IPC daemon process (its effective user matches UID=0 or \p refuid, * or at least its group matches \p refgid), either or both (in case * of UID=0) mismatches on the expected credentials of such peer * process \e shall be investigated at the caller when value of 1 * gets returned there, since higher-than-expected privileges in * respect to the expected/intended credentials possibly violate * the least privilege principle and may pose an additional risk * (i.e. such accidental inconsistency shall be eventually fixed). */ int pcmk__crm_ipc_is_authentic_process(qb_ipcc_connection_t *qb_ipc, int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid); #endif // CRMCOMMON_PRIVATE__H diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index fa0430d683..5ce612ed76 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -1,2776 +1,2776 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(stonith); struct stonith_action_s { /*! user defined data */ char *agent; char *action; char *victim; GHashTable *args; int timeout; int async; void *userdata; void (*done_cb) (GPid pid, gint status, const char *output, gpointer user_data); void (*fork_cb) (GPid pid, gpointer user_data); svc_action_t *svc_action; /*! internal timing information */ time_t initial_start_time; int tries; int remaining_timeout; int max_retries; /* device output data */ GPid pid; int rc; char *output; char *error; }; typedef struct stonith_private_s { char *token; crm_ipc_t *ipc; mainloop_io_t *source; GHashTable *stonith_op_callback_table; GList *notify_list; int notify_refcnt; bool notify_deletes; void (*op_callback) (stonith_t * st, stonith_callback_data_t * data); } stonith_private_t; typedef struct stonith_notify_client_s { const char *event; const char *obj_id; /* implement one day */ const char *obj_type; /* implement one day */ void (*notify) (stonith_t * st, stonith_event_t * e); bool delete; } stonith_notify_client_t; typedef struct stonith_callback_client_s { void (*callback) (stonith_t * st, stonith_callback_data_t * data); const char *id; void *user_data; gboolean only_success; gboolean allow_timeout_updates; struct timer_rec_s *timer; } stonith_callback_client_t; struct notify_blob_s { stonith_t *stonith; xmlNode *xml; }; struct timer_rec_s { int call_id; int timeout; guint ref; stonith_t *stonith; }; typedef int (*stonith_op_t) (const char *, int, const char *, xmlNode *, xmlNode *, xmlNode *, xmlNode **, xmlNode **); bool stonith_dispatch(stonith_t * st); xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); static int stonith_send_command(stonith_t *stonith, const char *op, xmlNode *data, xmlNode **output_data, int call_options, int timeout); static void stonith_connection_destroy(gpointer user_data); static void stonith_send_notification(gpointer data, gpointer user_data); static int internal_stonith_action_execute(stonith_action_t * action); static void log_action(stonith_action_t *action, pid_t pid); /*! * \brief Get agent namespace by name * * \param[in] namespace_s Name of namespace as string * * \return Namespace as enum value */ enum stonith_namespace stonith_text2namespace(const char *namespace_s) { if (pcmk__str_eq(namespace_s, "any", pcmk__str_null_matches)) { return st_namespace_any; } else if (!strcmp(namespace_s, "redhat") || !strcmp(namespace_s, "stonith-ng")) { return st_namespace_rhcs; } else if (!strcmp(namespace_s, "internal")) { return st_namespace_internal; } else if (!strcmp(namespace_s, "heartbeat")) { return st_namespace_lha; } return st_namespace_invalid; } /*! * \brief Get agent namespace name * * \param[in] namespace Namespace as enum value * * \return Namespace name as string */ const char * stonith_namespace2text(enum stonith_namespace st_namespace) { switch (st_namespace) { case st_namespace_any: return "any"; case st_namespace_rhcs: return "stonith-ng"; case st_namespace_internal: return "internal"; case st_namespace_lha: return "heartbeat"; default: break; } return "unsupported"; } /*! * \brief Determine namespace of a fence agent * * \param[in] agent Fence agent type * \param[in] namespace_s Name of agent namespace as string, if known * * \return Namespace of specified agent, as enum value */ enum stonith_namespace stonith_get_namespace(const char *agent, const char *namespace_s) { if (pcmk__str_eq(namespace_s, "internal", pcmk__str_casei)) { return st_namespace_internal; } if (stonith__agent_is_rhcs(agent)) { return st_namespace_rhcs; } #if HAVE_STONITH_STONITH_H if (stonith__agent_is_lha(agent)) { return st_namespace_lha; } #endif crm_err("Unknown fence agent: %s", agent); return st_namespace_invalid; } gboolean stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node) { gboolean rv = FALSE; stonith_t *stonith_api = st?st:stonith_api_new(); char *list = NULL; if(stonith_api) { if (stonith_api->state == stonith_disconnected) { int rc = stonith_api->cmds->connect(stonith_api, "stonith-api", NULL); if (rc != pcmk_ok) { crm_err("Failed connecting to Stonith-API for watchdog-fencing-query."); } } if (stonith_api->state != stonith_disconnected) { /* caveat!!! * this might fail when when stonithd is just updating the device-list * probably something we should fix as well for other api-calls */ int rc = stonith_api->cmds->list(stonith_api, st_opt_sync_call, STONITH_WATCHDOG_ID, &list, 0); if ((rc != pcmk_ok) || (list == NULL)) { /* due to the race described above it can happen that * we drop in here - so as not to make remote nodes * panic on that answer */ crm_warn("watchdog-fencing-query failed"); } else if (list[0] == '\0') { rv = TRUE; } else { GList *targets = stonith__parse_targets(list); rv = pcmk__str_in_list(node, targets, pcmk__str_casei); g_list_free_full(targets, free); } free(list); if (!st) { /* if we're provided the api we still might have done the * connection - but let's assume the caller won't bother */ stonith_api->cmds->disconnect(stonith_api); } } if (!st) { stonith_api_delete(stonith_api); } } else { crm_err("Stonith-API for watchdog-fencing-query couldn't be created."); } crm_trace("Pacemaker assumes node %s %sto do watchdog-fencing.", node, rv?"":"not "); return rv; } gboolean stonith__watchdog_fencing_enabled_for_node(const char *node) { return stonith__watchdog_fencing_enabled_for_node_api(NULL, node); } static void log_action(stonith_action_t *action, pid_t pid) { if (action->output) { /* Logging the whole string confuses syslog when the string is xml */ char *prefix = crm_strdup_printf("%s[%d] stdout:", action->agent, pid); crm_log_output(LOG_TRACE, prefix, action->output); free(prefix); } if (action->error) { /* Logging the whole string confuses syslog when the string is xml */ char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid); crm_log_output(LOG_WARNING, prefix, action->error); free(prefix); } } /* when cycling through the list we don't want to delete items so just mark them and when we know nobody is using the list loop over it to remove the marked items */ static void foreach_notify_entry (stonith_private_t *private, GFunc func, gpointer user_data) { private->notify_refcnt++; g_list_foreach(private->notify_list, func, user_data); private->notify_refcnt--; if ((private->notify_refcnt == 0) && private->notify_deletes) { GList *list_item = private->notify_list; private->notify_deletes = FALSE; while (list_item != NULL) { stonith_notify_client_t *list_client = list_item->data; GList *next = g_list_next(list_item); if (list_client->delete) { free(list_client); private->notify_list = g_list_delete_link(private->notify_list, list_item); } list_item = next; } } } static void stonith_connection_destroy(gpointer user_data) { stonith_t *stonith = user_data; stonith_private_t *native = NULL; struct notify_blob_s blob; crm_trace("Sending destroyed notification"); blob.stonith = stonith; blob.xml = create_xml_node(NULL, "notify"); native = stonith->st_private; native->ipc = NULL; native->source = NULL; free(native->token); native->token = NULL; stonith->state = stonith_disconnected; crm_xml_add(blob.xml, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(blob.xml, F_SUBTYPE, T_STONITH_NOTIFY_DISCONNECT); foreach_notify_entry(native, stonith_send_notification, &blob); free_xml(blob.xml); } xmlNode * create_device_registration_xml(const char *id, enum stonith_namespace namespace, const char *agent, stonith_key_value_t *params, const char *rsc_provides) { xmlNode *data = create_xml_node(NULL, F_STONITH_DEVICE); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); #if HAVE_STONITH_STONITH_H if (namespace == st_namespace_any) { namespace = stonith_get_namespace(agent, NULL); } if (namespace == st_namespace_lha) { hash2field((gpointer) "plugin", (gpointer) agent, args); agent = "fence_legacy"; } #endif crm_xml_add(data, XML_ATTR_ID, id); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add(data, "agent", agent); if ((namespace != st_namespace_any) && (namespace != st_namespace_invalid)) { crm_xml_add(data, "namespace", stonith_namespace2text(namespace)); } if (rsc_provides) { crm_xml_add(data, "rsc_provides", rsc_provides); } for (; params; params = params->next) { hash2field((gpointer) params->key, (gpointer) params->value, args); } return data; } static int stonith_api_register_device(stonith_t * st, int call_options, const char *id, const char *namespace, const char *agent, stonith_key_value_t * params) { int rc = 0; xmlNode *data = NULL; data = create_device_registration_xml(id, stonith_text2namespace(namespace), agent, params, NULL); rc = stonith_send_command(st, STONITH_OP_DEVICE_ADD, data, NULL, call_options, 0); free_xml(data); return rc; } static int stonith_api_remove_device(stonith_t * st, int call_options, const char *name) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add(data, XML_ATTR_ID, name); rc = stonith_send_command(st, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0); free_xml(data); return rc; } static int stonith_api_remove_level_full(stonith_t *st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level) { int rc = 0; xmlNode *data = NULL; CRM_CHECK(node || pattern || (attr && value), return -EINVAL); data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); crm_xml_add(data, F_STONITH_ORIGIN, __func__); if (node) { crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); } else if (pattern) { crm_xml_add(data, XML_ATTR_STONITH_TARGET_PATTERN, pattern); } else { crm_xml_add(data, XML_ATTR_STONITH_TARGET_ATTRIBUTE, attr); crm_xml_add(data, XML_ATTR_STONITH_TARGET_VALUE, value); } crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); rc = stonith_send_command(st, STONITH_OP_LEVEL_DEL, data, NULL, options, 0); free_xml(data); return rc; } static int stonith_api_remove_level(stonith_t * st, int options, const char *node, int level) { return stonith_api_remove_level_full(st, options, node, NULL, NULL, NULL, level); } /*! * \internal * \brief Create XML for fence topology level registration request * * \param[in] node If not NULL, target level by this node name * \param[in] pattern If not NULL, target by node name using this regex * \param[in] attr If not NULL, target by this node attribute * \param[in] value If not NULL, target by this node attribute value * \param[in] level Index number of level to register * \param[in] device_list List of devices in level * * \return Newly allocated XML tree on success, NULL otherwise * * \note The caller should set only one of node, pattern or attr/value. */ xmlNode * create_level_registration_xml(const char *node, const char *pattern, const char *attr, const char *value, int level, stonith_key_value_t *device_list) { size_t len = 0; char *list = NULL; xmlNode *data; CRM_CHECK(node || pattern || (attr && value), return NULL); data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); CRM_CHECK(data, return NULL); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add_int(data, XML_ATTR_ID, level); crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); if (node) { crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); } else if (pattern) { crm_xml_add(data, XML_ATTR_STONITH_TARGET_PATTERN, pattern); } else { crm_xml_add(data, XML_ATTR_STONITH_TARGET_ATTRIBUTE, attr); crm_xml_add(data, XML_ATTR_STONITH_TARGET_VALUE, value); } // cppcheck seems not to understand the abort logic behind pcmk__realloc // cppcheck-suppress memleak for (; device_list; device_list = device_list->next) { pcmk__add_separated_word(&list, &len, device_list->value, ","); } crm_xml_add(data, XML_ATTR_STONITH_DEVICES, list); free(list); return data; } static int stonith_api_register_level_full(stonith_t * st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level, stonith_key_value_t *device_list) { int rc = 0; xmlNode *data = create_level_registration_xml(node, pattern, attr, value, level, device_list); CRM_CHECK(data != NULL, return -EINVAL); rc = stonith_send_command(st, STONITH_OP_LEVEL_ADD, data, NULL, options, 0); free_xml(data); return rc; } static int stonith_api_register_level(stonith_t * st, int options, const char *node, int level, stonith_key_value_t * device_list) { return stonith_api_register_level_full(st, options, node, NULL, NULL, NULL, level, device_list); } static void append_config_arg(gpointer key, gpointer value, gpointer user_data) { /* The fencer will filter "action" out when it registers the device, * but ignore it here in case any external API users don't. * * Also filter out parameters handled directly by Pacemaker. */ if (!pcmk__str_eq(key, STONITH_ATTR_ACTION_OP, pcmk__str_casei) && !pcmk_stonith_param(key) && (strstr(key, CRM_META) == NULL) && !pcmk__str_eq(key, "crm_feature_set", pcmk__str_casei)) { crm_trace("Passing %s=%s with fence action", (const char *) key, (const char *) (value? value : "")); g_hash_table_insert((GHashTable *) user_data, strdup(key), strdup(value? value : "")); } } static GHashTable * make_args(const char *agent, const char *action, const char *victim, uint32_t victim_nodeid, GHashTable * device_args, GHashTable * port_map, const char *host_arg) { GHashTable *arg_list = NULL; const char *value = NULL; CRM_CHECK(action != NULL, return NULL); arg_list = pcmk__strkey_table(free, free); // Add action to arguments (using an alias if requested) if (device_args) { char buffer[512]; snprintf(buffer, sizeof(buffer), "pcmk_%s_action", action); value = g_hash_table_lookup(device_args, buffer); if (value) { crm_debug("Substituting '%s' for fence action %s targeting %s", value, action, victim); action = value; } } g_hash_table_insert(arg_list, strdup(STONITH_ATTR_ACTION_OP), strdup(action)); /* If this is a fencing operation against another node, add more standard * arguments. */ if (victim && device_args) { const char *param = NULL; /* Always pass the target's name, per * https://github.com/ClusterLabs/fence-agents/blob/master/doc/FenceAgentAPI.md */ g_hash_table_insert(arg_list, strdup("nodename"), strdup(victim)); // If the target's node ID was specified, pass it, too if (victim_nodeid) { char *nodeid = crm_strdup_printf("%" PRIu32, victim_nodeid); // cts-fencing looks for this log message crm_info("Passing '%s' as nodeid with fence action '%s' targeting %s", nodeid, action, victim); g_hash_table_insert(arg_list, strdup("nodeid"), nodeid); } // Check whether target must be specified in some other way param = g_hash_table_lookup(device_args, PCMK_STONITH_HOST_ARGUMENT); if (!pcmk__str_eq(agent, "fence_legacy", pcmk__str_none) && !pcmk__str_eq(param, "none", pcmk__str_casei)) { if (param == NULL) { /* Use the caller's default for pcmk_host_argument, or "port" if * none was given */ param = (host_arg == NULL)? "port" : host_arg; } value = g_hash_table_lookup(device_args, param); if (pcmk__str_eq(value, "dynamic", pcmk__str_casei|pcmk__str_null_matches)) { /* If the host argument was "dynamic" or not explicitly specified, * add it with the target */ const char *alias = NULL; if (port_map) { alias = g_hash_table_lookup(port_map, victim); } if (alias == NULL) { alias = victim; } crm_debug("Passing %s='%s' with fence action %s targeting %s", param, alias, action, victim); g_hash_table_insert(arg_list, strdup(param), strdup(alias)); } } } if (device_args) { g_hash_table_foreach(device_args, append_config_arg, arg_list); } return arg_list; } /*! * \internal * \brief Free all memory used by a stonith action * * \param[in,out] action Action to free */ void stonith__destroy_action(stonith_action_t *action) { if (action) { free(action->agent); if (action->args) { g_hash_table_destroy(action->args); } free(action->action); free(action->victim); if (action->svc_action) { services_action_free(action->svc_action); } free(action->output); free(action->error); free(action); } } /*! * \internal * \brief Get the result of an executed stonith action * * \param[in,out] action Executed action * \param[out] rc Where to store result code (or NULL) * \param[out] output Where to store standard output (or NULL) * \param[out] error_output Where to store standard error output (or NULL) * * \note If output or error_output is not NULL, the caller is responsible for * freeing the memory. */ void stonith__action_result(stonith_action_t *action, int *rc, char **output, char **error_output) { if (rc) { *rc = pcmk_ok; } if (output) { *output = NULL; } if (error_output) { *error_output = NULL; } if (action != NULL) { if (rc) { *rc = action->rc; } if (output && action->output) { *output = action->output; action->output = NULL; // hand off memory management to caller } if (error_output && action->error) { *error_output = action->error; action->error = NULL; // hand off memory management to caller } } } #define FAILURE_MAX_RETRIES 2 stonith_action_t * stonith_action_create(const char *agent, const char *_action, const char *victim, uint32_t victim_nodeid, int timeout, GHashTable * device_args, GHashTable * port_map, const char *host_arg) { stonith_action_t *action; action = calloc(1, sizeof(stonith_action_t)); action->args = make_args(agent, _action, victim, victim_nodeid, device_args, port_map, host_arg); crm_debug("Preparing '%s' action for %s using agent %s", _action, (victim? victim : "no target"), agent); action->agent = strdup(agent); action->action = strdup(_action); if (victim) { action->victim = strdup(victim); } action->timeout = action->remaining_timeout = timeout; action->max_retries = FAILURE_MAX_RETRIES; if (device_args) { char buffer[512]; const char *value = NULL; snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", _action); value = g_hash_table_lookup(device_args, buffer); if (value) { action->max_retries = atoi(value); } } return action; } static gboolean update_remaining_timeout(stonith_action_t * action) { int diff = time(NULL) - action->initial_start_time; if (action->tries >= action->max_retries) { crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed", action->agent, action->action, action->max_retries); action->remaining_timeout = 0; } else if ((action->rc != -ETIME) && diff < (action->timeout * 0.7)) { /* only set remaining timeout period if there is 30% * or greater of the original timeout period left */ action->remaining_timeout = action->timeout - diff; } else { action->remaining_timeout = 0; } return action->remaining_timeout ? TRUE : FALSE; } static int svc_action_to_errno(svc_action_t *svc_action) { int rv = pcmk_ok; if (svc_action->status == PCMK_EXEC_TIMEOUT) { rv = -ETIME; } else if (svc_action->rc != PCMK_OCF_OK) { /* Try to provide a useful error code based on the fence agent's * error output. */ if (svc_action->stderr_data == NULL) { rv = -ENODATA; } else if (strstr(svc_action->stderr_data, "imed out")) { /* Some agents have their own internal timeouts */ rv = -ETIME; } else if (strstr(svc_action->stderr_data, "Unrecognised action")) { rv = -EOPNOTSUPP; } else { rv = -pcmk_err_generic; } } return rv; } static void stonith_action_async_done(svc_action_t *svc_action) { stonith_action_t *action = (stonith_action_t *) svc_action->cb_data; action->rc = svc_action_to_errno(svc_action); action->output = svc_action->stdout_data; svc_action->stdout_data = NULL; action->error = svc_action->stderr_data; svc_action->stderr_data = NULL; svc_action->params = NULL; crm_debug("Child process %d performing action '%s' exited with rc %d", action->pid, action->action, svc_action->rc); log_action(action, action->pid); if (action->rc != pcmk_ok && update_remaining_timeout(action)) { int rc = internal_stonith_action_execute(action); if (rc == pcmk_ok) { return; } } if (action->done_cb) { action->done_cb(action->pid, action->rc, action->output, action->userdata); } action->svc_action = NULL; // don't remove our caller stonith__destroy_action(action); } static void stonith_action_async_forked(svc_action_t *svc_action) { stonith_action_t *action = (stonith_action_t *) svc_action->cb_data; action->pid = svc_action->pid; action->svc_action = svc_action; if (action->fork_cb) { (action->fork_cb) (svc_action->pid, action->userdata); } crm_trace("Child process %d performing action '%s' successfully forked", action->pid, action->action); } static int internal_stonith_action_execute(stonith_action_t * action) { int rc = -EPROTO; int is_retry = 0; svc_action_t *svc_action = NULL; static int stonith_sequence = 0; char *buffer = NULL; if (!action->tries) { action->initial_start_time = time(NULL); } action->tries++; if (action->tries > 1) { crm_info("Attempt %d to execute %s (%s). remaining timeout is %d", action->tries, action->agent, action->action, action->remaining_timeout); is_retry = 1; } if (action->args == NULL || action->agent == NULL) goto fail; buffer = crm_strdup_printf(PCMK__FENCE_BINDIR "/%s", basename(action->agent)); svc_action = services_action_create_generic(buffer, NULL); free(buffer); svc_action->timeout = 1000 * action->remaining_timeout; svc_action->standard = strdup(PCMK_RESOURCE_CLASS_STONITH); svc_action->id = crm_strdup_printf("%s_%s_%d", basename(action->agent), action->action, action->tries); svc_action->agent = strdup(action->agent); svc_action->sequence = stonith_sequence++; svc_action->params = action->args; svc_action->cb_data = (void *) action; svc_action->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Action", svc_action->id, svc_action->flags, SVC_ACTION_NON_BLOCKED, "SVC_ACTION_NON_BLOCKED"); /* keep retries from executing out of control and free previous results */ if (is_retry) { free(action->output); action->output = NULL; free(action->error); action->error = NULL; sleep(1); } if (action->async) { /* async */ if(services_action_async_fork_notify(svc_action, &stonith_action_async_done, &stonith_action_async_forked) == FALSE) { services_action_free(svc_action); svc_action = NULL; } else { rc = 0; } } else { /* sync */ if (services_action_sync(svc_action)) { rc = 0; action->rc = svc_action_to_errno(svc_action); action->output = svc_action->stdout_data; svc_action->stdout_data = NULL; action->error = svc_action->stderr_data; svc_action->stderr_data = NULL; } else { action->rc = -ECONNABORTED; rc = action->rc; } svc_action->params = NULL; services_action_free(svc_action); } fail: return rc; } /*! * \internal * \brief Kick off execution of an async stonith action * * \param[in,out] action Action to be executed * \param[in,out] userdata Datapointer to be passed to callbacks * \param[in] done Callback to notify action has failed/succeeded * \param[in] fork_callback Callback to notify successful fork of child * * \return pcmk_ok if ownership of action has been taken, -errno otherwise */ int stonith_action_execute_async(stonith_action_t * action, void *userdata, void (*done) (GPid pid, int rc, const char *output, gpointer user_data), void (*fork_cb) (GPid pid, gpointer user_data)) { if (!action) { return -EINVAL; } action->userdata = userdata; action->done_cb = done; action->fork_cb = fork_cb; action->async = 1; return internal_stonith_action_execute(action); } /*! * \internal * \brief Execute a stonith action * * \param[in,out] action Action to execute * * \return pcmk_ok on success, -errno otherwise */ int stonith__execute(stonith_action_t *action) { int rc = pcmk_ok; CRM_CHECK(action != NULL, return -EINVAL); // Keep trying until success, max retries, or timeout do { rc = internal_stonith_action_execute(action); } while ((rc != pcmk_ok) && update_remaining_timeout(action)); return rc; } static int stonith_api_device_list(stonith_t * stonith, int call_options, const char *namespace, stonith_key_value_t ** devices, int timeout) { int count = 0; enum stonith_namespace ns = stonith_text2namespace(namespace); if (devices == NULL) { crm_err("Parameter error: stonith_api_device_list"); return -EFAULT; } #if HAVE_STONITH_STONITH_H // Include Linux-HA agents if requested if ((ns == st_namespace_any) || (ns == st_namespace_lha)) { count += stonith__list_lha_agents(devices); } #endif // Include Red Hat agents if requested if ((ns == st_namespace_any) || (ns == st_namespace_rhcs)) { count += stonith__list_rhcs_agents(devices); } return count; } static int stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent, const char *namespace, char **output, int timeout) { /* By executing meta-data directly, we can get it from stonith_admin when * the cluster is not running, which is important for higher-level tools. */ enum stonith_namespace ns = stonith_get_namespace(agent, namespace); crm_trace("Looking up metadata for %s agent %s", stonith_namespace2text(ns), agent); switch (ns) { case st_namespace_rhcs: return stonith__rhcs_metadata(agent, timeout, output); #if HAVE_STONITH_STONITH_H case st_namespace_lha: return stonith__lha_metadata(agent, timeout, output); #endif default: crm_err("Can't get fence agent '%s' meta-data: No such agent", agent); break; } return -ENODEV; } static int stonith_api_query(stonith_t * stonith, int call_options, const char *target, stonith_key_value_t ** devices, int timeout) { int rc = 0, lpc = 0, max = 0; xmlNode *data = NULL; xmlNode *output = NULL; xmlXPathObjectPtr xpathObj = NULL; CRM_CHECK(devices != NULL, return -EINVAL); data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add(data, F_STONITH_TARGET, target); crm_xml_add(data, F_STONITH_ACTION, "off"); rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout); if (rc < 0) { return rc; } xpathObj = xpath_search(output, "//@agent"); if (xpathObj) { max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { xmlChar *match_path = xmlGetNodePath(match); crm_info("%s[%d] = %s", "//@agent", lpc, match_path); free(match_path); *devices = stonith_key_value_add(*devices, NULL, crm_element_value(match, XML_ATTR_ID)); } } freeXpathObject(xpathObj); } free_xml(output); free_xml(data); return max; } static int stonith_api_call(stonith_t * stonith, int call_options, const char *id, const char *action, const char *victim, int timeout, xmlNode ** output) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add(data, F_STONITH_DEVICE, id); crm_xml_add(data, F_STONITH_ACTION, action); crm_xml_add(data, F_STONITH_TARGET, victim); rc = stonith_send_command(stonith, STONITH_OP_EXEC, data, output, call_options, timeout); free_xml(data); return rc; } static int stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **list_info, int timeout) { int rc; xmlNode *output = NULL; rc = stonith_api_call(stonith, call_options, id, "list", NULL, timeout, &output); if (output && list_info) { const char *list_str; list_str = crm_element_value(output, "st_output"); if (list_str) { *list_info = strdup(list_str); } } if (output) { free_xml(output); } return rc; } static int stonith_api_monitor(stonith_t * stonith, int call_options, const char *id, int timeout) { return stonith_api_call(stonith, call_options, id, "monitor", NULL, timeout, NULL); } static int stonith_api_status(stonith_t * stonith, int call_options, const char *id, const char *port, int timeout) { return stonith_api_call(stonith, call_options, id, "status", port, timeout, NULL); } static int stonith_api_fence_with_delay(stonith_t * stonith, int call_options, const char *node, const char *action, int timeout, int tolerance, int delay) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, __func__); crm_xml_add(data, F_STONITH_TARGET, node); crm_xml_add(data, F_STONITH_ACTION, action); crm_xml_add_int(data, F_STONITH_TIMEOUT, timeout); crm_xml_add_int(data, F_STONITH_TOLERANCE, tolerance); crm_xml_add_int(data, F_STONITH_DELAY, delay); rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout); free_xml(data); return rc; } static int stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const char *action, int timeout, int tolerance) { return stonith_api_fence_with_delay(stonith, call_options, node, action, timeout, tolerance, 0); } static int stonith_api_confirm(stonith_t * stonith, int call_options, const char *target) { stonith__set_call_options(call_options, target, st_opt_manual_ack); return stonith_api_fence(stonith, call_options, target, "off", 0, 0); } static int stonith_api_history(stonith_t * stonith, int call_options, const char *node, stonith_history_t ** history, int timeout) { int rc = 0; xmlNode *data = NULL; xmlNode *output = NULL; stonith_history_t *last = NULL; *history = NULL; if (node) { data = create_xml_node(NULL, __func__); crm_xml_add(data, F_STONITH_TARGET, node); } stonith__set_call_options(call_options, node, st_opt_sync_call); rc = stonith_send_command(stonith, STONITH_OP_FENCE_HISTORY, data, &output, call_options, timeout); free_xml(data); if (rc == 0) { xmlNode *op = NULL; xmlNode *reply = get_xpath_object("//" F_STONITH_HISTORY_LIST, output, LOG_NEVER); for (op = pcmk__xml_first_child(reply); op != NULL; op = pcmk__xml_next(op)) { stonith_history_t *kvp; long long completed; long long completed_nsec = 0L; kvp = calloc(1, sizeof(stonith_history_t)); kvp->target = crm_element_value_copy(op, F_STONITH_TARGET); kvp->action = crm_element_value_copy(op, F_STONITH_ACTION); kvp->origin = crm_element_value_copy(op, F_STONITH_ORIGIN); kvp->delegate = crm_element_value_copy(op, F_STONITH_DELEGATE); kvp->client = crm_element_value_copy(op, F_STONITH_CLIENTNAME); crm_element_value_ll(op, F_STONITH_DATE, &completed); kvp->completed = (time_t) completed; crm_element_value_ll(op, F_STONITH_DATE_NSEC, &completed_nsec); kvp->completed_nsec = completed_nsec; crm_element_value_int(op, F_STONITH_STATE, &kvp->state); if (last) { last->next = kvp; } else { *history = kvp; } last = kvp; } } free_xml(output); return rc; } void stonith_history_free(stonith_history_t *history) { stonith_history_t *hp, *hp_old; for (hp = history; hp; hp_old = hp, hp = hp->next, free(hp_old)) { free(hp->target); free(hp->action); free(hp->origin); free(hp->delegate); free(hp->client); } } static gint stonithlib_GCompareFunc(gconstpointer a, gconstpointer b) { int rc = 0; const stonith_notify_client_t *a_client = a; const stonith_notify_client_t *b_client = b; if (a_client->delete || b_client->delete) { /* make entries marked for deletion not findable */ return -1; } CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0); rc = strcmp(a_client->event, b_client->event); if (rc == 0) { if (a_client->notify == NULL || b_client->notify == NULL) { return 0; } else if (a_client->notify == b_client->notify) { return 0; } else if (((long)a_client->notify) < ((long)b_client->notify)) { crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return -1; } crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return 1; } return rc; } xmlNode * stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options) { xmlNode *op_msg = create_xml_node(NULL, "stonith_command"); CRM_CHECK(op_msg != NULL, return NULL); CRM_CHECK(token != NULL, return NULL); crm_xml_add(op_msg, F_XML_TAGNAME, "stonith_command"); crm_xml_add(op_msg, F_TYPE, T_STONITH_NG); crm_xml_add(op_msg, F_STONITH_CALLBACK_TOKEN, token); crm_xml_add(op_msg, F_STONITH_OPERATION, op); crm_xml_add_int(op_msg, F_STONITH_CALLID, call_id); crm_trace("Sending call options: %.8lx, %d", (long)call_options, call_options); crm_xml_add_int(op_msg, F_STONITH_CALLOPTS, call_options); if (data != NULL) { add_message_xml(op_msg, F_STONITH_CALLDATA, data); } return op_msg; } static void stonith_destroy_op_callback(gpointer data) { stonith_callback_client_t *blob = data; if (blob->timer && blob->timer->ref > 0) { g_source_remove(blob->timer->ref); } free(blob->timer); free(blob); } static int stonith_api_signoff(stonith_t * stonith) { stonith_private_t *native = stonith->st_private; crm_debug("Disconnecting from the fencer"); if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } free(native->token); native->token = NULL; stonith->state = stonith_disconnected; return pcmk_ok; } static int stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks) { stonith_private_t *private = stonith->st_private; if (all_callbacks) { private->op_callback = NULL; g_hash_table_destroy(private->stonith_op_callback_table); private->stonith_op_callback_table = pcmk__intkey_table(stonith_destroy_op_callback); } else if (call_id == 0) { private->op_callback = NULL; } else { pcmk__intkey_table_remove(private->stonith_op_callback_table, call_id); } return pcmk_ok; } static void invoke_callback(stonith_t * st, int call_id, int rc, void *userdata, void (*callback) (stonith_t * st, stonith_callback_data_t * data)) { stonith_callback_data_t data = { 0, }; data.call_id = call_id; data.rc = rc; data.userdata = userdata; callback(st, &data); } static void stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc) { stonith_private_t *private = NULL; stonith_callback_client_t *blob = NULL; stonith_callback_client_t local_blob; CRM_CHECK(stonith != NULL, return); CRM_CHECK(stonith->st_private != NULL, return); private = stonith->st_private; local_blob.id = NULL; local_blob.callback = NULL; local_blob.user_data = NULL; local_blob.only_success = FALSE; if (msg != NULL) { crm_element_value_int(msg, F_STONITH_RC, &rc); crm_element_value_int(msg, F_STONITH_CALLID, &call_id); } CRM_CHECK(call_id > 0, crm_log_xml_err(msg, "Bad result")); blob = pcmk__intkey_table_lookup(private->stonith_op_callback_table, call_id); if (blob != NULL) { local_blob = *blob; blob = NULL; stonith_api_del_callback(stonith, call_id, FALSE); } else { crm_trace("No callback found for call %d", call_id); local_blob.callback = NULL; } if (local_blob.callback != NULL && (rc == pcmk_ok || local_blob.only_success == FALSE)) { crm_trace("Invoking callback %s for call %d", crm_str(local_blob.id), call_id); invoke_callback(stonith, call_id, rc, local_blob.user_data, local_blob.callback); } else if (private->op_callback == NULL && rc != pcmk_ok) { crm_warn("Fencing command failed: %s", pcmk_strerror(rc)); crm_log_xml_debug(msg, "Failed fence update"); } if (private->op_callback != NULL) { crm_trace("Invoking global callback for call %d", call_id); invoke_callback(stonith, call_id, rc, NULL, private->op_callback); } crm_trace("OP callback activated."); } static gboolean stonith_async_timeout_handler(gpointer data) { struct timer_rec_s *timer = data; crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout); stonith_perform_callback(timer->stonith, NULL, timer->call_id, -ETIME); /* Always return TRUE, never remove the handler * We do that in stonith_del_callback() */ return TRUE; } static void set_callback_timeout(stonith_callback_client_t * callback, stonith_t * stonith, int call_id, int timeout) { struct timer_rec_s *async_timer = callback->timer; if (timeout <= 0) { return; } if (!async_timer) { async_timer = calloc(1, sizeof(struct timer_rec_s)); callback->timer = async_timer; } async_timer->stonith = stonith; async_timer->call_id = call_id; /* Allow a fair bit of grace to allow the server to tell us of a timeout * This is only a fallback */ async_timer->timeout = (timeout + 60) * 1000; if (async_timer->ref) { g_source_remove(async_timer->ref); } async_timer->ref = g_timeout_add(async_timer->timeout, stonith_async_timeout_handler, async_timer); } static void update_callback_timeout(int call_id, int timeout, stonith_t * st) { stonith_callback_client_t *callback = NULL; stonith_private_t *private = st->st_private; callback = pcmk__intkey_table_lookup(private->stonith_op_callback_table, call_id); if (!callback || !callback->allow_timeout_updates) { return; } set_callback_timeout(callback, st, call_id, timeout); } static int stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) { const char *type = NULL; struct notify_blob_s blob; stonith_t *st = userdata; stonith_private_t *private = NULL; CRM_ASSERT(st != NULL); private = st->st_private; blob.stonith = st; blob.xml = string2xml(buffer); if (blob.xml == NULL) { crm_warn("Received malformed message from fencer: %s", buffer); return 0; } /* do callbacks */ type = crm_element_value(blob.xml, F_TYPE); crm_trace("Activating %s callbacks...", type); if (pcmk__str_eq(type, T_STONITH_NG, pcmk__str_casei)) { stonith_perform_callback(st, blob.xml, 0, 0); } else if (pcmk__str_eq(type, T_STONITH_NOTIFY, pcmk__str_casei)) { foreach_notify_entry(private, stonith_send_notification, &blob); } else if (pcmk__str_eq(type, T_STONITH_TIMEOUT_VALUE, pcmk__str_casei)) { int call_id = 0; int timeout = 0; crm_element_value_int(blob.xml, F_STONITH_TIMEOUT, &timeout); crm_element_value_int(blob.xml, F_STONITH_CALLID, &call_id); update_callback_timeout(call_id, timeout, st); } else { crm_err("Unknown message type: %s", type); crm_log_xml_warn(blob.xml, "BadReply"); } free_xml(blob.xml); return 1; } static int stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd) { int rc = pcmk_ok; stonith_private_t *native = NULL; const char *display_name = name? name : "client"; struct ipc_client_callbacks st_callbacks = { .dispatch = stonith_dispatch_internal, .destroy = stonith_connection_destroy }; CRM_CHECK(stonith != NULL, return -EINVAL); native = stonith->st_private; CRM_ASSERT(native != NULL); crm_debug("Attempting fencer connection by %s with%s mainloop", display_name, (stonith_fd? "out" : "")); stonith->state = stonith_connected_command; if (stonith_fd) { /* No mainloop */ native->ipc = crm_ipc_new("stonith-ng", 0); if (native->ipc && crm_ipc_connect(native->ipc)) { *stonith_fd = crm_ipc_get_fd(native->ipc); } else if (native->ipc) { crm_ipc_close(native->ipc); crm_ipc_destroy(native->ipc); native->ipc = NULL; } } else { /* With mainloop */ native->source = mainloop_add_ipc_client("stonith-ng", G_PRIORITY_MEDIUM, 0, stonith, &st_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { rc = -ENOTCONN; } else { xmlNode *reply = NULL; xmlNode *hello = create_xml_node(NULL, "stonith_command"); crm_xml_add(hello, F_TYPE, T_STONITH_NG); crm_xml_add(hello, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(hello, F_STONITH_CLIENTNAME, name); rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply); if (rc < 0) { crm_debug("Couldn't register with the fencer: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); rc = -ECOMM; } else if (reply == NULL) { crm_debug("Couldn't register with the fencer: no reply"); rc = -EPROTO; } else { const char *msg_type = crm_element_value(reply, F_STONITH_OPERATION); native->token = crm_element_value_copy(reply, F_STONITH_CLIENTID); if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_casei)) { crm_debug("Couldn't register with the fencer: invalid reply type '%s'", (msg_type? msg_type : "(missing)")); crm_log_xml_debug(reply, "Invalid fencer reply"); rc = -EPROTO; } else if (native->token == NULL) { crm_debug("Couldn't register with the fencer: no token in reply"); crm_log_xml_debug(reply, "Invalid fencer reply"); rc = -EPROTO; } else { #if HAVE_MSGFROMIPC_TIMEOUT stonith->call_timeout = PCMK__IPC_TIMEOUT; #endif crm_debug("Connection to fencer by %s succeeded (registration token: %s)", display_name, native->token); rc = pcmk_ok; } } free_xml(reply); free_xml(hello); } if (rc != pcmk_ok) { crm_debug("Connection attempt to fencer by %s failed: %s " CRM_XS " rc=%d", display_name, pcmk_strerror(rc), rc); stonith->cmds->disconnect(stonith); } return rc; } static int stonith_set_notification(stonith_t * stonith, const char *callback, int enabled) { int rc = pcmk_ok; xmlNode *notify_msg = create_xml_node(NULL, __func__); stonith_private_t *native = stonith->st_private; if (stonith->state != stonith_disconnected) { crm_xml_add(notify_msg, F_STONITH_OPERATION, T_STONITH_NOTIFY); if (enabled) { crm_xml_add(notify_msg, F_STONITH_NOTIFY_ACTIVATE, callback); } else { crm_xml_add(notify_msg, F_STONITH_NOTIFY_DEACTIVATE, callback); } rc = crm_ipc_send(native->ipc, notify_msg, crm_ipc_client_response, -1, NULL); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't register for fencing notifications: %d", rc); rc = -ECOMM; } else { rc = pcmk_ok; } } free_xml(notify_msg); return rc; } static int stonith_api_add_notification(stonith_t * stonith, const char *event, void (*callback) (stonith_t * stonith, stonith_event_t * e)) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = NULL; private = stonith->st_private; crm_trace("Adding callback for %s events (%d)", event, g_list_length(private->notify_list)); new_client = calloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = callback; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); if (list_item != NULL) { crm_warn("Callback already present"); free(new_client); return -ENOTUNIQ; } else { private->notify_list = g_list_append(private->notify_list, new_client); stonith_set_notification(stonith, event, 1); crm_trace("Callback added (%d)", g_list_length(private->notify_list)); } return pcmk_ok; } static int stonith_api_del_notification(stonith_t * stonith, const char *event) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = NULL; crm_debug("Removing callback for %s events", event); private = stonith->st_private; new_client = calloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = NULL; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); stonith_set_notification(stonith, event, 0); if (list_item != NULL) { stonith_notify_client_t *list_client = list_item->data; if (private->notify_refcnt) { list_client->delete = TRUE; private->notify_deletes = TRUE; } else { private->notify_list = g_list_remove(private->notify_list, list_client); free(list_client); } crm_trace("Removed callback"); } else { crm_trace("Callback not present"); } free(new_client); return pcmk_ok; } static int stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int options, void *user_data, const char *callback_name, void (*callback) (stonith_t * st, stonith_callback_data_t * data)) { stonith_callback_client_t *blob = NULL; stonith_private_t *private = NULL; CRM_CHECK(stonith != NULL, return -EINVAL); CRM_CHECK(stonith->st_private != NULL, return -EINVAL); private = stonith->st_private; if (call_id == 0) { private->op_callback = callback; } else if (call_id < 0) { if (!(options & st_opt_report_only_success)) { crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id)); invoke_callback(stonith, call_id, call_id, user_data, callback); } else { crm_warn("Fencer call failed: %s", pcmk_strerror(call_id)); } return FALSE; } blob = calloc(1, sizeof(stonith_callback_client_t)); blob->id = callback_name; blob->only_success = (options & st_opt_report_only_success) ? TRUE : FALSE; blob->user_data = user_data; blob->callback = callback; blob->allow_timeout_updates = (options & st_opt_timeout_updates) ? TRUE : FALSE; if (timeout > 0) { set_callback_timeout(blob, stonith, call_id, timeout); } pcmk__intkey_table_insert(private->stonith_op_callback_table, call_id, blob); crm_trace("Added callback to %s for call %d", callback_name, call_id); return TRUE; } static void stonith_dump_pending_op(gpointer key, gpointer value, gpointer user_data) { int call = GPOINTER_TO_INT(key); stonith_callback_client_t *blob = value; crm_debug("Call %d (%s): pending", call, crm_str(blob->id)); } void stonith_dump_pending_callbacks(stonith_t * stonith) { stonith_private_t *private = stonith->st_private; if (private->stonith_op_callback_table == NULL) { return; } return g_hash_table_foreach(private->stonith_op_callback_table, stonith_dump_pending_op, NULL); } /* */ static stonith_event_t * xml_to_event(xmlNode * msg) { stonith_event_t *event = calloc(1, sizeof(stonith_event_t)); const char *ntype = crm_element_value(msg, F_SUBTYPE); char *data_addr = crm_strdup_printf("//%s", ntype); xmlNode *data = get_xpath_object(data_addr, msg, LOG_DEBUG); crm_log_xml_trace(msg, "stonith_notify"); crm_element_value_int(msg, F_STONITH_RC, &(event->result)); if (pcmk__str_eq(ntype, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) { event->operation = crm_element_value_copy(msg, F_STONITH_OPERATION); if (data) { event->origin = crm_element_value_copy(data, F_STONITH_ORIGIN); event->action = crm_element_value_copy(data, F_STONITH_ACTION); event->target = crm_element_value_copy(data, F_STONITH_TARGET); event->executioner = crm_element_value_copy(data, F_STONITH_DELEGATE); event->id = crm_element_value_copy(data, F_STONITH_REMOTE_OP_ID); event->client_origin = crm_element_value_copy(data, F_STONITH_CLIENTNAME); event->device = crm_element_value_copy(data, F_STONITH_DEVICE); } else { crm_err("No data for %s event", ntype); crm_log_xml_notice(msg, "BadEvent"); } } free(data_addr); return event; } static void event_free(stonith_event_t * event) { free(event->id); free(event->type); free(event->message); free(event->operation); free(event->origin); free(event->action); free(event->target); free(event->executioner); free(event->device); free(event->client_origin); free(event); } static void stonith_send_notification(gpointer data, gpointer user_data) { struct notify_blob_s *blob = user_data; stonith_notify_client_t *entry = data; stonith_event_t *st_event = NULL; const char *event = NULL; if (blob->xml == NULL) { crm_warn("Skipping callback - NULL message"); return; } event = crm_element_value(blob->xml, F_SUBTYPE); if (entry == NULL) { crm_warn("Skipping callback - NULL callback client"); return; } else if (entry->delete) { crm_trace("Skipping callback - marked for deletion"); return; } else if (entry->notify == NULL) { crm_warn("Skipping callback - NULL callback"); return; } else if (!pcmk__str_eq(entry->event, event, pcmk__str_casei)) { crm_trace("Skipping callback - event mismatch %p/%s vs. %s", entry, entry->event, event); return; } st_event = xml_to_event(blob->xml); crm_trace("Invoking callback for %p/%s event...", entry, event); entry->notify(blob->stonith, st_event); crm_trace("Callback invoked..."); event_free(st_event); } /*! * \internal * \brief Create and send an API request * * \param[in] stonith Stonith connection * \param[in] op API operation to request * \param[in] data Data to attach to request * \param[out] output_data If not NULL, will be set to reply if synchronous * \param[in] call_options Bitmask of stonith_call_options to use * \param[in] timeout Error if not completed within this many seconds * * \return pcmk_ok (for synchronous requests) or positive call ID * (for asynchronous requests) on success, -errno otherwise */ static int stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data, int call_options, int timeout) { int rc = 0; int reply_id = -1; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; stonith_private_t *native = NULL; CRM_ASSERT(stonith && stonith->st_private && op); native = stonith->st_private; if (output_data != NULL) { *output_data = NULL; } if ((stonith->state == stonith_disconnected) || (native->token == NULL)) { return -ENOTCONN; } /* Increment the call ID, which must be positive to avoid conflicting with * error codes. This shouldn't be a problem unless the client mucked with * it or the counter wrapped around. */ stonith->call_id++; if (stonith->call_id < 1) { stonith->call_id = 1; } op_msg = stonith_create_op(stonith->call_id, native->token, op, data, call_options); if (op_msg == NULL) { return -EINVAL; } crm_xml_add_int(op_msg, F_STONITH_TIMEOUT, timeout); crm_trace("Sending %s message to fencer with timeout %ds", op, timeout); if (data) { const char *delay_s = crm_element_value(data, F_STONITH_DELAY); if (delay_s) { crm_xml_add(op_msg, F_STONITH_DELAY, delay_s); } } { enum crm_ipc_flags ipc_flags = crm_ipc_flags_none; if (call_options & st_opt_sync_call) { pcmk__set_ipc_flags(ipc_flags, "stonith command", crm_ipc_client_response); } rc = crm_ipc_send(native->ipc, op_msg, ipc_flags, 1000 * (timeout + 60), &op_reply); } free_xml(op_msg); if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%ds): %d", op, timeout, rc); rc = -ECOMM; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (!(call_options & st_opt_sync_call)) { crm_trace("Async call %d, returning", stonith->call_id); free_xml(op_reply); return stonith->call_id; } rc = pcmk_ok; crm_element_value_int(op_reply, F_STONITH_CALLID, &reply_id); if (reply_id == stonith->call_id) { crm_trace("Synchronous reply %d received", reply_id); if (crm_element_value_int(op_reply, F_STONITH_RC, &rc) != 0) { rc = -ENOMSG; } if ((call_options & st_opt_discard_reply) || output_data == NULL) { crm_trace("Discarding reply"); } else { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } } else if (reply_id <= 0) { crm_err("Received bad reply: No id set"); crm_log_xml_err(op_reply, "Bad reply"); free_xml(op_reply); rc = -ENOMSG; } else { crm_err("Received bad reply: %d (wanted %d)", reply_id, stonith->call_id); crm_log_xml_err(op_reply, "Old reply"); free_xml(op_reply); rc = -ENOMSG; } done: if (crm_ipc_connected(native->ipc) == FALSE) { crm_err("Fencer disconnected"); free(native->token); native->token = NULL; stonith->state = stonith_disconnected; } free_xml(op_reply); return rc; } /* Not used with mainloop */ bool stonith_dispatch(stonith_t * st) { gboolean stay_connected = TRUE; stonith_private_t *private = NULL; CRM_ASSERT(st != NULL); private = st->st_private; while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); stonith_dispatch_internal(msg, strlen(msg), st); } if (crm_ipc_connected(private->ipc) == FALSE) { crm_err("Connection closed"); stay_connected = FALSE; } } return stay_connected; } static int stonith_api_free(stonith_t * stonith) { int rc = pcmk_ok; crm_trace("Destroying %p", stonith); if (stonith->state != stonith_disconnected) { crm_trace("Disconnecting %p first", stonith); rc = stonith->cmds->disconnect(stonith); } if (stonith->state == stonith_disconnected) { stonith_private_t *private = stonith->st_private; crm_trace("Removing %d callbacks", g_hash_table_size(private->stonith_op_callback_table)); g_hash_table_destroy(private->stonith_op_callback_table); crm_trace("Destroying %d notification clients", g_list_length(private->notify_list)); g_list_free_full(private->notify_list, free); free(stonith->st_private); free(stonith->cmds); free(stonith); } else { crm_err("Not free'ing active connection: %s (%d)", pcmk_strerror(rc), rc); } return rc; } void stonith_api_delete(stonith_t * stonith) { crm_trace("Destroying %p", stonith); if(stonith) { stonith->cmds->free(stonith); } } static int stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id, const char *namespace_s, const char *agent, stonith_key_value_t *params, int timeout, char **output, char **error_output) { /* Validation should be done directly via the agent, so we can get it from * stonith_admin when the cluster is not running, which is important for * higher-level tools. */ int rc = pcmk_ok; /* Use a dummy node name in case the agent requires a target. We assume the * actual target doesn't matter for validation purposes (if in practice, * that is incorrect, we will need to allow the caller to pass the target). */ const char *target = "node1"; const char *host_arg = NULL; GHashTable *params_table = pcmk__strkey_table(free, free); // Convert parameter list to a hash table for (; params; params = params->next) { if (pcmk__str_eq(params->key, PCMK_STONITH_HOST_ARGUMENT, pcmk__str_casei)) { host_arg = params->value; } if (!pcmk_stonith_param(params->key)) { g_hash_table_insert(params_table, strdup(params->key), strdup(params->value)); } } #if SUPPORT_CIBSECRETS rc = pcmk__substitute_secrets(rsc_id, params_table); if (rc != pcmk_rc_ok) { crm_warn("Could not replace secret parameters for validation of %s: %s", agent, pcmk_rc_str(rc)); // rc is standard return value, don't return it in this function } #endif if (output) { *output = NULL; } if (error_output) { *error_output = NULL; } switch (stonith_get_namespace(agent, namespace_s)) { case st_namespace_rhcs: rc = stonith__rhcs_validate(st, call_options, target, agent, params_table, host_arg, timeout, output, error_output); break; #if HAVE_STONITH_STONITH_H case st_namespace_lha: rc = stonith__lha_validate(st, call_options, target, agent, params_table, timeout, output, error_output); break; #endif default: rc = -EINVAL; errno = EINVAL; crm_perror(LOG_ERR, "Agent %s not found or does not support validation", agent); break; } g_hash_table_destroy(params_table); return rc; } stonith_t * stonith_api_new(void) { stonith_t *new_stonith = NULL; stonith_private_t *private = NULL; new_stonith = calloc(1, sizeof(stonith_t)); if (new_stonith == NULL) { return NULL; } private = calloc(1, sizeof(stonith_private_t)); if (private == NULL) { free(new_stonith); return NULL; } new_stonith->st_private = private; private->stonith_op_callback_table = pcmk__intkey_table(stonith_destroy_op_callback); private->notify_list = NULL; private->notify_refcnt = 0; private->notify_deletes = FALSE; new_stonith->call_id = 1; new_stonith->state = stonith_disconnected; new_stonith->cmds = calloc(1, sizeof(stonith_api_operations_t)); if (new_stonith->cmds == NULL) { free(new_stonith->st_private); free(new_stonith); return NULL; } /* *INDENT-OFF* */ new_stonith->cmds->free = stonith_api_free; new_stonith->cmds->connect = stonith_api_signon; new_stonith->cmds->disconnect = stonith_api_signoff; new_stonith->cmds->list = stonith_api_list; new_stonith->cmds->monitor = stonith_api_monitor; new_stonith->cmds->status = stonith_api_status; new_stonith->cmds->fence = stonith_api_fence; new_stonith->cmds->fence_with_delay = stonith_api_fence_with_delay; new_stonith->cmds->confirm = stonith_api_confirm; new_stonith->cmds->history = stonith_api_history; new_stonith->cmds->list_agents = stonith_api_device_list; new_stonith->cmds->metadata = stonith_api_device_metadata; new_stonith->cmds->query = stonith_api_query; new_stonith->cmds->remove_device = stonith_api_remove_device; new_stonith->cmds->register_device = stonith_api_register_device; new_stonith->cmds->remove_level = stonith_api_remove_level; new_stonith->cmds->remove_level_full = stonith_api_remove_level_full; new_stonith->cmds->register_level = stonith_api_register_level; new_stonith->cmds->register_level_full = stonith_api_register_level_full; new_stonith->cmds->remove_callback = stonith_api_del_callback; new_stonith->cmds->register_callback = stonith_api_add_callback; new_stonith->cmds->remove_notification = stonith_api_del_notification; new_stonith->cmds->register_notification = stonith_api_add_notification; new_stonith->cmds->validate = stonith_api_validate; /* *INDENT-ON* */ return new_stonith; } /*! * \brief Make a blocking connection attempt to the fencer * * \param[in,out] st Fencer API object * \param[in] name Client name to use with fencer * \param[in] max_attempts Return error if this many attempts fail * * \return pcmk_ok on success, result of last attempt otherwise */ int stonith_api_connect_retry(stonith_t *st, const char *name, int max_attempts) { int rc = -EINVAL; // if max_attempts is not positive for (int attempt = 1; attempt <= max_attempts; attempt++) { rc = st->cmds->connect(st, name, NULL); if (rc == pcmk_ok) { return pcmk_ok; } else if (attempt < max_attempts) { crm_notice("Fencer connection attempt %d of %d failed (retrying in 2s): %s " CRM_XS " rc=%d", attempt, max_attempts, pcmk_strerror(rc), rc); sleep(2); } } crm_notice("Could not connect to fencer: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); return rc; } stonith_key_value_t * stonith_key_value_add(stonith_key_value_t * head, const char *key, const char *value) { stonith_key_value_t *p, *end; p = calloc(1, sizeof(stonith_key_value_t)); if (key) { p->key = strdup(key); } if (value) { p->value = strdup(value); } end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void stonith_key_value_freeall(stonith_key_value_t * head, int keys, int values) { stonith_key_value_t *p; while (head) { p = head->next; if (keys) { free(head->key); } if (values) { free(head->value); } free(head); head = p; } } #define api_log_open() openlog("stonith-api", LOG_CONS | LOG_NDELAY | LOG_PID, LOG_DAEMON) #define api_log(level, fmt, args...) syslog(level, "%s: "fmt, __func__, args) int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off) { int rc = pcmk_ok; stonith_t *st = stonith_api_new(); const char *action = off? "off" : "reboot"; api_log_open(); if (st == NULL) { api_log(LOG_ERR, "API initialization failed, could not kick (%s) node %u/%s", action, nodeid, uname); return -EPROTO; } rc = st->cmds->connect(st, "stonith-api", NULL); if (rc != pcmk_ok) { api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } else { char *name = (uname == NULL)? pcmk__itoa(nodeid) : strdup(uname); int opts = 0; stonith__set_call_options(opts, name, st_opt_sync_call|st_opt_allow_suicide); if ((uname == NULL) && (nodeid > 0)) { stonith__set_call_options(opts, name, st_opt_cs_nodeid); } rc = st->cmds->fence(st, opts, name, action, timeout, 0); free(name); if (rc != pcmk_ok) { api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } else { api_log(LOG_NOTICE, "Node %u/%s kicked: %s", nodeid, uname, action); } } stonith_api_delete(st); return rc; } time_t stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress) { int rc = pcmk_ok; time_t when = 0; stonith_t *st = stonith_api_new(); stonith_history_t *history = NULL, *hp = NULL; if (st == NULL) { api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: " "API initialization failed", nodeid, uname); return when; } rc = st->cmds->connect(st, "stonith-api", NULL); if (rc != pcmk_ok) { api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc); } else { int entries = 0; int progress = 0; int completed = 0; int opts = 0; char *name = (uname == NULL)? pcmk__itoa(nodeid) : strdup(uname); stonith__set_call_options(opts, name, st_opt_sync_call); if ((uname == NULL) && (nodeid > 0)) { stonith__set_call_options(opts, name, st_opt_cs_nodeid); } rc = st->cmds->history(st, opts, name, &history, 120); free(name); for (hp = history; hp; hp = hp->next) { entries++; if (in_progress) { progress++; if (hp->state != st_done && hp->state != st_failed) { when = time(NULL); } } else if (hp->state == st_done) { completed++; if (hp->completed > when) { when = hp->completed; } } } stonith_history_free(history); if(rc == pcmk_ok) { api_log(LOG_INFO, "Found %d entries for %u/%s: %d in progress, %d completed", entries, nodeid, uname, progress, completed); } else { api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: %s (%d)", nodeid, uname, pcmk_strerror(rc), rc); } } stonith_api_delete(st); if(when) { api_log(LOG_INFO, "Node %u/%s last kicked at: %ld", nodeid, uname, (long int)when); } return when; } bool stonith_agent_exists(const char *agent, int timeout) { stonith_t *st = NULL; stonith_key_value_t *devices = NULL; stonith_key_value_t *dIter = NULL; bool rc = FALSE; if (agent == NULL) { return rc; } st = stonith_api_new(); if (st == NULL) { crm_err("Could not list fence agents: API memory allocation failed"); return FALSE; } st->cmds->list_agents(st, st_opt_sync_call, NULL, &devices, timeout == 0 ? 120 : timeout); for (dIter = devices; dIter != NULL; dIter = dIter->next) { if (pcmk__str_eq(dIter->value, agent, pcmk__str_none)) { rc = TRUE; break; } } stonith_key_value_freeall(devices, 1, 1); stonith_api_delete(st); return rc; } const char * stonith_action_str(const char *action) { if (action == NULL) { return "fencing"; } else if (!strcmp(action, "on")) { return "unfencing"; } else if (!strcmp(action, "off")) { return "turning off"; } else { return action; } } /*! * \internal * \brief Parse a target name from one line of a target list string * * \param[in] line One line of a target list string - * \parma[in] len String length of line + * \param[in] len String length of line * \param[in,out] output List to add newly allocated target name to */ static void parse_list_line(const char *line, int len, GList **output) { size_t i = 0; size_t entry_start = 0; /* Skip complaints about additional parameters device doesn't understand * * @TODO Document or eliminate the implied restriction of target names */ if (strstr(line, "invalid") || strstr(line, "variable")) { crm_debug("Skipping list output line: %s", line); return; } // Process line content, character by character for (i = 0; i <= len; i++) { if (isspace(line[i]) || (line[i] == ',') || (line[i] == ';') || (line[i] == '\0')) { // We've found a separator (i.e. the end of an entry) int rc = 0; char *entry = NULL; if (i == entry_start) { // Skip leading and sequential separators entry_start = i + 1; continue; } entry = calloc(i - entry_start + 1, sizeof(char)); CRM_ASSERT(entry != NULL); /* Read entry, stopping at first separator * * @TODO Document or eliminate these character restrictions */ rc = sscanf(line + entry_start, "%[a-zA-Z0-9_-.]", entry); if (rc != 1) { crm_warn("Could not parse list output entry: %s " CRM_XS " entry_start=%d position=%d", line + entry_start, entry_start, i); free(entry); } else if (pcmk__strcase_any_of(entry, "on", "off", NULL)) { /* Some agents print the target status in the list output, * though none are known now (the separate list-status command * is used for this, but it can also print "UNKNOWN"). To handle * this possibility, skip such entries. * * @TODO Document or eliminate the implied restriction of target * names. */ free(entry); } else { // We have a valid entry *output = g_list_append(*output, entry); } entry_start = i + 1; } } } /*! * \internal * \brief Parse a list of targets from a string * * \param[in] list_output Target list as a string * * \return List of target names * \note The target list string format is flexible, to allow for user-specified * lists such pcmk_host_list and the output of an agent's list action * (whether direct or via the API, which escapes newlines). There may be * multiple lines, separated by either a newline or an escaped newline * (backslash n). Each line may have one or more target names, separated * by any combination of whitespace, commas, and semi-colons. Lines * containing "invalid" or "variable" will be ignored entirely. Target * names "on" or "off" (case-insensitive) will be ignored. Target names * may contain only alphanumeric characters, underbars (_), dashes (-), * and dots (.) (if any other character occurs in the name, it and all * subsequent characters in the name will be ignored). * \note The caller is responsible for freeing the result with * g_list_free_full(result, free). */ GList * stonith__parse_targets(const char *target_spec) { GList *targets = NULL; if (target_spec != NULL) { size_t out_len = strlen(target_spec); size_t line_start = 0; // Starting index of line being processed for (size_t i = 0; i <= out_len; ++i) { if ((target_spec[i] == '\n') || (target_spec[i] == '\0') || ((target_spec[i] == '\\') && (target_spec[i + 1] == 'n'))) { // We've reached the end of one line of output int len = i - line_start; if (len > 0) { char *line = strndup(target_spec + line_start, len); line[len] = '\0'; // Because it might be a newline parse_list_line(line, len, &targets); free(line); } if (target_spec[i] == '\\') { ++i; // backslash-n takes up two positions } line_start = i + 1; } } } return targets; } /*! * \internal * \brief Determine if a later stonith event succeeded. * * \note Before calling this function, use stonith__sort_history() to sort the * top_history argument. */ gboolean stonith__later_succeeded(stonith_history_t *event, stonith_history_t *top_history) { gboolean ret = FALSE; for (stonith_history_t *prev_hp = top_history; prev_hp; prev_hp = prev_hp->next) { if (prev_hp == event) { break; } if ((prev_hp->state == st_done) && pcmk__str_eq(event->target, prev_hp->target, pcmk__str_casei) && pcmk__str_eq(event->action, prev_hp->action, pcmk__str_casei) && pcmk__str_eq(event->delegate, prev_hp->delegate, pcmk__str_casei) && ((event->completed < prev_hp->completed) || ((event->completed == prev_hp->completed) && (event->completed_nsec < prev_hp->completed_nsec)))) { ret = TRUE; break; } } return ret; } /*! * \internal * \brief Sort the stonith-history * sort by competed most current on the top * pending actions lacking a completed-stamp are gathered at the top * * \param[in] history List of stonith actions * */ stonith_history_t * stonith__sort_history(stonith_history_t *history) { stonith_history_t *new = NULL, *pending = NULL, *hp, *np, *tmp; for (hp = history; hp; ) { tmp = hp->next; if ((hp->state == st_done) || (hp->state == st_failed)) { /* sort into new */ if ((!new) || (hp->completed > new->completed) || ((hp->completed == new->completed) && (hp->completed_nsec > new->completed_nsec))) { hp->next = new; new = hp; } else { np = new; do { if ((!np->next) || (hp->completed > np->next->completed) || ((hp->completed == np->next->completed) && (hp->completed_nsec > np->next->completed_nsec))) { hp->next = np->next; np->next = hp; break; } np = np->next; } while (1); } } else { /* put into pending */ hp->next = pending; pending = hp; } hp = tmp; } /* pending actions don't have a completed-stamp so make them go front */ if (pending) { stonith_history_t *last_pending = pending; while (last_pending->next) { last_pending = last_pending->next; } last_pending->next = new; new = pending; } return new; } /*! * \brief Return string equivalent of an operation state value * * \param[in] state Fencing operation state value * * \return Human-friendly string equivalent of state */ const char * stonith_op_state_str(enum op_state state) { switch (state) { case st_query: return "querying"; case st_exec: return "executing"; case st_done: return "completed"; case st_duplicate: return "duplicate"; case st_failed: return "failed"; } return "unknown"; } stonith_history_t * stonith__first_matching_event(stonith_history_t *history, bool (*matching_fn)(stonith_history_t *, void *), void *user_data) { for (stonith_history_t *hp = history; hp; hp = hp->next) { if (matching_fn(hp, user_data)) { return hp; } } return NULL; } bool stonith__event_state_pending(stonith_history_t *history, void *user_data) { return history->state != st_failed && history->state != st_done; } bool stonith__event_state_eq(stonith_history_t *history, void *user_data) { return history->state == GPOINTER_TO_INT(user_data); } bool stonith__event_state_neq(stonith_history_t *history, void *user_data) { return history->state != GPOINTER_TO_INT(user_data); } void stonith__device_parameter_flags(uint32_t *device_flags, const char *device_name, xmlNode *metadata) { xmlXPathObjectPtr xpath = NULL; int max = 0; int lpc = 0; CRM_CHECK((device_flags != NULL) && (metadata != NULL), return); xpath = xpath_search(metadata, "//parameter"); max = numXpathResults(xpath); if (max <= 0) { freeXpathObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *parameter = NULL; xmlNode *match = getXpathResult(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if (match == NULL) { continue; } parameter = crm_element_value(match, "name"); if (pcmk__str_eq(parameter, "plug", pcmk__str_casei)) { stonith__set_device_flags(*device_flags, device_name, st_device_supports_parameter_plug); } else if (pcmk__str_eq(parameter, "port", pcmk__str_casei)) { stonith__set_device_flags(*device_flags, device_name, st_device_supports_parameter_port); } } freeXpathObject(xpath); } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START const char *get_stonith_provider(const char *agent, const char *provider); const char * get_stonith_provider(const char *agent, const char *provider) { return stonith_namespace2text(stonith_get_namespace(agent, provider)); } // LCOV_EXCL_STOP // End deprecated API diff --git a/lib/pacemaker/pcmk_graph_consumer.c b/lib/pacemaker/pcmk_graph_consumer.c index ceb731f769..1810e62dfa 100644 --- a/lib/pacemaker/pcmk_graph_consumer.c +++ b/lib/pacemaker/pcmk_graph_consumer.c @@ -1,846 +1,846 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include /* * Functions for updating graph */ /*! * \internal * \brief Update synapse after completed prerequisite * * A synapse is ready to be executed once all its prerequisite actions (inputs) * complete. Given a completed action, check whether it is an input for a given * synapse, and if so, mark the input as confirmed, and mark the synapse as * ready if appropriate. * * \param[in] synapse Transition graph synapse to update * \param[in] action_id ID of an action that completed * * \note The only substantial effect here is confirming synapse inputs. * should_fire_synapse() will recalculate synapse->ready, so the only * thing that uses the synapse->ready value from here is * synapse_state_str(). */ static void update_synapse_ready(synapse_t *synapse, int action_id) { if (synapse->ready) { return; // All inputs have already been confirmed } synapse->ready = TRUE; // Presume ready until proven otherwise for (GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { crm_action_t *prereq = (crm_action_t *) lpc->data; if (prereq->id == action_id) { crm_trace("Confirming input %d of synapse %d", action_id, synapse->id); prereq->confirmed = TRUE; } else if (!(prereq->confirmed)) { synapse->ready = FALSE; crm_trace("Synapse %d still not ready after action %d", synapse->id, action_id); } } if (synapse->ready) { crm_trace("Synapse %d is now ready to execute", synapse->id); } } /*! * \internal * \brief Update action and synapse confirmation after action completion * * \param[in] synapse Transition graph synapse that action belongs to * \param[in] action_id ID of action that completed */ static void update_synapse_confirmed(synapse_t *synapse, int action_id) { bool all_confirmed = true; for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { crm_action_t *action = (crm_action_t *) lpc->data; if (action->id == action_id) { crm_trace("Confirmed action %d of synapse %d", action_id, synapse->id); action->confirmed = TRUE; } else if (all_confirmed && !(action->confirmed)) { all_confirmed = false; crm_trace("Synapse %d still not confirmed after action %d", synapse->id, action_id); } } if (all_confirmed && !(synapse->confirmed)) { crm_trace("Confirmed synapse %d", synapse->id); synapse->confirmed = TRUE; } } /*! * \internal * \brief Update the transition graph with a completed action result * * \param[in,out] graph Transition graph to update * \param[in] action Action that completed */ void pcmk__update_graph(crm_graph_t *graph, crm_action_t *action) { for (GList *lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { synapse_t *synapse = (synapse_t *) lpc->data; if (synapse->confirmed || synapse->failed) { continue; // This synapse already completed } else if (synapse->executed) { update_synapse_confirmed(synapse, action->id); } else if (!(action->failed) || (synapse->priority == INFINITY)) { update_synapse_ready(synapse, action->id); } } } /* * Functions for executing graph */ /* A transition graph consists of various types of actions. The library caller * registers execution functions for each action type, which will be stored * here. */ static crm_graph_functions_t *graph_fns = NULL; /*! * \internal * \brief Set transition graph execution functions * * \param[in] Execution functions to use */ void pcmk__set_graph_functions(crm_graph_functions_t *fns) { crm_debug("Setting custom functions for executing transition graphs"); graph_fns = fns; CRM_ASSERT(graph_fns != NULL); CRM_ASSERT(graph_fns->rsc != NULL); CRM_ASSERT(graph_fns->crmd != NULL); CRM_ASSERT(graph_fns->pseudo != NULL); CRM_ASSERT(graph_fns->stonith != NULL); } /*! * \internal * \brief Check whether a graph synapse is ready to be executed * * \param[in] graph Transition graph that synapse is part of * \param[in] synapse Synapse to check * * \return true if synapse is ready, false otherwise */ static bool should_fire_synapse(crm_graph_t *graph, synapse_t *synapse) { GList *lpc = NULL; synapse->ready = TRUE; for (lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) { crm_action_t *prereq = (crm_action_t *) lpc->data; if (!(prereq->confirmed)) { crm_trace("Input %d for synapse %d not yet confirmed", prereq->id, synapse->id); synapse->ready = FALSE; break; } else if (prereq->failed && !(prereq->can_fail)) { crm_trace("Input %d for synapse %d confirmed but failed", prereq->id, synapse->id); synapse->ready = FALSE; break; } } if (synapse->ready) { crm_trace("Synapse %d is ready to execute", synapse->id); } else { return false; } for (lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { crm_action_t *a = (crm_action_t *) lpc->data; if (a->type == action_type_pseudo) { /* None of the below applies to pseudo ops */ } else if (synapse->priority < graph->abort_priority) { crm_trace("Skipping synapse %d: priority %d is less than " "abort priority %d", synapse->id, synapse->priority, graph->abort_priority); graph->skipped++; return false; } else if (graph_fns->allowed && !(graph_fns->allowed(graph, a))) { crm_trace("Deferring synapse %d: not allowed", synapse->id); return false; } } return true; } /*! * \internal * \brief Initiate an action from a transition graph * * \param[in] graph Transition graph containing action - * \parma[in] action Action to execute + * \param[in] action Action to execute * * \return TRUE if action was initiated, FALSE otherwise */ static gboolean initiate_action(crm_graph_t *graph, crm_action_t *action) { const char *id = ID(action->xml); CRM_CHECK(!(action->executed), return FALSE); CRM_CHECK(id != NULL, return FALSE); action->executed = TRUE; switch (action->type) { case action_type_pseudo: crm_trace("Executing pseudo-action %d (%s)", action->id, id); return graph_fns->pseudo(graph, action); case action_type_rsc: crm_trace("Executing resource action %d (%s)", action->id, id); return graph_fns->rsc(graph, action); case action_type_crm: if (pcmk__str_eq(crm_element_value(action->xml, XML_LRM_ATTR_TASK), CRM_OP_FENCE, pcmk__str_casei)) { crm_trace("Executing fencing action %d (%s)", action->id, id); return graph_fns->stonith(graph, action); } crm_trace("Executing control action %d (%s)", action->id, id); return graph_fns->crmd(graph, action); default: crm_err("Unsupported graph action type <%s id='%s'> (bug?)", crm_element_name(action->xml), id); return FALSE; } } /*! * \internal * \brief Execute a graph synapse * * \param[in] graph Transition graph with synapse to execute * \param[in] synapse Synapse to execute * * \return Standard Pacemaker return value */ static int fire_synapse(crm_graph_t *graph, synapse_t *synapse) { synapse->executed = TRUE; for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) { crm_action_t *action = (crm_action_t *) lpc->data; if (!initiate_action(graph, action)) { crm_err("Failed initiating <%s id=%d> in synapse %d", crm_element_name(action->xml), action->id, synapse->id); synapse->confirmed = TRUE; action->confirmed = TRUE; action->failed = TRUE; return pcmk_rc_error; } } return pcmk_rc_ok; } /*! * \internal * \brief Dummy graph method that can be used with simulations * * \param[in] graph Transition graph containing action * \param[in] action Action to be initiated * * \retval TRUE Action initiation was (simulated to be) successful * \retval FALSE Action initiation was (simulated to be) failed (due to the * PE_fail environment variable being set to the action ID) */ static gboolean pseudo_action_dummy(crm_graph_t * graph, crm_action_t * action) { static int fail = -1; if (fail < 0) { long long fail_ll; if ((pcmk__scan_ll(getenv("PE_fail"), &fail_ll, 0LL) == pcmk_rc_ok) && (fail_ll > 0LL) && (fail_ll <= INT_MAX)) { fail = (int) fail_ll; } else { fail = 0; } } if (action->id == fail) { crm_err("Dummy event handler: pretending action %d failed", action->id); action->failed = TRUE; graph->abort_priority = INFINITY; } else { crm_trace("Dummy event handler: action %d initiated", action->id); } action->confirmed = TRUE; pcmk__update_graph(graph, action); return TRUE; } static crm_graph_functions_t default_fns = { pseudo_action_dummy, pseudo_action_dummy, pseudo_action_dummy, pseudo_action_dummy }; /*! * \internal * \brief Execute all actions in a transition graph * * \param[in] graph Transition graph to execute * * \return Status of transition after execution */ enum transition_status pcmk__execute_graph(crm_graph_t *graph) { GList *lpc = NULL; int log_level = LOG_DEBUG; enum transition_status pass_result = transition_active; const char *status = "In progress"; if (graph_fns == NULL) { graph_fns = &default_fns; } if (graph == NULL) { return transition_complete; } graph->fired = 0; graph->pending = 0; graph->skipped = 0; graph->completed = 0; graph->incomplete = 0; // Count completed and in-flight synapses for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { synapse_t *synapse = (synapse_t *) lpc->data; if (synapse->confirmed) { graph->completed++; } else if (!(synapse->failed) && synapse->executed) { graph->pending++; } } crm_trace("Executing graph %d (%d synapses already completed, %d pending)", graph->id, graph->completed, graph->pending); // Execute any synapses that are ready for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { synapse_t *synapse = (synapse_t *) lpc->data; if ((graph->batch_limit > 0) && (graph->pending >= graph->batch_limit)) { crm_debug("Throttling graph execution: batch limit (%d) reached", graph->batch_limit); break; } else if (synapse->failed) { graph->skipped++; continue; } else if (synapse->confirmed || synapse->executed) { continue; // Already handled } else if (should_fire_synapse(graph, synapse)) { graph->fired++; if (fire_synapse(graph, synapse) != pcmk_rc_ok) { crm_err("Synapse %d failed to fire", synapse->id); log_level = LOG_ERR; graph->abort_priority = INFINITY; graph->incomplete++; graph->fired--; } if (!(synapse->confirmed)) { graph->pending++; } } else { crm_trace("Synapse %d cannot fire", synapse->id); graph->incomplete++; } } if ((graph->pending == 0) && (graph->fired == 0)) { graph->complete = TRUE; if ((graph->incomplete != 0) && (graph->abort_priority <= 0)) { log_level = LOG_WARNING; pass_result = transition_terminated; status = "Terminated"; } else if (graph->skipped != 0) { log_level = LOG_NOTICE; pass_result = transition_complete; status = "Stopped"; } else { log_level = LOG_NOTICE; pass_result = transition_complete; status = "Complete"; } } else if (graph->fired == 0) { pass_result = transition_pending; } do_crm_log(log_level, "Transition %d (Complete=%d, Pending=%d," " Fired=%d, Skipped=%d, Incomplete=%d, Source=%s): %s", graph->id, graph->completed, graph->pending, graph->fired, graph->skipped, graph->incomplete, graph->source, status); return pass_result; } /* * Functions for unpacking transition graph XML into structs */ /*! * \internal * \brief Unpack a transition graph action from XML * * \param[in] parent Synapse that action is part of * \param[in] xml_action Action XML to unparse * * \return Newly allocated action on success, or NULL otherwise */ static crm_action_t * unpack_action(synapse_t *parent, xmlNode *xml_action) { action_type_e action_type; crm_action_t *action = NULL; const char *element = TYPE(xml_action); const char *value = ID(xml_action); if (value == NULL) { crm_err("Ignoring transition graph action without id (bug?)"); crm_log_xml_trace(xml_action, "invalid"); return NULL; } if (pcmk__str_eq(element, XML_GRAPH_TAG_RSC_OP, pcmk__str_casei)) { action_type = action_type_rsc; } else if (pcmk__str_eq(element, XML_GRAPH_TAG_PSEUDO_EVENT, pcmk__str_casei)) { action_type = action_type_pseudo; } else if (pcmk__str_eq(element, XML_GRAPH_TAG_CRM_EVENT, pcmk__str_casei)) { action_type = action_type_crm; } else { crm_err("Ignoring transition graph action of unknown type '%s' (bug?)", element); crm_log_xml_trace(xml_action, "invalid"); return NULL; } action = calloc(1, sizeof(crm_action_t)); if (action == NULL) { crm_perror(LOG_CRIT, "Cannot unpack transition graph action"); crm_log_xml_trace(xml_action, "lost"); return NULL; } pcmk__scan_min_int(value, &(action->id), -1); action->type = action_type_rsc; action->xml = copy_xml(xml_action); action->synapse = parent; action->type = action_type; action->params = xml2list(action->xml); value = g_hash_table_lookup(action->params, "CRM_meta_timeout"); pcmk__scan_min_int(value, &(action->timeout), 0); /* Take start-delay into account for the timeout of the action timer */ value = g_hash_table_lookup(action->params, "CRM_meta_start_delay"); { int start_delay; pcmk__scan_min_int(value, &start_delay, 0); action->timeout += start_delay; } if (pcmk__guint_from_hash(action->params, CRM_META "_" XML_LRM_ATTR_INTERVAL, 0, &(action->interval_ms)) != pcmk_rc_ok) { action->interval_ms = 0; } value = g_hash_table_lookup(action->params, "CRM_meta_can_fail"); if (value != NULL) { crm_str_to_boolean(value, &(action->can_fail)); #ifndef PCMK__COMPAT_2_0 if (action->can_fail) { crm_warn("Support for the can_fail meta-attribute is deprecated" " and will be removed in a future release"); } #endif } crm_trace("Action %d has timer set to %dms", action->id, action->timeout); return action; } /*! * \internal * \brief Unpack transition graph synapse from XML * * \param[in] new_graph Transition graph that synapse is part of * \param[in] xml_synapse Synapse XML * * \return Newly allocated synapse on success, or NULL otherwise */ static synapse_t * unpack_synapse(crm_graph_t *new_graph, xmlNode *xml_synapse) { const char *value = NULL; xmlNode *action_set = NULL; synapse_t *new_synapse = NULL; crm_trace("Unpacking synapse %s", ID(xml_synapse)); new_synapse = calloc(1, sizeof(synapse_t)); if (new_synapse == NULL) { return NULL; } pcmk__scan_min_int(ID(xml_synapse), &(new_synapse->id), 0); value = crm_element_value(xml_synapse, XML_CIB_ATTR_PRIORITY); pcmk__scan_min_int(value, &(new_synapse->priority), 0); CRM_CHECK(new_synapse->id >= 0, free(new_synapse); return NULL); new_graph->num_synapses++; crm_trace("Unpacking synapse %s action sets", crm_element_value(xml_synapse, XML_ATTR_ID)); for (action_set = first_named_child(xml_synapse, "action_set"); action_set != NULL; action_set = crm_next_same_xml(action_set)) { for (xmlNode *action = pcmk__xml_first_child(action_set); action != NULL; action = pcmk__xml_next(action)) { crm_action_t *new_action = unpack_action(new_synapse, action); if (new_action == NULL) { continue; } crm_trace("Adding action %d to synapse %d", new_action->id, new_synapse->id); new_graph->num_actions++; new_synapse->actions = g_list_append(new_synapse->actions, new_action); } } crm_trace("Unpacking synapse %s inputs", ID(xml_synapse)); for (xmlNode *inputs = first_named_child(xml_synapse, "inputs"); inputs != NULL; inputs = crm_next_same_xml(inputs)) { for (xmlNode *trigger = first_named_child(inputs, "trigger"); trigger != NULL; trigger = crm_next_same_xml(trigger)) { for (xmlNode *input = pcmk__xml_first_child(trigger); input != NULL; input = pcmk__xml_next(input)) { crm_action_t *new_input = unpack_action(new_synapse, input); if (new_input == NULL) { continue; } crm_trace("Adding input %d to synapse %d", new_input->id, new_synapse->id); new_synapse->inputs = g_list_append(new_synapse->inputs, new_input); } } } return new_synapse; } /*! * \internal * \brief Unpack transition graph XML * * \param[in] xml_graph Transition graph XML to unpack * \param[in] reference Where the XML came from (for logging) * * \return Newly allocated transition graph on success, NULL otherwise * \note The caller is responsible for freeing the return value using * pcmk__free_graph(). * \note The XML is expected to be structured like: ... ... */ crm_graph_t * pcmk__unpack_graph(xmlNode *xml_graph, const char *reference) { crm_graph_t *new_graph = NULL; const char *t_id = NULL; const char *time = NULL; new_graph = calloc(1, sizeof(crm_graph_t)); if (new_graph == NULL) { return NULL; } new_graph->source = strdup((reference == NULL)? "unknown" : reference); if (new_graph->source == NULL) { free(new_graph); return NULL; } new_graph->id = -1; new_graph->abort_priority = 0; new_graph->network_delay = 0; new_graph->stonith_timeout = 0; new_graph->completion_action = tg_done; // Parse top-level attributes from if (xml_graph != NULL) { t_id = crm_element_value(xml_graph, "transition_id"); CRM_CHECK(t_id != NULL, free(new_graph); return NULL); pcmk__scan_min_int(t_id, &(new_graph->id), -1); time = crm_element_value(xml_graph, "cluster-delay"); CRM_CHECK(time != NULL, free(new_graph); return NULL); new_graph->network_delay = crm_parse_interval_spec(time); time = crm_element_value(xml_graph, "stonith-timeout"); if (time == NULL) { new_graph->stonith_timeout = new_graph->network_delay; } else { new_graph->stonith_timeout = crm_parse_interval_spec(time); } // Use 0 (dynamic limit) as default/invalid, -1 (no limit) as minimum t_id = crm_element_value(xml_graph, "batch-limit"); if ((t_id == NULL) || (pcmk__scan_min_int(t_id, &(new_graph->batch_limit), -1) != pcmk_rc_ok)) { new_graph->batch_limit = 0; } t_id = crm_element_value(xml_graph, "migration-limit"); pcmk__scan_min_int(t_id, &(new_graph->migration_limit), -1); } // Unpack each child element for (xmlNode *synapse_xml = first_named_child(xml_graph, "synapse"); synapse_xml != NULL; synapse_xml = crm_next_same_xml(synapse_xml)) { synapse_t *new_synapse = unpack_synapse(new_graph, synapse_xml); if (new_synapse != NULL) { new_graph->synapses = g_list_append(new_graph->synapses, new_synapse); } } crm_debug("Unpacked transition %d from %s: %d actions in %d synapses", new_graph->id, new_graph->source, new_graph->num_actions, new_graph->num_synapses); return new_graph; } /* * Functions for freeing transition graph objects */ /*! * \internal * \brief Free a transition graph action object * * \param[in] user_data Action to free */ static void free_graph_action(gpointer user_data) { crm_action_t *action = user_data; if ((action->timer != NULL) && (action->timer->source_id != 0)) { crm_warn("Cancelling timer for graph action %d", action->id); g_source_remove(action->timer->source_id); } if (action->params != NULL) { g_hash_table_destroy(action->params); } free_xml(action->xml); free(action->timer); free(action); } /*! * \internal * \brief Free a transition graph synapse object * * \param[in] user_data Synapse to free */ static void free_graph_synapse(gpointer user_data) { synapse_t *synapse = user_data; g_list_free_full(synapse->actions, free_graph_action); g_list_free_full(synapse->inputs, free_graph_action); free(synapse); } /*! * \internal * \brief Free a transition graph object * * \param[in] graph Transition graph to free */ void pcmk__free_graph(crm_graph_t *graph) { if (graph != NULL) { g_list_free_full(graph->synapses, free_graph_synapse); free(graph->source); free(graph); } } /* * Other transition graph utilities */ /*! * \internal * \brief Synthesize an executor event from a graph action * * \param[in] resource If not NULL, use greater call ID than in this XML * \param[in] action Graph action * \param[in] status What to use as event execution status * \param[in] rc What to use as event exit status * \param[in] exit_reason What to use as event exit reason * * \return Newly allocated executor event on success, or NULL otherwise */ lrmd_event_data_t * pcmk__event_from_graph_action(xmlNode *resource, crm_action_t *action, int status, int rc, const char *exit_reason) { lrmd_event_data_t *op = NULL; GHashTableIter iter; const char *name = NULL; const char *value = NULL; xmlNode *action_resource = NULL; CRM_CHECK(action != NULL, return NULL); CRM_CHECK(action->type == action_type_rsc, return NULL); action_resource = first_named_child(action->xml, XML_CIB_TAG_RESOURCE); CRM_CHECK(action_resource != NULL, crm_log_xml_warn(action->xml, "invalid"); return NULL); op = lrmd_new_event(ID(action_resource), crm_element_value(action->xml, XML_LRM_ATTR_TASK), action->interval_ms); lrmd__set_result(op, rc, status, exit_reason); op->t_run = time(NULL); op->t_rcchange = op->t_run; op->params = pcmk__strkey_table(free, free); g_hash_table_iter_init(&iter, action->params); while (g_hash_table_iter_next(&iter, (void **)&name, (void **)&value)) { g_hash_table_insert(op->params, strdup(name), strdup(value)); } for (xmlNode *xop = pcmk__xml_first_child(resource); xop != NULL; xop = pcmk__xml_next(xop)) { int tmp = 0; crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp); crm_debug("Got call_id=%d for %s", tmp, ID(resource)); if (tmp > op->call_id) { op->call_id = tmp; } } op->call_id++; return op; }