diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index 77112ee07e..d310c58993 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -1,2823 +1,2821 @@ /* * Copyright 2009-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *device_list = NULL; GHashTable *topology = NULL; GList *cmd_list = NULL; struct device_search_s { /* target of fence action */ char *host; /* requested fence action */ char *action; /* timeout to use if a device is queried dynamically for possible targets */ int per_device_timeout; /* number of registered fencing devices at time of request */ int replies_needed; /* number of device replies received so far */ int replies_received; /* whether the target is eligible to perform requested action (or off) */ bool allow_suicide; /* private data to pass to search callback function */ void *user_data; /* function to call when all replies have been received */ void (*callback) (GList * devices, void *user_data); /* devices capable of performing requested action (or off if remapping) */ GListPtr capable; }; static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data); static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, const char *client_id); static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence); typedef struct async_command_s { int id; int pid; int fd_stdout; int options; int default_timeout; /* seconds */ int timeout; /* seconds */ int start_delay; /* seconds */ int delay_id; char *op; char *origin; char *client; char *client_name; char *remote_op_id; char *victim; uint32_t victim_nodeid; char *action; char *device; char *mode; GListPtr device_list; GListPtr device_next; void *internal_user_data; void (*done_cb) (GPid pid, int rc, const char *output, gpointer user_data); guint timer_sigterm; guint timer_sigkill; /*! If the operation timed out, this is the last signal * we sent to the process to get it to terminate */ int last_timeout_signo; stonith_device_t *active_on; stonith_device_t *activating_on; } async_command_t; static xmlNode *stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc); static gboolean is_action_required(const char *action, stonith_device_t *device) { return device && device->automatic_unfencing && pcmk__str_eq(action, "on", pcmk__str_casei); } static int get_action_delay_max(stonith_device_t * device, const char * action) { const char *value = NULL; int delay_max = 0; if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) { return 0; } value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_MAX); if (value) { delay_max = crm_parse_interval_spec(value) / 1000; } return delay_max; } static int get_action_delay_base(stonith_device_t * device, const char * action) { const char *value = NULL; int delay_base = 0; if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) { return 0; } value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_BASE); if (value) { delay_base = crm_parse_interval_spec(value) / 1000; } return delay_base; } /*! * \internal * \brief Override STONITH timeout with pcmk_*_timeout if available * * \param[in] device STONITH device to use * \param[in] action STONITH action name * \param[in] default_timeout Timeout to use if device does not have * a pcmk_*_timeout parameter for action * * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout * \note For consistency, it would be nice if reboot/off/on timeouts could be * set the same way as start/stop/monitor timeouts, i.e. with an * entry in the fencing resource configuration. However that * is insufficient because fencing devices may be registered directly via * the fencer's register_device() API instead of going through the CIB * (e.g. stonith_admin uses it for its -R option, and the executor uses it * to ensure a device is registered when a command is issued). As device * properties, pcmk_*_timeout parameters can be grabbed by the fencer when * the device is registered, whether by CIB change or API call. */ static int get_action_timeout(stonith_device_t * device, const char *action, int default_timeout) { if (action && device && device->params) { char buffer[64] = { 0, }; const char *value = NULL; /* If "reboot" was requested but the device does not support it, * we will remap to "off", so check timeout for "off" instead */ if (pcmk__str_eq(action, "reboot", pcmk__str_casei) && is_not_set(device->flags, st_device_supports_reboot)) { crm_trace("%s doesn't support reboot, using timeout for off instead", device->id); action = "off"; } /* If the device config specified an action-specific timeout, use it */ snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action); value = g_hash_table_lookup(device->params, buffer); if (value) { return atoi(value); } } return default_timeout; } static void free_async_command(async_command_t * cmd) { if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } cmd_list = g_list_remove(cmd_list, cmd); g_list_free_full(cmd->device_list, free); free(cmd->device); free(cmd->action); free(cmd->victim); free(cmd->remote_op_id); free(cmd->client); free(cmd->client_name); free(cmd->origin); free(cmd->mode); free(cmd->op); free(cmd); } static async_command_t * create_async_command(xmlNode * msg) { async_command_t *cmd = NULL; xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR); const char *action = crm_element_value(op, F_STONITH_ACTION); CRM_CHECK(action != NULL, crm_log_xml_warn(msg, "NoAction"); return NULL); crm_log_xml_trace(msg, "Command"); cmd = calloc(1, sizeof(async_command_t)); crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id)); crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options)); crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout)); cmd->timeout = cmd->default_timeout; // Value -1 means disable any static/random fencing delays crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay)); cmd->origin = crm_element_value_copy(msg, F_ORIG); cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID); cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID); cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME); cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION); cmd->action = strdup(action); cmd->victim = crm_element_value_copy(op, F_STONITH_TARGET); cmd->mode = crm_element_value_copy(op, F_STONITH_MODE); cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE); CRM_CHECK(cmd->op != NULL, crm_log_xml_warn(msg, "NoOp"); free_async_command(cmd); return NULL); CRM_CHECK(cmd->client != NULL, crm_log_xml_warn(msg, "NoClient")); cmd->done_cb = st_child_done; cmd_list = g_list_append(cmd_list, cmd); return cmd; } static int get_action_limit(stonith_device_t * device) { const char *value = NULL; int action_limit = 1; value = g_hash_table_lookup(device->params, STONITH_ATTR_ACTION_LIMIT); if (value) { action_limit = crm_parse_int(value, "1"); if (action_limit == 0) { /* pcmk_action_limit should not be 0. Enforce it to be 1. */ action_limit = 1; } } return action_limit; } static int get_active_cmds(stonith_device_t * device) { int counter = 0; GListPtr gIter = NULL; GListPtr gIterNext = NULL; CRM_CHECK(device != NULL, return 0); for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd = gIter->data; gIterNext = gIter->next; if (cmd->active_on == device) { counter++; } } return counter; } static void fork_cb(GPid pid, gpointer user_data) { async_command_t *cmd = (async_command_t *) user_data; stonith_device_t * device = /* in case of a retry we've done the move from activating_on to active_on already */ cmd->activating_on?cmd->activating_on:cmd->active_on; CRM_ASSERT(device); crm_debug("Operation '%s'%s%s on %s now running with pid=%d, timeout=%ds", cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "", device->id, pid, cmd->timeout); cmd->active_on = device; cmd->activating_on = NULL; } static gboolean stonith_device_execute(stonith_device_t * device) { int exec_rc = 0; const char *action_str = NULL; const char *host_arg = NULL; async_command_t *cmd = NULL; stonith_action_t *action = NULL; int active_cmds = 0; int action_limit = 0; GListPtr gIter = NULL; GListPtr gIterNext = NULL; CRM_CHECK(device != NULL, return FALSE); active_cmds = get_active_cmds(device); action_limit = get_action_limit(device); if (action_limit > -1 && active_cmds >= action_limit) { crm_trace("%s is over its action limit of %d (%u active action%s)", device->id, action_limit, active_cmds, active_cmds > 1 ? "s" : ""); return TRUE; } for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) { async_command_t *pending_op = gIter->data; gIterNext = gIter->next; if (pending_op && pending_op->delay_id) { crm_trace ("Operation '%s'%s%s on %s was asked to run too early, waiting for start_delay timeout of %ds", pending_op->action, pending_op->victim ? " targeting " : "", pending_op->victim ? pending_op->victim : "", device->id, pending_op->start_delay); continue; } device->pending_ops = g_list_remove_link(device->pending_ops, gIter); g_list_free_1(gIter); cmd = pending_op; break; } if (cmd == NULL) { crm_trace("Nothing further to do for %s for now", device->id); return TRUE; } if(pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_casei)) { if(pcmk__str_eq(cmd->action, "reboot", pcmk__str_casei)) { pcmk_panic(__FUNCTION__); goto done; } else if(pcmk__str_eq(cmd->action, "off", pcmk__str_casei)) { pcmk_panic(__FUNCTION__); goto done; } else { crm_info("Faking success for %s watchdog operation", cmd->action); cmd->done_cb(0, 0, NULL, cmd); goto done; } } #if SUPPORT_CIBSECRETS if (pcmk__substitute_secrets(device->id, device->params) != pcmk_rc_ok) { /* replacing secrets failed! */ if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { /* don't fail on stop! */ crm_info("proceeding with the stop operation for %s", device->id); } else { crm_err("failed to get secrets for %s, " "considering resource not configured", device->id); exec_rc = PCMK_OCF_NOT_CONFIGURED; cmd->done_cb(0, exec_rc, NULL, cmd); goto done; } } #endif action_str = cmd->action; if (pcmk__str_eq(cmd->action, "reboot", pcmk__str_casei) && is_not_set(device->flags, st_device_supports_reboot)) { crm_warn("Agent '%s' does not advertise support for 'reboot', performing 'off' action instead", device->agent); action_str = "off"; } if (is_set(device->flags, st_device_supports_parameter_port)) { host_arg = "port"; } else if (is_set(device->flags, st_device_supports_parameter_plug)) { host_arg = "plug"; } action = stonith_action_create(device->agent, action_str, cmd->victim, cmd->victim_nodeid, cmd->timeout, device->params, device->aliases, host_arg); /* for async exec, exec_rc is negative for early error exit otherwise handling of success/errors is done via callbacks */ cmd->activating_on = device; exec_rc = stonith_action_execute_async(action, (void *)cmd, cmd->done_cb, fork_cb); if (exec_rc < 0) { crm_warn("Operation '%s'%s%s on %s failed: %s (%d)", cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "", device->id, pcmk_strerror(exec_rc), exec_rc); cmd->activating_on = NULL; cmd->done_cb(0, exec_rc, NULL, cmd); } done: /* Device might get triggered to work by multiple fencing commands * simultaneously. Trigger the device again to make sure any * remaining concurrent commands get executed. */ if (device->pending_ops) { mainloop_set_trigger(device->work); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static gboolean start_delay_helper(gpointer data) { async_command_t *cmd = data; stonith_device_t *device = NULL; cmd->delay_id = 0; device = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; if (device) { mainloop_set_trigger(device->work); } return FALSE; } static void schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) { int delay_max = 0; int delay_base = 0; int requested_delay = cmd->start_delay; CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); if (cmd->device) { free(cmd->device); } if (device->include_nodeid && cmd->victim) { crm_node_t *node = crm_get_peer(0, cmd->victim); cmd->victim_nodeid = node->id; } cmd->device = strdup(device->id); cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout); if (cmd->remote_op_id) { crm_debug("Scheduling '%s' action%s%s on %s for remote peer %s with op id (%s) (timeout=%ds)", cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "", device->id, cmd->origin, cmd->remote_op_id, cmd->timeout); } else { crm_debug("Scheduling '%s' action%s%s on %s for %s (timeout=%ds)", cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "", device->id, cmd->client, cmd->timeout); } device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); // Value -1 means disable any static/random fencing delays if (requested_delay < 0) { return; } delay_max = get_action_delay_max(device, cmd->action); delay_base = get_action_delay_base(device, cmd->action); if (delay_max == 0) { delay_max = delay_base; } if (delay_max < delay_base) { crm_warn("Base-delay (%ds) is larger than max-delay (%ds) " "for %s on %s - limiting to max-delay", delay_base, delay_max, cmd->action, device->id); delay_base = delay_max; } if (delay_max > 0) { // coverity[dont_call] We're not using rand() for security cmd->start_delay += ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + delay_base; } if (cmd->start_delay > 0) { crm_notice("Delaying '%s' action%s%s on %s for %ds (timeout=%ds, " "requested_delay=%ds, base=%ds, max=%ds)", cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "", device->id, cmd->start_delay, cmd->timeout, requested_delay, delay_base, delay_max); cmd->delay_id = g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd); } } static void free_device(gpointer data) { GListPtr gIter = NULL; stonith_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { async_command_t *cmd = gIter->data; crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action); cmd->done_cb(0, -ENODEV, NULL, cmd); } g_list_free(device->pending_ops); g_list_free_full(device->targets, free); mainloop_destroy_trigger(device->work); free_xml(device->agent_metadata); free(device->namespace); free(device->on_target_actions); free(device->agent); free(device->id); free(device); } void free_device_list(void) { if (device_list != NULL) { g_hash_table_destroy(device_list); device_list = NULL; } } void init_device_list(void) { if (device_list == NULL) { device_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_device); } } static GHashTable * build_port_aliases(const char *hostmap, GListPtr * targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = crm_strcase_table_new(); if (hostmap == NULL) { return aliases; } max = strlen(hostmap); for (; lpc <= max; lpc++) { switch (hostmap[lpc]) { /* Assignment chars */ case '=': case ':': if (lpc > last) { free(name); name = calloc(1, 1 + lpc - last); memcpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if (name) { char *value = NULL; value = calloc(1, 1 + lpc - last); memcpy(value, hostmap + last, lpc - last); crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if (targets) { *targets = g_list_append(*targets, strdup(value)); } value = NULL; name = NULL; added++; } else if (lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last); } last = lpc + 1; break; } if (hostmap[lpc] == 0) { break; } } if (added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } free(name); return aliases; } GHashTable *metadata_cache = NULL; void free_metadata_cache(void) { if (metadata_cache != NULL) { g_hash_table_destroy(metadata_cache); metadata_cache = NULL; } } static void init_metadata_cache(void) { if (metadata_cache == NULL) { metadata_cache = crm_str_table_new(); } } static xmlNode * get_agent_metadata(const char *agent) { xmlNode *xml = NULL; char *buffer = NULL; init_metadata_cache(); buffer = g_hash_table_lookup(metadata_cache, agent); if(pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_casei)) { return NULL; } else if(buffer == NULL) { stonith_t *st = stonith_api_new(); int rc; if (st == NULL) { crm_warn("Could not get agent meta-data: " "API memory allocation failed"); return NULL; } rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10); stonith_api_delete(st); if (rc || !buffer) { crm_err("Could not retrieve metadata for fencing agent %s", agent); return NULL; } g_hash_table_replace(metadata_cache, strdup(agent), buffer); } xml = string2xml(buffer); return xml; } static gboolean is_nodeid_required(xmlNode * xml) { xmlXPathObjectPtr xpath = NULL; if (stand_alone) { return FALSE; } if (!xml) { return FALSE; } xpath = xpath_search(xml, "//parameter[@name='nodeid']"); if (numXpathResults(xpath) <= 0) { freeXpathObject(xpath); return FALSE; } freeXpathObject(xpath); return TRUE; } #define MAX_ACTION_LEN 256 static char * add_action(char *actions, const char *action) { int offset = 0; if (actions == NULL) { actions = calloc(1, MAX_ACTION_LEN); } else { offset = strlen(actions); } if (offset > 0) { offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, " "); } offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, "%s", action); return actions; } static void read_action_metadata(stonith_device_t *device) { xmlXPathObjectPtr xpath = NULL; int max = 0; int lpc = 0; if (device->agent_metadata == NULL) { return; } xpath = xpath_search(device->agent_metadata, "//action"); max = numXpathResults(xpath); if (max <= 0) { freeXpathObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *on_target = NULL; const char *action = NULL; xmlNode *match = getXpathResult(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; on_target = crm_element_value(match, "on_target"); action = crm_element_value(match, "name"); if(pcmk__str_eq(action, "list", pcmk__str_casei)) { set_bit(device->flags, st_device_supports_list); } else if(pcmk__str_eq(action, "status", pcmk__str_casei)) { set_bit(device->flags, st_device_supports_status); } else if(pcmk__str_eq(action, "reboot", pcmk__str_casei)) { set_bit(device->flags, st_device_supports_reboot); } else if (pcmk__str_eq(action, "on", pcmk__str_casei)) { /* "automatic" means the cluster will unfence node when it joins */ const char *automatic = crm_element_value(match, "automatic"); /* "required" is a deprecated synonym for "automatic" */ const char *required = crm_element_value(match, "required"); if (crm_is_true(automatic) || crm_is_true(required)) { device->automatic_unfencing = TRUE; } } if (action && crm_is_true(on_target)) { device->on_target_actions = add_action(device->on_target_actions, action); } } freeXpathObject(xpath); } /*! * \internal * \brief Set a pcmk_*_action parameter if not already set * * \param[in,out] params Device parameters * \param[in] action Name of action * \param[in] value Value to use if action is not already set */ static void map_action(GHashTable *params, const char *action, const char *value) { char *key = crm_strdup_printf("pcmk_%s_action", action); if (g_hash_table_lookup(params, key)) { crm_warn("Ignoring %s='%s', see %s instead", STONITH_ATTR_ACTION_OP, value, key); free(key); } else { crm_warn("Mapping %s='%s' to %s='%s'", STONITH_ATTR_ACTION_OP, value, key, value); g_hash_table_insert(params, key, strdup(value)); } } /*! * \internal * \brief Create device parameter table from XML * * \param[in] name Device name (used for logging only) * \param[in,out] params Device parameters */ static GHashTable * xml2device_params(const char *name, xmlNode *dev) { GHashTable *params = xml2list(dev); const char *value; /* Action should never be specified in the device configuration, * but we support it for users who are familiar with other software * that worked that way. */ value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP); if (value != NULL) { crm_warn("%s has '%s' parameter, which should never be specified in configuration", name, STONITH_ATTR_ACTION_OP); if (*value == '\0') { crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, "reboot") == 0) { crm_warn("Ignoring %s='reboot' (see stonith-action cluster property instead)", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, "off") == 0) { map_action(params, "reboot", value); } else { map_action(params, "off", value); map_action(params, "reboot", value); } g_hash_table_remove(params, STONITH_ATTR_ACTION_OP); } return params; } static stonith_device_t * build_device_from_xml(xmlNode * msg) { const char *value; xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR); stonith_device_t *device = NULL; char *agent = crm_element_value_copy(dev, "agent"); CRM_CHECK(agent != NULL, return device); device = calloc(1, sizeof(stonith_device_t)); CRM_CHECK(device != NULL, {free(agent); return device;}); device->id = crm_element_value_copy(dev, XML_ATTR_ID); device->agent = agent; device->namespace = crm_element_value_copy(dev, "namespace"); device->params = xml2device_params(device->id, dev); value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTLIST); if (value) { device->targets = stonith__parse_targets(value); } value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTMAP); device->aliases = build_port_aliases(value, &(device->targets)); device->agent_metadata = get_agent_metadata(device->agent); if (device->agent_metadata) { read_action_metadata(device); set_bit(device->flags, stonith__device_parameter_flags(device->agent_metadata)); } value = g_hash_table_lookup(device->params, "nodeid"); if (!value) { device->include_nodeid = is_nodeid_required(device->agent_metadata); } value = crm_element_value(dev, "rsc_provides"); if (pcmk__str_eq(value, "unfencing", pcmk__str_casei)) { device->automatic_unfencing = TRUE; } if (is_action_required("on", device)) { crm_info("The fencing device '%s' requires unfencing", device->id); } if (device->on_target_actions) { crm_info("The fencing device '%s' requires actions (%s) to be executed on the target node", device->id, device->on_target_actions); } device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); /* TODO: Hook up priority */ return device; } static const char * target_list_type(stonith_device_t * dev) { const char *check_type = NULL; check_type = g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTCHECK); if (check_type == NULL) { if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTLIST)) { check_type = "static-list"; } else if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP)) { check_type = "static-list"; } else if(is_set(dev->flags, st_device_supports_list)){ check_type = "dynamic-list"; } else if(is_set(dev->flags, st_device_supports_status)){ check_type = "status"; } else { check_type = "none"; } } return check_type; } static void schedule_internal_command(const char *origin, stonith_device_t * device, const char *action, const char *victim, int timeout, void *internal_user_data, void (*done_cb) (GPid pid, int rc, const char *output, gpointer user_data)) { async_command_t *cmd = NULL; cmd = calloc(1, sizeof(async_command_t)); cmd->id = -1; cmd->default_timeout = timeout ? timeout : 60; cmd->timeout = cmd->default_timeout; cmd->action = strdup(action); cmd->victim = victim ? strdup(victim) : NULL; cmd->device = strdup(device->id); cmd->origin = strdup(origin); cmd->client = strdup(crm_system_name); cmd->client_name = strdup(crm_system_name); cmd->internal_user_data = internal_user_data; cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */ schedule_stonith_command(cmd, device); } gboolean string_in_list(GListPtr list, const char *item) { int lpc = 0; int max = g_list_length(list); for (lpc = 0; lpc < max; lpc++) { const char *value = g_list_nth_data(list, lpc); if (pcmk__str_eq(item, value, pcmk__str_casei)) { return TRUE; } else { crm_trace("%d: '%s' != '%s'", lpc, item, value); } } return FALSE; } static void status_search_cb(GPid pid, int rc, const char *output, gpointer user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; gboolean can = FALSE; free_async_command(cmd); if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (rc == 1 /* unknown */ ) { crm_trace("Host %s is not known by %s", search->host, dev->id); } else if (rc == 0 /* active */ || rc == 2 /* inactive */ ) { crm_trace("Host %s is known by %s", search->host, dev->id); can = TRUE; } else { crm_notice("Unknown result when testing if %s can fence %s: rc=%d", dev->id, search->host, rc); } search_devices_record_result(search, dev->id, can); } static void dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; gboolean can_fence = FALSE; free_async_command(cmd); /* Host/alias must be in the list output to be eligible to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); /* If we successfully got the targets earlier, don't disable. */ if (rc != 0 && !dev->targets) { crm_notice("Disabling port list queries for %s (%d): %s", dev->id, rc, output); /* Fall back to status */ g_hash_table_replace(dev->params, strdup(STONITH_ATTR_HOSTCHECK), strdup("status")); g_list_free_full(dev->targets, free); dev->targets = NULL; } else if (!rc) { crm_info("Refreshing port list for %s", dev->id); g_list_free_full(dev->targets, free); dev->targets = stonith__parse_targets(output); dev->targets_age = time(NULL); } if (dev->targets) { const char *alias = g_hash_table_lookup(dev->aliases, search->host); if (!alias) { alias = search->host; } if (string_in_list(dev->targets, alias)) { can_fence = TRUE; } } search_devices_record_result(search, dev->id, can_fence); } /*! * \internal * \brief Returns true if any key in first is not in second or second has a different value for key */ static int device_params_diff(GHashTable *first, GHashTable *second) { char *key = NULL; char *value = NULL; GHashTableIter gIter; g_hash_table_iter_init(&gIter, first); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) { if(strstr(key, "CRM_meta") == key) { continue; } else if(strcmp(key, "crm_feature_set") == 0) { continue; } else { char *other_value = g_hash_table_lookup(second, key); if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) { crm_trace("Different value for %s: %s != %s", key, other_value, value); return 1; } } } return 0; } /*! * \internal * \brief Checks to see if an identical device already exists in the device_list */ static stonith_device_t * device_has_duplicate(stonith_device_t * device) { stonith_device_t *dup = g_hash_table_lookup(device_list, device->id); if (!dup) { crm_trace("No match for %s", device->id); return NULL; } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) { crm_trace("Different agent: %s != %s", dup->agent, device->agent); return NULL; } /* Use calculate_operation_digest() here? */ if (device_params_diff(device->params, dup->params) || device_params_diff(dup->params, device->params)) { return NULL; } crm_trace("Match"); return dup; } int stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib) { stonith_device_t *dup = NULL; stonith_device_t *device = build_device_from_xml(msg); CRM_CHECK(device != NULL, return -ENOMEM); dup = device_has_duplicate(device); if (dup) { crm_debug("Device '%s' already existed in device list (%d active devices)", device->id, g_hash_table_size(device_list)); free_device(device); device = dup; } else { stonith_device_t *old = g_hash_table_lookup(device_list, device->id); if (from_cib && old && old->api_registered) { /* If the cib is writing over an entry that is shared with a stonith client, * copy any pending ops that currently exist on the old entry to the new one. * Otherwise the pending ops will be reported as failures */ crm_info("Overwriting an existing entry for %s from the cib", device->id); device->pending_ops = old->pending_ops; device->api_registered = TRUE; old->pending_ops = NULL; if (device->pending_ops) { mainloop_set_trigger(device->work); } } g_hash_table_replace(device_list, device->id, device); crm_notice("Added '%s' to the device list (%d active devices)", device->id, g_hash_table_size(device_list)); } if (desc) { *desc = device->id; } if (from_cib) { device->cib_registered = TRUE; } else { device->api_registered = TRUE; } return pcmk_ok; } int stonith_device_remove(const char *id, gboolean from_cib) { stonith_device_t *device = g_hash_table_lookup(device_list, id); if (!device) { crm_info("Device '%s' not found (%d active devices)", id, g_hash_table_size(device_list)); return pcmk_ok; } if (from_cib) { device->cib_registered = FALSE; } else { device->verified = FALSE; device->api_registered = FALSE; } if (!device->cib_registered && !device->api_registered) { g_hash_table_remove(device_list, id); crm_info("Removed '%s' from the device list (%d active devices)", id, g_hash_table_size(device_list)); } else { crm_trace("Not removing '%s' from the device list (%d active devices) " "- still %s%s_registered", id, g_hash_table_size(device_list), device->cib_registered?"cib":"", device->api_registered?"api":""); } return pcmk_ok; } /*! * \internal * \brief Return the number of stonith levels registered for a node * * \param[in] tp Node's topology table entry * * \return Number of non-NULL levels in topology entry * \note This function is used only for log messages. */ static int count_active_levels(stonith_topology_t * tp) { int lpc = 0; int count = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { count++; } } return count; } static void free_topology_entry(gpointer data) { stonith_topology_t *tp = data; int lpc = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { g_list_free_full(tp->levels[lpc], free); } } free(tp->target); free(tp->target_value); free(tp->target_pattern); free(tp->target_attribute); free(tp); } void free_topology_list(void) { if (topology != NULL) { g_hash_table_destroy(topology); topology = NULL; } } void init_topology_list(void) { if (topology == NULL) { topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry); } } char *stonith_level_key(xmlNode *level, int mode) { if(mode == -1) { mode = stonith_level_kind(level); } switch(mode) { case 0: return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET); case 1: return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN); case 2: { const char *name = crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE); const char *value = crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE); if(name && value) { return crm_strdup_printf("%s=%s", name, value); } } default: return crm_strdup_printf("Unknown-%d-%s", mode, ID(level)); } } int stonith_level_kind(xmlNode * level) { int mode = 0; const char *target = crm_element_value(level, XML_ATTR_STONITH_TARGET); if(target == NULL) { mode++; target = crm_element_value(level, XML_ATTR_STONITH_TARGET_PATTERN); } if(stand_alone == FALSE && target == NULL) { mode++; if(crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE) == NULL) { mode++; } else if(crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE) == NULL) { mode++; } } return mode; } static stonith_key_value_t * parse_device_list(const char *devices) { int lpc = 0; int max = 0; int last = 0; stonith_key_value_t *output = NULL; if (devices == NULL) { return output; } max = strlen(devices); for (lpc = 0; lpc <= max; lpc++) { if (devices[lpc] == ',' || devices[lpc] == 0) { char *line = strndup(devices + last, lpc - last); output = stonith_key_value_add(output, NULL, line); free(line); last = lpc + 1; } } return output; } /*! * \internal * \brief Register a STONITH level for a target * * Given an XML request specifying the target name, level index, and device IDs * for the level, this will create an entry for the target in the global topology * table if one does not already exist, then append the specified device IDs to * the entry's device list for the specified level. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, will be set to string representation ("TARGET[LEVEL]") * * \return pcmk_ok on success, -EINVAL if XML does not specify valid level index */ int stonith_level_register(xmlNode *msg, char **desc) { int id = 0; xmlNode *level; int mode; char *target; stonith_topology_t *tp; stonith_key_value_t *dIter = NULL; stonith_key_value_t *devices = NULL; /* Allow the XML here to point to the level tag directly, or wrapped in * another tag. If directly, don't search by xpath, because it might give * multiple hits (e.g. if the XML is the CIB). */ if (pcmk__str_eq(TYPE(msg), XML_TAG_FENCING_LEVEL, pcmk__str_casei)) { level = msg; } else { level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR); } CRM_CHECK(level != NULL, return -EINVAL); mode = stonith_level_kind(level); target = stonith_level_key(level, mode); crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id); if (desc) { *desc = crm_strdup_printf("%s[%d]", target, id); } /* Sanity-check arguments */ if (mode >= 3 || (id <= 0) || (id >= ST_LEVEL_MAX)) { crm_trace("Could not add %s[%d] (%d) to the topology (%d active entries)", target, id, mode, g_hash_table_size(topology)); free(target); crm_log_xml_err(level, "Bad topology"); return -EINVAL; } /* Find or create topology table entry */ tp = g_hash_table_lookup(topology, target); if (tp == NULL) { tp = calloc(1, sizeof(stonith_topology_t)); tp->kind = mode; tp->target = target; tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE); tp->target_pattern = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN); tp->target_attribute = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE); g_hash_table_replace(topology, tp->target, tp); crm_trace("Added %s (%d) to the topology (%d active entries)", target, mode, g_hash_table_size(topology)); } else { free(target); } if (tp->levels[id] != NULL) { crm_info("Adding to the existing %s[%d] topology entry", tp->target, id); } devices = parse_device_list(crm_element_value(level, XML_ATTR_STONITH_DEVICES)); for (dIter = devices; dIter; dIter = dIter->next) { const char *device = dIter->value; crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id); tp->levels[id] = g_list_append(tp->levels[id], strdup(device)); } stonith_key_value_freeall(devices, 1, 1); crm_info("Target %s has %d active fencing levels", tp->target, count_active_levels(tp)); return pcmk_ok; } int stonith_level_remove(xmlNode *msg, char **desc) { int id = 0; stonith_topology_t *tp; char *target; /* Unlike additions, removal requests should always have one level tag */ xmlNode *level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR); CRM_CHECK(level != NULL, return -EINVAL); target = stonith_level_key(level, -1); crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id); if (desc) { *desc = crm_strdup_printf("%s[%d]", target, id); } /* Sanity-check arguments */ if (id >= ST_LEVEL_MAX) { free(target); return -EINVAL; } tp = g_hash_table_lookup(topology, target); if (tp == NULL) { crm_info("Topology for %s not found (%d active entries)", target, g_hash_table_size(topology)); } else if (id == 0 && g_hash_table_remove(topology, target)) { crm_info("Removed all %s related entries from the topology (%d active entries)", target, g_hash_table_size(topology)); } else if (id > 0 && tp->levels[id] != NULL) { g_list_free_full(tp->levels[id], free); tp->levels[id] = NULL; crm_info("Removed level '%d' from topology for %s (%d active levels remaining)", id, target, count_active_levels(tp)); } free(target); return pcmk_ok; } /*! * \internal * \brief Schedule an (asynchronous) action directly on a stonith device * * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action * directly on a specified device. Only list, monitor, and status actions are * expected to use this call, though it should work with any agent command. * * \param[in] msg API message XML with desired action * \param[out] output Unused * * \return -EINPROGRESS on success, -errno otherwise * \note If the action is monitor, the device must be registered via the API * (CIB registration is not sufficient), because monitor should not be * possible unless the device is "started" (API registered). */ static int stonith_device_action(xmlNode * msg, char **output) { xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR); xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR); const char *id = crm_element_value(dev, F_STONITH_DEVICE); const char *action = crm_element_value(op, F_STONITH_ACTION); async_command_t *cmd = NULL; stonith_device_t *device = NULL; if ((id == NULL) || (action == NULL)) { crm_info("Malformed API action request: device %s, action %s", (id? id : "not specified"), (action? action : "not specified")); return -EPROTO; } device = g_hash_table_lookup(device_list, id); if ((device == NULL) || (!device->api_registered && !strcmp(action, "monitor"))) { // Monitors may run only on "started" (API-registered) devices crm_info("Ignoring API '%s' action request because device %s not found", action, id); return -ENODEV; } cmd = create_async_command(msg); if (cmd == NULL) { return -EPROTO; } schedule_stonith_command(cmd, device); return -EINPROGRESS; } static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence) { search->replies_received++; if (can_fence && device) { search->capable = g_list_append(search->capable, strdup(device)); } if (search->replies_needed == search->replies_received) { crm_debug("Finished Search. %d devices can perform action (%s) on node %s", g_list_length(search->capable), search->action ? search->action : "", search->host ? search->host : ""); search->callback(search->capable, search->user_data); free(search->host); free(search->action); free(search); } } /*! * \internal * \brief Check whether the local host is allowed to execute a fencing action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Hostname of fence target * \param[in] allow_suicide Whether self-fencing is allowed for this operation * * \return TRUE if local host is allowed to execute action, FALSE otherwise */ static gboolean localhost_is_eligible(const stonith_device_t *device, const char *action, const char *target, gboolean allow_suicide) { gboolean localhost_is_target = pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei); if (device && action && device->on_target_actions && strstr(device->on_target_actions, action)) { if (!localhost_is_target) { crm_trace("'%s' operation with %s can only be executed for localhost not %s", action, device->id, target); return FALSE; } } else if (localhost_is_target && !allow_suicide) { crm_trace("'%s' operation does not support self-fencing", action); return FALSE; } return TRUE; } static void can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *search) { gboolean can = FALSE; const char *check_type = NULL; const char *host = search->host; const char *alias = NULL; CRM_LOG_ASSERT(dev != NULL); if (dev == NULL) { goto search_report_results; } else if (host == NULL) { can = TRUE; goto search_report_results; } /* Short-circuit query if this host is not allowed to perform the action */ if (pcmk__str_eq(search->action, "reboot", pcmk__str_casei)) { /* A "reboot" *might* get remapped to "off" then "on", so short-circuit * only if all three are disallowed. If only one or two are disallowed, * we'll report that with the results. We never allow suicide for * remapped "on" operations because the host is off at that point. */ if (!localhost_is_eligible(dev, "reboot", host, search->allow_suicide) && !localhost_is_eligible(dev, "off", host, search->allow_suicide) && !localhost_is_eligible(dev, "on", host, FALSE)) { goto search_report_results; } } else if (!localhost_is_eligible(dev, search->action, host, search->allow_suicide)) { goto search_report_results; } alias = g_hash_table_lookup(dev->aliases, host); if (alias == NULL) { alias = host; } check_type = target_list_type(dev); if (pcmk__str_eq(check_type, "none", pcmk__str_casei)) { can = TRUE; } else if (pcmk__str_eq(check_type, "static-list", pcmk__str_casei)) { /* Presence in the hostmap is sufficient * Only use if all hosts on which the device can be active can always fence all listed hosts */ if (string_in_list(dev->targets, host)) { can = TRUE; } else if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP) && g_hash_table_lookup(dev->aliases, host)) { can = TRUE; } } else if (pcmk__str_eq(check_type, "dynamic-list", pcmk__str_casei)) { time_t now = time(NULL); if (dev->targets == NULL || dev->targets_age + 60 < now) { crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev->id, search->host, search->action); schedule_internal_command(__FUNCTION__, dev, "list", NULL, search->per_device_timeout, search, dynamic_list_search_cb); /* we'll respond to this search request async in the cb */ return; } if (string_in_list(dev->targets, alias)) { can = TRUE; } } else if (pcmk__str_eq(check_type, "status", pcmk__str_casei)) { crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev->id, search->host, search->action); schedule_internal_command(__FUNCTION__, dev, "status", search->host, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ return; } else { crm_err("Invalid value for " STONITH_ATTR_HOSTCHECK ": %s", check_type); check_type = "Invalid " STONITH_ATTR_HOSTCHECK; } if (pcmk__str_eq(host, alias, pcmk__str_casei)) { crm_notice("%s is%s eligible to fence (%s) %s: %s", dev->id, (can? "" : " not"), search->action, host, check_type); } else { crm_notice("%s is%s eligible to fence (%s) %s (aka. '%s'): %s", dev->id, (can? "" : " not"), search->action, host, alias, check_type); } search_report_results: search_devices_record_result(search, dev ? dev->id : NULL, can); } static void search_devices(gpointer key, gpointer value, gpointer user_data) { stonith_device_t *dev = value; struct device_search_s *search = user_data; can_fence_host_with_device(dev, search); } #define DEFAULT_QUERY_TIMEOUT 20 static void get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data, void (*callback) (GList * devices, void *user_data)) { struct device_search_s *search; int per_device_timeout = DEFAULT_QUERY_TIMEOUT; int devices_needing_async_query = 0; char *key = NULL; const char *check_type = NULL; GHashTableIter gIter; stonith_device_t *device = NULL; if (!g_hash_table_size(device_list)) { callback(NULL, user_data); return; } search = calloc(1, sizeof(struct device_search_s)); if (!search) { callback(NULL, user_data); return; } g_hash_table_iter_init(&gIter, device_list); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&device)) { check_type = target_list_type(device); if (pcmk__strcase_any_of(check_type, "status", "dynamic-list", NULL)) { devices_needing_async_query++; } } /* If we have devices that require an async event in order to know what * nodes they can fence, we have to give the events a timeout. The total * query timeout is divided among those events. */ if (devices_needing_async_query) { per_device_timeout = timeout / devices_needing_async_query; if (!per_device_timeout) { crm_err("STONITH timeout %ds is too low; using %ds, but consider raising to at least %ds", timeout, DEFAULT_QUERY_TIMEOUT, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query); per_device_timeout = DEFAULT_QUERY_TIMEOUT; } else if (per_device_timeout < DEFAULT_QUERY_TIMEOUT) { crm_notice("STONITH timeout %ds is low for the current configuration;" " consider raising to at least %ds", timeout, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query); } } search->host = host ? strdup(host) : NULL; search->action = action ? strdup(action) : NULL; search->per_device_timeout = per_device_timeout; /* We are guaranteed this many replies. Even if a device gets * unregistered some how during the async search, we will get * the correct number of replies. */ search->replies_needed = g_hash_table_size(device_list); search->allow_suicide = suicide; search->callback = callback; search->user_data = user_data; /* kick off the search */ crm_debug("Searching through %d devices to see what is capable of action (%s) for target %s", search->replies_needed, search->action ? search->action : "", search->host ? search->host : ""); g_hash_table_foreach(device_list, search_devices, search); } struct st_query_data { xmlNode *reply; char *remote_peer; char *client_id; char *target; char *action; int call_options; }; /*! * \internal * \brief Add action-specific attributes to query reply XML * * \param[in,out] xml XML to add attributes to * \param[in] action Fence action * \param[in] device Fence device */ static void add_action_specific_attributes(xmlNode *xml, const char *action, stonith_device_t *device) { int action_specific_timeout; int delay_max; int delay_base; CRM_CHECK(xml && action && device, return); if (is_action_required(action, device)) { crm_trace("Action '%s' is required on %s", action, device->id); crm_xml_add_int(xml, F_STONITH_DEVICE_REQUIRED, 1); } action_specific_timeout = get_action_timeout(device, action, 0); if (action_specific_timeout) { crm_trace("Action '%s' has timeout %dms on %s", action, action_specific_timeout, device->id); crm_xml_add_int(xml, F_STONITH_ACTION_TIMEOUT, action_specific_timeout); } delay_max = get_action_delay_max(device, action); if (delay_max > 0) { crm_trace("Action '%s' has maximum random delay %dms on %s", action, delay_max, device->id); crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max / 1000); } delay_base = get_action_delay_base(device, action); if (delay_base > 0) { crm_xml_add_int(xml, F_STONITH_DELAY_BASE, delay_base / 1000); } if ((delay_max > 0) && (delay_base == 0)) { crm_trace("Action '%s' has maximum random delay %dms on %s", action, delay_max, device->id); } else if ((delay_max == 0) && (delay_base > 0)) { crm_trace("Action '%s' has a static delay of %dms on %s", action, delay_base, device->id); } else if ((delay_max > 0) && (delay_base > 0)) { crm_trace("Action '%s' has a minimum delay of %dms and a randomly chosen " "maximum delay of %dms on %s", action, delay_base, delay_max, device->id); } } /*! * \internal * \brief Add "disallowed" attribute to query reply XML if appropriate * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_disallowed(xmlNode *xml, const char *action, stonith_device_t *device, const char *target, gboolean allow_suicide) { if (!localhost_is_eligible(device, action, target, allow_suicide)) { crm_trace("Action '%s' on %s is disallowed for local host", action, device->id); crm_xml_add(xml, F_STONITH_ACTION_DISALLOWED, XML_BOOLEAN_TRUE); } } /*! * \internal * \brief Add child element with action-specific values to query reply XML * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_action_reply(xmlNode *xml, const char *action, stonith_device_t *device, const char *target, gboolean allow_suicide) { xmlNode *child = create_xml_node(xml, F_STONITH_ACTION); crm_xml_add(child, XML_ATTR_ID, action); add_action_specific_attributes(child, action, device); add_disallowed(child, action, device, target, allow_suicide); } static void stonith_query_capable_device_cb(GList * devices, void *user_data) { struct st_query_data *query = user_data; int available_devices = 0; xmlNode *dev = NULL; xmlNode *list = NULL; GListPtr lpc = NULL; /* Pack the results into XML */ list = create_xml_node(NULL, __FUNCTION__); crm_xml_add(list, F_STONITH_TARGET, query->target); for (lpc = devices; lpc != NULL; lpc = lpc->next) { stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data); const char *action = query->action; if (!device) { /* It is possible the device got unregistered while * determining who can fence the target */ continue; } available_devices++; dev = create_xml_node(list, F_STONITH_DEVICE); crm_xml_add(dev, XML_ATTR_ID, device->id); crm_xml_add(dev, "namespace", device->namespace); crm_xml_add(dev, "agent", device->agent); crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified); /* If the originating fencer wants to reboot the node, and we have a * capable device that doesn't support "reboot", remap to "off" instead. */ if (is_not_set(device->flags, st_device_supports_reboot) && pcmk__str_eq(query->action, "reboot", pcmk__str_casei)) { crm_trace("%s doesn't support reboot, using values for off instead", device->id); action = "off"; } /* Add action-specific values if available */ add_action_specific_attributes(dev, action, device); if (pcmk__str_eq(query->action, "reboot", pcmk__str_casei)) { /* A "reboot" *might* get remapped to "off" then "on", so after * sending the "reboot"-specific values in the main element, we add * sub-elements for "off" and "on" values. * * We short-circuited earlier if "reboot", "off" and "on" are all * disallowed for the local host. However if only one or two are * disallowed, we send back the results and mark which ones are * disallowed. If "reboot" is disallowed, this might cause problems * with older fencer versions, which won't check for it. Older * versions will ignore "off" and "on", so they are not a problem. */ add_disallowed(dev, action, device, query->target, is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, "off", device, query->target, is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, "on", device, query->target, FALSE); } /* A query without a target wants device parameters */ if (query->target == NULL) { xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS); g_hash_table_foreach(device->params, hash2field, attrs); } } crm_xml_add_int(list, F_STONITH_AVAILABLE_DEVICES, available_devices); if (query->target) { crm_debug("Found %d matching devices for '%s'", available_devices, query->target); } else { crm_debug("%d devices installed", available_devices); } if (list != NULL) { crm_log_xml_trace(list, "Add query results"); add_message_xml(query->reply, F_STONITH_CALLDATA, list); } stonith_send_reply(query->reply, query->call_options, query->remote_peer, query->client_id); free_xml(query->reply); free(query->remote_peer); free(query->client_id); free(query->target); free(query->action); free(query); free_xml(list); g_list_free_full(devices, free); } static void stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int call_options) { struct st_query_data *query = NULL; const char *action = NULL; const char *target = NULL; int timeout = 0; xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_NEVER); crm_element_value_int(msg, F_STONITH_TIMEOUT, &timeout); if (dev) { const char *device = crm_element_value(dev, F_STONITH_DEVICE); target = crm_element_value(dev, F_STONITH_TARGET); action = crm_element_value(dev, F_STONITH_ACTION); if (device && pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) { /* No query or reply necessary */ return; } } crm_log_xml_debug(msg, "Query"); query = calloc(1, sizeof(struct st_query_data)); query->reply = stonith_construct_reply(msg, NULL, NULL, pcmk_ok); query->remote_peer = remote_peer ? strdup(remote_peer) : NULL; query->client_id = client_id ? strdup(client_id) : NULL; query->target = target ? strdup(target) : NULL; query->action = action ? strdup(action) : NULL; query->call_options = call_options; get_capable_devices(target, action, timeout, is_set(call_options, st_opt_allow_suicide), query, stonith_query_capable_device_cb); } #define ST_LOG_OUTPUT_MAX 512 static void log_operation(async_command_t * cmd, int rc, int pid, const char *next, const char *output, gboolean op_merged) { if (rc == 0) { next = NULL; } if (cmd->victim != NULL) { do_crm_log(rc == 0 ? LOG_NOTICE : LOG_ERR, "Operation '%s' [%d] (call %d from %s) for host '%s' with device '%s' returned%s: %d (%s)%s%s", cmd->action, pid, cmd->id, cmd->client_name, cmd->victim, cmd->device, (op_merged? " (merged)" : ""), rc, pcmk_strerror(rc), (next? ", retrying with " : ""), (next ? next : "")); } else { do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE, "Operation '%s' [%d] for device '%s' returned%s: %d (%s)%s%s", cmd->action, pid, cmd->device, (op_merged? " (merged)" : ""), rc, pcmk_strerror(rc), (next? ", retrying with " : ""), (next ? next : "")); } if (output) { /* Logging the whole string confuses syslog when the string is xml */ char *prefix = crm_strdup_printf("%s:%d", cmd->device, pid); crm_log_output(rc == 0 ? LOG_DEBUG : LOG_WARNING, prefix, output); free(prefix); } } static void stonith_send_async_reply(async_command_t * cmd, const char *output, int rc, GPid pid, int options) { xmlNode *reply = NULL; gboolean bcast = FALSE; reply = stonith_construct_async_reply(cmd, output, NULL, rc); if (pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) { /* Too verbose to log */ crm_trace("Metadata query for %s", cmd->device); output = NULL; - } else if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_none) || - pcmk__str_eq(cmd->action, "list", pcmk__str_none) || pcmk__str_eq(cmd->action, "status", pcmk__str_none)) { + } else if (pcmk__str_any_of(cmd->action, "monitor", "list", "status", NULL)) { crm_trace("Never broadcast '%s' replies", cmd->action); } else if (!stand_alone && pcmk__str_eq(cmd->origin, cmd->victim, pcmk__str_casei) && !pcmk__str_eq(cmd->action, "on", pcmk__str_casei)) { crm_trace("Broadcast '%s' reply for %s", cmd->action, cmd->victim); crm_xml_add(reply, F_SUBTYPE, "broadcast"); bcast = TRUE; } log_operation(cmd, rc, pid, NULL, output, (options & st_reply_opt_merged ? TRUE : FALSE)); crm_log_xml_trace(reply, "Reply"); if (options & st_reply_opt_merged) { crm_xml_add(reply, F_STONITH_MERGED, "true"); } if (bcast) { crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); } else if (cmd->origin) { crm_trace("Directed reply to %s", cmd->origin); send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE); } else { crm_trace("Directed local %ssync reply to %s", (cmd->options & st_opt_sync_call) ? "" : "a-", cmd->client_name); do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE); } if (stand_alone) { /* Do notification with a clean data object */ xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); crm_xml_add_int(notify_data, F_STONITH_RC, rc); crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim); crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op); crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost"); crm_xml_add(notify_data, F_STONITH_DEVICE, cmd->device); crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client); do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data); do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL); } free_xml(reply); } static void cancel_stonith_command(async_command_t * cmd) { stonith_device_t *device; CRM_CHECK(cmd != NULL, return); if (!cmd->device) { return; } device = g_hash_table_lookup(device_list, cmd->device); if (device) { crm_trace("Cancel scheduled '%s' action on %s", cmd->action, device->id); device->pending_ops = g_list_remove(device->pending_ops, cmd); } } static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data) { stonith_device_t *device = NULL; stonith_device_t *next_device = NULL; async_command_t *cmd = user_data; GListPtr gIter = NULL; GListPtr gIterNext = NULL; CRM_CHECK(cmd != NULL, return); cmd->active_on = NULL; /* The device is ready to do something else now */ device = g_hash_table_lookup(device_list, cmd->device); if (device) { if (!device->verified && (rc == pcmk_ok) && - (pcmk__str_eq(cmd->action, "list", pcmk__str_casei) || - pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei) || pcmk__str_eq(cmd->action, "status", pcmk__str_casei))) { + (pcmk__strcase_any_of(cmd->action, "list", "monitor", "status", NULL))) { device->verified = TRUE; } mainloop_set_trigger(device->work); } crm_debug("Operation '%s' on '%s' completed with rc=%d (%d remaining)", cmd->action, cmd->device, rc, g_list_length(cmd->device_next)); if (rc == 0) { GListPtr iter; /* see if there are any required devices left to execute for this op */ for (iter = cmd->device_next; iter != NULL; iter = iter->next) { next_device = g_hash_table_lookup(device_list, iter->data); if (next_device != NULL && is_action_required(cmd->action, next_device)) { cmd->device_next = iter->next; break; } next_device = NULL; } } else if (rc != 0 && cmd->device_next && (is_action_required(cmd->action, device) == FALSE)) { /* if this device didn't work out, see if there are any others we can try. * if the failed device was 'required', we can't pick another device. */ next_device = g_hash_table_lookup(device_list, cmd->device_next->data); cmd->device_next = cmd->device_next->next; } /* this operation requires more fencing, hooray! */ if (next_device) { log_operation(cmd, rc, pid, next_device->id, output, FALSE); schedule_stonith_command(cmd, next_device); /* Prevent cmd from being freed */ cmd = NULL; goto done; } stonith_send_async_reply(cmd, output, rc, pid, st_reply_opt_none); if (rc != 0) { goto done; } /* Check to see if any operations are scheduled to do the exact * same thing that just completed. If so, rather than * performing the same fencing operation twice, return the result * of this operation for all pending commands it matches. */ for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd_other = gIter->data; gIterNext = gIter->next; if (cmd == cmd_other) { continue; } /* A pending scheduled command matches the command that just finished if. * 1. The client connections are different. * 2. The node victim is the same. * 3. The fencing action is the same. * 4. The device scheduled to execute the action is the same. */ if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) || !pcmk__str_eq(cmd->victim, cmd_other->victim, pcmk__str_casei) || !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_casei) || !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) { continue; } /* Duplicate merging will do the right thing for either type of remapped * reboot. If the executing fencer remapped an unsupported reboot to * off, then cmd->action will be reboot and will be merged with any * other reboot requests. If the originating fencer remapped a * topology reboot to off then on, we will get here once with * cmd->action "off" and once with "on", and they will be merged * separately with similar requests. */ crm_notice ("Merging stonith action '%s' targeting %s originating from client %s with identical stonith request from client %s", cmd_other->action, cmd_other->victim, cmd_other->client_name, cmd->client_name); cmd_list = g_list_remove_link(cmd_list, gIter); stonith_send_async_reply(cmd_other, output, rc, pid, st_reply_opt_merged); cancel_stonith_command(cmd_other); free_async_command(cmd_other); g_list_free_1(gIter); } done: free_async_command(cmd); } static gint sort_device_priority(gconstpointer a, gconstpointer b) { const stonith_device_t *dev_a = a; const stonith_device_t *dev_b = b; if (dev_a->priority > dev_b->priority) { return -1; } else if (dev_a->priority < dev_b->priority) { return 1; } return 0; } static void stonith_fence_get_devices_cb(GList * devices, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; crm_info("Found %d matching devices for '%s'", g_list_length(devices), cmd->victim); if (devices != NULL) { /* Order based on priority */ devices = g_list_sort(devices, sort_device_priority); device = g_hash_table_lookup(device_list, devices->data); if (device) { cmd->device_list = devices; cmd->device_next = devices->next; devices = NULL; /* list owned by cmd now */ } } /* we have a device, schedule it for fencing. */ if (device) { schedule_stonith_command(cmd, device); /* in progress */ return; } /* no device found! */ stonith_send_async_reply(cmd, NULL, -ENODEV, 0, st_reply_opt_none); free_async_command(cmd); g_list_free_full(devices, free); } static int stonith_fence(xmlNode * msg) { const char *device_id = NULL; stonith_device_t *device = NULL; async_command_t *cmd = create_async_command(msg); xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); if (cmd == NULL) { return -EPROTO; } device_id = crm_element_value(dev, F_STONITH_DEVICE); if (device_id) { device = g_hash_table_lookup(device_list, device_id); if (device == NULL) { crm_err("Requested device '%s' is not available", device_id); return -ENODEV; } schedule_stonith_command(cmd, device); } else { const char *host = crm_element_value(dev, F_STONITH_TARGET); if (cmd->options & st_opt_cs_nodeid) { int nodeid = crm_atoi(host, NULL); crm_node_t *node = crm_find_known_peer_full(nodeid, NULL, CRM_GET_PEER_ANY); if (node) { host = node->uname; } } /* If we get to here, then self-fencing is implicitly allowed */ get_capable_devices(host, cmd->action, cmd->default_timeout, TRUE, cmd, stonith_fence_get_devices_cb); } return -EINPROGRESS; } xmlNode * stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc) { xmlNode *reply = NULL; reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __FUNCTION__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, "st_output", output); crm_xml_add_int(reply, F_STONITH_RC, rc); if (request == NULL) { /* Most likely, this is the result of a stonith operation that was * initiated before we came up. Unfortunately that means we lack enough * information to provide clients with a full result. * * @TODO Maybe synchronize this information at start-up? */ crm_warn("Missing request information for client notifications for " "operation with result %d (initiated before we came up?)", rc); } else { const char *name = NULL; const char *value = NULL; const char *names[] = { F_STONITH_OPERATION, F_STONITH_CALLID, F_STONITH_CLIENTID, F_STONITH_CLIENTNAME, F_STONITH_REMOTE_OP_ID, F_STONITH_CALLOPTS }; crm_trace("Creating a result reply with%s reply output (rc=%d)", (data? "" : "out"), rc); for (int lpc = 0; lpc < DIMOF(names); lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } if (data != NULL) { add_message_xml(reply, F_STONITH_CALLDATA, data); } } return reply; } static xmlNode * stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc) { xmlNode *reply = NULL; crm_trace("Creating a basic reply"); reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __FUNCTION__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, F_STONITH_OPERATION, cmd->op); crm_xml_add(reply, F_STONITH_DEVICE, cmd->device); crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client); crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name); crm_xml_add(reply, F_STONITH_TARGET, cmd->victim); crm_xml_add(reply, F_STONITH_ACTION, cmd->op); crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin); crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id); crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options); crm_xml_add_int(reply, F_STONITH_RC, rc); crm_xml_add(reply, "st_output", output); if (data != NULL) { crm_info("Attaching reply output"); add_message_xml(reply, F_STONITH_CALLDATA, data); } return reply; } bool fencing_peer_active(crm_node_t *peer) { if (peer == NULL) { return FALSE; } else if (peer->uname == NULL) { return FALSE; } else if (is_set(peer->processes, crm_get_cluster_proc())) { return TRUE; } return FALSE; } /*! * \internal * \brief Determine if we need to use an alternate node to * fence the target. If so return that node's uname * * \retval NULL, no alternate host * \retval uname, uname of alternate host to use */ static const char * check_alternate_host(const char *target) { const char *alternate_host = NULL; crm_trace("Checking if we (%s) can fence %s", stonith_our_uname, target); if (find_topology_for_host(target) && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { GHashTableIter gIter; crm_node_t *entry = NULL; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { crm_trace("Checking for %s.%d != %s", entry->uname, entry->id, target); if (fencing_peer_active(entry) && !pcmk__str_eq(entry->uname, target, pcmk__str_casei)) { alternate_host = entry->uname; break; } } if (alternate_host == NULL) { crm_err("No alternate host available to handle complex self fencing request"); g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { crm_notice("Peer[%d] %s", entry->id, entry->uname); } } } return alternate_host; } static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, const char *client_id) { if (remote_peer) { send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE); } else { do_local_reply(reply, client_id, is_set(call_options, st_opt_sync_call), remote_peer != NULL); } } static void remove_relay_op(xmlNode * request) { xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, request, LOG_TRACE); const char *relay_op_id = NULL; const char *op_id = NULL; const char *client_name = NULL; const char *target = NULL; remote_fencing_op_t *relay_op = NULL; if (dev) { target = crm_element_value(dev, F_STONITH_TARGET); } relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID_RELAY); op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID); client_name = crm_element_value(request, F_STONITH_CLIENTNAME); /* Delete RELAY operation. */ if (relay_op_id && target && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id); if (relay_op) { GHashTableIter iter; remote_fencing_op_t *list_op = NULL; g_hash_table_iter_init(&iter, stonith_remote_op_list); /* If the operation to be deleted is registered as a duplicate, delete the registration. */ while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) { GListPtr dup_iter = NULL; if (list_op != relay_op) { for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) { remote_fencing_op_t *other = dup_iter->data; if (other == relay_op) { other->duplicates = g_list_remove(other->duplicates, relay_op); break; } } } } crm_info("Delete the relay op : %s - %s of %s for %s.(replaced by op : %s - %s of %s for %s)", relay_op->id, relay_op->action, relay_op->target, relay_op->client_name, op_id, relay_op->action, target, client_name); g_hash_table_remove(stonith_remote_op_list, relay_op_id); } } } static int handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *request, const char *remote_peer) { int call_options = 0; int rc = -EOPNOTSUPP; xmlNode *data = NULL; xmlNode *reply = NULL; char *output = NULL; const char *op = crm_element_value(request, F_STONITH_OPERATION); const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { xmlNode *reply = create_xml_node(NULL, "reply"); CRM_ASSERT(client); crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(reply, F_STONITH_CLIENTID, client->id); pcmk__ipc_send_xml(client, id, reply, flags); client->request_id = 0; free_xml(reply); return 0; } else if (pcmk__str_eq(op, STONITH_OP_EXEC, pcmk__str_none)) { rc = stonith_device_action(request, &output); } else if (pcmk__str_eq(op, STONITH_OP_TIMEOUT_UPDATE, pcmk__str_none)) { const char *call_id = crm_element_value(request, F_STONITH_CALLID); const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); int op_timeout = 0; crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout); do_stonith_async_timeout_update(client_id, call_id, op_timeout); return 0; } else if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { if (remote_peer) { create_remote_stonith_op(client_id, request, TRUE); /* Record it for the future notification */ } /* Delete the DC node RELAY operation. */ remove_relay_op(request); stonith_query(request, remote_peer, client_id, call_options); return 0; } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) { const char *flag_name = NULL; CRM_ASSERT(client); flag_name = crm_element_value(request, F_STONITH_NOTIFY_ACTIVATE); if (flag_name) { crm_debug("Setting %s callbacks for %s (%s): ON", flag_name, client->name, client->id); client->options |= get_stonith_flag(flag_name); } flag_name = crm_element_value(request, F_STONITH_NOTIFY_DEACTIVATE); if (flag_name) { crm_debug("Setting %s callbacks for %s (%s): off", flag_name, client->name, client->id); client->options |= get_stonith_flag(flag_name); } if (flags & crm_ipc_client_response) { pcmk__ipc_send_ack(client, id, flags, "ack"); } return 0; } else if (pcmk__str_eq(op, STONITH_OP_RELAY, pcmk__str_none)) { xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); crm_notice("Peer %s has received a forwarded fencing request from %s to fence (%s) peer %s", stonith_our_uname, client ? client->name : remote_peer, crm_element_value(dev, F_STONITH_ACTION), crm_element_value(dev, F_STONITH_TARGET)); if (initiate_remote_stonith_op(NULL, request, FALSE) != NULL) { rc = -EINPROGRESS; } } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) { if (remote_peer || stand_alone) { rc = stonith_fence(request); } else if (call_options & st_opt_manual_ack) { remote_fencing_op_t *rop = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); const char *target = crm_element_value(dev, F_STONITH_TARGET); crm_notice("Received manual confirmation that %s is fenced", target); rop = initiate_remote_stonith_op(client, request, TRUE); rc = stonith_manual_ack(request, rop); } else { const char *alternate_host = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); const char *target = crm_element_value(dev, F_STONITH_TARGET); const char *action = crm_element_value(dev, F_STONITH_ACTION); const char *device = crm_element_value(dev, F_STONITH_DEVICE); if (client) { int tolerance = 0; crm_notice("Client %s.%.8s wants to fence (%s) '%s' with device '%s'", client->name, client->id, action, target, device ? device : "(any)"); crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance); if (stonith_check_fence_tolerance(tolerance, target, action)) { rc = 0; goto done; } } else { crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'", remote_peer, action, target, device ? device : "(any)"); } alternate_host = check_alternate_host(target); if (alternate_host && client) { const char *client_id = NULL; remote_fencing_op_t *op = NULL; crm_notice("Forwarding complex self fencing request to peer %s", alternate_host); if (client->id) { client_id = client->id; } else { client_id = crm_element_value(request, F_STONITH_CLIENTID); } /* Create an operation for RELAY and send the ID in the RELAY message. */ /* When a QUERY response is received, delete the RELAY operation to avoid the existence of duplicate operations. */ op = create_remote_stonith_op(client_id, request, FALSE); crm_xml_add(request, F_STONITH_OPERATION, STONITH_OP_RELAY); crm_xml_add(request, F_STONITH_CLIENTID, client->id); crm_xml_add(request, F_STONITH_REMOTE_OP_ID, op->id); send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request, FALSE); rc = -EINPROGRESS; } else if (initiate_remote_stonith_op(client, request, FALSE) != NULL) { rc = -EINPROGRESS; } } } else if (pcmk__str_eq(op, STONITH_OP_FENCE_HISTORY, pcmk__str_none)) { rc = stonith_fence_history(request, &data, remote_peer, call_options); if (call_options & st_opt_discard_reply) { /* we don't expect answers to the broadcast * we might have sent out */ free_xml(data); return pcmk_ok; } } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_ADD, pcmk__str_none)) { const char *device_id = NULL; rc = stonith_device_register(request, &device_id, FALSE); do_stonith_notify_device(call_options, op, rc, device_id); } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_DEL, pcmk__str_none)) { xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR); const char *device_id = crm_element_value(dev, XML_ATTR_ID); rc = stonith_device_remove(device_id, FALSE); do_stonith_notify_device(call_options, op, rc, device_id); } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) { char *device_id = NULL; rc = stonith_level_register(request, &device_id); do_stonith_notify_level(call_options, op, rc, device_id); free(device_id); } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) { char *device_id = NULL; rc = stonith_level_remove(request, &device_id); do_stonith_notify_level(call_options, op, rc, device_id); } else if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) { int node_id = 0; const char *name = NULL; crm_element_value_int(request, XML_ATTR_ID, &node_id); name = crm_element_value(request, XML_ATTR_UNAME); reap_crm_member(node_id, name); return pcmk_ok; } else { crm_err("Unknown IPC request %s from %s", op, (client? client->name : remote_peer)); } done: /* Always reply unless the request is in process still. * If in progress, a reply will happen async after the request * processing is finished */ if (rc != -EINPROGRESS) { crm_trace("Reply handling: %p %u %u %d %d %s", client, client?client->request_id:0, id, is_set(call_options, st_opt_sync_call), call_options, crm_element_value(request, F_STONITH_CALLOPTS)); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } reply = stonith_construct_reply(request, output, data, rc); stonith_send_reply(reply, call_options, remote_peer, client_id); } free(output); free_xml(data); free_xml(reply); return rc; } static void handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer) { const char *op = crm_element_value(request, F_STONITH_OPERATION); if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { process_remote_stonith_query(request); } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) { process_remote_stonith_exec(request); } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) { /* Reply to a complex fencing op */ process_remote_stonith_exec(request); } else { crm_err("Unknown %s reply from %s", op, client ? client->name : remote_peer); crm_log_xml_warn(request, "UnknownOp"); } } void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *request, const char *remote_peer) { int call_options = 0; int rc = 0; gboolean is_reply = FALSE; /* Copy op for reporting. The original might get freed by handle_reply() * before we use it in crm_debug(): * handle_reply() * |- process_remote_stonith_exec() * |-- remote_op_done() * |--- handle_local_reply_and_notify() * |---- crm_xml_add(...F_STONITH_OPERATION...) * |--- free_xml(op->request) */ char *op = crm_element_value_copy(request, F_STONITH_OPERATION); if (get_xpath_object("//" T_STONITH_REPLY, request, LOG_NEVER)) { is_reply = TRUE; } crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); crm_debug("Processing %s%s %u from %s (%16x)", op, is_reply ? " reply" : "", id, client ? client->name : remote_peer, call_options); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (is_reply) { handle_reply(client, request, remote_peer); } else { rc = handle_request(client, id, flags, request, remote_peer); } crm_debug("Processed %s%s from %s: %s (%d)", op, is_reply ? " reply" : "", client ? client->name : remote_peer, rc > 0 ? "" : pcmk_strerror(rc), rc); free(op); } diff --git a/devel/Makefile.am b/devel/Makefile.am index d520c21ed8..27237be980 100644 --- a/devel/Makefile.am +++ b/devel/Makefile.am @@ -1,57 +1,59 @@ # # Copyright 2020 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # EXTRA_SCRIPTS = coccinelle/test/testrunner.sh EXTRA_DIST = $(EXTRA_SCRIPTS) \ coccinelle/ref-passed-variables-inited.cocci \ + coccinelle/string-any-of.cocci \ coccinelle/string-empty.cocci \ coccinelle/string-null-matches.cocci \ coccinelle/string-replacements.cocci \ coccinelle/test/ref-passed-variables-inited.input.c \ coccinelle/test/ref-passed-variables-inited.output # Coccinelle is a tool that takes special patch-like files (called semantic patches) and # applies them throughout a source tree. This is useful when refactoring, changing APIs, # catching dangerous or incorrect code, and other similar tasks. It's not especially # easy to write a semantic patch but most users should only be concerned about running # the target and inspecting the results. # # Documentation (including examples, which are the most useful): # http://coccinelle.lip6.fr/documentation.php # # Run the "make cocci" target to just output what would be done, or "make cocci-inplace" # to apply the changes to the source tree. # # COCCI_FILES may be set on the command line, if you want to test just a single file # while it's under development. Otherwise, it is a list of all the files that are ready # to be run. # # ref-passed-variables-inited.cocci seems to be returning some false positives around # GHashTableIters, so it is disabled for the moment. -COCCI_FILES ?= coccinelle/string-empty.cocci \ +COCCI_FILES ?= coccinelle/string-any-of.cocci \ + coccinelle/string-empty.cocci \ coccinelle/string-null-matches.cocci cocci: for f in $(COCCI_FILES); do \ for d in daemons include lib tools; do \ test $$d = "include" \ && spatch $(_SPATCH_FLAGS) --include-headers --local-includes \ --preprocess --sp-file $$f --dir ../$$d \ || spatch $(_SPATCH_FLAGS) --local-includes \ --preprocess --sp-file $$f --dir ../$$d; \ done; \ done cocci-inplace: $(MAKE) $(AM_MAKEFLAGS) _SPATCH_FLAGS=--in-place cocci cocci-test: for f in coccinelle/test/*.c; do \ coccinelle/test/testrunner.sh $$f; \ done diff --git a/devel/coccinelle/string-any-of.cocci b/devel/coccinelle/string-any-of.cocci new file mode 100644 index 0000000000..d63ad2e0a5 --- /dev/null +++ b/devel/coccinelle/string-any-of.cocci @@ -0,0 +1,74 @@ +/* + * Copyright 2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + * + * Catch string comparisons where the pcmk__str_any_of function could be used + * instead. Note that we are only catching uses involving identifiers (not + * expressions), but I think this is probably fine - we are likely not using + * the same expression multiple times in a single line of code. If some are + * found, it's easy enough to add another block here. + */ + +@ any_of_1 @ +expression test_str, str, new_str; +identifier I =~ "pcmk__str_none"; +@@ +- pcmk__str_eq(test_str, str, I) || pcmk__str_eq(test_str, new_str, I) ++ pcmk__str_any_of(test_str, str, new_str, NULL) + +@ any_of_2 @ +expression test_str, str, new_str; +identifier I =~ "pcmk__str_casei"; +@@ +- pcmk__str_eq(test_str, str, I) || pcmk__str_eq(test_str, new_str, I) ++ pcmk__strcase_any_of(test_str, str, new_str, NULL) + +@ any_of_3 @ +expression test_str, new_str; +expression list strs; +identifier I =~ "pcmk__str_none"; +@@ +- pcmk__str_any_of(test_str, strs, NULL) || pcmk__str_eq(test_str, new_str, I) ++ pcmk__str_any_of(test_str, strs, new_str, NULL) + +@ any_of_4 @ +expression test_str, new_str; +expression list strs; +identifier I =~ "pcmk__str_casei"; +@@ +- pcmk__strcase_any_of(test_str, strs, NULL) || pcmk__str_eq(test_str, new_str, I) ++ pcmk__strcase_any_of(test_str, strs, new_str, NULL) + +@ none_of_1 @ +expression test_str, str, new_str; +identifier I =~ "pcmk__str_none"; +@@ +- !pcmk__str_eq(test_str, str, I) && !pcmk__str_eq(test_str, new_str, I) ++ !pcmk__str_any_of(test_str, str, new_str, NULL) + +@ none_of_2 @ +expression test_str, str, new_str; +identifier I =~ "pcmk__str_casei"; +@@ +- !pcmk__str_eq(test_str, str, I) && !pcmk__str_eq(test_str, new_str, I) ++ !pcmk__strcase_any_of(test_str, str, new_str, NULL) + +@ none_of_3 @ +expression test_str, new_str; +expression list strs; +identifier I =~ "pcmk__str_none"; +@@ +- !pcmk__str_any_of(test_str, strs, NULL) && !pcmk__str_eq(test_str, new_str, I) ++ !pcmk__str_any_of(test_str, strs, new_str, NULL) + +@ none_of_4 @ +expression test_str, new_str; +expression list strs; +identifier I =~ "pcmk__str_casei"; +@@ +- !pcmk__strcase_any_of(test_str, strs, NULL) && !pcmk__str_eq(test_str, new_str, I) ++ !pcmk__strcase_any_of(test_str, strs, new_str, NULL) diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c index 133524c4ab..65dd34b273 100644 --- a/lib/cluster/cpg.c +++ b/lib/cluster/cpg.c @@ -1,813 +1,812 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* PCMK__SPECIAL_PID* */ cpg_handle_t pcmk_cpg_handle = 0; /* TODO: Remove, use cluster.cpg_handle */ static bool cpg_evicted = FALSE; gboolean(*pcmk_cpg_dispatch_fn) (int kind, const char *from, const char *data) = NULL; #define cs_repeat(counter, max, code) do { \ code; \ if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ counter++; \ crm_debug("Retrying operation after %ds", counter); \ sleep(counter); \ } else { \ break; \ } \ } while(counter < max) void cluster_disconnect_cpg(crm_cluster_t *cluster) { pcmk_cpg_handle = 0; if (cluster->cpg_handle) { crm_trace("Disconnecting CPG"); cpg_leave(cluster->cpg_handle, &cluster->group); cpg_finalize(cluster->cpg_handle); cluster->cpg_handle = 0; } else { crm_info("No CPG connection"); } } uint32_t get_local_nodeid(cpg_handle_t handle) { cs_error_t rc = CS_OK; int retries = 0; static uint32_t local_nodeid = 0; cpg_handle_t local_handle = handle; cpg_callbacks_t cb = { }; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; if(local_nodeid != 0) { return local_nodeid; } if(handle == 0) { crm_trace("Creating connection"); cs_repeat(retries, 5, rc = cpg_initialize(&local_handle, &cb)); if (rc != CS_OK) { crm_err("Could not connect to the CPG API: %s (%d)", cs_strerror(rc), rc); return 0; } rc = cpg_fd_get(local_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CPG API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CPG provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CPG provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CPG provider: %s (%d)", strerror(-rv), -rv); goto bail; } } if (rc == CS_OK) { retries = 0; crm_trace("Performing lookup"); cs_repeat(retries, 5, rc = cpg_local_get(local_handle, &local_nodeid)); } if (rc != CS_OK) { crm_err("Could not get local node id from the CPG API: %s (%d)", ais_error2text(rc), rc); } bail: if(handle == 0) { crm_trace("Closing connection"); cpg_finalize(local_handle); } crm_debug("Local nodeid is %u", local_nodeid); return local_nodeid; } GListPtr cs_message_queue = NULL; int cs_message_timer = 0; static ssize_t crm_cs_flush(gpointer data); static gboolean crm_cs_flush_cb(gpointer data) { cs_message_timer = 0; crm_cs_flush(data); return FALSE; } #define CS_SEND_MAX 200 static ssize_t crm_cs_flush(gpointer data) { int sent = 0; ssize_t rc = 0; int queue_len = 0; static unsigned int last_sent = 0; cpg_handle_t *handle = (cpg_handle_t *)data; if (*handle == 0) { crm_trace("Connection is dead"); return pcmk_ok; } queue_len = g_list_length(cs_message_queue); if ((queue_len % 1000) == 0 && queue_len > 1) { crm_err("CPG queue has grown to %d", queue_len); } else if (queue_len == CS_SEND_MAX) { crm_warn("CPG queue has grown to %d", queue_len); } if (cs_message_timer) { /* There is already a timer, wait until it goes off */ crm_trace("Timer active %d", cs_message_timer); return pcmk_ok; } while (cs_message_queue && sent < CS_SEND_MAX) { struct iovec *iov = cs_message_queue->data; errno = 0; rc = cpg_mcast_joined(*handle, CPG_TYPE_AGREED, iov, 1); if (rc != CS_OK) { break; } sent++; last_sent++; crm_trace("CPG message sent, size=%llu", (unsigned long long) iov->iov_len); cs_message_queue = g_list_remove(cs_message_queue, iov); free(iov->iov_base); free(iov); } queue_len -= sent; if (sent > 1 || cs_message_queue) { crm_info("Sent %d CPG messages (%d remaining, last=%u): %s (%lld)", sent, queue_len, last_sent, ais_error2text(rc), (long long) rc); } else { crm_trace("Sent %d CPG messages (%d remaining, last=%u): %s (%lld)", sent, queue_len, last_sent, ais_error2text(rc), (long long) rc); } if (cs_message_queue) { uint32_t delay_ms = 100; if(rc != CS_OK) { /* Proportionally more if sending failed but cap at 1s */ delay_ms = QB_MIN(1000, CS_SEND_MAX + (10 * queue_len)); } cs_message_timer = g_timeout_add(delay_ms, crm_cs_flush_cb, data); } return rc; } gboolean send_cpg_iov(struct iovec * iov) { static unsigned int queued = 0; queued++; crm_trace("Queueing CPG message %u (%llu bytes)", queued, (unsigned long long) iov->iov_len); cs_message_queue = g_list_append(cs_message_queue, iov); crm_cs_flush(&pcmk_cpg_handle); return TRUE; } static int pcmk_cpg_dispatch(gpointer user_data) { int rc = 0; crm_cluster_t *cluster = (crm_cluster_t*) user_data; rc = cpg_dispatch(cluster->cpg_handle, CS_DISPATCH_ONE); if (rc != CS_OK) { crm_err("Connection to the CPG API failed: %s (%d)", ais_error2text(rc), rc); cpg_finalize(cluster->cpg_handle); cluster->cpg_handle = 0; return -1; } else if(cpg_evicted) { crm_err("Evicted from CPG membership"); return -1; } return 0; } char * pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void *content, uint32_t *kind, const char **from) { char *data = NULL; AIS_Message *msg = (AIS_Message *) content; if(handle) { // Do filtering and field massaging uint32_t local_nodeid = get_local_nodeid(handle); const char *local_name = get_local_node_name(); if (msg->sender.id > 0 && msg->sender.id != nodeid) { crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, msg->sender.id); return NULL; } else if (msg->host.id != 0 && (local_nodeid != msg->host.id)) { /* Not for us */ crm_trace("Not for us: %u != %u", msg->host.id, local_nodeid); return NULL; } else if (msg->host.size != 0 && !pcmk__str_eq(msg->host.uname, local_name, pcmk__str_casei)) { /* Not for us */ crm_trace("Not for us: %s != %s", msg->host.uname, local_name); return NULL; } msg->sender.id = nodeid; if (msg->sender.size == 0) { crm_node_t *peer = crm_get_peer(nodeid, NULL); if (peer == NULL) { crm_err("Peer with nodeid=%u is unknown", nodeid); } else if (peer->uname == NULL) { crm_err("No uname for peer with nodeid=%u", nodeid); } else { crm_notice("Fixing uname for peer with nodeid=%u", nodeid); msg->sender.size = strlen(peer->uname); memset(msg->sender.uname, 0, MAX_NAME); memcpy(msg->sender.uname, peer->uname, msg->sender.size); } } } crm_trace("Got new%s message (size=%d, %d, %d)", msg->is_compressed ? " compressed" : "", ais_data_len(msg), msg->size, msg->compressed_size); if (kind != NULL) { *kind = msg->header.id; } if (from != NULL) { *from = msg->sender.uname; } if (msg->is_compressed && msg->size > 0) { int rc = BZ_OK; char *uncompressed = NULL; unsigned int new_size = msg->size + 1; if (check_message_sanity(msg, NULL) == FALSE) { goto badmsg; } crm_trace("Decompressing message data"); uncompressed = calloc(1, new_size); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, msg->data, msg->compressed_size, 1, 0); if (rc != BZ_OK) { crm_err("Decompression failed: %s " CRM_XS " bzerror=%d", bz2_strerror(rc), rc); free(uncompressed); goto badmsg; } CRM_ASSERT(rc == BZ_OK); CRM_ASSERT(new_size == msg->size); data = uncompressed; } else if (check_message_sanity(msg, data) == FALSE) { goto badmsg; } else if (pcmk__str_eq("identify", data, pcmk__str_casei)) { char *pid_s = pcmk__getpid_s(); send_cluster_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); free(pid_s); return NULL; } else { data = strdup(msg->data); } // Is this necessary? crm_get_peer(msg->sender.id, msg->sender.uname); crm_trace("Payload: %.200s", data); return data; badmsg: crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" " min=%d, total=%d, size=%d, bz2_size=%d", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, (int)sizeof(AIS_Message), msg->header.size, msg->size, msg->compressed_size); free(data); return NULL; } static int cmp_member_list_nodeid(const void *first, const void *second) { const struct cpg_address *const a = *((const struct cpg_address **) first), *const b = *((const struct cpg_address **) second); if (a->nodeid < b->nodeid) { return -1; } else if (a->nodeid > b->nodeid) { return 1; } /* don't bother with "reason" nor "pid" */ return 0; } static const char * cpgreason2str(cpg_reason_t reason) { switch (reason) { case CPG_REASON_JOIN: return " via cpg_join"; case CPG_REASON_LEAVE: return " via cpg_leave"; case CPG_REASON_NODEDOWN: return " via cluster exit"; case CPG_REASON_NODEUP: return " via cluster join"; case CPG_REASON_PROCDOWN: return " for unknown reason"; default: break; } return ""; } static inline const char * peer_name(crm_node_t *peer) { if (peer == NULL) { return "unknown node"; } else if (peer->uname == NULL) { return "peer node"; } else { return peer->uname; } } void pcmk_cpg_membership(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { int i; gboolean found = FALSE; static int counter = 0; uint32_t local_nodeid = get_local_nodeid(handle); const struct cpg_address *key, **sorted; sorted = malloc(member_list_entries * sizeof(const struct cpg_address *)); CRM_ASSERT(sorted != NULL); for (size_t iter = 0; iter < member_list_entries; iter++) { sorted[iter] = member_list + iter; } /* so that the cross-matching multiply-subscribed nodes is then cheap */ qsort(sorted, member_list_entries, sizeof(const struct cpg_address *), cmp_member_list_nodeid); for (i = 0; i < left_list_entries; i++) { crm_node_t *peer = crm_find_peer(left_list[i].nodeid, NULL); const struct cpg_address **rival = NULL; /* in CPG world, NODE:PROCESS-IN-MEMBERSHIP-OF-G is an 1:N relation and not playing by this rule may go wild in case of multiple residual instances of the same pacemaker daemon at the same node -- we must ensure that the possible local rival(s) won't make us cry out and bail (e.g. when they quit themselves), since all the surrounding logic denies this simple fact that the full membership is discriminated also per the PID of the process beside mere node ID (and implicitly, group ID); practically, this will be sound in terms of not preventing progress, since all the CPG joiners are also API end-point carriers, and that's what matters locally (who's the winner); remotely, we will just compare leave_list and member_list and if the left process has its node retained in member_list (under some other PID, anyway) we will just ignore it as well XXX: long-term fix is to establish in-out PID-aware tracking? */ if (peer) { key = &left_list[i]; rival = bsearch(&key, sorted, member_list_entries, sizeof(const struct cpg_address *), cmp_member_list_nodeid); } if (rival == NULL) { crm_info("Group %s event %d: %s (node %u pid %u) left%s", groupName->value, counter, peer_name(peer), left_list[i].nodeid, left_list[i].pid, cpgreason2str(left_list[i].reason)); if (peer) { crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, OFFLINESTATUS); } } else if (left_list[i].nodeid == local_nodeid) { crm_warn("Group %s event %d: duplicate local pid %u left%s", groupName->value, counter, left_list[i].pid, cpgreason2str(left_list[i].reason)); } else { crm_warn("Group %s event %d: " "%s (node %u) duplicate pid %u left%s (%u remains)", groupName->value, counter, peer_name(peer), left_list[i].nodeid, left_list[i].pid, cpgreason2str(left_list[i].reason), (*rival)->pid); } } free(sorted); sorted = NULL; for (i = 0; i < joined_list_entries; i++) { crm_info("Group %s event %d: node %u pid %u joined%s", groupName->value, counter, joined_list[i].nodeid, joined_list[i].pid, cpgreason2str(joined_list[i].reason)); } for (i = 0; i < member_list_entries; i++) { crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); if (member_list[i].nodeid == local_nodeid && member_list[i].pid != getpid()) { /* see the note above */ crm_warn("Group %s event %d: detected duplicate local pid %u", groupName->value, counter, member_list[i].pid); continue; } crm_info("Group %s event %d: %s (node %u pid %u) is member", groupName->value, counter, peer_name(peer), member_list[i].nodeid, member_list[i].pid); /* If the caller left auto-reaping enabled, this will also update the * state to member. */ peer = crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); if (peer && peer->state && strcmp(peer->state, CRM_NODE_MEMBER)) { /* The node is a CPG member, but we currently think it's not a * cluster member. This is possible only if auto-reaping was * disabled. The node may be joining, and we happened to get the CPG * notification before the quorum notification; or the node may have * just died, and we are processing its final messages; or a bug * has affected the peer cache. */ time_t now = time(NULL); if (peer->when_lost == 0) { // Track when we first got into this contradictory state peer->when_lost = now; } else if (now > (peer->when_lost + 60)) { // If it persists for more than a minute, update the state crm_warn("Node %u is member of group %s but was believed offline", member_list[i].nodeid, groupName->value); crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, 0); } } if (local_nodeid == member_list[i].nodeid) { found = TRUE; } } if (!found) { crm_err("Local node was evicted from group %s", groupName->value); cpg_evicted = TRUE; } counter++; } gboolean cluster_connect_cpg(crm_cluster_t *cluster) { cs_error_t rc; int fd = -1; int retries = 0; uint32_t id = 0; crm_node_t *peer = NULL; cpg_handle_t handle = 0; const char *message_name = pcmk_message_name(crm_system_name); uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; struct mainloop_fd_callbacks cpg_fd_callbacks = { .dispatch = pcmk_cpg_dispatch, .destroy = cluster->destroy, }; cpg_callbacks_t cpg_callbacks = { .cpg_deliver_fn = cluster->cpg.cpg_deliver_fn, .cpg_confchg_fn = cluster->cpg.cpg_confchg_fn, /* .cpg_deliver_fn = pcmk_cpg_deliver, */ /* .cpg_confchg_fn = pcmk_cpg_membership, */ }; cpg_evicted = FALSE; cluster->group.length = 0; cluster->group.value[0] = 0; /* group.value is char[128] */ strncpy(cluster->group.value, message_name, 127); cluster->group.value[127] = 0; cluster->group.length = 1 + QB_MIN(127, strlen(cluster->group.value)); cs_repeat(retries, 30, rc = cpg_initialize(&handle, &cpg_callbacks)); if (rc != CS_OK) { crm_err("Could not connect to the CPG API: %s (%d)", cs_strerror(rc), rc); goto bail; } rc = cpg_fd_get(handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CPG API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CPG provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CPG provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); rc = CS_ERR_ACCESS; goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CPG provider: %s (%d)", strerror(-rv), -rv); rc = CS_ERR_ACCESS; goto bail; } id = get_local_nodeid(handle); if (id == 0) { crm_err("Could not get local node id from the CPG API"); goto bail; } cluster->nodeid = id; retries = 0; cs_repeat(retries, 30, rc = cpg_join(handle, &cluster->group)); if (rc != CS_OK) { crm_err("Could not join the CPG group '%s': %d", message_name, rc); goto bail; } pcmk_cpg_handle = handle; cluster->cpg_handle = handle; mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, cluster, &cpg_fd_callbacks); bail: if (rc != CS_OK) { cpg_finalize(handle); return FALSE; } peer = crm_get_peer(id, NULL); crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); return TRUE; } gboolean send_cluster_message_cs(xmlNode * msg, gboolean local, crm_node_t * node, enum crm_ais_msg_types dest) { gboolean rc = TRUE; char *data = NULL; data = dump_xml_unformatted(msg); rc = send_cluster_text(crm_class_cluster, data, local, node, dest); free(data); return rc; } gboolean send_cluster_text(enum crm_ais_msg_class msg_class, const char *data, gboolean local, crm_node_t *node, enum crm_ais_msg_types dest) { static int msg_id = 0; static int local_pid = 0; static int local_name_len = 0; static const char *local_name = NULL; char *target = NULL; struct iovec *iov; AIS_Message *msg = NULL; enum crm_ais_msg_types sender = text2msg_type(crm_system_name); switch (msg_class) { case crm_class_cluster: break; default: crm_err("Invalid message class: %d", msg_class); return FALSE; } CRM_CHECK(dest != crm_msg_ais, return FALSE); if(local_name == NULL) { local_name = get_local_node_name(); } if(local_name_len == 0 && local_name) { local_name_len = strlen(local_name); } if (data == NULL) { data = ""; } if (local_pid == 0) { local_pid = getpid(); } if (sender == crm_msg_none) { sender = local_pid; } msg = calloc(1, sizeof(AIS_Message)); msg_id++; msg->id = msg_id; msg->header.id = msg_class; msg->header.error = CS_OK; msg->host.type = dest; msg->host.local = local; if (node) { if (node->uname) { target = strdup(node->uname); msg->host.size = strlen(node->uname); memset(msg->host.uname, 0, MAX_NAME); memcpy(msg->host.uname, node->uname, msg->host.size); } else { target = crm_strdup_printf("%u", node->id); } msg->host.id = node->id; } else { target = strdup("all"); } msg->sender.id = 0; msg->sender.type = sender; msg->sender.pid = local_pid; msg->sender.size = local_name_len; memset(msg->sender.uname, 0, MAX_NAME); if(local_name && msg->sender.size) { memcpy(msg->sender.uname, local_name, msg->sender.size); } msg->size = 1 + strlen(data); msg->header.size = sizeof(AIS_Message) + msg->size; if (msg->size < CRM_BZ2_THRESHOLD) { msg = realloc_safe(msg, msg->header.size); memcpy(msg->data, data, msg->size); } else { char *compressed = NULL; unsigned int new_size = 0; char *uncompressed = strdup(data); if (pcmk__compress(uncompressed, (unsigned int) msg->size, 0, &compressed, &new_size) == pcmk_rc_ok) { msg->header.size = sizeof(AIS_Message) + new_size; msg = realloc_safe(msg, msg->header.size); memcpy(msg->data, compressed, new_size); msg->is_compressed = TRUE; msg->compressed_size = new_size; } else { // cppcheck seems not to understand the abort logic in realloc_safe // cppcheck-suppress memleak msg = realloc_safe(msg, msg->header.size); memcpy(msg->data, data, msg->size); } free(uncompressed); free(compressed); } iov = calloc(1, sizeof(struct iovec)); iov->iov_base = msg; iov->iov_len = msg->header.size; if (msg->compressed_size) { crm_trace("Queueing CPG message %u to %s (%llu bytes, %d bytes compressed payload): %.200s", msg->id, target, (unsigned long long) iov->iov_len, msg->compressed_size, data); } else { crm_trace("Queueing CPG message %u to %s (%llu bytes, %d bytes payload): %.200s", msg->id, target, (unsigned long long) iov->iov_len, msg->size, data); } free(target); send_cpg_iov(iov); return TRUE; } enum crm_ais_msg_types text2msg_type(const char *text) { int type = crm_msg_none; CRM_CHECK(text != NULL, return type); text = pcmk_message_name(text); if (pcmk__str_eq(text, "ais", pcmk__str_casei)) { type = crm_msg_ais; } else if (pcmk__str_eq(text, CRM_SYSTEM_CIB, pcmk__str_casei)) { type = crm_msg_cib; - } else if (pcmk__str_eq(text, CRM_SYSTEM_CRMD, pcmk__str_casei) - || pcmk__str_eq(text, CRM_SYSTEM_DC, pcmk__str_casei)) { + } else if (pcmk__strcase_any_of(text, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL)) { type = crm_msg_crmd; } else if (pcmk__str_eq(text, CRM_SYSTEM_TENGINE, pcmk__str_casei)) { type = crm_msg_te; } else if (pcmk__str_eq(text, CRM_SYSTEM_PENGINE, pcmk__str_casei)) { type = crm_msg_pe; } else if (pcmk__str_eq(text, CRM_SYSTEM_LRMD, pcmk__str_casei)) { type = crm_msg_lrmd; } else if (pcmk__str_eq(text, CRM_SYSTEM_STONITHD, pcmk__str_casei)) { type = crm_msg_stonithd; } else if (pcmk__str_eq(text, "stonith-ng", pcmk__str_casei)) { type = crm_msg_stonith_ng; } else if (pcmk__str_eq(text, "attrd", pcmk__str_casei)) { type = crm_msg_attrd; } else { /* This will normally be a transient client rather than * a cluster daemon. Set the type to the pid of the client */ int scan_rc = sscanf(text, "%d", &type); if (scan_rc != 1 || type <= crm_msg_stonith_ng) { /* Ensure it's sane */ type = crm_msg_none; } } return type; } diff --git a/lib/common/utils.c b/lib/common/utils.c index 5a4e3fa800..35e168dd5b 100644 --- a/lib/common/utils.c +++ b/lib/common/utils.c @@ -1,618 +1,616 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef PW_BUFFER_LEN # define PW_BUFFER_LEN 500 #endif CRM_TRACE_INIT_DATA(common); gboolean crm_config_error = FALSE; gboolean crm_config_warning = FALSE; char *crm_system_name = NULL; int pcmk__score_red = 0; int pcmk__score_green = 0; int pcmk__score_yellow = 0; int char2score(const char *score) { int score_f = 0; if (score == NULL) { } else if (pcmk_str_is_minus_infinity(score)) { score_f = -CRM_SCORE_INFINITY; } else if (pcmk_str_is_infinity(score)) { score_f = CRM_SCORE_INFINITY; } else if (pcmk__str_eq(score, "red", pcmk__str_casei)) { score_f = pcmk__score_red; } else if (pcmk__str_eq(score, "yellow", pcmk__str_casei)) { score_f = pcmk__score_yellow; } else if (pcmk__str_eq(score, "green", pcmk__str_casei)) { score_f = pcmk__score_green; } else { score_f = crm_parse_int(score, NULL); if (score_f > 0 && score_f > CRM_SCORE_INFINITY) { score_f = CRM_SCORE_INFINITY; } else if (score_f < 0 && score_f < -CRM_SCORE_INFINITY) { score_f = -CRM_SCORE_INFINITY; } } return score_f; } char * score2char_stack(int score, char *buf, size_t len) { if (score >= CRM_SCORE_INFINITY) { strncpy(buf, CRM_INFINITY_S, 9); } else if (score <= -CRM_SCORE_INFINITY) { strncpy(buf, CRM_MINUS_INFINITY_S , 10); } else { return crm_itoa_stack(score, buf, len); } return buf; } char * score2char(int score) { if (score >= CRM_SCORE_INFINITY) { return strdup(CRM_INFINITY_S); } else if (score <= -CRM_SCORE_INFINITY) { return strdup(CRM_MINUS_INFINITY_S); } return crm_itoa(score); } int crm_user_lookup(const char *name, uid_t * uid, gid_t * gid) { int rc = pcmk_ok; char *buffer = NULL; struct passwd pwd; struct passwd *pwentry = NULL; buffer = calloc(1, PW_BUFFER_LEN); if (buffer == NULL) { return -ENOMEM; } rc = getpwnam_r(name, &pwd, buffer, PW_BUFFER_LEN, &pwentry); if (pwentry) { if (uid) { *uid = pwentry->pw_uid; } if (gid) { *gid = pwentry->pw_gid; } crm_trace("User %s has uid=%d gid=%d", name, pwentry->pw_uid, pwentry->pw_gid); } else { rc = rc? -rc : -EINVAL; crm_info("User %s lookup: %s", name, pcmk_strerror(rc)); } free(buffer); return rc; } /*! * \brief Get user and group IDs of pacemaker daemon user * * \param[out] uid If non-NULL, where to store daemon user ID * \param[out] gid If non-NULL, where to store daemon group ID * * \return pcmk_ok on success, -errno otherwise */ int pcmk_daemon_user(uid_t *uid, gid_t *gid) { static uid_t daemon_uid; static gid_t daemon_gid; static bool found = false; int rc = pcmk_err_generic; if (!found) { rc = crm_user_lookup(CRM_DAEMON_USER, &daemon_uid, &daemon_gid); if (rc == pcmk_ok) { found = true; } } if (found) { if (uid) { *uid = daemon_uid; } if (gid) { *gid = daemon_gid; } } return rc; } static int crm_version_helper(const char *text, const char **end_text) { int atoi_result = -1; CRM_ASSERT(end_text != NULL); errno = 0; if (text != NULL && text[0] != 0) { /* seemingly sacrificing const-correctness -- because while strtol doesn't modify the input, it doesn't want to artificially taint the "end_text" pointer-to-pointer-to-first-char-in-string with constness in case the input wasn't actually constant -- by semantic definition not a single character will get modified so it shall be perfectly safe to make compiler happy with dropping "const" qualifier here */ atoi_result = (int) strtol(text, (char **) end_text, 10); if (errno == EINVAL) { crm_err("Conversion of '%s' %c failed", text, text[0]); atoi_result = -1; } } return atoi_result; } /* * version1 < version2 : -1 * version1 = version2 : 0 * version1 > version2 : 1 */ int compare_version(const char *version1, const char *version2) { int rc = 0; int lpc = 0; const char *ver1_iter, *ver2_iter; if (version1 == NULL && version2 == NULL) { return 0; } else if (version1 == NULL) { return -1; } else if (version2 == NULL) { return 1; } ver1_iter = version1; ver2_iter = version2; while (1) { int digit1 = 0; int digit2 = 0; lpc++; if (ver1_iter == ver2_iter) { break; } if (ver1_iter != NULL) { digit1 = crm_version_helper(ver1_iter, &ver1_iter); } if (ver2_iter != NULL) { digit2 = crm_version_helper(ver2_iter, &ver2_iter); } if (digit1 < digit2) { rc = -1; break; } else if (digit1 > digit2) { rc = 1; break; } if (ver1_iter != NULL && *ver1_iter == '.') { ver1_iter++; } if (ver1_iter != NULL && *ver1_iter == '\0') { ver1_iter = NULL; } if (ver2_iter != NULL && *ver2_iter == '.') { ver2_iter++; } if (ver2_iter != NULL && *ver2_iter == 0) { ver2_iter = NULL; } } if (rc == 0) { crm_trace("%s == %s (%d)", version1, version2, lpc); } else if (rc < 0) { crm_trace("%s < %s (%d)", version1, version2, lpc); } else if (rc > 0) { crm_trace("%s > %s (%d)", version1, version2, lpc); } return rc; } /*! * \brief Parse milliseconds from a Pacemaker interval specification * * \param[in] input Pacemaker time interval specification (a bare number of * seconds, a number with a unit optionally with whitespace * before and/or after the number, or an ISO 8601 duration) * * \return Milliseconds equivalent of given specification on success (limited * to the range of an unsigned integer), 0 if input is NULL, * or 0 (and set errno to EINVAL) on error */ guint crm_parse_interval_spec(const char *input) { long long msec = -1; errno = 0; if (input == NULL) { return 0; } else if (input[0] == 'P') { crm_time_t *period_s = crm_time_parse_duration(input); if (period_s) { msec = 1000 * crm_time_get_seconds(period_s); crm_time_free(period_s); } } else { msec = crm_get_msec(input); } if (msec < 0) { crm_warn("Using 0 instead of '%s'", input); errno = EINVAL; return 0; } return (msec >= G_MAXUINT)? G_MAXUINT : (guint) msec; } extern bool crm_is_daemon; /* coverity[+kill] */ void crm_abort(const char *file, const char *function, int line, const char *assert_condition, gboolean do_core, gboolean do_fork) { int rc = 0; int pid = 0; int status = 0; /* Implied by the parent's error logging below */ /* crm_write_blackbox(0); */ if(crm_is_daemon == FALSE) { /* This is a command line tool - do not fork */ /* crm_add_logfile(NULL); * Record it to a file? */ crm_enable_stderr(TRUE); /* Make sure stderr is enabled so we can tell the caller */ do_fork = FALSE; /* Just crash if needed */ } if (do_core == FALSE) { crm_err("%s: Triggered assert at %s:%d : %s", function, file, line, assert_condition); return; } else if (do_fork) { pid = fork(); } else { crm_err("%s: Triggered fatal assert at %s:%d : %s", function, file, line, assert_condition); } if (pid == -1) { crm_crit("%s: Cannot create core for non-fatal assert at %s:%d : %s", function, file, line, assert_condition); return; } else if(pid == 0) { /* Child process */ abort(); return; } /* Parent process */ crm_err("%s: Forked child %d to record non-fatal assert at %s:%d : %s", function, pid, file, line, assert_condition); crm_write_blackbox(SIGTRAP, NULL); do { rc = waitpid(pid, &status, 0); if(rc == pid) { return; /* Job done */ } } while(errno == EINTR); if (errno == ECHILD) { /* crm_mon does this */ crm_trace("Cannot wait on forked child %d - SIGCHLD is probably set to SIG_IGN", pid); return; } crm_perror(LOG_ERR, "Cannot wait on forked child %d", pid); } void crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile) { int rc; pid_t pid; if (daemonize == FALSE) { return; } /* Check before we even try... */ rc = pcmk__pidfile_matches(pidfile, 1, name, &pid); if ((rc != pcmk_rc_ok) && (rc != ENOENT)) { crm_err("%s: already running [pid %lld in %s]", name, (long long) pid, pidfile); printf("%s: already running [pid %lld in %s]\n", name, (long long) pid, pidfile); crm_exit(CRM_EX_ERROR); } pid = fork(); if (pid < 0) { fprintf(stderr, "%s: could not start daemon\n", name); crm_perror(LOG_ERR, "fork"); crm_exit(CRM_EX_OSERR); } else if (pid > 0) { crm_exit(CRM_EX_OK); } rc = pcmk__lock_pidfile(pidfile, name); if (rc != pcmk_rc_ok) { crm_err("Could not lock '%s' for %s: %s " CRM_XS " rc=%d", pidfile, name, pcmk_rc_str(rc), rc); printf("Could not lock '%s' for %s: %s (%d)\n", pidfile, name, pcmk_rc_str(rc), rc); crm_exit(CRM_EX_ERROR); } umask(S_IWGRP | S_IWOTH | S_IROTH); close(STDIN_FILENO); pcmk__open_devnull(O_RDONLY); // stdin (fd 0) close(STDOUT_FILENO); pcmk__open_devnull(O_WRONLY); // stdout (fd 1) close(STDERR_FILENO); pcmk__open_devnull(O_WRONLY); // stderr (fd 2) } char * crm_meta_name(const char *field) { int lpc = 0; int max = 0; char *crm_name = NULL; CRM_CHECK(field != NULL, return NULL); crm_name = crm_strdup_printf(CRM_META "_%s", field); /* Massage the names so they can be used as shell variables */ max = strlen(crm_name); for (; lpc < max; lpc++) { switch (crm_name[lpc]) { case '-': crm_name[lpc] = '_'; break; } } return crm_name; } const char * crm_meta_value(GHashTable * hash, const char *field) { char *key = NULL; const char *value = NULL; key = crm_meta_name(field); if (key) { value = g_hash_table_lookup(hash, key); free(key); } return value; } #ifdef HAVE_UUID_UUID_H # include #endif char * crm_generate_uuid(void) { unsigned char uuid[16]; char *buffer = malloc(37); /* Including NUL byte */ uuid_generate(uuid); uuid_unparse(uuid, buffer); return buffer; } /*! * \brief Get name to be used as identifier for cluster messages * * \param[in] name Actual system name to check * * \return Non-NULL cluster message identifier corresponding to name * * \note The Pacemaker daemons were renamed in version 2.0.0, but the old names * must continue to be used as the identifier for cluster messages, so * that mixed-version clusters are possible during a rolling upgrade. */ const char * pcmk_message_name(const char *name) { if (name == NULL) { return "unknown"; } else if (!strcmp(name, "pacemaker-attrd")) { return "attrd"; } else if (!strcmp(name, "pacemaker-based")) { return CRM_SYSTEM_CIB; } else if (!strcmp(name, "pacemaker-controld")) { return CRM_SYSTEM_CRMD; } else if (!strcmp(name, "pacemaker-execd")) { return CRM_SYSTEM_LRMD; } else if (!strcmp(name, "pacemaker-fenced")) { return "stonith-ng"; } else if (!strcmp(name, "pacemaker-schedulerd")) { return CRM_SYSTEM_PENGINE; } else { return name; } } /*! * \brief Check whether a string represents a cluster daemon name * * \param[in] name String to check * * \return TRUE if name is standard client name used by daemons, FALSE otherwise */ bool crm_is_daemon_name(const char *name) { name = pcmk_message_name(name); return (!strcmp(name, CRM_SYSTEM_CRMD) || !strcmp(name, CRM_SYSTEM_STONITHD) || !strcmp(name, "stonith-ng") || !strcmp(name, "attrd") || !strcmp(name, CRM_SYSTEM_CIB) || !strcmp(name, CRM_SYSTEM_MCP) || !strcmp(name, CRM_SYSTEM_DC) || !strcmp(name, CRM_SYSTEM_TENGINE) || !strcmp(name, CRM_SYSTEM_LRMD)); } #include char * crm_md5sum(const char *buffer) { int lpc = 0, len = 0; char *digest = NULL; unsigned char raw_digest[MD5_DIGEST_SIZE]; if (buffer == NULL) { buffer = ""; } len = strlen(buffer); crm_trace("Beginning digest of %d bytes", len); digest = malloc(2 * MD5_DIGEST_SIZE + 1); if(digest) { md5_buffer(buffer, len, raw_digest); for (lpc = 0; lpc < MD5_DIGEST_SIZE; lpc++) { sprintf(digest + (2 * lpc), "%02x", raw_digest[lpc]); } digest[(2 * MD5_DIGEST_SIZE)] = 0; crm_trace("Digest %s.", digest); } else { crm_err("Could not create digest"); } return digest; } #ifdef HAVE_GNUTLS_GNUTLS_H void crm_gnutls_global_init(void) { signal(SIGPIPE, SIG_IGN); gnutls_global_init(); } #endif /*! * \brief Get the local hostname * * \return Newly allocated string with name, or NULL (and set errno) on error */ char * pcmk_hostname() { struct utsname hostinfo; return (uname(&hostinfo) < 0)? NULL : strdup(hostinfo.nodename); } bool pcmk_str_is_infinity(const char *s) { - return pcmk__str_eq(s, CRM_INFINITY_S, pcmk__str_none) || pcmk__str_eq(s, - CRM_PLUS_INFINITY_S, - pcmk__str_none); + return pcmk__str_any_of(s, CRM_INFINITY_S, CRM_PLUS_INFINITY_S, NULL); } bool pcmk_str_is_minus_infinity(const char *s) { return pcmk__str_eq(s, CRM_MINUS_INFINITY_S, pcmk__str_none); } diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c index 92e58d70db..7f2b35b094 100644 --- a/lib/pacemaker/pcmk_sched_bundle.c +++ b/lib/pacemaker/pcmk_sched_bundle.c @@ -1,1084 +1,1082 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #define PE__VARIANT_BUNDLE 1 #include static bool is_bundle_node(pe__bundle_variant_data_t *data, pe_node_t *node) { for (GList *gIter = data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (node->details == replica->node->details) { return TRUE; } } return FALSE; } gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set); void distribute_children(pe_resource_t *rsc, GListPtr children, GListPtr nodes, int max, int per_host_max, pe_working_set_t * data_set); static GList * get_container_list(pe_resource_t *rsc) { GList *containers = NULL; if (rsc->variant == pe_container) { pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, rsc); for (GList *gIter = data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; containers = g_list_append(containers, replica->container); } } return containers; } static inline GList * get_containers_or_children(pe_resource_t *rsc) { return (rsc->variant == pe_container)? get_container_list(rsc) : rsc->children; } static bool migration_threshold_reached(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set) { int fail_count, countdown; /* Migration threshold of 0 means never force away */ if (rsc->migration_threshold == 0) { return FALSE; } // If we're ignoring failures, also ignore the migration threshold if (is_set(rsc->flags, pe_rsc_failure_ignored)) { return FALSE; } /* If there are no failures, there's no need to force away */ fail_count = pe_get_failcount(node, rsc, NULL, pe_fc_effective|pe_fc_fillers, NULL, data_set); if (fail_count <= 0) { return FALSE; } /* How many more times recovery will be tried on this node */ countdown = QB_MAX(rsc->migration_threshold - fail_count, 0); if (countdown == 0) { crm_warn("Forcing %s away from %s after %d failures (max=%d)", rsc->id, node->details->uname, fail_count, rsc->migration_threshold); return TRUE; } crm_info("%s can fail %d more times on %s before being forced off", rsc->id, countdown, node->details->uname); return FALSE; } pe_node_t * pcmk__bundle_allocate(pe_resource_t *rsc, pe_node_t *prefer, pe_working_set_t *data_set) { GListPtr containers = NULL; GListPtr nodes = NULL; pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return NULL); get_bundle_variant_data(bundle_data, rsc); set_bit(rsc->flags, pe_rsc_allocating); containers = get_container_list(rsc); pe__show_node_weights(!show_scores, rsc, __FUNCTION__, rsc->allowed_nodes); nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = sort_nodes_by_weight(nodes, NULL, data_set); containers = g_list_sort_with_data(containers, sort_clone_instance, data_set); distribute_children(rsc, containers, nodes, bundle_data->nreplicas, bundle_data->nreplicas_per_host, data_set); g_list_free(nodes); g_list_free(containers); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; pe_node_t *container_host = NULL; CRM_ASSERT(replica); if (replica->ip) { pe_rsc_trace(rsc, "Allocating bundle %s IP %s", rsc->id, replica->ip->id); replica->ip->cmds->allocate(replica->ip, prefer, data_set); } container_host = replica->container->allocated_to; if (replica->remote && pe__is_guest_or_remote_node(container_host)) { /* We need 'nested' connection resources to be on the same * host because pacemaker-remoted only supports a single * active connection */ rsc_colocation_new("child-remote-with-docker-remote", NULL, INFINITY, replica->remote, container_host->details->remote_rsc, NULL, NULL, data_set); } if (replica->remote) { pe_rsc_trace(rsc, "Allocating bundle %s connection %s", rsc->id, replica->remote->id); replica->remote->cmds->allocate(replica->remote, prefer, data_set); } // Explicitly allocate replicas' children before bundle child if (replica->child) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, replica->child->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { if (node->details != replica->node->details) { node->weight = -INFINITY; } else if (!migration_threshold_reached(replica->child, node, data_set)) { node->weight = INFINITY; } } set_bit(replica->child->parent->flags, pe_rsc_allocating); pe_rsc_trace(rsc, "Allocating bundle %s replica child %s", rsc->id, replica->child->id); replica->child->cmds->allocate(replica->child, replica->node, data_set); clear_bit(replica->child->parent->flags, pe_rsc_allocating); } } if (bundle_data->child) { pe_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, bundle_data->child->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { if (is_bundle_node(bundle_data, node)) { node->weight = 0; } else { node->weight = -INFINITY; } } pe_rsc_trace(rsc, "Allocating bundle %s child %s", rsc->id, bundle_data->child->id); bundle_data->child->cmds->allocate(bundle_data->child, prefer, data_set); } clear_bit(rsc->flags, pe_rsc_allocating); clear_bit(rsc->flags, pe_rsc_provisional); return NULL; } void pcmk__bundle_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set) { pe_action_t *action = NULL; GListPtr containers = NULL; pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); containers = get_container_list(rsc); get_bundle_variant_data(bundle_data, rsc); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if (replica->ip) { replica->ip->cmds->create_actions(replica->ip, data_set); } if (replica->container) { replica->container->cmds->create_actions(replica->container, data_set); } if (replica->remote) { replica->remote->cmds->create_actions(replica->remote, data_set); } } clone_create_pseudo_actions(rsc, containers, NULL, NULL, data_set); if (bundle_data->child) { bundle_data->child->cmds->create_actions(bundle_data->child, data_set); if (is_set(bundle_data->child->flags, pe_rsc_promotable)) { /* promote */ action = create_pseudo_resource_op(rsc, RSC_PROMOTE, TRUE, TRUE, data_set); action = create_pseudo_resource_op(rsc, RSC_PROMOTED, TRUE, TRUE, data_set); action->priority = INFINITY; /* demote */ action = create_pseudo_resource_op(rsc, RSC_DEMOTE, TRUE, TRUE, data_set); action = create_pseudo_resource_op(rsc, RSC_DEMOTED, TRUE, TRUE, data_set); action->priority = INFINITY; } } g_list_free(containers); } void pcmk__bundle_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); get_bundle_variant_data(bundle_data, rsc); if (bundle_data->child) { new_rsc_order(rsc, RSC_START, bundle_data->child, RSC_START, pe_order_implies_first_printed, data_set); new_rsc_order(rsc, RSC_STOP, bundle_data->child, RSC_STOP, pe_order_implies_first_printed, data_set); if (bundle_data->child->children) { new_rsc_order(bundle_data->child, RSC_STARTED, rsc, RSC_STARTED, pe_order_implies_then_printed, data_set); new_rsc_order(bundle_data->child, RSC_STOPPED, rsc, RSC_STOPPED, pe_order_implies_then_printed, data_set); } else { new_rsc_order(bundle_data->child, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed, data_set); new_rsc_order(bundle_data->child, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed, data_set); } } for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); CRM_ASSERT(replica->container); replica->container->cmds->internal_constraints(replica->container, data_set); order_start_start(rsc, replica->container, pe_order_runnable_left|pe_order_implies_first_printed); if (replica->child) { order_stop_stop(rsc, replica->child, pe_order_implies_first_printed); } order_stop_stop(rsc, replica->container, pe_order_implies_first_printed); new_rsc_order(replica->container, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed, data_set); new_rsc_order(replica->container, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed, data_set); if (replica->ip) { replica->ip->cmds->internal_constraints(replica->ip, data_set); // Start ip then container new_rsc_order(replica->ip, RSC_START, replica->container, RSC_START, pe_order_runnable_left|pe_order_preserve, data_set); new_rsc_order(replica->container, RSC_STOP, replica->ip, RSC_STOP, pe_order_implies_first|pe_order_preserve, data_set); rsc_colocation_new("ip-with-docker", NULL, INFINITY, replica->ip, replica->container, NULL, NULL, data_set); } if (replica->remote) { /* This handles ordering and colocating remote relative to container * (via "resource-with-container"). Since IP is also ordered and * colocated relative to the container, we don't need to do anything * explicit here with IP. */ replica->remote->cmds->internal_constraints(replica->remote, data_set); } if (replica->child) { CRM_ASSERT(replica->remote); // "Start remote then child" is implicit in scheduler's remote logic } } if (bundle_data->child) { bundle_data->child->cmds->internal_constraints(bundle_data->child, data_set); if (is_set(bundle_data->child->flags, pe_rsc_promotable)) { promote_demote_constraints(rsc, data_set); /* child demoted before global demoted */ new_rsc_order(bundle_data->child, RSC_DEMOTED, rsc, RSC_DEMOTED, pe_order_implies_then_printed, data_set); /* global demote before child demote */ new_rsc_order(rsc, RSC_DEMOTE, bundle_data->child, RSC_DEMOTE, pe_order_implies_first_printed, data_set); /* child promoted before global promoted */ new_rsc_order(bundle_data->child, RSC_PROMOTED, rsc, RSC_PROMOTED, pe_order_implies_then_printed, data_set); /* global promote before child promote */ new_rsc_order(rsc, RSC_PROMOTE, bundle_data->child, RSC_PROMOTE, pe_order_implies_first_printed, data_set); } } else { // int type = pe_order_optional | pe_order_implies_then | pe_order_restart; // custom_action_order(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL, // rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL, pe_order_optional, data_set); } } static pe_resource_t * compatible_replica_for_node(pe_resource_t *rsc_lh, pe_node_t *candidate, pe_resource_t *rsc, enum rsc_role_e filter, gboolean current) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(candidate != NULL, return NULL); get_bundle_variant_data(bundle_data, rsc); crm_trace("Looking for compatible child from %s for %s on %s", rsc_lh->id, rsc->id, candidate->details->uname); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (is_child_compatible(replica->container, candidate, filter, current)) { crm_trace("Pairing %s with %s on %s", rsc_lh->id, replica->container->id, candidate->details->uname); return replica->container; } } crm_trace("Can't pair %s with %s", rsc_lh->id, rsc->id); return NULL; } static pe_resource_t * compatible_replica(pe_resource_t *rsc_lh, pe_resource_t *rsc, enum rsc_role_e filter, gboolean current, pe_working_set_t *data_set) { GListPtr scratch = NULL; pe_resource_t *pair = NULL; pe_node_t *active_node_lh = NULL; active_node_lh = rsc_lh->fns->location(rsc_lh, NULL, current); if (active_node_lh) { return compatible_replica_for_node(rsc_lh, active_node_lh, rsc, filter, current); } scratch = g_hash_table_get_values(rsc_lh->allowed_nodes); scratch = sort_nodes_by_weight(scratch, NULL, data_set); for (GListPtr gIter = scratch; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; pair = compatible_replica_for_node(rsc_lh, node, rsc, filter, current); if (pair) { goto done; } } pe_rsc_debug(rsc, "Can't pair %s with %s", rsc_lh->id, (rsc? rsc->id : "none")); done: g_list_free(scratch); return pair; } void pcmk__bundle_rsc_colocation_lh(pe_resource_t *rsc, pe_resource_t *rsc_rh, rsc_colocation_t *constraint, pe_working_set_t *data_set) { /* -- Never called -- * * Instead we add the colocation constraints to the child and call from there */ CRM_ASSERT(FALSE); } int copies_per_node(pe_resource_t * rsc) { /* Strictly speaking, there should be a 'copies_per_node' addition * to the resource function table and each case would be a * function. However that would be serious overkill to return an * int. In fact, it seems to me that both function tables * could/should be replaced by resources.{c,h} full of * rsc_{some_operation} functions containing a switch as below * which calls out to functions named {variant}_{some_operation} * as needed. */ switch(rsc->variant) { case pe_unknown: return 0; case pe_native: case pe_group: return 1; case pe_clone: { const char *max_clones_node = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_NODEMAX); return crm_parse_int(max_clones_node, "1"); } case pe_container: { pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, rsc); return data->nreplicas_per_host; } } return 0; } void pcmk__bundle_rsc_colocation_rh(pe_resource_t *rsc_lh, pe_resource_t *rsc, rsc_colocation_t *constraint, pe_working_set_t *data_set) { GListPtr allocated_rhs = NULL; pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(constraint != NULL, return); CRM_CHECK(rsc_lh != NULL, pe_err("rsc_lh was NULL for %s", constraint->id); return); CRM_CHECK(rsc != NULL, pe_err("rsc was NULL for %s", constraint->id); return); CRM_ASSERT(rsc_lh->variant == pe_native); if (constraint->score == 0) { return; } if (is_set(rsc->flags, pe_rsc_provisional)) { pe_rsc_trace(rsc, "%s is still provisional", rsc->id); return; } else if(constraint->rsc_lh->variant > pe_group) { pe_resource_t *rh_child = compatible_replica(rsc_lh, rsc, RSC_ROLE_UNKNOWN, FALSE, data_set); if (rh_child) { pe_rsc_debug(rsc, "Pairing %s with %s", rsc_lh->id, rh_child->id); rsc_lh->cmds->rsc_colocation_lh(rsc_lh, rh_child, constraint, data_set); } else if (constraint->score >= INFINITY) { crm_notice("Cannot pair %s with instance of %s", rsc_lh->id, rsc->id); assign_node(rsc_lh, NULL, TRUE); } else { pe_rsc_debug(rsc, "Cannot pair %s with instance of %s", rsc_lh->id, rsc->id); } return; } get_bundle_variant_data(bundle_data, rsc); pe_rsc_trace(rsc, "Processing constraint %s: %s -> %s %d", constraint->id, rsc_lh->id, rsc->id, constraint->score); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (constraint->score < INFINITY) { replica->container->cmds->rsc_colocation_rh(rsc_lh, replica->container, constraint, data_set); } else { pe_node_t *chosen = replica->container->fns->location(replica->container, NULL, FALSE); if ((chosen == NULL) || is_set_recursive(replica->container, pe_rsc_block, TRUE)) { continue; } if ((constraint->role_rh >= RSC_ROLE_MASTER) && (replica->child == NULL)) { continue; } if ((constraint->role_rh >= RSC_ROLE_MASTER) && (replica->child->next_role < RSC_ROLE_MASTER)) { continue; } pe_rsc_trace(rsc, "Allowing %s: %s %d", constraint->id, chosen->details->uname, chosen->weight); allocated_rhs = g_list_prepend(allocated_rhs, chosen); } } if (constraint->score >= INFINITY) { node_list_exclude(rsc_lh->allowed_nodes, allocated_rhs, FALSE); } g_list_free(allocated_rhs); } enum pe_action_flags pcmk__bundle_action_flags(pe_action_t *action, pe_node_t *node) { GListPtr containers = NULL; enum pe_action_flags flags = 0; pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, action->rsc); if(data->child) { enum action_tasks task = get_complex_task(data->child, action->task, TRUE); switch(task) { case no_action: case action_notify: case action_notified: case action_promote: case action_promoted: case action_demote: case action_demoted: return summary_action_flags(action, data->child->children, node); default: break; } } containers = get_container_list(action->rsc); flags = summary_action_flags(action, containers, node); g_list_free(containers); return flags; } pe_resource_t * find_compatible_child_by_node(pe_resource_t * local_child, pe_node_t * local_node, pe_resource_t * rsc, enum rsc_role_e filter, gboolean current) { GListPtr gIter = NULL; GListPtr children = NULL; if (local_node == NULL) { crm_err("Can't colocate unrunnable child %s with %s", local_child->id, rsc->id); return NULL; } crm_trace("Looking for compatible child from %s for %s on %s", local_child->id, rsc->id, local_node->details->uname); children = get_containers_or_children(rsc); for (gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; if(is_child_compatible(child_rsc, local_node, filter, current)) { crm_trace("Pairing %s with %s on %s", local_child->id, child_rsc->id, local_node->details->uname); return child_rsc; } } crm_trace("Can't pair %s with %s", local_child->id, rsc->id); if(children != rsc->children) { g_list_free(children); } return NULL; } static pe__bundle_replica_t * replica_for_container(pe_resource_t *rsc, pe_resource_t *container, pe_node_t *node) { if (rsc->variant == pe_container) { pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, rsc); for (GList *gIter = data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (replica->child && (container == replica->container) && (node->details == replica->node->details)) { return replica; } } } return NULL; } static enum pe_graph_flags multi_update_interleave_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type, pe_working_set_t *data_set) { GListPtr gIter = NULL; GListPtr children = NULL; gboolean current = FALSE; enum pe_graph_flags changed = pe_graph_none; /* Fix this - lazy */ if (pcmk__ends_with(first->uuid, "_stopped_0") || pcmk__ends_with(first->uuid, "_demoted_0")) { current = TRUE; } children = get_containers_or_children(then->rsc); for (gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *then_child = gIter->data; pe_resource_t *first_child = find_compatible_child(then_child, first->rsc, RSC_ROLE_UNKNOWN, current, data_set); if (first_child == NULL && current) { crm_trace("Ignore"); } else if (first_child == NULL) { crm_debug("No match found for %s (%d / %s / %s)", then_child->id, current, first->uuid, then->uuid); /* Me no like this hack - but what else can we do? * * If there is no-one active or about to be active * on the same node as then_child, then they must * not be allowed to start */ if (type & (pe_order_runnable_left | pe_order_implies_then) /* Mandatory */ ) { pe_rsc_info(then->rsc, "Inhibiting %s from being active", then_child->id); if(assign_node(then_child, NULL, TRUE)) { changed |= pe_graph_updated_then; } } } else { pe_action_t *first_action = NULL; pe_action_t *then_action = NULL; enum action_tasks task = clone_child_action(first); const char *first_task = task2text(task); pe__bundle_replica_t *first_replica = NULL; pe__bundle_replica_t *then_replica = NULL; first_replica = replica_for_container(first->rsc, first_child, node); if (strstr(first->task, "stop") && first_replica && first_replica->child) { /* Except for 'stopped' we should be looking at the * in-container resource, actions for the child will * happen later and are therefor more likely to align * with the user's intent. */ first_action = find_first_action(first_replica->child->actions, NULL, task2text(task), node); } else { first_action = find_first_action(first_child->actions, NULL, task2text(task), node); } then_replica = replica_for_container(then->rsc, then_child, node); if (strstr(then->task, "mote") && then_replica && then_replica->child) { /* Promote/demote actions will never be found for the * container resource, look in the child instead * * Alternatively treat: * 'XXXX then promote YYYY' as 'XXXX then start container for YYYY', and * 'demote XXXX then stop YYYY' as 'stop container for XXXX then stop YYYY' */ then_action = find_first_action(then_replica->child->actions, NULL, then->task, node); } else { then_action = find_first_action(then_child->actions, NULL, then->task, node); } if (first_action == NULL) { if (is_not_set(first_child->flags, pe_rsc_orphan) - && !pcmk__str_eq(first_task, RSC_STOP, pcmk__str_none) - && !pcmk__str_eq(first_task, RSC_DEMOTE, pcmk__str_none)) { + && !pcmk__str_any_of(first_task, RSC_STOP, RSC_DEMOTE, NULL)) { crm_err("Internal error: No action found for %s in %s (first)", first_task, first_child->id); } else { crm_trace("No action found for %s in %s%s (first)", first_task, first_child->id, is_set(first_child->flags, pe_rsc_orphan) ? " (ORPHAN)" : ""); } continue; } /* We're only interested if 'then' is neither stopping nor being demoted */ if (then_action == NULL) { if (is_not_set(then_child->flags, pe_rsc_orphan) - && !pcmk__str_eq(then->task, RSC_STOP, pcmk__str_none) - && !pcmk__str_eq(then->task, RSC_DEMOTE, pcmk__str_none)) { + && !pcmk__str_any_of(then->task, RSC_STOP, RSC_DEMOTE, NULL)) { crm_err("Internal error: No action found for %s in %s (then)", then->task, then_child->id); } else { crm_trace("No action found for %s in %s%s (then)", then->task, then_child->id, is_set(then_child->flags, pe_rsc_orphan) ? " (ORPHAN)" : ""); } continue; } if (order_actions(first_action, then_action, type)) { crm_debug("Created constraint for %s (%d) -> %s (%d) %.6x", first_action->uuid, is_set(first_action->flags, pe_action_optional), then_action->uuid, is_set(then_action->flags, pe_action_optional), type); changed |= (pe_graph_updated_first | pe_graph_updated_then); } if(first_action && then_action) { changed |= then_child->cmds->update_actions(first_action, then_action, node, first_child->cmds->action_flags(first_action, node), filter, type, data_set); } else { crm_err("Nothing found either for %s (%p) or %s (%p) %s", first_child->id, first_action, then_child->id, then_action, task2text(task)); } } } if(children != then->rsc->children) { g_list_free(children); } return changed; } static bool can_interleave_actions(pe_action_t *first, pe_action_t *then) { bool interleave = FALSE; pe_resource_t *rsc = NULL; const char *interleave_s = NULL; if(first->rsc == NULL || then->rsc == NULL) { crm_trace("Not interleaving %s with %s (both must be resources)", first->uuid, then->uuid); return FALSE; } else if(first->rsc == then->rsc) { crm_trace("Not interleaving %s with %s (must belong to different resources)", first->uuid, then->uuid); return FALSE; } else if(first->rsc->variant < pe_clone || then->rsc->variant < pe_clone) { crm_trace("Not interleaving %s with %s (both sides must be clones or bundles)", first->uuid, then->uuid); return FALSE; } if (pcmk__ends_with(then->uuid, "_stop_0") || pcmk__ends_with(then->uuid, "_demote_0")) { rsc = first->rsc; } else { rsc = then->rsc; } interleave_s = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERLEAVE); interleave = crm_is_true(interleave_s); crm_trace("Interleave %s -> %s: %s (based on %s)", first->uuid, then->uuid, interleave ? "yes" : "no", rsc->id); return interleave; } enum pe_graph_flags pcmk__multi_update_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type, pe_working_set_t *data_set) { enum pe_graph_flags changed = pe_graph_none; crm_trace("%s -> %s", first->uuid, then->uuid); if(can_interleave_actions(first, then)) { changed = multi_update_interleave_actions(first, then, node, flags, filter, type, data_set); } else if(then->rsc) { GListPtr gIter = NULL; GListPtr children = NULL; // Handle the 'primitive' ordering case changed |= native_update_actions(first, then, node, flags, filter, type, data_set); // Now any children (or containers in the case of a bundle) children = get_containers_or_children(then->rsc); for (gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *then_child = (pe_resource_t *) gIter->data; enum pe_graph_flags then_child_changed = pe_graph_none; pe_action_t *then_child_action = find_first_action(then_child->actions, NULL, then->task, node); if (then_child_action) { enum pe_action_flags then_child_flags = then_child->cmds->action_flags(then_child_action, node); if (is_set(then_child_flags, pe_action_runnable)) { then_child_changed |= then_child->cmds->update_actions(first, then_child_action, node, flags, filter, type, data_set); } changed |= then_child_changed; if (then_child_changed & pe_graph_updated_then) { for (GListPtr lpc = then_child_action->actions_after; lpc != NULL; lpc = lpc->next) { pe_action_wrapper_t *next = (pe_action_wrapper_t *) lpc->data; update_action(next->action, data_set); } } } } if(children != then->rsc->children) { g_list_free(children); } } return changed; } void pcmk__bundle_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) { pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, rsc); pe_rsc_trace(rsc, "Processing location constraint %s for %s", constraint->id, rsc->id); native_rsc_location(rsc, constraint); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; if (replica->container) { replica->container->cmds->rsc_location(replica->container, constraint); } if (replica->ip) { replica->ip->cmds->rsc_location(replica->ip, constraint); } } if (bundle_data->child && ((constraint->role_filter == RSC_ROLE_SLAVE) || (constraint->role_filter == RSC_ROLE_MASTER))) { bundle_data->child->cmds->rsc_location(bundle_data->child, constraint); bundle_data->child->rsc_location = g_list_prepend(bundle_data->child->rsc_location, constraint); } } void pcmk__bundle_expand(pe_resource_t *rsc, pe_working_set_t * data_set) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); get_bundle_variant_data(bundle_data, rsc); if (bundle_data->child) { bundle_data->child->cmds->expand(bundle_data->child, data_set); } for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if (replica->remote && replica->container && pe__bundle_needs_remote_name(replica->remote)) { /* REMOTE_CONTAINER_HACK: Allow remote nodes to run containers that * run pacemaker-remoted inside, without needing a separate IP for * the container. This is done by configuring the inner remote's * connection host as the magic string "#uname", then * replacing it with the underlying host when needed. */ xmlNode *nvpair = get_xpath_object("//nvpair[@name='" XML_RSC_ATTR_REMOTE_RA_ADDR "']", replica->remote->xml, LOG_ERR); const char *calculated_addr = NULL; calculated_addr = pe__add_bundle_remote_name(replica->remote, nvpair, "value"); if (calculated_addr) { crm_trace("Set address for bundle connection %s to bundle host %s", replica->remote->id, calculated_addr); g_hash_table_replace(replica->remote->parameters, strdup(XML_RSC_ATTR_REMOTE_RA_ADDR), strdup(calculated_addr)); } else { /* The only way to get here is if the remote connection is * neither currently running nor scheduled to run. That means we * won't be doing any operations that require addr (only start * requires it; we additionally use it to compare digests when * unpacking status, promote, and migrate_from history, but * that's already happened by this point). */ crm_info("Unable to determine address for bundle %s remote connection", rsc->id); } } if (replica->ip) { replica->ip->cmds->expand(replica->ip, data_set); } if (replica->container) { replica->container->cmds->expand(replica->container, data_set); } if (replica->remote) { replica->remote->cmds->expand(replica->remote, data_set); } } } gboolean pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t * data_set) { bool any_created = FALSE; pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return FALSE); get_bundle_variant_data(bundle_data, rsc); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if (replica->ip) { any_created |= replica->ip->cmds->create_probe(replica->ip, node, complete, force, data_set); } if (replica->child && (node->details == replica->node->details)) { any_created |= replica->child->cmds->create_probe(replica->child, node, complete, force, data_set); } if (replica->container) { bool created = replica->container->cmds->create_probe(replica->container, node, complete, force, data_set); if(created) { any_created = TRUE; /* If we're limited to one replica per host (due to * the lack of an IP range probably), then we don't * want any of our peer containers starting until * we've established that no other copies are already * running. * * Partly this is to ensure that nreplicas_per_host is * observed, but also to ensure that the containers * don't fail to start because the necessary port * mappings (which won't include an IP for uniqueness) * are already taken */ for (GList *tIter = bundle_data->replicas; tIter && (bundle_data->nreplicas_per_host == 1); tIter = tIter->next) { pe__bundle_replica_t *other = tIter->data; if ((other != replica) && (other != NULL) && (other->container != NULL)) { custom_action_order(replica->container, pcmk__op_key(replica->container->id, RSC_STATUS, 0), NULL, other->container, pcmk__op_key(other->container->id, RSC_START, 0), NULL, pe_order_optional|pe_order_same_node, data_set); } } } } if (replica->container && replica->remote && replica->remote->cmds->create_probe(replica->remote, node, complete, force, data_set)) { /* Do not probe the remote resource until we know where the * container is running. This is required for REMOTE_CONTAINER_HACK * to correctly probe remote resources. */ char *probe_uuid = pcmk__op_key(replica->remote->id, RSC_STATUS, 0); pe_action_t *probe = find_first_action(replica->remote->actions, probe_uuid, NULL, node); free(probe_uuid); if (probe) { any_created = TRUE; crm_trace("Ordering %s probe on %s", replica->remote->id, node->details->uname); custom_action_order(replica->container, pcmk__op_key(replica->container->id, RSC_START, 0), NULL, replica->remote, NULL, probe, pe_order_probe, data_set); } } } return any_created; } void pcmk__bundle_append_meta(pe_resource_t *rsc, xmlNode *xml) { } void pcmk__bundle_log_actions(pe_resource_t *rsc, pe_working_set_t *data_set, gboolean terminal) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); get_bundle_variant_data(bundle_data, rsc); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pe__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica); if (replica->ip) { LogActions(replica->ip, data_set, terminal); } if (replica->container) { LogActions(replica->container, data_set, terminal); } if (replica->remote) { LogActions(replica->remote, data_set, terminal); } if (replica->child) { LogActions(replica->child, data_set, terminal); } } } diff --git a/lib/pacemaker/pcmk_sched_clone.c b/lib/pacemaker/pcmk_sched_clone.c index 07f931c2dd..e68eca90e3 100644 --- a/lib/pacemaker/pcmk_sched_clone.c +++ b/lib/pacemaker/pcmk_sched_clone.c @@ -1,1510 +1,1509 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #define VARIANT_CLONE 1 #include gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set); static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all); static gint sort_rsc_id(gconstpointer a, gconstpointer b) { const pe_resource_t *resource1 = (const pe_resource_t *)a; const pe_resource_t *resource2 = (const pe_resource_t *)b; long num1, num2; CRM_ASSERT(resource1 != NULL); CRM_ASSERT(resource2 != NULL); /* * Sort clone instances numerically by instance number, so instance :10 * comes after :9. */ num1 = strtol(strrchr(resource1->id, ':') + 1, NULL, 10); num2 = strtol(strrchr(resource2->id, ':') + 1, NULL, 10); if (num1 < num2) { return -1; } else if (num1 > num2) { return 1; } return 0; } static pe_node_t * parent_node_instance(const pe_resource_t * rsc, pe_node_t * node) { pe_node_t *ret = NULL; if (node != NULL && rsc->parent) { ret = pe_hash_table_lookup(rsc->parent->allowed_nodes, node->details->id); } else if(node != NULL) { ret = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); } return ret; } static gboolean did_fail(const pe_resource_t * rsc) { GListPtr gIter = rsc->children; if (is_set(rsc->flags, pe_rsc_failed)) { return TRUE; } for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; if (did_fail(child_rsc)) { return TRUE; } } return FALSE; } gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set) { int rc = 0; pe_node_t *node1 = NULL; pe_node_t *node2 = NULL; pe_node_t *current_node1 = NULL; pe_node_t *current_node2 = NULL; unsigned int nnodes1 = 0; unsigned int nnodes2 = 0; gboolean can1 = TRUE; gboolean can2 = TRUE; const pe_resource_t *resource1 = (const pe_resource_t *)a; const pe_resource_t *resource2 = (const pe_resource_t *)b; CRM_ASSERT(resource1 != NULL); CRM_ASSERT(resource2 != NULL); /* allocation order: * - active instances * - instances running on nodes with the least copies * - active instances on nodes that can't support them or are to be fenced * - failed instances * - inactive instances */ current_node1 = pe__find_active_on(resource1, &nnodes1, NULL); current_node2 = pe__find_active_on(resource2, &nnodes2, NULL); if (nnodes1 && nnodes2) { if (nnodes1 < nnodes2) { crm_trace("%s < %s: running_on", resource1->id, resource2->id); return -1; } else if (nnodes1 > nnodes2) { crm_trace("%s > %s: running_on", resource1->id, resource2->id); return 1; } } node1 = current_node1; node2 = current_node2; if (node1) { pe_node_t *match = pe_hash_table_lookup(resource1->allowed_nodes, node1->details->id); if (match == NULL || match->weight < 0) { crm_trace("%s: current location is unavailable", resource1->id); node1 = NULL; can1 = FALSE; } } if (node2) { pe_node_t *match = pe_hash_table_lookup(resource2->allowed_nodes, node2->details->id); if (match == NULL || match->weight < 0) { crm_trace("%s: current location is unavailable", resource2->id); node2 = NULL; can2 = FALSE; } } if (can1 != can2) { if (can1) { crm_trace("%s < %s: availability of current location", resource1->id, resource2->id); return -1; } crm_trace("%s > %s: availability of current location", resource1->id, resource2->id); return 1; } if (resource1->priority < resource2->priority) { crm_trace("%s < %s: priority", resource1->id, resource2->id); return 1; } else if (resource1->priority > resource2->priority) { crm_trace("%s > %s: priority", resource1->id, resource2->id); return -1; } if (node1 == NULL && node2 == NULL) { crm_trace("%s == %s: not active", resource1->id, resource2->id); return 0; } if (node1 != node2) { if (node1 == NULL) { crm_trace("%s > %s: active", resource1->id, resource2->id); return 1; } else if (node2 == NULL) { crm_trace("%s < %s: active", resource1->id, resource2->id); return -1; } } can1 = can_run_resources(node1); can2 = can_run_resources(node2); if (can1 != can2) { if (can1) { crm_trace("%s < %s: can", resource1->id, resource2->id); return -1; } crm_trace("%s > %s: can", resource1->id, resource2->id); return 1; } node1 = parent_node_instance(resource1, node1); node2 = parent_node_instance(resource2, node2); if (node1 != NULL && node2 == NULL) { crm_trace("%s < %s: not allowed", resource1->id, resource2->id); return -1; } else if (node1 == NULL && node2 != NULL) { crm_trace("%s > %s: not allowed", resource1->id, resource2->id); return 1; } if (node1 == NULL || node2 == NULL) { crm_trace("%s == %s: not allowed", resource1->id, resource2->id); return 0; } if (node1->count < node2->count) { crm_trace("%s < %s: count", resource1->id, resource2->id); return -1; } else if (node1->count > node2->count) { crm_trace("%s > %s: count", resource1->id, resource2->id); return 1; } can1 = did_fail(resource1); can2 = did_fail(resource2); if (can1 != can2) { if (can1) { crm_trace("%s > %s: failed", resource1->id, resource2->id); return 1; } crm_trace("%s < %s: failed", resource1->id, resource2->id); return -1; } if (node1 && node2) { int lpc = 0; int max = 0; pe_node_t *n = NULL; GListPtr gIter = NULL; GListPtr list1 = NULL; GListPtr list2 = NULL; GHashTable *hash1 = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free); GHashTable *hash2 = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free); n = pe__copy_node(current_node1); g_hash_table_insert(hash1, (gpointer) n->details->id, n); n = pe__copy_node(current_node2); g_hash_table_insert(hash2, (gpointer) n->details->id, n); if(resource1->parent) { for (gIter = resource1->parent->rsc_cons; gIter; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; if (constraint->score == 0) { continue; } crm_trace("Applying %s to %s", constraint->id, resource1->id); hash1 = pcmk__native_merge_weights(constraint->rsc_rh, resource1->id, hash1, constraint->node_attribute, constraint->score / (float) INFINITY, 0); } for (gIter = resource1->parent->rsc_cons_lhs; gIter; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; if (constraint->score == 0) { continue; } crm_trace("Applying %s to %s", constraint->id, resource1->id); hash1 = pcmk__native_merge_weights(constraint->rsc_lh, resource1->id, hash1, constraint->node_attribute, constraint->score / (float) INFINITY, pe_weights_positive); } } if(resource2->parent) { for (gIter = resource2->parent->rsc_cons; gIter; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; crm_trace("Applying %s to %s", constraint->id, resource2->id); hash2 = pcmk__native_merge_weights(constraint->rsc_rh, resource2->id, hash2, constraint->node_attribute, constraint->score / (float) INFINITY, 0); } for (gIter = resource2->parent->rsc_cons_lhs; gIter; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; crm_trace("Applying %s to %s", constraint->id, resource2->id); hash2 = pcmk__native_merge_weights(constraint->rsc_lh, resource2->id, hash2, constraint->node_attribute, constraint->score / (float) INFINITY, pe_weights_positive); } } /* Current location score */ node1 = g_hash_table_lookup(hash1, current_node1->details->id); node2 = g_hash_table_lookup(hash2, current_node2->details->id); if (node1->weight < node2->weight) { if (node1->weight < 0) { crm_trace("%s > %s: current score: %d %d", resource1->id, resource2->id, node1->weight, node2->weight); rc = -1; goto out; } else { crm_trace("%s < %s: current score: %d %d", resource1->id, resource2->id, node1->weight, node2->weight); rc = 1; goto out; } } else if (node1->weight > node2->weight) { crm_trace("%s > %s: current score: %d %d", resource1->id, resource2->id, node1->weight, node2->weight); rc = -1; goto out; } /* All location scores */ list1 = g_hash_table_get_values(hash1); list2 = g_hash_table_get_values(hash2); list1 = sort_nodes_by_weight(list1, current_node1, data_set); list2 = sort_nodes_by_weight(list2, current_node2, data_set); max = g_list_length(list1); if (max < g_list_length(list2)) { max = g_list_length(list2); } for (; lpc < max; lpc++) { node1 = g_list_nth_data(list1, lpc); node2 = g_list_nth_data(list2, lpc); if (node1 == NULL) { crm_trace("%s < %s: colocated score NULL", resource1->id, resource2->id); rc = 1; break; } else if (node2 == NULL) { crm_trace("%s > %s: colocated score NULL", resource1->id, resource2->id); rc = -1; break; } if (node1->weight < node2->weight) { crm_trace("%s < %s: colocated score", resource1->id, resource2->id); rc = 1; break; } else if (node1->weight > node2->weight) { crm_trace("%s > %s: colocated score", resource1->id, resource2->id); rc = -1; break; } } /* Order by reverse uname - same as sort_node_weight() does? */ out: g_hash_table_destroy(hash1); /* Free mem */ g_hash_table_destroy(hash2); /* Free mem */ g_list_free(list1); g_list_free(list2); if (rc != 0) { return rc; } } rc = strcmp(resource1->id, resource2->id); crm_trace("%s %c %s: default", resource1->id, rc < 0 ? '<' : '>', resource2->id); return rc; } static pe_node_t * can_run_instance(pe_resource_t * rsc, pe_node_t * node, int limit) { pe_node_t *local_node = NULL; if (node == NULL && rsc->allowed_nodes) { GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&local_node)) { can_run_instance(rsc, local_node, limit); } return NULL; } if (!node) { /* make clang analyzer happy */ goto bail; } else if (can_run_resources(node) == FALSE) { goto bail; } else if (is_set(rsc->flags, pe_rsc_orphan)) { goto bail; } local_node = parent_node_instance(rsc, node); if (local_node == NULL) { crm_warn("%s cannot run on %s: node not allowed", rsc->id, node->details->uname); goto bail; } else if (local_node->weight < 0) { common_update_score(rsc, node->details->id, local_node->weight); pe_rsc_trace(rsc, "%s cannot run on %s: Parent node weight doesn't allow it.", rsc->id, node->details->uname); } else if (local_node->count < limit) { pe_rsc_trace(rsc, "%s can run on %s (already running %d)", rsc->id, node->details->uname, local_node->count); return local_node; } else { pe_rsc_trace(rsc, "%s cannot run on %s: node full (%d >= %d)", rsc->id, node->details->uname, local_node->count, limit); } bail: if (node) { common_update_score(rsc, node->details->id, -INFINITY); } return NULL; } static pe_node_t * allocate_instance(pe_resource_t *rsc, pe_node_t *prefer, gboolean all_coloc, int limit, pe_working_set_t *data_set) { pe_node_t *chosen = NULL; GHashTable *backup = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Checking allocation of %s (preferring %s, using %s parent colocations)", rsc->id, (prefer? prefer->details->uname: "none"), (all_coloc? "all" : "some")); if (is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->fns->location(rsc, NULL, FALSE); } else if (is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } /* Only include positive colocation preferences of dependent resources * if not every node will get a copy of the clone */ append_parent_colocation(rsc->parent, rsc, all_coloc); if (prefer) { pe_node_t *local_prefer = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); if (local_prefer == NULL || local_prefer->weight < 0) { pe_rsc_trace(rsc, "Not pre-allocating %s to %s - unavailable", rsc->id, prefer->details->uname); return NULL; } } can_run_instance(rsc, NULL, limit); backup = pcmk__copy_node_table(rsc->allowed_nodes); pe_rsc_trace(rsc, "Allocating instance %s", rsc->id); chosen = rsc->cmds->allocate(rsc, prefer, data_set); if (chosen && prefer && (chosen->details != prefer->details)) { crm_info("Not pre-allocating %s to %s because %s is better", rsc->id, prefer->details->uname, chosen->details->uname); g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = backup; native_deallocate(rsc); chosen = NULL; backup = NULL; } if (chosen) { pe_node_t *local_node = parent_node_instance(rsc, chosen); if (local_node) { local_node->count++; } else if (is_set(rsc->flags, pe_rsc_managed)) { /* what to do? we can't enforce per-node limits in this case */ pcmk__config_err("%s not found in %s (list of %d)", chosen->details->id, rsc->parent->id, g_hash_table_size(rsc->parent->allowed_nodes)); } } if(backup) { g_hash_table_destroy(backup); } return chosen; } static void append_parent_colocation(pe_resource_t * rsc, pe_resource_t * child, gboolean all) { GListPtr gIter = NULL; gIter = rsc->rsc_cons; for (; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *cons = (rsc_colocation_t *) gIter->data; if (cons->score == 0) { continue; } if (all || cons->score < 0 || cons->score == INFINITY) { child->rsc_cons = g_list_prepend(child->rsc_cons, cons); } } gIter = rsc->rsc_cons_lhs; for (; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *cons = (rsc_colocation_t *) gIter->data; if (cons->score == 0) { continue; } if (all || cons->score < 0) { child->rsc_cons_lhs = g_list_prepend(child->rsc_cons_lhs, cons); } } } void distribute_children(pe_resource_t *rsc, GListPtr children, GListPtr nodes, int max, int per_host_max, pe_working_set_t * data_set); void distribute_children(pe_resource_t *rsc, GListPtr children, GListPtr nodes, int max, int per_host_max, pe_working_set_t * data_set) { int loop_max = 0; int allocated = 0; int available_nodes = 0; /* count now tracks the number of clones currently allocated */ for(GListPtr nIter = nodes; nIter != NULL; nIter = nIter->next) { pe_node_t *node = nIter->data; node->count = 0; if (can_run_resources(node)) { available_nodes++; } } if(available_nodes) { loop_max = max / available_nodes; } if (loop_max < 1) { loop_max = 1; } pe_rsc_debug(rsc, "Allocating up to %d %s instances to a possible %d nodes (at most %d per host, %d optimal)", max, rsc->id, available_nodes, per_host_max, loop_max); /* Pre-allocate as many instances as we can to their current location */ for (GListPtr gIter = children; gIter != NULL && allocated < max; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; if (child->running_on && is_set(child->flags, pe_rsc_provisional) && is_not_set(child->flags, pe_rsc_failed)) { pe_node_t *child_node = pe__current_node(child); pe_node_t *local_node = parent_node_instance(child, child_node); pe_rsc_trace(rsc, "Checking pre-allocation of %s to %s (%d remaining of %d)", child->id, child_node->details->uname, max - allocated, max); if (can_run_resources(child_node) == FALSE || child_node->weight < 0) { pe_rsc_trace(rsc, "Not pre-allocating because %s can not run %s", child_node->details->uname, child->id); } else if(local_node && local_node->count >= loop_max) { pe_rsc_trace(rsc, "Not pre-allocating because %s already allocated optimal instances", child_node->details->uname); } else if (allocate_instance(child, child_node, max < available_nodes, per_host_max, data_set)) { pe_rsc_trace(rsc, "Pre-allocated %s to %s", child->id, child_node->details->uname); allocated++; } } } pe_rsc_trace(rsc, "Done pre-allocating (%d of %d)", allocated, max); for (GListPtr gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; if (child->running_on != NULL) { pe_node_t *child_node = pe__current_node(child); pe_node_t *local_node = parent_node_instance(child, child_node); if (local_node == NULL) { crm_err("%s is running on %s which isn't allowed", child->id, child_node->details->uname); } } if (is_not_set(child->flags, pe_rsc_provisional)) { } else if (allocated >= max) { pe_rsc_debug(rsc, "Child %s not allocated - limit reached %d %d", child->id, allocated, max); resource_location(child, NULL, -INFINITY, "clone:limit_reached", data_set); } else { if (allocate_instance(child, NULL, max < available_nodes, per_host_max, data_set)) { allocated++; } } } pe_rsc_debug(rsc, "Allocated %d %s instances of a possible %d", allocated, rsc->id, max); } pe_node_t * pcmk__clone_allocate(pe_resource_t *rsc, pe_node_t *prefer, pe_working_set_t *data_set) { GListPtr nodes = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if (is_not_set(rsc->flags, pe_rsc_provisional)) { return NULL; } else if (is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } if (is_set(rsc->flags, pe_rsc_promotable)) { apply_master_prefs(rsc); } set_bit(rsc->flags, pe_rsc_allocating); /* this information is used by sort_clone_instance() when deciding in which * order to allocate clone instances */ for (GListPtr gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; if (constraint->score == 0) { continue; } pe_rsc_trace(rsc, "%s: Allocating %s first", rsc->id, constraint->rsc_rh->id); constraint->rsc_rh->cmds->allocate(constraint->rsc_rh, prefer, data_set); } for (GListPtr gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; if (constraint->score == 0) { continue; } rsc->allowed_nodes = constraint->rsc_lh->cmds->merge_weights(constraint->rsc_lh, rsc->id, rsc->allowed_nodes, constraint->node_attribute, (float)constraint->score / INFINITY, (pe_weights_rollback | pe_weights_positive)); } pe__show_node_weights(!show_scores, rsc, __FUNCTION__, rsc->allowed_nodes); nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = sort_nodes_by_weight(nodes, NULL, data_set); rsc->children = g_list_sort_with_data(rsc->children, sort_clone_instance, data_set); distribute_children(rsc, rsc->children, nodes, clone_data->clone_max, clone_data->clone_node_max, data_set); g_list_free(nodes); if (is_set(rsc->flags, pe_rsc_promotable)) { pcmk__set_instance_roles(rsc, data_set); } clear_bit(rsc->flags, pe_rsc_provisional); clear_bit(rsc->flags, pe_rsc_allocating); pe_rsc_trace(rsc, "Done allocating %s", rsc->id); return NULL; } static void clone_update_pseudo_status(pe_resource_t * rsc, gboolean * stopping, gboolean * starting, gboolean * active) { GListPtr gIter = NULL; if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; clone_update_pseudo_status(child, stopping, starting, active); } return; } CRM_ASSERT(active != NULL); CRM_ASSERT(starting != NULL); CRM_ASSERT(stopping != NULL); if (rsc->running_on) { *active = TRUE; } gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (*starting && *stopping) { return; } else if (is_set(action->flags, pe_action_optional)) { pe_rsc_trace(rsc, "Skipping optional: %s", action->uuid); continue; } else if (is_set(action->flags, pe_action_pseudo) == FALSE && is_set(action->flags, pe_action_runnable) == FALSE) { pe_rsc_trace(rsc, "Skipping unrunnable: %s", action->uuid); continue; } else if (pcmk__str_eq(RSC_STOP, action->task, pcmk__str_casei)) { pe_rsc_trace(rsc, "Stopping due to: %s", action->uuid); *stopping = TRUE; } else if (pcmk__str_eq(RSC_START, action->task, pcmk__str_casei)) { if (is_set(action->flags, pe_action_runnable) == FALSE) { pe_rsc_trace(rsc, "Skipping pseudo-op: %s run=%d, pseudo=%d", action->uuid, is_set(action->flags, pe_action_runnable), is_set(action->flags, pe_action_pseudo)); } else { pe_rsc_trace(rsc, "Starting due to: %s", action->uuid); pe_rsc_trace(rsc, "%s run=%d, pseudo=%d", action->uuid, is_set(action->flags, pe_action_runnable), is_set(action->flags, pe_action_pseudo)); *starting = TRUE; } } } } static pe_action_t * find_rsc_action(pe_resource_t *rsc, const char *task, gboolean active_only, GList **list) { pe_action_t *match = NULL; GListPtr possible = NULL; GListPtr active = NULL; possible = pe__resource_actions(rsc, NULL, task, FALSE); if (active_only) { GListPtr gIter = possible; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *op = (pe_action_t *) gIter->data; if (is_set(op->flags, pe_action_optional) == FALSE) { active = g_list_prepend(active, op); } } if (active && pcmk__list_of_1(active)) { match = g_list_nth_data(active, 0); } if (list) { *list = active; active = NULL; } } else if (possible && pcmk__list_of_1(possible)) { match = g_list_nth_data(possible, 0); } if (list) { *list = possible; possible = NULL; } if (possible) { g_list_free(possible); } if (active) { g_list_free(active); } return match; } static void child_ordering_constraints(pe_resource_t * rsc, pe_working_set_t * data_set) { pe_action_t *stop = NULL; pe_action_t *start = NULL; pe_action_t *last_stop = NULL; pe_action_t *last_start = NULL; GListPtr gIter = NULL; gboolean active_only = TRUE; /* change to false to get the old behavior */ clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if (clone_data->ordered == FALSE) { return; } /* we have to maintain a consistent sorted child list when building order constraints */ rsc->children = g_list_sort(rsc->children, sort_rsc_id); for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; stop = find_rsc_action(child, RSC_STOP, active_only, NULL); if (stop) { if (last_stop) { /* child/child relative stop */ order_actions(stop, last_stop, pe_order_optional); } last_stop = stop; } start = find_rsc_action(child, RSC_START, active_only, NULL); if (start) { if (last_start) { /* child/child relative start */ order_actions(last_start, start, pe_order_optional); } last_start = start; } } } void clone_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); clone_create_pseudo_actions(rsc, rsc->children, &clone_data->start_notify, &clone_data->stop_notify,data_set); child_ordering_constraints(rsc, data_set); if (is_set(rsc->flags, pe_rsc_promotable)) { create_promotable_actions(rsc, data_set); } } void clone_create_pseudo_actions( pe_resource_t * rsc, GListPtr children, notify_data_t **start_notify, notify_data_t **stop_notify, pe_working_set_t * data_set) { gboolean child_active = FALSE; gboolean child_starting = FALSE; gboolean child_stopping = FALSE; gboolean allow_dependent_migrations = TRUE; pe_action_t *stop = NULL; pe_action_t *stopped = NULL; pe_action_t *start = NULL; pe_action_t *started = NULL; pe_rsc_trace(rsc, "Creating actions for %s", rsc->id); for (GListPtr gIter = children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; gboolean starting = FALSE; gboolean stopping = FALSE; child_rsc->cmds->create_actions(child_rsc, data_set); clone_update_pseudo_status(child_rsc, &stopping, &starting, &child_active); if (stopping && starting) { allow_dependent_migrations = FALSE; } child_stopping |= stopping; child_starting |= starting; } /* start */ start = create_pseudo_resource_op(rsc, RSC_START, !child_starting, TRUE, data_set); started = create_pseudo_resource_op(rsc, RSC_STARTED, !child_starting, FALSE, data_set); started->priority = INFINITY; if (child_active || child_starting) { update_action_flags(started, pe_action_runnable, __FUNCTION__, __LINE__); } if (start_notify != NULL && *start_notify == NULL) { *start_notify = create_notification_boundaries(rsc, RSC_START, start, started, data_set); } /* stop */ stop = create_pseudo_resource_op(rsc, RSC_STOP, !child_stopping, TRUE, data_set); stopped = create_pseudo_resource_op(rsc, RSC_STOPPED, !child_stopping, TRUE, data_set); stopped->priority = INFINITY; if (allow_dependent_migrations) { update_action_flags(stop, pe_action_migrate_runnable, __FUNCTION__, __LINE__); } if (stop_notify != NULL && *stop_notify == NULL) { *stop_notify = create_notification_boundaries(rsc, RSC_STOP, stop, stopped, data_set); if (start_notify && *start_notify && *stop_notify) { order_actions((*stop_notify)->post_done, (*start_notify)->pre, pe_order_optional); } } } void clone_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set) { pe_resource_t *last_rsc = NULL; GListPtr gIter; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); pe_rsc_trace(rsc, "Internal constraints for %s", rsc->id); new_rsc_order(rsc, RSC_STOPPED, rsc, RSC_START, pe_order_optional, data_set); new_rsc_order(rsc, RSC_START, rsc, RSC_STARTED, pe_order_runnable_left, data_set); new_rsc_order(rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_runnable_left, data_set); if (is_set(rsc->flags, pe_rsc_promotable)) { new_rsc_order(rsc, RSC_DEMOTED, rsc, RSC_STOP, pe_order_optional, data_set); new_rsc_order(rsc, RSC_STARTED, rsc, RSC_PROMOTE, pe_order_runnable_left, data_set); } if (clone_data->ordered) { /* we have to maintain a consistent sorted child list when building order constraints */ rsc->children = g_list_sort(rsc->children, sort_rsc_id); } for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->internal_constraints(child_rsc, data_set); order_start_start(rsc, child_rsc, pe_order_runnable_left | pe_order_implies_first_printed); new_rsc_order(child_rsc, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed, data_set); if (clone_data->ordered && last_rsc) { order_start_start(last_rsc, child_rsc, pe_order_optional); } order_stop_stop(rsc, child_rsc, pe_order_implies_first_printed); new_rsc_order(child_rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed, data_set); if (clone_data->ordered && last_rsc) { order_stop_stop(child_rsc, last_rsc, pe_order_optional); } last_rsc = child_rsc; } if (is_set(rsc->flags, pe_rsc_promotable)) { promotable_constraints(rsc, data_set); } } bool assign_node(pe_resource_t * rsc, pe_node_t * node, gboolean force) { bool changed = FALSE; if (rsc->children) { for (GListPtr gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; changed |= assign_node(child_rsc, node, force); } return changed; } if (rsc->allocated_to != NULL) { changed = true; } native_assign_node(rsc, NULL, node, force); return changed; } gboolean is_child_compatible(pe_resource_t *child_rsc, pe_node_t * local_node, enum rsc_role_e filter, gboolean current) { pe_node_t *node = NULL; enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current); CRM_CHECK(child_rsc && local_node, return FALSE); if (is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { /* We only want instances that haven't failed */ node = child_rsc->fns->location(child_rsc, NULL, current); } if (filter != RSC_ROLE_UNKNOWN && next_role != filter) { crm_trace("Filtered %s", child_rsc->id); return FALSE; } if (node && (node->details == local_node->details)) { return TRUE; } else if (node) { crm_trace("%s - %s vs %s", child_rsc->id, node->details->uname, local_node->details->uname); } else { crm_trace("%s - not allocated %d", child_rsc->id, current); } return FALSE; } pe_resource_t * find_compatible_child(pe_resource_t *local_child, pe_resource_t *rsc, enum rsc_role_e filter, gboolean current, pe_working_set_t *data_set) { pe_resource_t *pair = NULL; GListPtr gIter = NULL; GListPtr scratch = NULL; pe_node_t *local_node = NULL; local_node = local_child->fns->location(local_child, NULL, current); if (local_node) { return find_compatible_child_by_node(local_child, local_node, rsc, filter, current); } scratch = g_hash_table_get_values(local_child->allowed_nodes); scratch = sort_nodes_by_weight(scratch, NULL, data_set); gIter = scratch; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; pair = find_compatible_child_by_node(local_child, node, rsc, filter, current); if (pair) { goto done; } } pe_rsc_debug(rsc, "Can't pair %s with %s", local_child->id, rsc->id); done: g_list_free(scratch); return pair; } void clone_rsc_colocation_lh(pe_resource_t *rsc_lh, pe_resource_t *rsc_rh, rsc_colocation_t *constraint, pe_working_set_t *data_set) { /* -- Never called -- * * Instead we add the colocation constraints to the child and call from there */ CRM_ASSERT(FALSE); } void clone_rsc_colocation_rh(pe_resource_t *rsc_lh, pe_resource_t *rsc_rh, rsc_colocation_t *constraint, pe_working_set_t *data_set) { GListPtr gIter = NULL; gboolean do_interleave = FALSE; const char *interleave_s = NULL; CRM_CHECK(constraint != NULL, return); CRM_CHECK(rsc_lh != NULL, pe_err("rsc_lh was NULL for %s", constraint->id); return); CRM_CHECK(rsc_rh != NULL, pe_err("rsc_rh was NULL for %s", constraint->id); return); CRM_CHECK(rsc_lh->variant == pe_native, return); if (constraint->score == 0) { return; } pe_rsc_trace(rsc_rh, "Processing constraint %s: %s -> %s %d", constraint->id, rsc_lh->id, rsc_rh->id, constraint->score); if (is_set(rsc_rh->flags, pe_rsc_promotable)) { if (is_set(rsc_rh->flags, pe_rsc_provisional)) { pe_rsc_trace(rsc_rh, "%s is still provisional", rsc_rh->id); return; } else if (constraint->role_rh == RSC_ROLE_UNKNOWN) { pe_rsc_trace(rsc_rh, "Handling %s as a clone colocation", constraint->id); } else { promotable_colocation_rh(rsc_lh, rsc_rh, constraint, data_set); return; } } /* only the LHS side needs to be labeled as interleave */ interleave_s = g_hash_table_lookup(constraint->rsc_lh->meta, XML_RSC_ATTR_INTERLEAVE); if(crm_is_true(interleave_s) && constraint->rsc_lh->variant > pe_group) { // TODO: Do we actually care about multiple RH copies sharing a LH copy anymore? if (copies_per_node(constraint->rsc_lh) != copies_per_node(constraint->rsc_rh)) { pcmk__config_err("Cannot interleave %s and %s because they do not " "support the same number of instances per node", constraint->rsc_lh->id, constraint->rsc_rh->id); } else { do_interleave = TRUE; } } if (is_set(rsc_rh->flags, pe_rsc_provisional)) { pe_rsc_trace(rsc_rh, "%s is still provisional", rsc_rh->id); return; } else if (do_interleave) { pe_resource_t *rh_child = NULL; rh_child = find_compatible_child(rsc_lh, rsc_rh, RSC_ROLE_UNKNOWN, FALSE, data_set); if (rh_child) { pe_rsc_debug(rsc_rh, "Pairing %s with %s", rsc_lh->id, rh_child->id); rsc_lh->cmds->rsc_colocation_lh(rsc_lh, rh_child, constraint, data_set); } else if (constraint->score >= INFINITY) { crm_notice("Cannot pair %s with instance of %s", rsc_lh->id, rsc_rh->id); assign_node(rsc_lh, NULL, TRUE); } else { pe_rsc_debug(rsc_rh, "Cannot pair %s with instance of %s", rsc_lh->id, rsc_rh->id); } return; } else if (constraint->score >= INFINITY) { GListPtr rhs = NULL; gIter = rsc_rh->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; pe_node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE); if (chosen != NULL && is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) { pe_rsc_trace(rsc_rh, "Allowing %s: %s %d", constraint->id, chosen->details->uname, chosen->weight); rhs = g_list_prepend(rhs, chosen); } } node_list_exclude(rsc_lh->allowed_nodes, rhs, FALSE); g_list_free(rhs); return; } gIter = rsc_rh->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->rsc_colocation_rh(rsc_lh, child_rsc, constraint, data_set); } } enum action_tasks clone_child_action(pe_action_t * action) { enum action_tasks result = no_action; pe_resource_t *child = (pe_resource_t *) action->rsc->children->data; - if (pcmk__str_eq(action->task, "notify", pcmk__str_casei) - || pcmk__str_eq(action->task, "notified", pcmk__str_casei)) { + if (pcmk__strcase_any_of(action->task, "notify", "notified", NULL)) { /* Find the action we're notifying about instead */ int stop = 0; char *key = action->uuid; int lpc = strlen(key); for (; lpc > 0; lpc--) { if (key[lpc] == '_' && stop == 0) { stop = lpc; } else if (key[lpc] == '_') { char *task_mutable = NULL; lpc++; task_mutable = strdup(key + lpc); task_mutable[stop - lpc] = 0; crm_trace("Extracted action '%s' from '%s'", task_mutable, key); result = get_complex_task(child, task_mutable, TRUE); free(task_mutable); break; } } } else { result = get_complex_task(child, action->task, TRUE); } return result; } enum pe_action_flags summary_action_flags(pe_action_t * action, GListPtr children, pe_node_t * node) { GListPtr gIter = NULL; gboolean any_runnable = FALSE; gboolean check_runnable = TRUE; enum action_tasks task = clone_child_action(action); enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo); const char *task_s = task2text(task); for (gIter = children; gIter != NULL; gIter = gIter->next) { pe_action_t *child_action = NULL; pe_resource_t *child = (pe_resource_t *) gIter->data; child_action = find_first_action(child->actions, NULL, task_s, child->children ? NULL : node); pe_rsc_trace(action->rsc, "Checking for %s in %s on %s (%s)", task_s, child->id, node ? node->details->uname : "none", child_action?child_action->uuid:"NA"); if (child_action) { enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node); if (is_set(flags, pe_action_optional) && is_set(child_flags, pe_action_optional) == FALSE) { pe_rsc_trace(child, "%s is mandatory because of %s", action->uuid, child_action->uuid); flags = crm_clear_bit(__FUNCTION__, __LINE__, action->rsc->id, flags, pe_action_optional); pe_clear_action_bit(action, pe_action_optional); } if (is_set(child_flags, pe_action_runnable)) { any_runnable = TRUE; } } } if (check_runnable && any_runnable == FALSE) { pe_rsc_trace(action->rsc, "%s is not runnable because no children are", action->uuid); flags = crm_clear_bit(__FUNCTION__, __LINE__, action->rsc->id, flags, pe_action_runnable); if (node == NULL) { pe_clear_action_bit(action, pe_action_runnable); } } return flags; } enum pe_action_flags clone_action_flags(pe_action_t * action, pe_node_t * node) { return summary_action_flags(action, action->rsc->children, node); } void clone_rsc_location(pe_resource_t *rsc, pe__location_t *constraint) { GListPtr gIter = rsc->children; pe_rsc_trace(rsc, "Processing location constraint %s for %s", constraint->id, rsc->id); native_rsc_location(rsc, constraint); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->rsc_location(child_rsc, constraint); } } void clone_expand(pe_resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *op = (pe_action_t *) gIter->data; rsc->cmds->action_flags(op, NULL); } if (clone_data->start_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->start_notify); expand_notification_data(rsc, clone_data->start_notify, data_set); create_notifications(rsc, clone_data->start_notify, data_set); } if (clone_data->stop_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->stop_notify); expand_notification_data(rsc, clone_data->stop_notify, data_set); create_notifications(rsc, clone_data->stop_notify, data_set); } if (clone_data->promote_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->promote_notify); expand_notification_data(rsc, clone_data->promote_notify, data_set); create_notifications(rsc, clone_data->promote_notify, data_set); } if (clone_data->demote_notify) { collect_notification_data(rsc, TRUE, TRUE, clone_data->demote_notify); expand_notification_data(rsc, clone_data->demote_notify, data_set); create_notifications(rsc, clone_data->demote_notify, data_set); } /* Now that the notifcations have been created we can expand the children */ gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->cmds->expand(child_rsc, data_set); } native_expand(rsc, data_set); /* The notifications are in the graph now, we can destroy the notify_data */ free_notification_data(clone_data->demote_notify); clone_data->demote_notify = NULL; free_notification_data(clone_data->stop_notify); clone_data->stop_notify = NULL; free_notification_data(clone_data->start_notify); clone_data->start_notify = NULL; free_notification_data(clone_data->promote_notify); clone_data->promote_notify = NULL; } // Check whether a resource or any of its children is known on node static bool rsc_known_on(const pe_resource_t *rsc, const pe_node_t *node) { if (rsc->children) { for (GList *child_iter = rsc->children; child_iter != NULL; child_iter = child_iter->next) { pe_resource_t *child = (pe_resource_t *) child_iter->data; if (rsc_known_on(child, node)) { return TRUE; } } } else if (rsc->known_on) { GHashTableIter iter; pe_node_t *known_node = NULL; g_hash_table_iter_init(&iter, rsc->known_on); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &known_node)) { if (node->details == known_node->details) { return TRUE; } } } return FALSE; } // Look for an instance of clone that is known on node static pe_resource_t * find_instance_on(const pe_resource_t *clone, const pe_node_t *node) { for (GList *gIter = clone->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; if (rsc_known_on(child, node)) { return child; } } return NULL; } // For unique clones, probe each instance separately static gboolean probe_unique_clone(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t *data_set) { gboolean any_created = FALSE; for (GList *child_iter = rsc->children; child_iter != NULL; child_iter = child_iter->next) { pe_resource_t *child = (pe_resource_t *) child_iter->data; any_created |= child->cmds->create_probe(child, node, complete, force, data_set); } return any_created; } // For anonymous clones, only a single instance needs to be probed static gboolean probe_anonymous_clone(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t *data_set) { // First, check if we probed an instance on this node last time pe_resource_t *child = find_instance_on(rsc, node); // Otherwise, check if we plan to start an instance on this node if (child == NULL) { for (GList *child_iter = rsc->children; child_iter && !child; child_iter = child_iter->next) { pe_node_t *local_node = NULL; pe_resource_t *child_rsc = (pe_resource_t *) child_iter->data; if (child_rsc) { /* make clang analyzer happy */ local_node = child_rsc->fns->location(child_rsc, NULL, FALSE); if (local_node && (local_node->details == node->details)) { child = child_rsc; } } } } // Otherwise, use the first clone instance if (child == NULL) { child = rsc->children->data; } CRM_ASSERT(child); return child->cmds->create_probe(child, node, complete, force, data_set); } gboolean clone_create_probe(pe_resource_t * rsc, pe_node_t * node, pe_action_t * complete, gboolean force, pe_working_set_t * data_set) { gboolean any_created = FALSE; CRM_ASSERT(rsc); rsc->children = g_list_sort(rsc->children, sort_rsc_id); if (rsc->children == NULL) { pe_warn("Clone %s has no children", rsc->id); return FALSE; } if (rsc->exclusive_discover) { pe_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (allowed && allowed->rsc_discover_mode != pe_discover_exclusive) { /* exclusive discover is enabled and this node is not marked * as a node this resource should be discovered on * * remove the node from allowed_nodes so that the * notification contains only nodes that we might ever run * on */ g_hash_table_remove(rsc->allowed_nodes, node->details->id); /* Bit of a shortcut - might as well take it */ return FALSE; } } if (is_set(rsc->flags, pe_rsc_unique)) { any_created = probe_unique_clone(rsc, node, complete, force, data_set); } else { any_created = probe_anonymous_clone(rsc, node, complete, force, data_set); } return any_created; } void clone_append_meta(pe_resource_t * rsc, xmlNode * xml) { char *name = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); name = crm_meta_name(XML_RSC_ATTR_UNIQUE); crm_xml_add(xml, name, is_set(rsc->flags, pe_rsc_unique) ? "true" : "false"); free(name); name = crm_meta_name(XML_RSC_ATTR_NOTIFY); crm_xml_add(xml, name, is_set(rsc->flags, pe_rsc_notify) ? "true" : "false"); free(name); name = crm_meta_name(XML_RSC_ATTR_INCARNATION_MAX); crm_xml_add_int(xml, name, clone_data->clone_max); free(name); name = crm_meta_name(XML_RSC_ATTR_INCARNATION_NODEMAX); crm_xml_add_int(xml, name, clone_data->clone_node_max); free(name); if (is_set(rsc->flags, pe_rsc_promotable)) { name = crm_meta_name(XML_RSC_ATTR_PROMOTED_MAX); crm_xml_add_int(xml, name, clone_data->promoted_max); free(name); name = crm_meta_name(XML_RSC_ATTR_PROMOTED_NODEMAX); crm_xml_add_int(xml, name, clone_data->promoted_node_max); free(name); /* @COMPAT Maintain backward compatibility with resource agents that * expect the old names (deprecated since 2.0.0). */ name = crm_meta_name(XML_RSC_ATTR_MASTER_MAX); crm_xml_add_int(xml, name, clone_data->promoted_max); free(name); name = crm_meta_name(XML_RSC_ATTR_MASTER_NODEMAX); crm_xml_add_int(xml, name, clone_data->promoted_node_max); free(name); } } diff --git a/lib/pacemaker/pcmk_sched_utils.c b/lib/pacemaker/pcmk_sched_utils.c index 6193ad5f74..4453939d64 100644 --- a/lib/pacemaker/pcmk_sched_utils.c +++ b/lib/pacemaker/pcmk_sched_utils.c @@ -1,753 +1,752 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include // lrmd_event_data_t #include pe__location_t * rsc2node_new(const char *id, pe_resource_t *rsc, int node_weight, const char *discover_mode, pe_node_t *foo_node, pe_working_set_t *data_set) { pe__location_t *new_con = NULL; if (rsc == NULL || id == NULL) { pe_err("Invalid constraint %s for rsc=%p", crm_str(id), rsc); return NULL; } else if (foo_node == NULL) { CRM_CHECK(node_weight == 0, return NULL); } new_con = calloc(1, sizeof(pe__location_t)); if (new_con != NULL) { new_con->id = strdup(id); new_con->rsc_lh = rsc; new_con->node_list_rh = NULL; new_con->role_filter = RSC_ROLE_UNKNOWN; if (pcmk__str_eq(discover_mode, "always", pcmk__str_null_matches | pcmk__str_casei)) { new_con->discover_mode = pe_discover_always; } else if (pcmk__str_eq(discover_mode, "never", pcmk__str_casei)) { new_con->discover_mode = pe_discover_never; } else if (pcmk__str_eq(discover_mode, "exclusive", pcmk__str_casei)) { new_con->discover_mode = pe_discover_exclusive; rsc->exclusive_discover = TRUE; } else { pe_err("Invalid %s value %s in location constraint", XML_LOCATION_ATTR_DISCOVERY, discover_mode); } if (foo_node != NULL) { pe_node_t *copy = pe__copy_node(foo_node); copy->weight = node_weight; new_con->node_list_rh = g_list_prepend(NULL, copy); } data_set->placement_constraints = g_list_prepend(data_set->placement_constraints, new_con); rsc->rsc_location = g_list_prepend(rsc->rsc_location, new_con); } return new_con; } gboolean can_run_resources(const pe_node_t * node) { if (node == NULL) { return FALSE; } #if 0 if (node->weight < 0) { return FALSE; } #endif if (node->details->online == FALSE || node->details->shutdown || node->details->unclean || node->details->standby || node->details->maintenance) { crm_trace("%s: online=%d, unclean=%d, standby=%d, maintenance=%d", node->details->uname, node->details->online, node->details->unclean, node->details->standby, node->details->maintenance); return FALSE; } return TRUE; } /*! * \internal * \brief Copy a hash table of node objects * * \param[in] nodes Hash table to copy * * \return New copy of nodes (or NULL if nodes is NULL) */ GHashTable * pcmk__copy_node_table(GHashTable *nodes) { GHashTable *new_table = NULL; GHashTableIter iter; pe_node_t *node = NULL; if (nodes == NULL) { return NULL; } new_table = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free); g_hash_table_iter_init(&iter, nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { pe_node_t *new_node = pe__copy_node(node); g_hash_table_insert(new_table, (gpointer) new_node->details->id, new_node); } return new_table; } /*! * \internal * \brief Copy a list of node objects * * \param[in] list List to copy * \param[in] reset Set copies' scores to 0 * * \return New list of shallow copies of nodes in original list */ GList * pcmk__copy_node_list(const GList *list, bool reset) { GList *result = NULL; for (const GList *gIter = list; gIter != NULL; gIter = gIter->next) { pe_node_t *new_node = NULL; pe_node_t *this_node = (pe_node_t *) gIter->data; new_node = pe__copy_node(this_node); if (reset) { new_node->weight = 0; } result = g_list_prepend(result, new_node); } return result; } struct node_weight_s { pe_node_t *active; pe_working_set_t *data_set; }; /* return -1 if 'a' is more preferred * return 1 if 'b' is more preferred */ static gint sort_node_weight(gconstpointer a, gconstpointer b, gpointer data) { const pe_node_t *node1 = (const pe_node_t *)a; const pe_node_t *node2 = (const pe_node_t *)b; struct node_weight_s *nw = data; int node1_weight = 0; int node2_weight = 0; int result = 0; if (a == NULL) { return 1; } if (b == NULL) { return -1; } node1_weight = node1->weight; node2_weight = node2->weight; if (can_run_resources(node1) == FALSE) { node1_weight = -INFINITY; } if (can_run_resources(node2) == FALSE) { node2_weight = -INFINITY; } if (node1_weight > node2_weight) { crm_trace("%s (%d) > %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); return -1; } if (node1_weight < node2_weight) { crm_trace("%s (%d) < %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); return 1; } crm_trace("%s (%d) == %s (%d) : weight", node1->details->uname, node1_weight, node2->details->uname, node2_weight); if (pcmk__str_eq(nw->data_set->placement_strategy, "minimal", pcmk__str_casei)) { goto equal; } if (pcmk__str_eq(nw->data_set->placement_strategy, "balanced", pcmk__str_casei)) { result = compare_capacity(node1, node2); if (result < 0) { crm_trace("%s > %s : capacity (%d)", node1->details->uname, node2->details->uname, result); return -1; } else if (result > 0) { crm_trace("%s < %s : capacity (%d)", node1->details->uname, node2->details->uname, result); return 1; } } /* now try to balance resources across the cluster */ if (node1->details->num_resources < node2->details->num_resources) { crm_trace("%s (%d) > %s (%d) : resources", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return -1; } else if (node1->details->num_resources > node2->details->num_resources) { crm_trace("%s (%d) < %s (%d) : resources", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return 1; } if (nw->active && nw->active->details == node1->details) { crm_trace("%s (%d) > %s (%d) : active", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return -1; } else if (nw->active && nw->active->details == node2->details) { crm_trace("%s (%d) < %s (%d) : active", node1->details->uname, node1->details->num_resources, node2->details->uname, node2->details->num_resources); return 1; } equal: crm_trace("%s = %s", node1->details->uname, node2->details->uname); return strcmp(node1->details->uname, node2->details->uname); } GList * sort_nodes_by_weight(GList *nodes, pe_node_t *active_node, pe_working_set_t *data_set) { struct node_weight_s nw = { active_node, data_set }; return g_list_sort_with_data(nodes, sort_node_weight, &nw); } void native_deallocate(pe_resource_t * rsc) { if (rsc->allocated_to) { pe_node_t *old = rsc->allocated_to; crm_info("Deallocating %s from %s", rsc->id, old->details->uname); set_bit(rsc->flags, pe_rsc_provisional); rsc->allocated_to = NULL; old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc, rsc); old->details->num_resources--; /* old->count--; */ calculate_utilization(old->details->utilization, rsc->utilization, TRUE); free(old); } } gboolean native_assign_node(pe_resource_t * rsc, GListPtr nodes, pe_node_t * chosen, gboolean force) { CRM_ASSERT(rsc->variant == pe_native); if (force == FALSE && chosen != NULL) { bool unset = FALSE; if(chosen->weight < 0) { unset = TRUE; // Allow the graph to assume that the remote resource will come up } else if (!can_run_resources(chosen) && !pe__is_guest_node(chosen)) { unset = TRUE; } if(unset) { crm_debug("All nodes for resource %s are unavailable" ", unclean or shutting down (%s: %d, %d)", rsc->id, chosen->details->uname, can_run_resources(chosen), chosen->weight); rsc->next_role = RSC_ROLE_STOPPED; chosen = NULL; } } /* todo: update the old node for each resource to reflect its * new resource count */ native_deallocate(rsc); clear_bit(rsc->flags, pe_rsc_provisional); if (chosen == NULL) { GListPtr gIter = NULL; char *rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING); crm_debug("Could not allocate a node for %s", rsc->id); rsc->next_role = RSC_ROLE_STOPPED; for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *op = (pe_action_t *) gIter->data; const char *interval_ms_s = g_hash_table_lookup(op->meta, XML_LRM_ATTR_INTERVAL_MS); crm_debug("Processing %s", op->uuid); if(pcmk__str_eq(RSC_STOP, op->task, pcmk__str_casei)) { update_action_flags(op, pe_action_optional | pe_action_clear, __FUNCTION__, __LINE__); } else if(pcmk__str_eq(RSC_START, op->task, pcmk__str_casei)) { update_action_flags(op, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__); /* set_bit(rsc->flags, pe_rsc_block); */ } else if (interval_ms_s && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_casei)) { if(pcmk__str_eq(rc_inactive, g_hash_table_lookup(op->meta, XML_ATTR_TE_TARGET_RC), pcmk__str_casei)) { /* This is a recurring monitor for the stopped state, leave it alone */ } else { /* Normal monitor operation, cancel it */ update_action_flags(op, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__); } } } free(rc_inactive); return FALSE; } crm_debug("Assigning %s to %s", chosen->details->uname, rsc->id); rsc->allocated_to = pe__copy_node(chosen); chosen->details->allocated_rsc = g_list_prepend(chosen->details->allocated_rsc, rsc); chosen->details->num_resources++; chosen->count++; calculate_utilization(chosen->details->utilization, rsc->utilization, FALSE); dump_rsc_utilization((show_utilization? LOG_STDOUT : LOG_TRACE), __FUNCTION__, rsc, chosen); return TRUE; } void log_action(unsigned int log_level, const char *pre_text, pe_action_t * action, gboolean details) { const char *node_uname = NULL; const char *node_uuid = NULL; if (action == NULL) { crm_trace("%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } if (is_set(action->flags, pe_action_pseudo)) { node_uname = NULL; node_uuid = NULL; } else if (action->node != NULL) { node_uname = action->node->details->uname; node_uuid = action->node->details->id; } else { node_uname = ""; node_uuid = NULL; } switch (text2task(action->task)) { case stonith_node: case shutdown_crm: crm_trace("%s%s%sAction %d: %s%s%s%s%s%s", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ", is_set(action->flags, pe_action_pseudo) ? "Pseudo " : is_set(action->flags, pe_action_optional) ? "Optional " : is_set(action->flags, pe_action_runnable) ? is_set(action->flags, pe_action_processed) ? "" : "(Provisional) " : "!!Non-Startable!! ", action->id, action->uuid, node_uname ? "\ton " : "", node_uname ? node_uname : "", node_uuid ? "\t\t(" : "", node_uuid ? node_uuid : "", node_uuid ? ")" : ""); break; default: crm_trace("%s%s%sAction %d: %s %s%s%s%s%s%s", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ", is_set(action->flags, pe_action_optional) ? "Optional " : is_set(action->flags, pe_action_pseudo) ? "Pseudo " : is_set(action->flags, pe_action_runnable) ? is_set(action->flags, pe_action_processed) ? "" : "(Provisional) " : "!!Non-Startable!! ", action->id, action->uuid, action->rsc ? action->rsc->id : "", node_uname ? "\ton " : "", node_uname ? node_uname : "", node_uuid ? "\t\t(" : "", node_uuid ? node_uuid : "", node_uuid ? ")" : ""); break; } if (details) { GListPtr gIter = NULL; crm_trace("\t\t====== Preceding Actions"); gIter = action->actions_before; for (; gIter != NULL; gIter = gIter->next) { pe_action_wrapper_t *other = (pe_action_wrapper_t *) gIter->data; log_action(log_level + 1, "\t\t", other->action, FALSE); } crm_trace("\t\t====== Subsequent Actions"); gIter = action->actions_after; for (; gIter != NULL; gIter = gIter->next) { pe_action_wrapper_t *other = (pe_action_wrapper_t *) gIter->data; log_action(log_level + 1, "\t\t", other->action, FALSE); } crm_trace("\t\t====== End"); } else { crm_trace("\t\t(before=%d, after=%d)", g_list_length(action->actions_before), g_list_length(action->actions_after)); } } gboolean can_run_any(GHashTable * nodes) { GHashTableIter iter; pe_node_t *node = NULL; if (nodes == NULL) { return FALSE; } g_hash_table_iter_init(&iter, nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (can_run_resources(node) && node->weight >= 0) { return TRUE; } } return FALSE; } pe_action_t * create_pseudo_resource_op(pe_resource_t * rsc, const char *task, bool optional, bool runnable, pe_working_set_t *data_set) { pe_action_t *action = custom_action(rsc, pcmk__op_key(rsc->id, task, 0), task, NULL, optional, TRUE, data_set); update_action_flags(action, pe_action_pseudo, __FUNCTION__, __LINE__); update_action_flags(action, pe_action_runnable, __FUNCTION__, __LINE__); if(runnable) { update_action_flags(action, pe_action_runnable, __FUNCTION__, __LINE__); } return action; } /*! * \internal * \brief Create an executor cancel op * * \param[in] rsc Resource of action to cancel * \param[in] task Name of action to cancel * \param[in] interval_ms Interval of action to cancel * \param[in] node Node of action to cancel * \param[in] data_set Working set of cluster * * \return Created op */ pe_action_t * pe_cancel_op(pe_resource_t *rsc, const char *task, guint interval_ms, pe_node_t *node, pe_working_set_t *data_set) { pe_action_t *cancel_op; char *interval_ms_s = crm_strdup_printf("%u", interval_ms); // @TODO dangerous if possible to schedule another action with this key char *key = pcmk__op_key(rsc->id, task, interval_ms); cancel_op = custom_action(rsc, key, RSC_CANCEL, node, FALSE, TRUE, data_set); free(cancel_op->task); cancel_op->task = strdup(RSC_CANCEL); free(cancel_op->cancel_task); cancel_op->cancel_task = strdup(task); add_hash_param(cancel_op->meta, XML_LRM_ATTR_TASK, task); add_hash_param(cancel_op->meta, XML_LRM_ATTR_INTERVAL_MS, interval_ms_s); free(interval_ms_s); return cancel_op; } /*! * \internal * \brief Create a shutdown op for a scheduler transition * * \param[in] node Node being shut down * \param[in] data_set Working set of cluster * * \return Created op */ pe_action_t * sched_shutdown_op(pe_node_t *node, pe_working_set_t *data_set) { char *shutdown_id = crm_strdup_printf("%s-%s", CRM_OP_SHUTDOWN, node->details->uname); pe_action_t *shutdown_op = custom_action(NULL, shutdown_id, CRM_OP_SHUTDOWN, node, FALSE, TRUE, data_set); crm_notice("Scheduling shutdown of node %s", node->details->uname); shutdown_constraints(node, shutdown_op, data_set); add_hash_param(shutdown_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); return shutdown_op; } static char * generate_transition_magic(const char *transition_key, int op_status, int op_rc) { CRM_CHECK(transition_key != NULL, return NULL); return crm_strdup_printf("%d:%d;%s", op_status, op_rc, transition_key); } static void append_digest(lrmd_event_data_t *op, xmlNode *update, const char *version, const char *magic, int level) { /* this will enable us to later determine that the * resource's parameters have changed and we should force * a restart */ char *digest = NULL; xmlNode *args_xml = NULL; if (op->params == NULL) { return; } args_xml = create_xml_node(NULL, XML_TAG_PARAMS); g_hash_table_foreach(op->params, hash2field, args_xml); pcmk__filter_op_for_digest(args_xml); digest = calculate_operation_digest(args_xml, version); #if 0 if (level < get_crm_log_level() && op->interval_ms == 0 && pcmk__str_eq(op->op_type, CRMD_ACTION_START, pcmk__str_none)) { char *digest_source = dump_xml_unformatted(args_xml); do_crm_log(level, "Calculated digest %s for %s (%s). Source: %s\n", digest, ID(update), magic, digest_source); free(digest_source); } #endif crm_xml_add(update, XML_LRM_ATTR_OP_DIGEST, digest); free_xml(args_xml); free(digest); } #define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" /*! * \internal * \brief Create XML for resource operation history update * * \param[in,out] parent Parent XML node to add to * \param[in,out] op Operation event data * \param[in] caller_version DC feature set * \param[in] target_rc Expected result of operation * \param[in] node Name of node on which operation was performed * \param[in] origin Arbitrary description of update source * \param[in] level A log message will be logged at this level * * \return Newly created XML node for history update */ xmlNode * pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op, const char *caller_version, int target_rc, const char *node, const char *origin, int level) { char *key = NULL; char *magic = NULL; char *op_id = NULL; char *op_id_additional = NULL; char *local_user_data = NULL; const char *exit_reason = NULL; xmlNode *xml_op = NULL; const char *task = NULL; CRM_CHECK(op != NULL, return NULL); do_crm_log(level, "%s: Updating resource %s after %s op %s (interval=%u)", origin, op->rsc_id, op->op_type, services_lrm_status_str(op->op_status), op->interval_ms); crm_trace("DC version: %s", caller_version); task = op->op_type; /* Record a successful reload as a start, and a failed reload as a monitor, * to make life easier for the scheduler when determining the current state. */ if (pcmk__str_eq(task, "reload", pcmk__str_none)) { if (op->op_status == PCMK_LRM_OP_DONE) { task = CRMD_ACTION_START; } else { task = CRMD_ACTION_STATUS; } } key = pcmk__op_key(op->rsc_id, task, op->interval_ms); if (pcmk__str_eq(task, CRMD_ACTION_NOTIFY, pcmk__str_none)) { const char *n_type = crm_meta_value(op->params, "notify_type"); const char *n_task = crm_meta_value(op->params, "notify_operation"); CRM_LOG_ASSERT(n_type != NULL); CRM_LOG_ASSERT(n_task != NULL); op_id = pcmk__notify_key(op->rsc_id, n_type, n_task); if (op->op_status != PCMK_LRM_OP_PENDING) { /* Ignore notify errors. * * @TODO It might be better to keep the correct result here, and * ignore it in process_graph_event(). */ op->op_status = PCMK_LRM_OP_DONE; op->rc = 0; } } else if (did_rsc_op_fail(op, target_rc)) { op_id = pcmk__op_key(op->rsc_id, "last_failure", 0); if (op->interval_ms == 0) { // Ensure 'last' gets updated, in case record-pending is true op_id_additional = pcmk__op_key(op->rsc_id, "last", 0); } exit_reason = op->exit_reason; } else if (op->interval_ms > 0) { op_id = strdup(key); } else { op_id = pcmk__op_key(op->rsc_id, "last", 0); } again: xml_op = find_entity(parent, XML_LRM_TAG_RSC_OP, op_id); if (xml_op == NULL) { xml_op = create_xml_node(parent, XML_LRM_TAG_RSC_OP); } if (op->user_data == NULL) { crm_debug("Generating fake transition key for: " PCMK__OP_FMT " %d from %s", op->rsc_id, op->op_type, op->interval_ms, op->call_id, origin); local_user_data = pcmk__transition_key(-1, op->call_id, target_rc, FAKE_TE_ID); op->user_data = local_user_data; } if(magic == NULL) { magic = generate_transition_magic(op->user_data, op->op_status, op->rc); } crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_xml_add(xml_op, XML_LRM_ATTR_TASK_KEY, key); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); crm_xml_add(xml_op, XML_ATTR_ORIGIN, origin); crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version); crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, magic); crm_xml_add(xml_op, XML_LRM_ATTR_EXIT_REASON, exit_reason == NULL ? "" : exit_reason); crm_xml_add(xml_op, XML_LRM_ATTR_TARGET, node); /* For context during triage */ crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc); crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status); crm_xml_add_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, op->interval_ms); if (compare_version("2.1", caller_version) <= 0) { if (op->t_run || op->t_rcchange || op->exec_time || op->queue_time) { crm_trace("Timing data (" PCMK__OP_FMT "): last=%u change=%u exec=%u queue=%u", op->rsc_id, op->op_type, op->interval_ms, op->t_run, op->t_rcchange, op->exec_time, op->queue_time); if (op->interval_ms == 0) { crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE, (long long) op->t_run); // @COMPAT last-run is deprecated crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_RUN, (long long) op->t_run); } else if(op->t_rcchange) { /* last-run is not accurate for recurring ops */ crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE, (long long) op->t_rcchange); } else { /* ...but is better than nothing otherwise */ crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE, (long long) op->t_run); } crm_xml_add_int(xml_op, XML_RSC_OP_T_EXEC, op->exec_time); crm_xml_add_int(xml_op, XML_RSC_OP_T_QUEUE, op->queue_time); } } - if (pcmk__str_eq(op->op_type, CRMD_ACTION_MIGRATE, pcmk__str_none) - || pcmk__str_eq(op->op_type, CRMD_ACTION_MIGRATED, pcmk__str_none)) { + if (pcmk__str_any_of(op->op_type, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) { /* * Record migrate_source and migrate_target always for migrate ops. */ const char *name = XML_LRM_ATTR_MIGRATE_SOURCE; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); name = XML_LRM_ATTR_MIGRATE_TARGET; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); } append_digest(op, xml_op, caller_version, magic, LOG_DEBUG); if (op_id_additional) { free(op_id); op_id = op_id_additional; op_id_additional = NULL; goto again; } if (local_user_data) { free(local_user_data); op->user_data = NULL; } free(magic); free(op_id); free(key); return xml_op; } diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c index 4889272168..c774cb637c 100644 --- a/lib/pengine/clone.c +++ b/lib/pengine/clone.c @@ -1,1190 +1,1189 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #define VARIANT_CLONE 1 #include "./variant.h" void pe__force_anon(const char *standard, pe_resource_t *rsc, const char *rid, pe_working_set_t *data_set) { if (pe_rsc_is_clone(rsc)) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); pe_warn("Ignoring " XML_RSC_ATTR_UNIQUE " for %s because %s resources " "such as %s can be used only as anonymous clones", rsc->id, standard, rid); clone_data->clone_node_max = 1; clone_data->clone_max = QB_MIN(clone_data->clone_max, g_list_length(data_set->nodes)); } } pe_resource_t * find_clone_instance(pe_resource_t * rsc, const char *sub_id, pe_working_set_t * data_set) { char *child_id = NULL; pe_resource_t *child = NULL; const char *child_base = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); child_base = ID(clone_data->xml_obj_child); child_id = crm_strdup_printf("%s:%s", child_base, sub_id); child = pe_find_resource(rsc->children, child_id); free(child_id); return child; } pe_resource_t * pe__create_clone_child(pe_resource_t *rsc, pe_working_set_t *data_set) { gboolean as_orphan = FALSE; char *inc_num = NULL; char *inc_max = NULL; pe_resource_t *child_rsc = NULL; xmlNode *child_copy = NULL; clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); CRM_CHECK(clone_data->xml_obj_child != NULL, return FALSE); if (clone_data->total_clones >= clone_data->clone_max) { // If we've already used all available instances, this is an orphan as_orphan = TRUE; } // Allocate instance numbers in numerical order (starting at 0) inc_num = crm_itoa(clone_data->total_clones); inc_max = crm_itoa(clone_data->clone_max); child_copy = copy_xml(clone_data->xml_obj_child); crm_xml_add(child_copy, XML_RSC_ATTR_INCARNATION, inc_num); if (common_unpack(child_copy, &child_rsc, rsc, data_set) == FALSE) { pe_err("Failed unpacking resource %s", crm_element_value(child_copy, XML_ATTR_ID)); child_rsc = NULL; goto bail; } /* child_rsc->globally_unique = rsc->globally_unique; */ CRM_ASSERT(child_rsc); clone_data->total_clones += 1; pe_rsc_trace(child_rsc, "Setting clone attributes for: %s", child_rsc->id); rsc->children = g_list_append(rsc->children, child_rsc); if (as_orphan) { set_bit_recursive(child_rsc, pe_rsc_orphan); } add_hash_param(child_rsc->meta, XML_RSC_ATTR_INCARNATION_MAX, inc_max); pe_rsc_trace(rsc, "Added %s instance %s", rsc->id, child_rsc->id); bail: free(inc_num); free(inc_max); return child_rsc; } gboolean clone_unpack(pe_resource_t * rsc, pe_working_set_t * data_set) { int lpc = 0; xmlNode *a_child = NULL; xmlNode *xml_obj = rsc->xml; clone_variant_data_t *clone_data = NULL; const char *ordered = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_ORDERED); const char *max_clones = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_MAX); const char *max_clones_node = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_NODEMAX); pe_rsc_trace(rsc, "Processing resource %s...", rsc->id); clone_data = calloc(1, sizeof(clone_variant_data_t)); rsc->variant_opaque = clone_data; if (is_set(rsc->flags, pe_rsc_promotable)) { const char *promoted_max = NULL; const char *promoted_node_max = NULL; promoted_max = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROMOTED_MAX); if (promoted_max == NULL) { // @COMPAT deprecated since 2.0.0 promoted_max = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_MASTER_MAX); } promoted_node_max = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROMOTED_NODEMAX); if (promoted_node_max == NULL) { // @COMPAT deprecated since 2.0.0 promoted_node_max = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_MASTER_NODEMAX); } clone_data->promoted_max = crm_parse_int(promoted_max, "1"); clone_data->promoted_node_max = crm_parse_int(promoted_node_max, "1"); } // Implied by calloc() /* clone_data->xml_obj_child = NULL; */ clone_data->clone_node_max = crm_parse_int(max_clones_node, "1"); if (max_clones) { clone_data->clone_max = crm_parse_int(max_clones, "1"); } else if (pcmk__list_of_multiple(data_set->nodes)) { clone_data->clone_max = g_list_length(data_set->nodes); } else { clone_data->clone_max = 1; /* Handy during crm_verify */ } clone_data->ordered = crm_is_true(ordered); if ((rsc->flags & pe_rsc_unique) == 0 && clone_data->clone_node_max > 1) { pcmk__config_err("Ignoring " XML_RSC_ATTR_PROMOTED_MAX " for %s " "because anonymous clones support only one instance " "per node", rsc->id); clone_data->clone_node_max = 1; } pe_rsc_trace(rsc, "Options for %s", rsc->id); pe_rsc_trace(rsc, "\tClone max: %d", clone_data->clone_max); pe_rsc_trace(rsc, "\tClone node max: %d", clone_data->clone_node_max); pe_rsc_trace(rsc, "\tClone is unique: %s", is_set(rsc->flags, pe_rsc_unique) ? "true" : "false"); pe_rsc_trace(rsc, "\tClone is promotable: %s", is_set(rsc->flags, pe_rsc_promotable) ? "true" : "false"); // Clones may contain a single group or primitive for (a_child = __xml_first_child_element(xml_obj); a_child != NULL; a_child = __xml_next_element(a_child)) { - if (pcmk__str_eq((const char *)a_child->name, XML_CIB_TAG_RESOURCE, pcmk__str_none) - || pcmk__str_eq((const char *)a_child->name, XML_CIB_TAG_GROUP, pcmk__str_none)) { + if (pcmk__str_any_of((const char *)a_child->name, XML_CIB_TAG_RESOURCE, XML_CIB_TAG_GROUP, NULL)) { clone_data->xml_obj_child = a_child; break; } } if (clone_data->xml_obj_child == NULL) { pcmk__config_err("%s has nothing to clone", rsc->id); return FALSE; } /* * Make clones ever so slightly sticky by default * * This helps ensure clone instances are not shuffled around the cluster * for no benefit in situations when pre-allocation is not appropriate */ if (g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_STICKINESS) == NULL) { add_hash_param(rsc->meta, XML_RSC_ATTR_STICKINESS, "1"); } /* This ensures that the globally-unique value always exists for children to * inherit when being unpacked, as well as in resource agents' environment. */ add_hash_param(rsc->meta, XML_RSC_ATTR_UNIQUE, is_set(rsc->flags, pe_rsc_unique) ? XML_BOOLEAN_TRUE : XML_BOOLEAN_FALSE); if (clone_data->clone_max <= 0) { /* Create one child instance so that unpack_find_resource() will hook up * any orphans up to the parent correctly. */ if (pe__create_clone_child(rsc, data_set) == NULL) { return FALSE; } } else { // Create a child instance for each available instance number for (lpc = 0; lpc < clone_data->clone_max; lpc++) { if (pe__create_clone_child(rsc, data_set) == NULL) { return FALSE; } } } pe_rsc_trace(rsc, "Added %d children to resource %s...", clone_data->clone_max, rsc->id); return TRUE; } gboolean clone_active(pe_resource_t * rsc, gboolean all) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; gboolean child_active = child_rsc->fns->active(child_rsc, all); if (all == FALSE && child_active) { return TRUE; } else if (all && child_active == FALSE) { return FALSE; } } if (all) { return TRUE; } else { return FALSE; } } static void short_print(char *list, const char *prefix, const char *type, const char *suffix, long options, void *print_data) { if(suffix == NULL) { suffix = ""; } if (list) { if (options & pe_print_html) { status_print("
  • "); } status_print("%s%s: [%s ]%s", prefix, type, list, suffix); if (options & pe_print_html) { status_print("
  • \n"); } else if (options & pe_print_suppres_nl) { /* nothing */ } else if ((options & pe_print_printf) || (options & pe_print_ncurses)) { status_print("\n"); } } } static const char * configured_role_str(pe_resource_t * rsc) { const char *target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); if ((target_role == NULL) && rsc->children && rsc->children->data) { target_role = g_hash_table_lookup(((pe_resource_t*)rsc->children->data)->meta, XML_RSC_ATTR_TARGET_ROLE); } return target_role; } static enum rsc_role_e configured_role(pe_resource_t * rsc) { const char *target_role = configured_role_str(rsc); if (target_role) { return text2role(target_role); } return RSC_ROLE_UNKNOWN; } static void clone_print_xml(pe_resource_t * rsc, const char *pre_text, long options, void *print_data) { char *child_text = crm_strdup_printf("%s ", pre_text); const char *target_role = configured_role_str(rsc); GListPtr gIter = rsc->children; status_print("%sid); status_print("multi_state=\"%s\" ", is_set(rsc->flags, pe_rsc_promotable)? "true" : "false"); status_print("unique=\"%s\" ", is_set(rsc->flags, pe_rsc_unique) ? "true" : "false"); status_print("managed=\"%s\" ", is_set(rsc->flags, pe_rsc_managed) ? "true" : "false"); status_print("failed=\"%s\" ", is_set(rsc->flags, pe_rsc_failed) ? "true" : "false"); status_print("failure_ignored=\"%s\" ", is_set(rsc->flags, pe_rsc_failure_ignored) ? "true" : "false"); if (target_role) { status_print("target_role=\"%s\" ", target_role); } status_print(">\n"); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; child_rsc->fns->print(child_rsc, child_text, options, print_data); } status_print("%s\n", pre_text); free(child_text); } bool is_set_recursive(pe_resource_t * rsc, long long flag, bool any) { GListPtr gIter; bool all = !any; if(is_set(rsc->flags, flag)) { if(any) { return TRUE; } } else if(all) { return FALSE; } for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { if(is_set_recursive(gIter->data, flag, any)) { if(any) { return TRUE; } } else if(all) { return FALSE; } } if(all) { return TRUE; } return FALSE; } void clone_print(pe_resource_t * rsc, const char *pre_text, long options, void *print_data) { char *list_text = NULL; char *child_text = NULL; char *stopped_list = NULL; GListPtr master_list = NULL; GListPtr started_list = NULL; GListPtr gIter = rsc->children; clone_variant_data_t *clone_data = NULL; int active_instances = 0; if (pre_text == NULL) { pre_text = " "; } if (options & pe_print_xml) { clone_print_xml(rsc, pre_text, options, print_data); return; } get_clone_variant_data(clone_data, rsc); child_text = crm_strdup_printf("%s ", pre_text); status_print("%sClone Set: %s [%s]%s%s%s", pre_text ? pre_text : "", rsc->id, ID(clone_data->xml_obj_child), is_set(rsc->flags, pe_rsc_promotable) ? " (promotable)" : "", is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "", is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)"); if (options & pe_print_html) { status_print("\n
      \n"); } else if ((options & pe_print_log) == 0) { status_print("\n"); } for (; gIter != NULL; gIter = gIter->next) { gboolean print_full = FALSE; pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; gboolean partially_active = child_rsc->fns->active(child_rsc, FALSE); if (options & pe_print_clone_details) { print_full = TRUE; } if (is_set(rsc->flags, pe_rsc_unique)) { // Print individual instance when unique (except stopped orphans) if (partially_active || is_not_set(rsc->flags, pe_rsc_orphan)) { print_full = TRUE; } // Everything else in this block is for anonymous clones } else if (is_set(options, pe_print_pending) && (child_rsc->pending_task != NULL) && strcmp(child_rsc->pending_task, "probe")) { // Print individual instance when non-probe action is pending print_full = TRUE; } else if (partially_active == FALSE) { // List stopped instances when requested (except orphans) if (is_not_set(child_rsc->flags, pe_rsc_orphan) && is_not_set(options, pe_print_clone_active)) { stopped_list = pcmk__add_word(stopped_list, child_rsc->id); } } else if (is_set_recursive(child_rsc, pe_rsc_orphan, TRUE) || is_set_recursive(child_rsc, pe_rsc_managed, FALSE) == FALSE || is_set_recursive(child_rsc, pe_rsc_failed, TRUE)) { // Print individual instance when active orphaned/unmanaged/failed print_full = TRUE; } else if (child_rsc->fns->active(child_rsc, TRUE)) { // Instance of fully active anonymous clone pe_node_t *location = child_rsc->fns->location(child_rsc, NULL, TRUE); if (location) { // Instance is active on a single node enum rsc_role_e a_role = child_rsc->fns->state(child_rsc, TRUE); if (location->details->online == FALSE && location->details->unclean) { print_full = TRUE; } else if (a_role > RSC_ROLE_SLAVE) { master_list = g_list_append(master_list, location); } else { started_list = g_list_append(started_list, location); } } else { /* uncolocated group - bleh */ print_full = TRUE; } } else { // Instance of partially active anonymous clone print_full = TRUE; } if (print_full) { if (options & pe_print_html) { status_print("
    • \n"); } child_rsc->fns->print(child_rsc, child_text, options, print_data); if (options & pe_print_html) { status_print("
    • \n"); } } } /* Masters */ master_list = g_list_sort(master_list, sort_node_uname); for (gIter = master_list; gIter; gIter = gIter->next) { pe_node_t *host = gIter->data; list_text = pcmk__add_word(list_text, host->details->uname); active_instances++; } short_print(list_text, child_text, "Masters", NULL, options, print_data); g_list_free(master_list); free(list_text); list_text = NULL; /* Started/Slaves */ started_list = g_list_sort(started_list, sort_node_uname); for (gIter = started_list; gIter; gIter = gIter->next) { pe_node_t *host = gIter->data; list_text = pcmk__add_word(list_text, host->details->uname); active_instances++; } if (is_set(rsc->flags, pe_rsc_promotable)) { enum rsc_role_e role = configured_role(rsc); if(role == RSC_ROLE_SLAVE) { short_print(list_text, child_text, "Slaves (target-role)", NULL, options, print_data); } else { short_print(list_text, child_text, "Slaves", NULL, options, print_data); } } else { short_print(list_text, child_text, "Started", NULL, options, print_data); } g_list_free(started_list); free(list_text); list_text = NULL; if (is_not_set(options, pe_print_clone_active)) { const char *state = "Stopped"; enum rsc_role_e role = configured_role(rsc); if (role == RSC_ROLE_STOPPED) { state = "Stopped (disabled)"; } if (is_not_set(rsc->flags, pe_rsc_unique) && (clone_data->clone_max > active_instances)) { GListPtr nIter; GListPtr list = g_hash_table_get_values(rsc->allowed_nodes); /* Custom stopped list for non-unique clones */ free(stopped_list); stopped_list = NULL; if (list == NULL) { /* Clusters with symmetrical=false haven't calculated allowed_nodes yet * If we've not probed for them yet, the Stopped list will be empty */ list = g_hash_table_get_values(rsc->known_on); } list = g_list_sort(list, sort_node_uname); for (nIter = list; nIter != NULL; nIter = nIter->next) { pe_node_t *node = (pe_node_t *)nIter->data; if (pe_find_node(rsc->running_on, node->details->uname) == NULL) { stopped_list = pcmk__add_word(stopped_list, node->details->uname); } } g_list_free(list); } short_print(stopped_list, child_text, state, NULL, options, print_data); free(stopped_list); } if (options & pe_print_html) { status_print("
    \n"); } free(child_text); } PCMK__OUTPUT_ARGS("clone", "unsigned int", "pe_resource_t *", "GListPtr", "GListPtr") int pe__clone_xml(pcmk__output_t *out, va_list args) { unsigned int options = va_arg(args, unsigned int); pe_resource_t *rsc = va_arg(args, pe_resource_t *); GListPtr only_node = va_arg(args, GListPtr); GListPtr only_rsc = va_arg(args, GListPtr); GListPtr gIter = rsc->children; int rc = pcmk_rc_no_output; gboolean printed_header = FALSE; gboolean print_everything = TRUE; if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) { return rc; } print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) || (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id)); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; if (pcmk__rsc_filtered_by_node(child_rsc, only_node)) { continue; } if (child_rsc->fns->is_filtered(child_rsc, only_rsc, print_everything)) { continue; } if (!printed_header) { printed_header = TRUE; rc = pe__name_and_nvpairs_xml(out, true, "clone", 7 , "id", rsc->id , "multi_state", BOOL2STR(is_set(rsc->flags, pe_rsc_promotable)) , "unique", BOOL2STR(is_set(rsc->flags, pe_rsc_unique)) , "managed", BOOL2STR(is_set(rsc->flags, pe_rsc_managed)) , "failed", BOOL2STR(is_set(rsc->flags, pe_rsc_failed)) , "failure_ignored", BOOL2STR(is_set(rsc->flags, pe_rsc_failure_ignored)) , "target_role", configured_role_str(rsc)); CRM_ASSERT(rc == pcmk_rc_ok); } out->message(out, crm_map_element_name(child_rsc->xml), options, child_rsc, only_node, only_rsc); } if (printed_header) { pcmk__output_xml_pop_parent(out); } return rc; } PCMK__OUTPUT_ARGS("clone", "unsigned int", "pe_resource_t *", "GListPtr", "GListPtr") int pe__clone_html(pcmk__output_t *out, va_list args) { unsigned int options = va_arg(args, unsigned int); pe_resource_t *rsc = va_arg(args, pe_resource_t *); GListPtr only_node = va_arg(args, GListPtr); GListPtr only_rsc = va_arg(args, GListPtr); char *list_text = NULL; char *stopped_list = NULL; GListPtr master_list = NULL; GListPtr started_list = NULL; GListPtr gIter = rsc->children; clone_variant_data_t *clone_data = NULL; int active_instances = 0; int rc = pcmk_rc_no_output; gboolean print_everything = TRUE; get_clone_variant_data(clone_data, rsc); if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) { return rc; } print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) || (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id)); out->begin_list(out, NULL, NULL, "Clone Set: %s [%s]%s%s%s", rsc->id, ID(clone_data->xml_obj_child), is_set(rsc->flags, pe_rsc_promotable) ? " (promotable)" : "", is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "", is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)"); rc = pcmk_rc_ok; for (; gIter != NULL; gIter = gIter->next) { gboolean print_full = FALSE; pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; gboolean partially_active = child_rsc->fns->active(child_rsc, FALSE); if (pcmk__rsc_filtered_by_node(child_rsc, only_node)) { continue; } if (child_rsc->fns->is_filtered(child_rsc, only_rsc, print_everything)) { continue; } if (options & pe_print_clone_details) { print_full = TRUE; } if (is_set(rsc->flags, pe_rsc_unique)) { // Print individual instance when unique (except stopped orphans) if (partially_active || is_not_set(rsc->flags, pe_rsc_orphan)) { print_full = TRUE; } // Everything else in this block is for anonymous clones } else if (is_set(options, pe_print_pending) && (child_rsc->pending_task != NULL) && strcmp(child_rsc->pending_task, "probe")) { // Print individual instance when non-probe action is pending print_full = TRUE; } else if (partially_active == FALSE) { // List stopped instances when requested (except orphans) if (is_not_set(child_rsc->flags, pe_rsc_orphan) && is_not_set(options, pe_print_clone_active)) { stopped_list = pcmk__add_word(stopped_list, child_rsc->id); } } else if (is_set_recursive(child_rsc, pe_rsc_orphan, TRUE) || is_set_recursive(child_rsc, pe_rsc_managed, FALSE) == FALSE || is_set_recursive(child_rsc, pe_rsc_failed, TRUE)) { // Print individual instance when active orphaned/unmanaged/failed print_full = TRUE; } else if (child_rsc->fns->active(child_rsc, TRUE)) { // Instance of fully active anonymous clone pe_node_t *location = child_rsc->fns->location(child_rsc, NULL, TRUE); if (location) { // Instance is active on a single node enum rsc_role_e a_role = child_rsc->fns->state(child_rsc, TRUE); if (location->details->online == FALSE && location->details->unclean) { print_full = TRUE; } else if (a_role > RSC_ROLE_SLAVE) { master_list = g_list_append(master_list, location); } else { started_list = g_list_append(started_list, location); } } else { /* uncolocated group - bleh */ print_full = TRUE; } } else { // Instance of partially active anonymous clone print_full = TRUE; } if (print_full) { GListPtr all = NULL; /* Print every resource that's a child of this clone. */ all = g_list_prepend(all, strdup("*")); out->message(out, crm_map_element_name(child_rsc->xml), options, child_rsc, only_node, all); g_list_free_full(all, free); } } if (is_set(options, pe_print_clone_details)) { free(stopped_list); out->end_list(out); return pcmk_rc_ok; } /* Masters */ master_list = g_list_sort(master_list, sort_node_uname); for (gIter = master_list; gIter; gIter = gIter->next) { pe_node_t *host = gIter->data; if (!pcmk__str_in_list(only_node, host->details->uname)) { continue; } list_text = pcmk__add_word(list_text, host->details->uname); active_instances++; } if (list_text != NULL) { out->list_item(out, NULL, " Masters: [%s ]", list_text); g_list_free(master_list); free(list_text); list_text = NULL; } /* Started/Slaves */ started_list = g_list_sort(started_list, sort_node_uname); for (gIter = started_list; gIter; gIter = gIter->next) { pe_node_t *host = gIter->data; if (!pcmk__str_in_list(only_node, host->details->uname)) { continue; } list_text = pcmk__add_word(list_text, host->details->uname); active_instances++; } if (list_text != NULL) { if (is_set(rsc->flags, pe_rsc_promotable)) { enum rsc_role_e role = configured_role(rsc); if(role == RSC_ROLE_SLAVE) { out->list_item(out, NULL, " Slaves (target-role): [%s ]", list_text); } else { out->list_item(out, NULL, " Slaves: [%s ]", list_text); } } else { out->list_item(out, NULL, " Started: [%s ]", list_text); } g_list_free(started_list); free(list_text); list_text = NULL; } if (is_not_set(options, pe_print_clone_active)) { const char *state = "Stopped"; enum rsc_role_e role = configured_role(rsc); if (role == RSC_ROLE_STOPPED) { state = "Stopped (disabled)"; } if (is_not_set(rsc->flags, pe_rsc_unique) && (clone_data->clone_max > active_instances)) { GListPtr nIter; GListPtr list = g_hash_table_get_values(rsc->allowed_nodes); /* Custom stopped list for non-unique clones */ free(stopped_list); stopped_list = NULL; if (list == NULL) { /* Clusters with symmetrical=false haven't calculated allowed_nodes yet * If we've not probed for them yet, the Stopped list will be empty */ list = g_hash_table_get_values(rsc->known_on); } list = g_list_sort(list, sort_node_uname); for (nIter = list; nIter != NULL; nIter = nIter->next) { pe_node_t *node = (pe_node_t *)nIter->data; if (pe_find_node(rsc->running_on, node->details->uname) == NULL && pcmk__str_in_list(only_node, node->details->uname)) { stopped_list = pcmk__add_word(stopped_list, node->details->uname); } } g_list_free(list); } if (stopped_list != NULL) { out->list_item(out, NULL, " %s: [%s ]", state, stopped_list); free(stopped_list); } } out->end_list(out); return rc; } PCMK__OUTPUT_ARGS("clone", "unsigned int", "pe_resource_t *", "GListPtr", "GListPtr") int pe__clone_text(pcmk__output_t *out, va_list args) { unsigned int options = va_arg(args, unsigned int); pe_resource_t *rsc = va_arg(args, pe_resource_t *); GListPtr only_node = va_arg(args, GListPtr); GListPtr only_rsc = va_arg(args, GListPtr); char *list_text = NULL; char *stopped_list = NULL; GListPtr master_list = NULL; GListPtr started_list = NULL; GListPtr gIter = rsc->children; clone_variant_data_t *clone_data = NULL; int active_instances = 0; int rc = pcmk_rc_no_output; gboolean print_everything = TRUE; get_clone_variant_data(clone_data, rsc); if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) { return rc; } print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) || (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id)); out->begin_list(out, NULL, NULL, "Clone Set: %s [%s]%s%s%s", rsc->id, ID(clone_data->xml_obj_child), is_set(rsc->flags, pe_rsc_promotable) ? " (promotable)" : "", is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "", is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)"); rc = pcmk_rc_ok; for (; gIter != NULL; gIter = gIter->next) { gboolean print_full = FALSE; pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; gboolean partially_active = child_rsc->fns->active(child_rsc, FALSE); if (pcmk__rsc_filtered_by_node(child_rsc, only_node)) { continue; } if (child_rsc->fns->is_filtered(child_rsc, only_rsc, print_everything)) { continue; } if (options & pe_print_clone_details) { print_full = TRUE; } if (is_set(rsc->flags, pe_rsc_unique)) { // Print individual instance when unique (except stopped orphans) if (partially_active || is_not_set(rsc->flags, pe_rsc_orphan)) { print_full = TRUE; } // Everything else in this block is for anonymous clones } else if (is_set(options, pe_print_pending) && (child_rsc->pending_task != NULL) && strcmp(child_rsc->pending_task, "probe")) { // Print individual instance when non-probe action is pending print_full = TRUE; } else if (partially_active == FALSE) { // List stopped instances when requested (except orphans) if (is_not_set(child_rsc->flags, pe_rsc_orphan) && is_not_set(options, pe_print_clone_active)) { stopped_list = pcmk__add_word(stopped_list, child_rsc->id); } } else if (is_set_recursive(child_rsc, pe_rsc_orphan, TRUE) || is_set_recursive(child_rsc, pe_rsc_managed, FALSE) == FALSE || is_set_recursive(child_rsc, pe_rsc_failed, TRUE)) { // Print individual instance when active orphaned/unmanaged/failed print_full = TRUE; } else if (child_rsc->fns->active(child_rsc, TRUE)) { // Instance of fully active anonymous clone pe_node_t *location = child_rsc->fns->location(child_rsc, NULL, TRUE); if (location) { // Instance is active on a single node enum rsc_role_e a_role = child_rsc->fns->state(child_rsc, TRUE); if (location->details->online == FALSE && location->details->unclean) { print_full = TRUE; } else if (a_role > RSC_ROLE_SLAVE) { master_list = g_list_append(master_list, location); } else { started_list = g_list_append(started_list, location); } } else { /* uncolocated group - bleh */ print_full = TRUE; } } else { // Instance of partially active anonymous clone print_full = TRUE; } if (print_full) { GListPtr all = NULL; /* Print every resource that's a child of this clone. */ all = g_list_prepend(all, strdup("*")); out->message(out, crm_map_element_name(child_rsc->xml), options, child_rsc, only_node, all); g_list_free_full(all, free); } } if (is_set(options, pe_print_clone_details)) { free(stopped_list); out->end_list(out); return pcmk_rc_ok; } /* Masters */ master_list = g_list_sort(master_list, sort_node_uname); for (gIter = master_list; gIter; gIter = gIter->next) { pe_node_t *host = gIter->data; if (!pcmk__str_in_list(only_node, host->details->uname)) { continue; } list_text = pcmk__add_word(list_text, host->details->uname); active_instances++; } if (list_text != NULL) { out->list_item(out, "Masters", "[%s ]", list_text); g_list_free(master_list); free(list_text); list_text = NULL; } /* Started/Slaves */ started_list = g_list_sort(started_list, sort_node_uname); for (gIter = started_list; gIter; gIter = gIter->next) { pe_node_t *host = gIter->data; if (!pcmk__str_in_list(only_node, host->details->uname)) { continue; } list_text = pcmk__add_word(list_text, host->details->uname); active_instances++; } if (list_text != NULL) { if (is_set(rsc->flags, pe_rsc_promotable)) { enum rsc_role_e role = configured_role(rsc); if(role == RSC_ROLE_SLAVE) { out->list_item(out, "Slaves (target-role)", "[%s ]", list_text); } else { out->list_item(out, "Slaves", "[%s ]", list_text); } } else { out->list_item(out, "Started", "[%s ]", list_text); } g_list_free(started_list); free(list_text); list_text = NULL; } if (is_not_set(options, pe_print_clone_active)) { const char *state = "Stopped"; enum rsc_role_e role = configured_role(rsc); if (role == RSC_ROLE_STOPPED) { state = "Stopped (disabled)"; } if (is_not_set(rsc->flags, pe_rsc_unique) && (clone_data->clone_max > active_instances)) { GListPtr nIter; GListPtr list = g_hash_table_get_values(rsc->allowed_nodes); /* Custom stopped list for non-unique clones */ free(stopped_list); stopped_list = NULL; if (list == NULL) { /* Clusters with symmetrical=false haven't calculated allowed_nodes yet * If we've not probed for them yet, the Stopped list will be empty */ list = g_hash_table_get_values(rsc->known_on); } list = g_list_sort(list, sort_node_uname); for (nIter = list; nIter != NULL; nIter = nIter->next) { pe_node_t *node = (pe_node_t *)nIter->data; if (pe_find_node(rsc->running_on, node->details->uname) == NULL && pcmk__str_in_list(only_node, node->details->uname)) { stopped_list = pcmk__add_word(stopped_list, node->details->uname); } } g_list_free(list); } if (stopped_list != NULL) { out->list_item(out, state, "[%s ]", stopped_list); free(stopped_list); } } out->end_list(out); return rc; } void clone_free(pe_resource_t * rsc) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); pe_rsc_trace(rsc, "Freeing %s", rsc->id); for (GListPtr gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; CRM_ASSERT(child_rsc); pe_rsc_trace(child_rsc, "Freeing child %s", child_rsc->id); free_xml(child_rsc->xml); child_rsc->xml = NULL; /* There could be a saved unexpanded xml */ free_xml(child_rsc->orig_xml); child_rsc->orig_xml = NULL; child_rsc->fns->free(child_rsc); } g_list_free(rsc->children); if (clone_data) { CRM_ASSERT(clone_data->demote_notify == NULL); CRM_ASSERT(clone_data->stop_notify == NULL); CRM_ASSERT(clone_data->start_notify == NULL); CRM_ASSERT(clone_data->promote_notify == NULL); } common_free(rsc); } enum rsc_role_e clone_resource_state(const pe_resource_t * rsc, gboolean current) { enum rsc_role_e clone_role = RSC_ROLE_UNKNOWN; GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; enum rsc_role_e a_role = child_rsc->fns->state(child_rsc, current); if (a_role > clone_role) { clone_role = a_role; } } pe_rsc_trace(rsc, "%s role: %s", rsc->id, role2text(clone_role)); return clone_role; } /*! * \internal * \brief Check whether a clone has an instance for every node * * \param[in] rsc Clone to check * \param[in] data_set Cluster state */ bool pe__is_universal_clone(pe_resource_t *rsc, pe_working_set_t *data_set) { if (pe_rsc_is_clone(rsc)) { clone_variant_data_t *clone_data = NULL; get_clone_variant_data(clone_data, rsc); if (clone_data->clone_max == g_list_length(data_set->nodes)) { return TRUE; } } return FALSE; } gboolean pe__clone_is_filtered(pe_resource_t *rsc, GListPtr only_rsc, gboolean check_parent) { gboolean passes = FALSE; clone_variant_data_t *clone_data = NULL; if (pcmk__str_in_list(only_rsc, rsc_printable_id(rsc))) { passes = TRUE; } else { get_clone_variant_data(clone_data, rsc); passes = pcmk__str_in_list(only_rsc, ID(clone_data->xml_obj_child)); if (!passes) { for (GListPtr gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; if (!child_rsc->fns->is_filtered(child_rsc, only_rsc, FALSE)) { passes = TRUE; break; } } } } return !passes; } diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 91cea7d7dc..3ea6b2d038 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,4016 +1,4016 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_status); #define set_config_flag(data_set, option, flag) do { \ const char *tmp = pe_pref(data_set->config_hash, option); \ if(tmp) { \ if(crm_is_true(tmp)) { \ set_bit(data_set->flags, flag); \ } else { \ clear_bit(data_set->flags, flag); \ } \ } \ } while(0) static void unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum action_fail_response *failed, pe_working_set_t *data_set); static void determine_remote_online_status(pe_working_set_t *data_set, pe_node_t *this_node); static void add_node_attrs(xmlNode *attrs, pe_node_t *node, bool overwrite, pe_working_set_t *data_set); static void determine_online_status(xmlNode *node_state, pe_node_t *this_node, pe_working_set_t *data_set); static void unpack_lrm_resources(pe_node_t *node, xmlNode *lrm_state, pe_working_set_t *data_set); // Bitmask for warnings we only want to print once uint32_t pe_wo = 0; static gboolean is_dangling_guest_node(pe_node_t *node) { /* we are looking for a remote-node that was supposed to be mapped to a * container resource, but all traces of that container have disappeared * from both the config and the status section. */ if (pe__is_guest_or_remote_node(node) && node->details->remote_rsc && node->details->remote_rsc->container == NULL && is_set(node->details->remote_rsc->flags, pe_rsc_orphan_container_filler)) { return TRUE; } return FALSE; } /*! * \brief Schedule a fence action for a node * * \param[in,out] data_set Current working set of cluster * \param[in,out] node Node to fence * \param[in] reason Text description of why fencing is needed * \param[in] priority_delay Whether to consider `priority-fencing-delay` */ void pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, const char *reason, bool priority_delay) { CRM_CHECK(node, return); /* A guest node is fenced by marking its container as failed */ if (pe__is_guest_node(node)) { pe_resource_t *rsc = node->details->remote_rsc->container; if (is_set(rsc->flags, pe_rsc_failed) == FALSE) { if (!is_set(rsc->flags, pe_rsc_managed)) { crm_notice("Not fencing guest node %s " "(otherwise would because %s): " "its guest resource %s is unmanaged", node->details->uname, reason, rsc->id); } else { crm_warn("Guest node %s will be fenced " "(by recovering its guest resource %s): %s", node->details->uname, rsc->id, reason); /* We don't mark the node as unclean because that would prevent the * node from running resources. We want to allow it to run resources * in this transition if the recovery succeeds. */ node->details->remote_requires_reset = TRUE; set_bit(rsc->flags, pe_rsc_failed); set_bit(rsc->flags, pe_rsc_stop); } } } else if (is_dangling_guest_node(node)) { crm_info("Cleaning up dangling connection for guest node %s: " "fencing was already done because %s, " "and guest resource no longer exists", node->details->uname, reason); set_bit(node->details->remote_rsc->flags, pe_rsc_failed); set_bit(node->details->remote_rsc->flags, pe_rsc_stop); } else if (pe__is_remote_node(node)) { pe_resource_t *rsc = node->details->remote_rsc; if (rsc && (!is_set(rsc->flags, pe_rsc_managed))) { crm_notice("Not fencing remote node %s " "(otherwise would because %s): connection is unmanaged", node->details->uname, reason); } else if(node->details->remote_requires_reset == FALSE) { node->details->remote_requires_reset = TRUE; crm_warn("Remote node %s %s: %s", node->details->uname, pe_can_fence(data_set, node)? "will be fenced" : "is unclean", reason); } node->details->unclean = TRUE; // No need to apply `priority-fencing-delay` for remote nodes pe_fence_op(node, NULL, TRUE, reason, FALSE, data_set); } else if (node->details->unclean) { crm_trace("Cluster node %s %s because %s", node->details->uname, pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean", reason); } else { crm_warn("Cluster node %s %s: %s", node->details->uname, pe_can_fence(data_set, node)? "will be fenced" : "is unclean", reason); node->details->unclean = TRUE; pe_fence_op(node, NULL, TRUE, reason, priority_delay, data_set); } } // @TODO xpaths can't handle templates, rules, or id-refs // nvpair with provides or requires set to unfencing #define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \ "[(@" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_PROVIDES "'" \ "or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \ "and @" XML_NVPAIR_ATTR_VALUE "='unfencing']" // unfencing in rsc_defaults or any resource #define XPATH_ENABLE_UNFENCING \ "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \ "//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \ "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \ "/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR static void set_if_xpath(unsigned long long flag, const char *xpath, pe_working_set_t *data_set) { xmlXPathObjectPtr result = NULL; if (is_not_set(data_set->flags, flag)) { result = xpath_search(data_set->input, xpath); if (result && (numXpathResults(result) > 0)) { set_bit(data_set->flags, flag); } freeXpathObject(result); } } gboolean unpack_config(xmlNode * config, pe_working_set_t * data_set) { const char *value = NULL; GHashTable *config_hash = crm_str_table_new(); pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; data_set->config_hash = config_hash; pe__unpack_dataset_nvpairs(config, XML_CIB_TAG_PROPSET, &rule_data, config_hash, CIB_OPTIONS_FIRST, FALSE, data_set); verify_pe_options(data_set->config_hash); set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes); if(is_not_set(data_set->flags, pe_flag_startup_probes)) { crm_info("Startup probes: disabled (dangerous)"); } value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG); if (value && crm_is_true(value)) { crm_notice("Watchdog will be used via SBD if fencing is required " "and stonith-watchdog-timeout is nonzero"); set_bit(data_set->flags, pe_flag_have_stonith_resource); } /* Set certain flags via xpath here, so they can be used before the relevant * configuration sections are unpacked. */ set_if_xpath(pe_flag_enable_unfencing, XPATH_ENABLE_UNFENCING, data_set); value = pe_pref(data_set->config_hash, "stonith-timeout"); data_set->stonith_timeout = (int) crm_parse_interval_spec(value); crm_debug("STONITH timeout: %d", data_set->stonith_timeout); set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled); crm_debug("STONITH of failed nodes is %s", is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); if (!strcmp(data_set->stonith_action, "poweroff")) { pe_warn_once(pe_wo_poweroff, "Support for stonith-action of 'poweroff' is deprecated " "and will be removed in a future release (use 'off' instead)"); data_set->stonith_action = "off"; } crm_trace("STONITH will %s nodes", data_set->stonith_action); set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing); crm_debug("Concurrent fencing is %s", is_set(data_set->flags, pe_flag_concurrent_fencing) ? "enabled" : "disabled"); value = pe_pref(data_set->config_hash, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY); if (value) { data_set->priority_fencing_delay = crm_parse_interval_spec(value) / 1000; crm_trace("Priority fencing delay is %ds", data_set->priority_fencing_delay); } set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything); crm_debug("Stop all active resources: %s", is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false"); set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster); if (is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(data_set->config_hash, "no-quorum-policy"); if (pcmk__str_eq(value, "ignore", pcmk__str_casei)) { data_set->no_quorum_policy = no_quorum_ignore; } else if (pcmk__str_eq(value, "freeze", pcmk__str_casei)) { data_set->no_quorum_policy = no_quorum_freeze; } else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) { data_set->no_quorum_policy = no_quorum_demote; } else if (pcmk__str_eq(value, "suicide", pcmk__str_casei)) { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { int do_panic = 0; crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, &do_panic); if (do_panic || is_set(data_set->flags, pe_flag_have_quorum)) { data_set->no_quorum_policy = no_quorum_suicide; } else { crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum"); data_set->no_quorum_policy = no_quorum_stop; } } else { pcmk__config_err("Resetting no-quorum-policy to 'stop' because " "fencing is disabled"); data_set->no_quorum_policy = no_quorum_stop; } } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_debug("On loss of quorum: Freeze resources"); break; case no_quorum_stop: crm_debug("On loss of quorum: Stop ALL resources"); break; case no_quorum_demote: crm_debug("On loss of quorum: " "Demote promotable resources and stop other resources"); break; case no_quorum_suicide: crm_notice("On loss of quorum: Fence all remaining nodes"); break; case no_quorum_ignore: crm_notice("On loss of quorum: Ignore"); break; } set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans); crm_trace("Orphan resources are %s", is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans); crm_trace("Orphan resource actions are %s", is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop); crm_trace("Stopped resources are removed from the status section: %s", is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false"); set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode); crm_trace("Maintenance mode: %s", is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false"); set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal); crm_trace("Start failures are %s", is_set(data_set->flags, pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount"); if (is_set(data_set->flags, pe_flag_stonith_enabled)) { set_config_flag(data_set, "startup-fencing", pe_flag_startup_fencing); } if (is_set(data_set->flags, pe_flag_startup_fencing)) { crm_trace("Unseen nodes will be fenced"); } else { pe_warn_once(pe_wo_blind, "Blind faith: not fencing unseen nodes"); } pcmk__score_red = char2score(pe_pref(data_set->config_hash, "node-health-red")); pcmk__score_green = char2score(pe_pref(data_set->config_hash, "node-health-green")); pcmk__score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow")); crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s", pe_pref(data_set->config_hash, "node-health-red"), pe_pref(data_set->config_hash, "node-health-yellow"), pe_pref(data_set->config_hash, "node-health-green")); data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy"); crm_trace("Placement strategy: %s", data_set->placement_strategy); set_config_flag(data_set, "shutdown-lock", pe_flag_shutdown_lock); crm_trace("Resources will%s be locked to cleanly shut down nodes", (is_set(data_set->flags, pe_flag_shutdown_lock)? "" : " not")); if (is_set(data_set->flags, pe_flag_shutdown_lock)) { value = pe_pref(data_set->config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT); data_set->shutdown_lock = crm_parse_interval_spec(value) / 1000; crm_trace("Shutdown locks expire after %us", data_set->shutdown_lock); } return TRUE; } static void destroy_digest_cache(gpointer ptr) { op_digest_cache_t *data = ptr; free_xml(data->params_all); free_xml(data->params_secure); free_xml(data->params_restart); free(data->digest_all_calc); free(data->digest_restart_calc); free(data->digest_secure_calc); free(data); } pe_node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set) { pe_node_t *new_node = NULL; if (pe_find_node(data_set->nodes, uname) != NULL) { pcmk__config_warn("More than one node entry has name '%s'", uname); } new_node = calloc(1, sizeof(pe_node_t)); if (new_node == NULL) { return NULL; } new_node->weight = char2score(score); new_node->fixed = FALSE; new_node->details = calloc(1, sizeof(struct pe_node_shared_s)); if (new_node->details == NULL) { free(new_node); return NULL; } crm_trace("Creating node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->rsc_discovery_enabled = TRUE; new_node->details->running_rsc = NULL; new_node->details->type = node_ping; if (pcmk__str_eq(type, "remote", pcmk__str_casei)) { new_node->details->type = node_remote; set_bit(data_set->flags, pe_flag_have_remote_nodes); } else if (pcmk__str_eq(type, "member", pcmk__str_null_matches | pcmk__str_casei)) { new_node->details->type = node_member; } new_node->details->attrs = crm_str_table_new(); if (pe__is_guest_or_remote_node(new_node)) { g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("remote")); } else { g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("cluster")); } new_node->details->utilization = crm_str_table_new(); new_node->details->digest_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_digest_cache); data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname); return new_node; } static const char * expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data) { xmlNode *attr_set = NULL; xmlNode *attr = NULL; const char *container_id = ID(xml_obj); const char *remote_name = NULL; const char *remote_server = NULL; const char *remote_port = NULL; const char *connect_timeout = "60s"; const char *remote_allow_migrate=NULL; const char *is_managed = NULL; for (attr_set = __xml_first_child_element(xml_obj); attr_set != NULL; attr_set = __xml_next_element(attr_set)) { if (!pcmk__str_eq((const char *)attr_set->name, XML_TAG_META_SETS, pcmk__str_casei)) { continue; } for (attr = __xml_first_child_element(attr_set); attr != NULL; attr = __xml_next_element(attr)) { const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); if (pcmk__str_eq(name, XML_RSC_ATTR_REMOTE_NODE, pcmk__str_casei)) { remote_name = value; } else if (pcmk__str_eq(name, "remote-addr", pcmk__str_casei)) { remote_server = value; } else if (pcmk__str_eq(name, "remote-port", pcmk__str_casei)) { remote_port = value; } else if (pcmk__str_eq(name, "remote-connect-timeout", pcmk__str_casei)) { connect_timeout = value; } else if (pcmk__str_eq(name, "remote-allow-migrate", pcmk__str_casei)) { remote_allow_migrate=value; } else if (pcmk__str_eq(name, XML_RSC_ATTR_MANAGED, pcmk__str_casei)) { is_managed = value; } } } if (remote_name == NULL) { return NULL; } if (pe_find_resource(data->resources, remote_name) != NULL) { return NULL; } pe_create_remote_xml(parent, remote_name, container_id, remote_allow_migrate, is_managed, connect_timeout, remote_server, remote_port); return remote_name; } static void handle_startup_fencing(pe_working_set_t *data_set, pe_node_t *new_node) { if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) { /* Ignore fencing for remote nodes that don't have a connection resource * associated with them. This happens when remote node entries get left * in the nodes section after the connection resource is removed. */ return; } if (is_set(data_set->flags, pe_flag_startup_fencing)) { // All nodes are unclean until we've seen their status entry new_node->details->unclean = TRUE; } else { // Blind faith ... new_node->details->unclean = FALSE; } /* We need to be able to determine if a node's status section * exists or not separate from whether the node is unclean. */ new_node->details->unseen = TRUE; } gboolean unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; pe_node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; const char *score = NULL; pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; for (xml_obj = __xml_first_child_element(xml_nodes); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, pcmk__str_none)) { new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); crm_trace("Processing node %s/%s", uname, id); if (id == NULL) { pcmk__config_err("Ignoring <" XML_CIB_TAG_NODE "> entry in configuration without id"); continue; } new_node = pe_create_node(id, uname, type, score, data_set); if (new_node == NULL) { return FALSE; } /* if(data_set->have_quorum == FALSE */ /* && data_set->no_quorum_policy == no_quorum_stop) { */ /* /\* start shutting resources down *\/ */ /* new_node->weight = -INFINITY; */ /* } */ handle_startup_fencing(data_set, new_node); add_node_attrs(xml_obj, new_node, FALSE, data_set); pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_UTILIZATION, &rule_data, new_node->details->utilization, NULL, FALSE, data_set); crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); } } if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) { crm_info("Creating a fake local node"); pe_create_node(data_set->localhost, data_set->localhost, NULL, 0, data_set); } return TRUE; } static void setup_container(pe_resource_t * rsc, pe_working_set_t * data_set) { const char *container_id = NULL; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; setup_container(child_rsc, data_set); } return; } container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER); if (container_id && !pcmk__str_eq(container_id, rsc->id, pcmk__str_casei)) { pe_resource_t *container = pe_find_resource(data_set->resources, container_id); if (container) { rsc->container = container; set_bit(container->flags, pe_rsc_is_container); container->fillers = g_list_append(container->fillers, rsc); pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id); } else { pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id); } } } gboolean unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; /* Create remote nodes and guest nodes from the resource configuration * before unpacking resources. */ for (xml_obj = __xml_first_child_element(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { const char *new_node_id = NULL; /* Check for remote nodes, which are defined by ocf:pacemaker:remote * primitives. */ if (xml_contains_remote_node(xml_obj)) { new_node_id = ID(xml_obj); /* The "pe_find_node" check is here to make sure we don't iterate over * an expanded node that has already been added to the node list. */ if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found remote node %s defined by resource %s", new_node_id, ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, data_set); } continue; } /* Check for guest nodes, which are defined by special meta-attributes * of a primitive of any type (for example, VirtualDomain or Xen). */ if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, pcmk__str_none)) { /* This will add an ocf:pacemaker:remote primitive to the * configuration for the guest node's connection, to be unpacked * later. */ new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set); if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found guest node %s in resource %s", new_node_id, ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, data_set); } continue; } /* Check for guest nodes inside a group. Clones are currently not * supported as guest nodes. */ if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, pcmk__str_none)) { xmlNode *xml_obj2 = NULL; for (xml_obj2 = __xml_first_child_element(xml_obj); xml_obj2 != NULL; xml_obj2 = __xml_next_element(xml_obj2)) { new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set); if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found guest node %s in resource %s inside group %s", new_node_id, ID(xml_obj2), ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, data_set); } } } } return TRUE; } /* Call this after all the nodes and resources have been * unpacked, but before the status section is read. * * A remote node's online status is reflected by the state * of the remote node's connection resource. We need to link * the remote node to this connection resource so we can have * easy access to the connection resource during the scheduler calculations. */ static void link_rsc2remotenode(pe_working_set_t *data_set, pe_resource_t *new_rsc) { pe_node_t *remote_node = NULL; if (new_rsc->is_remote_node == FALSE) { return; } if (is_set(data_set->flags, pe_flag_quick_location)) { /* remote_nodes and remote_resources are not linked in quick location calculations */ return; } remote_node = pe_find_node(data_set->nodes, new_rsc->id); CRM_CHECK(remote_node != NULL, return;); pe_rsc_trace(new_rsc, "Linking remote connection resource %s to node %s", new_rsc->id, remote_node->details->uname); remote_node->details->remote_rsc = new_rsc; if (new_rsc->container == NULL) { /* Handle start-up fencing for remote nodes (as opposed to guest nodes) * the same as is done for cluster nodes. */ handle_startup_fencing(data_set, remote_node); } else { /* pe_create_node() marks the new node as "remote" or "cluster"; now * that we know the node is a guest node, update it correctly. */ g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("container")); } } static void destroy_tag(gpointer data) { pe_tag_t *tag = data; if (tag) { free(tag->id); g_list_free_full(tag->refs, free); free(tag); } } /*! * \internal * \brief Parse configuration XML for resource information * * \param[in] xml_resources Top of resource configuration XML * \param[in,out] data_set Where to put resource information * * \return TRUE * * \note unpack_remote_nodes() MUST be called before this, so that the nodes can * be used when common_unpack() calls resource_location() */ gboolean unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; GListPtr gIter = NULL; data_set->template_rsc_sets = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_tag); for (xml_obj = __xml_first_child_element(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { pe_resource_t *new_rsc = NULL; if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, pcmk__str_none)) { const char *template_id = ID(xml_obj); if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets, template_id, NULL, NULL) == FALSE) { /* Record the template's ID for the knowledge of its existence anyway. */ g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL); } continue; } crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj)); if (common_unpack(xml_obj, &new_rsc, NULL, data_set) && (new_rsc != NULL)) { data_set->resources = g_list_append(data_set->resources, new_rsc); pe_rsc_trace(new_rsc, "Added resource %s", new_rsc->id); } else { pcmk__config_err("Ignoring <%s> resource '%s' " "because configuration is invalid", crm_element_name(xml_obj), crm_str(ID(xml_obj))); if (new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } } } for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; setup_container(rsc, data_set); link_rsc2remotenode(data_set, rsc); } data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority); if (is_set(data_set->flags, pe_flag_quick_location)) { /* Ignore */ } else if (is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined"); pcmk__config_err("Either configure some or disable STONITH with the stonith-enabled option"); pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity"); } return TRUE; } gboolean unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set) { xmlNode *xml_tag = NULL; data_set->tags = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_tag); for (xml_tag = __xml_first_child_element(xml_tags); xml_tag != NULL; xml_tag = __xml_next_element(xml_tag)) { xmlNode *xml_obj_ref = NULL; const char *tag_id = ID(xml_tag); if (!pcmk__str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, pcmk__str_none)) { continue; } if (tag_id == NULL) { pcmk__config_err("Ignoring <%s> without " XML_ATTR_ID, crm_element_name(xml_tag)); continue; } for (xml_obj_ref = __xml_first_child_element(xml_tag); xml_obj_ref != NULL; xml_obj_ref = __xml_next_element(xml_obj_ref)) { const char *obj_ref = ID(xml_obj_ref); if (!pcmk__str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, pcmk__str_none)) { continue; } if (obj_ref == NULL) { pcmk__config_err("Ignoring <%s> for tag '%s' without " XML_ATTR_ID, crm_element_name(xml_obj_ref), tag_id); continue; } if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) { return FALSE; } } } return TRUE; } /* The ticket state section: * "/cib/status/tickets/ticket_state" */ static gboolean unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set) { const char *ticket_id = NULL; const char *granted = NULL; const char *last_granted = NULL; const char *standby = NULL; xmlAttrPtr xIter = NULL; pe_ticket_t *ticket = NULL; ticket_id = ID(xml_ticket); if (pcmk__str_empty(ticket_id)) { return FALSE; } crm_trace("Processing ticket state for %s", ticket_id); ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, data_set); if (ticket == NULL) { return FALSE; } } for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_ticket, prop_name); if (pcmk__str_eq(prop_name, XML_ATTR_ID, pcmk__str_none)) { continue; } g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value)); } granted = g_hash_table_lookup(ticket->state, "granted"); if (granted && crm_is_true(granted)) { ticket->granted = TRUE; crm_info("We have ticket '%s'", ticket->id); } else { ticket->granted = FALSE; crm_info("We do not have ticket '%s'", ticket->id); } last_granted = g_hash_table_lookup(ticket->state, "last-granted"); if (last_granted) { ticket->last_granted = crm_parse_int(last_granted, 0); } standby = g_hash_table_lookup(ticket->state, "standby"); if (standby && crm_is_true(standby)) { ticket->standby = TRUE; if (ticket->granted) { crm_info("Granted ticket '%s' is in standby-mode", ticket->id); } } else { ticket->standby = FALSE; } crm_trace("Done with ticket state for %s", ticket_id); return TRUE; } static gboolean unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; for (xml_obj = __xml_first_child_element(xml_tickets); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { if (!pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, pcmk__str_none)) { continue; } unpack_ticket_state(xml_obj, data_set); } return TRUE; } static void unpack_handle_remote_attrs(pe_node_t *this_node, xmlNode *state, pe_working_set_t * data_set) { const char *resource_discovery_enabled = NULL; xmlNode *attrs = NULL; pe_resource_t *rsc = NULL; if (!pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { return; } if ((this_node == NULL) || !pe__is_guest_or_remote_node(this_node)) { return; } crm_trace("Processing remote node id=%s, uname=%s", this_node->details->id, this_node->details->uname); this_node->details->remote_maintenance = crm_atoi(crm_element_value(state, XML_NODE_IS_MAINTENANCE), "0"); rsc = this_node->details->remote_rsc; if (this_node->details->remote_requires_reset == FALSE) { this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; } attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, data_set); if (pe__shutdown_requested(this_node)) { crm_info("Node %s is shutting down", this_node->details->uname); this_node->details->shutdown = TRUE; if (rsc) { rsc->next_role = RSC_ROLE_STOPPED; } } if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) || (rsc && !is_set(rsc->flags, pe_rsc_managed))) { crm_info("Node %s is in maintenance-mode", this_node->details->uname); this_node->details->maintenance = TRUE; } resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY); if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { if (pe__is_remote_node(this_node) && is_not_set(data_set->flags, pe_flag_stonith_enabled)) { crm_warn("Ignoring %s attribute on remote node %s because stonith is disabled", XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname); } else { /* This is either a remote node with fencing enabled, or a guest * node. We don't care whether fencing is enabled when fencing guest * nodes, because they are "fenced" by recovering their containing * resource. */ crm_info("Node %s has resource discovery disabled", this_node->details->uname); this_node->details->rsc_discovery_enabled = FALSE; } } } static bool unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set) { bool changed = false; xmlNode *lrm_rsc = NULL; for (xmlNode *state = __xml_first_child_element(status); state != NULL; state = __xml_next_element(state)) { const char *id = NULL; const char *uname = NULL; pe_node_t *this_node = NULL; bool process = FALSE; if (!pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { continue; } id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if (this_node == NULL) { crm_info("Node %s is unknown", id); continue; } else if (this_node->details->unpacked) { crm_trace("Node %s was already processed", id); continue; } else if (!pe__is_guest_or_remote_node(this_node) && is_set(data_set->flags, pe_flag_stonith_enabled)) { // A redundant test, but preserves the order for regression tests process = TRUE; } else if (pe__is_guest_or_remote_node(this_node)) { bool check = FALSE; pe_resource_t *rsc = this_node->details->remote_rsc; if(fence) { check = TRUE; } else if(rsc == NULL) { /* Not ready yet */ } else if (pe__is_guest_node(this_node) && rsc->role == RSC_ROLE_STARTED && rsc->container->role == RSC_ROLE_STARTED) { /* Both the connection and its containing resource need to be * known to be up before we process resources running in it. */ check = TRUE; crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED); } else if (!pe__is_guest_node(this_node) && ((rsc->role == RSC_ROLE_STARTED) || is_set(data_set->flags, pe_flag_shutdown_lock))) { check = TRUE; crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED); } if (check) { determine_remote_online_status(data_set, this_node); unpack_handle_remote_attrs(this_node, state, data_set); process = TRUE; } } else if (this_node->details->online) { process = TRUE; } else if (fence) { process = TRUE; } else if (is_set(data_set->flags, pe_flag_shutdown_lock)) { process = TRUE; } if(process) { crm_trace("Processing lrm resource entries on %shealthy%s node: %s", fence?"un":"", (pe__is_guest_or_remote_node(this_node)? " remote" : ""), this_node->details->uname); changed = TRUE; this_node->details->unpacked = TRUE; lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); unpack_lrm_resources(this_node, lrm_rsc, data_set); } } return changed; } /* remove nodes that are down, stopping */ /* create positive rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode * status, pe_working_set_t * data_set) { const char *id = NULL; const char *uname = NULL; xmlNode *state = NULL; pe_node_t *this_node = NULL; crm_trace("Beginning unpack"); if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, free, destroy_ticket); } for (state = __xml_first_child_element(status); state != NULL; state = __xml_next_element(state)) { if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, pcmk__str_none)) { unpack_tickets_state((xmlNode *) state, data_set); } else if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { xmlNode *attrs = NULL; const char *resource_discovery_enabled = NULL; id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if (uname == NULL) { /* error */ continue; } else if (this_node == NULL) { pcmk__config_warn("Ignoring recorded node status for '%s' " "because no longer in configuration", uname); continue; } else if (pe__is_guest_or_remote_node(this_node)) { /* online state for remote nodes is determined by the * rsc state after all the unpacking is done. we do however * need to mark whether or not the node has been fenced as this plays * a role during unpacking cluster node resource state */ this_node->details->remote_was_fenced = crm_atoi(crm_element_value(state, XML_NODE_IS_FENCED), "0"); continue; } crm_trace("Processing node id=%s, uname=%s", id, uname); /* Mark the node as provisionally clean * - at least we have seen it in the current cluster's lifetime */ this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, data_set); if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance"))) { crm_info("Node %s is in maintenance-mode", this_node->details->uname); this_node->details->maintenance = TRUE; } resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY); if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes", XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname); } crm_trace("determining node state"); determine_online_status(state, this_node, data_set); if (is_not_set(data_set->flags, pe_flag_have_quorum) && this_node->details->online && (data_set->no_quorum_policy == no_quorum_suicide)) { /* Everything else should flow from this automatically * (at least until the scheduler becomes able to migrate off * healthy resources) */ pe_fence_node(data_set, this_node, "cluster does not have quorum", FALSE); } } } while(unpack_node_loop(status, FALSE, data_set)) { crm_trace("Start another loop"); } // Now catch any nodes we didn't see unpack_node_loop(status, is_set(data_set->flags, pe_flag_stonith_enabled), data_set); /* Now that we know where resources are, we can schedule stops of containers * with failed bundle connections */ if (data_set->stop_needed != NULL) { for (GList *item = data_set->stop_needed; item; item = item->next) { pe_resource_t *container = item->data; pe_node_t *node = pe__current_node(container); if (node) { stop_action(container, node, FALSE); } } g_list_free(data_set->stop_needed); data_set->stop_needed = NULL; } for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *this_node = gIter->data; if (this_node == NULL) { continue; } else if (!pe__is_guest_or_remote_node(this_node)) { continue; } else if(this_node->details->unpacked) { continue; } determine_remote_online_status(data_set, this_node); } return TRUE; } static gboolean determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state, pe_node_t * this_node) { gboolean online = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); if (!crm_is_true(in_cluster)) { crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster)); } else if (pcmk__str_eq(is_peer, ONLINESTATUS, pcmk__str_casei)) { if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { online = TRUE; } else { crm_debug("Node is not ready to run resources: %s", join); } } else if (this_node->details->expected_up == FALSE) { crm_trace("Controller is down: in_cluster=%s", crm_str(in_cluster)); crm_trace("\tis_peer=%s, join=%s, expected=%s", crm_str(is_peer), crm_str(join), crm_str(exp_state)); } else { /* mark it unclean */ pe_fence_node(data_set, this_node, "peer is unexpectedly down", FALSE); crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s", crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state)); } return online; } static gboolean determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state, pe_node_t * this_node) { gboolean online = FALSE; gboolean do_terminate = FALSE; bool crmd_online = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); const char *terminate = pe_node_attribute_raw(this_node, "terminate"); /* - XML_NODE_IN_CLUSTER ::= true|false - XML_NODE_IS_PEER ::= online|offline - XML_NODE_JOIN_STATE ::= member|down|pending|banned - XML_NODE_EXPECTED ::= member|down */ if (crm_is_true(terminate)) { do_terminate = TRUE; } else if (terminate != NULL && strlen(terminate) > 0) { /* could be a time() value */ char t = terminate[0]; if (t != '0' && isdigit(t)) { do_terminate = TRUE; } } crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d", this_node->details->uname, crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state), do_terminate); online = crm_is_true(in_cluster); crmd_online = pcmk__str_eq(is_peer, ONLINESTATUS, pcmk__str_casei); if (exp_state == NULL) { exp_state = CRMD_JOINSTATE_DOWN; } if (this_node->details->shutdown) { crm_debug("%s is shutting down", this_node->details->uname); /* Slightly different criteria since we can't shut down a dead peer */ online = crmd_online; } else if (in_cluster == NULL) { pe_fence_node(data_set, this_node, "peer has not been seen by the cluster", FALSE); } else if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_casei)) { pe_fence_node(data_set, this_node, "peer failed the pacemaker membership criteria", FALSE); } else if (do_terminate == FALSE && pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN, pcmk__str_casei)) { if (crm_is_true(in_cluster) || crmd_online) { crm_info("- Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { crm_trace("%s is down or still coming up", this_node->details->uname); } } else if (do_terminate && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_casei) && crm_is_true(in_cluster) == FALSE && !crmd_online) { crm_info("Node %s was just shot", this_node->details->uname); online = FALSE; } else if (crm_is_true(in_cluster) == FALSE) { // Consider `priority-fencing-delay` for lost nodes pe_fence_node(data_set, this_node, "peer is no longer part of the cluster", TRUE); } else if (!crmd_online) { pe_fence_node(data_set, this_node, "peer process is no longer available", FALSE); /* Everything is running at this point, now check join state */ } else if (do_terminate) { pe_fence_node(data_set, this_node, "termination was requested", FALSE); } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { crm_info("Node %s is active", this_node->details->uname); } else if (pcmk__strcase_any_of(join, CRMD_JOINSTATE_PENDING, CRMD_JOINSTATE_DOWN, NULL)) { crm_info("Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { pe_fence_node(data_set, this_node, "peer was in an unknown state", FALSE); crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d", this_node->details->uname, crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown); } return online; } static void determine_remote_online_status(pe_working_set_t * data_set, pe_node_t * this_node) { pe_resource_t *rsc = this_node->details->remote_rsc; pe_resource_t *container = NULL; pe_node_t *host = NULL; /* If there is a node state entry for a (former) Pacemaker Remote node * but no resource creating that node, the node's connection resource will * be NULL. Consider it an offline remote node in that case. */ if (rsc == NULL) { this_node->details->online = FALSE; goto remote_online_done; } container = rsc->container; if (container && pcmk__list_of_1(rsc->running_on)) { host = rsc->running_on->data; } /* If the resource is currently started, mark it online. */ if (rsc->role == RSC_ROLE_STARTED) { crm_trace("%s node %s presumed ONLINE because connection resource is started", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = TRUE; } /* consider this node shutting down if transitioning start->stop */ if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) { crm_trace("%s node %s shutting down because connection resource is stopping", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->shutdown = TRUE; } /* Now check all the failure conditions. */ if(container && is_set(container->flags, pe_rsc_failed)) { crm_trace("Guest node %s UNCLEAN because guest resource failed", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } else if(is_set(rsc->flags, pe_rsc_failed)) { crm_trace("%s node %s OFFLINE because connection resource failed", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; } else if (rsc->role == RSC_ROLE_STOPPED || (container && container->role == RSC_ROLE_STOPPED)) { crm_trace("%s node %s OFFLINE because its resource is stopped", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = FALSE; } else if (host && (host->details->online == FALSE) && host->details->unclean) { crm_trace("Guest node %s UNCLEAN because host is unclean", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } remote_online_done: crm_trace("Remote node %s online=%s", this_node->details->id, this_node->details->online ? "TRUE" : "FALSE"); } static void determine_online_status(xmlNode * node_state, pe_node_t * this_node, pe_working_set_t * data_set) { gboolean online = FALSE; const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); CRM_CHECK(this_node != NULL, return); this_node->details->shutdown = FALSE; this_node->details->expected_up = FALSE; if (pe__shutdown_requested(this_node)) { this_node->details->shutdown = TRUE; } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { this_node->details->expected_up = TRUE; } if (this_node->details->type == node_ping) { this_node->details->unclean = FALSE; online = FALSE; /* As far as resource management is concerned, * the node is safely offline. * Anyone caught abusing this logic will be shot */ } else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { online = determine_online_status_no_fencing(data_set, node_state, this_node); } else { online = determine_online_status_fencing(data_set, node_state, this_node); } if (online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (online && this_node->details->shutdown) { /* don't run resources here */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (this_node->details->type == node_ping) { crm_info("Node %s is not a pacemaker node", this_node->details->uname); } else if (this_node->details->unclean) { pe_proc_warn("Node %s is unclean", this_node->details->uname); } else if (this_node->details->online) { crm_info("Node %s is %s", this_node->details->uname, this_node->details->shutdown ? "shutting down" : this_node->details->pending ? "pending" : this_node->details->standby ? "standby" : this_node->details->maintenance ? "maintenance" : "online"); } else { crm_trace("Node %s is offline", this_node->details->uname); } } /*! * \internal * \brief Find the end of a resource's name, excluding any clone suffix * * \param[in] id Resource ID to check * * \return Pointer to last character of resource's base name */ const char * pe_base_name_end(const char *id) { if (!pcmk__str_empty(id)) { const char *end = id + strlen(id) - 1; for (const char *s = end; s > id; --s) { switch (*s) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': return (s == end)? s : (s - 1); default: return end; } } return end; } return NULL; } /*! * \internal * \brief Get a resource name excluding any clone suffix * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_strip(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); char *basename = NULL; CRM_ASSERT(end); basename = strndup(last_rsc_id, end - last_rsc_id + 1); CRM_ASSERT(basename); return basename; } /*! * \internal * \brief Get the name of the first instance of a cloned resource * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name plus :0 * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_zero(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); size_t base_name_len = end - last_rsc_id + 1; char *zero = NULL; CRM_ASSERT(end); zero = calloc(base_name_len + 3, sizeof(char)); CRM_ASSERT(zero); memcpy(zero, last_rsc_id, base_name_len); zero[base_name_len] = ':'; zero[base_name_len + 1] = '0'; return zero; } static pe_resource_t * create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set) { pe_resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_debug(xml_rsc, "Orphan resource"); if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) { return NULL; } if (xml_contains_remote_node(xml_rsc)) { pe_node_t *node; crm_debug("Detected orphaned remote node %s", rsc_id); node = pe_find_node(data_set->nodes, rsc_id); if (node == NULL) { node = pe_create_node(rsc_id, rsc_id, "remote", NULL, data_set); } link_rsc2remotenode(data_set, rsc); if (node) { crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id); node->details->shutdown = TRUE; } } if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) { /* This orphaned rsc needs to be mapped to a container. */ crm_trace("Detected orphaned container filler %s", rsc_id); set_bit(rsc->flags, pe_rsc_orphan_container_filler); } set_bit(rsc->flags, pe_rsc_orphan); data_set->resources = g_list_append(data_set->resources, rsc); return rsc; } /*! * \internal * \brief Create orphan instance for anonymous clone resource history */ static pe_resource_t * create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id, pe_node_t *node, pe_working_set_t *data_set) { pe_resource_t *top = pe__create_clone_child(parent, data_set); // find_rsc() because we might be a cloned group pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone); pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id, node->details->uname); return orphan; } /*! * \internal * \brief Check a node for an instance of an anonymous clone * * Return a child instance of the specified anonymous clone, in order of * preference: (1) the instance running on the specified node, if any; * (2) an inactive instance (i.e. within the total of clone-max instances); * (3) a newly created orphan (i.e. clone-max instances are already active). * * \param[in] data_set Cluster information * \param[in] node Node on which to check for instance * \param[in] parent Clone to check * \param[in] rsc_id Name of cloned resource in history (without instance) */ static pe_resource_t * find_anonymous_clone(pe_working_set_t * data_set, pe_node_t * node, pe_resource_t * parent, const char *rsc_id) { GListPtr rIter = NULL; pe_resource_t *rsc = NULL; pe_resource_t *inactive_instance = NULL; gboolean skip_inactive = FALSE; CRM_ASSERT(parent != NULL); CRM_ASSERT(pe_rsc_is_clone(parent)); CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique)); // Check for active (or partially active, for cloned groups) instance pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { GListPtr locations = NULL; pe_resource_t *child = rIter->data; /* Check whether this instance is already known to be active or pending * anywhere, at this stage of unpacking. Because this function is called * for a resource before the resource's individual operation history * entries are unpacked, locations will generally not contain the * desired node. * * However, there are three exceptions: * (1) when child is a cloned group and we have already unpacked the * history of another member of the group on the same node; * (2) when we've already unpacked the history of another numbered * instance on the same node (which can happen if globally-unique * was flipped from true to false); and * (3) when we re-run calculations on the same data set as part of a * simulation. */ child->fns->location(child, &locations, 2); if (locations) { /* We should never associate the same numbered anonymous clone * instance with multiple nodes, and clone instances can't migrate, * so there must be only one location, regardless of history. */ CRM_LOG_ASSERT(locations->next == NULL); if (((pe_node_t *)locations->data)->details == node->details) { /* This child instance is active on the requested node, so check * for a corresponding configured resource. We use find_rsc() * instead of child because child may be a cloned group, and we * need the particular member corresponding to rsc_id. * * If the history entry is orphaned, rsc will be NULL. */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); if (rsc) { /* If there are multiple instance history entries for an * anonymous clone in a single node's history (which can * happen if globally-unique is switched from true to * false), we want to consider the instances beyond the * first as orphans, even if there are inactive instance * numbers available. */ if (rsc->running_on) { crm_notice("Active (now-)anonymous clone %s has " "multiple (orphan) instance histories on %s", parent->id, node->details->uname); skip_inactive = TRUE; rsc = NULL; } else { pe_rsc_trace(parent, "Resource %s, active", rsc->id); } } } g_list_free(locations); } else { pe_rsc_trace(parent, "Resource %s, skip inactive", child->id); if (!skip_inactive && !inactive_instance && is_not_set(child->flags, pe_rsc_block)) { // Remember one inactive instance in case we don't find active inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); /* ... but don't use it if it was already associated with a * pending action on another node */ if (inactive_instance && inactive_instance->pending_node && (inactive_instance->pending_node->details != node->details)) { inactive_instance = NULL; } } } } if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) { pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id); rsc = inactive_instance; } /* If the resource has "requires" set to "quorum" or "nothing", and we don't * have a clone instance for every node, we don't want to consume a valid * instance number for unclean nodes. Such instances may appear to be active * according to the history, but should be considered inactive, so we can * start an instance elsewhere. Treat such instances as orphans. * * An exception is instances running on guest nodes -- since guest node * "fencing" is actually just a resource stop, requires shouldn't apply. * * @TODO Ideally, we'd use an inactive instance number if it is not needed * for any clean instances. However, we don't know that at this point. */ if ((rsc != NULL) && is_not_set(rsc->flags, pe_rsc_needs_fencing) && (!node->details->online || node->details->unclean) && !pe__is_guest_node(node) && !pe__is_universal_clone(parent, data_set)) { rsc = NULL; } if (rsc == NULL) { rsc = create_anonymous_orphan(parent, rsc_id, node, data_set); pe_rsc_trace(parent, "Resource %s, orphan", rsc->id); } return rsc; } static pe_resource_t * unpack_find_resource(pe_working_set_t * data_set, pe_node_t * node, const char *rsc_id, xmlNode * rsc_entry) { pe_resource_t *rsc = NULL; pe_resource_t *parent = NULL; crm_trace("looking for %s", rsc_id); rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL) { /* If we didn't find the resource by its name in the operation history, * check it again as a clone instance. Even when clone-max=0, we create * a single :0 orphan to match against here. */ char *clone0_id = clone_zero(rsc_id); pe_resource_t *clone0 = pe_find_resource(data_set->resources, clone0_id); if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) { rsc = clone0; parent = uber_parent(clone0); crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id); } else { crm_trace("%s is not known as %s either (orphan)", rsc_id, clone0_id); } free(clone0_id); } else if (rsc->variant > pe_native) { crm_trace("Resource history for %s is orphaned because it is no longer primitive", rsc_id); return NULL; } else { parent = uber_parent(rsc); } if (pe_rsc_is_anon_clone(parent)) { if (pe_rsc_is_bundled(parent)) { rsc = pe__find_bundle_replica(parent->parent, node); } else { char *base = clone_strip(rsc_id); rsc = find_anonymous_clone(data_set, node, parent, base); free(base); CRM_ASSERT(rsc != NULL); } } if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_casei) && !pcmk__str_eq(rsc_id, rsc->clone_name, pcmk__str_casei)) { free(rsc->clone_name); rsc->clone_name = strdup(rsc_id); pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s", rsc_id, node->details->uname, rsc->id, (is_set(rsc->flags, pe_rsc_orphan)? " (ORPHAN)" : "")); } return rsc; } static pe_resource_t * process_orphan_resource(xmlNode * rsc_entry, pe_node_t * node, pe_working_set_t * data_set) { pe_resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname); rsc = create_fake_resource(rsc_id, rsc_entry, data_set); if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } else { CRM_CHECK(rsc != NULL, return NULL); pe_rsc_trace(rsc, "Added orphan %s", rsc->id); resource_location(rsc, NULL, -INFINITY, "__orphan_do_not_run__", data_set); } return rsc; } static void process_rsc_state(pe_resource_t * rsc, pe_node_t * node, enum action_fail_response on_fail, xmlNode * migrate_op, pe_working_set_t * data_set) { pe_node_t *tmpnode = NULL; char *reason = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s", rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail)); /* process current state */ if (rsc->role != RSC_ROLE_UNKNOWN) { pe_resource_t *iter = rsc; while (iter) { if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) { pe_node_t *n = pe__copy_node(node); pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name, n->details->uname); g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n); } if (is_set(iter->flags, pe_rsc_unique)) { break; } iter = iter->parent; } } /* If a managed resource is believed to be running, but node is down ... */ if (rsc->role > RSC_ROLE_STOPPED && node->details->online == FALSE && node->details->maintenance == FALSE && is_set(rsc->flags, pe_rsc_managed)) { gboolean should_fence = FALSE; /* If this is a guest node, fence it (regardless of whether fencing is * enabled, because guest node fencing is done by recovery of the * container resource rather than by the fencer). Mark the resource * we're processing as failed. When the guest comes back up, its * operation history in the CIB will be cleared, freeing the affected * resource to run again once we are sure we know its state. */ if (pe__is_guest_node(node)) { set_bit(rsc->flags, pe_rsc_failed); set_bit(rsc->flags, pe_rsc_stop); should_fence = TRUE; } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) { if (pe__is_remote_node(node) && node->details->remote_rsc && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) { /* Setting unseen means that fencing of the remote node will * occur only if the connection resource is not going to start * somewhere. This allows connection resources on a failed * cluster node to move to another node without requiring the * remote nodes to be fenced as well. */ node->details->unseen = TRUE; reason = crm_strdup_printf("%s is active there (fencing will be" " revoked if remote connection can " "be re-established elsewhere)", rsc->id); } should_fence = TRUE; } if (should_fence) { if (reason == NULL) { reason = crm_strdup_printf("%s is thought to be active there", rsc->id); } pe_fence_node(data_set, node, reason, FALSE); } free(reason); } if (node->details->unclean) { /* No extra processing needed * Also allows resources to be started again after a node is shot */ on_fail = action_fail_ignore; } switch (on_fail) { case action_fail_ignore: /* nothing to do */ break; case action_fail_demote: set_bit(rsc->flags, pe_rsc_failed); demote_action(rsc, node, FALSE); break; case action_fail_fence: /* treat it as if it is still running * but also mark the node as unclean */ reason = crm_strdup_printf("%s failed there", rsc->id); pe_fence_node(data_set, node, reason, FALSE); free(reason); break; case action_fail_standby: node->details->standby = TRUE; node->details->standby_onfail = TRUE; break; case action_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); break; case action_fail_migrate: /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set); break; case action_fail_stop: rsc->next_role = RSC_ROLE_STOPPED; break; case action_fail_recover: if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { set_bit(rsc->flags, pe_rsc_failed); set_bit(rsc->flags, pe_rsc_stop); stop_action(rsc, node, FALSE); } break; case action_fail_restart_container: set_bit(rsc->flags, pe_rsc_failed); set_bit(rsc->flags, pe_rsc_stop); if (rsc->container && pe_rsc_is_bundled(rsc)) { /* A bundle's remote connection can run on a different node than * the bundle's container. We don't necessarily know where the * container is running yet, so remember it and add a stop * action for it later. */ data_set->stop_needed = g_list_prepend(data_set->stop_needed, rsc->container); } else if (rsc->container) { stop_action(rsc->container, node, FALSE); } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { stop_action(rsc, node, FALSE); } break; case action_fail_reset_remote: set_bit(rsc->flags, pe_rsc_failed); set_bit(rsc->flags, pe_rsc_stop); if (is_set(data_set->flags, pe_flag_stonith_enabled)) { tmpnode = NULL; if (rsc->is_remote_node) { tmpnode = pe_find_node(data_set->nodes, rsc->id); } if (tmpnode && pe__is_remote_node(tmpnode) && tmpnode->details->remote_was_fenced == 0) { /* The remote connection resource failed in a way that * should result in fencing the remote node. */ pe_fence_node(data_set, tmpnode, "remote connection is unrecoverable", FALSE); } } /* require the stop action regardless if fencing is occurring or not. */ if (rsc->role > RSC_ROLE_STOPPED) { stop_action(rsc, node, FALSE); } /* if reconnect delay is in use, prevent the connection from exiting the * "STOPPED" role until the failure is cleared by the delay timeout. */ if (rsc->remote_reconnect_ms) { rsc->next_role = RSC_ROLE_STOPPED; } break; } /* ensure a remote-node connection failure forces an unclean remote-node * to be fenced. By setting unseen = FALSE, the remote-node failure will * result in a fencing operation regardless if we're going to attempt to * reconnect to the remote-node in this transition or not. */ if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) { tmpnode = pe_find_node(data_set->nodes, rsc->id); if (tmpnode && tmpnode->details->unclean) { tmpnode->details->unseen = FALSE; } } if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { if (is_set(rsc->flags, pe_rsc_orphan)) { if (is_set(rsc->flags, pe_rsc_managed)) { pcmk__config_warn("Detected active orphan %s running on %s", rsc->id, node->details->uname); } else { pcmk__config_warn("Resource '%s' must be stopped manually on " "%s because cluster is configured not to " "stop active orphans", rsc->id, node->details->uname); } } native_add_running(rsc, node, data_set); switch (on_fail) { case action_fail_ignore: break; case action_fail_demote: case action_fail_block: set_bit(rsc->flags, pe_rsc_failed); break; default: set_bit(rsc->flags, pe_rsc_failed); set_bit(rsc->flags, pe_rsc_stop); break; } } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) { /* Only do this for older status sections that included instance numbers * Otherwise stopped instances will appear as orphans */ pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); free(rsc->clone_name); rsc->clone_name = NULL; } else { GList *possible_matches = pe__resource_actions(rsc, node, RSC_STOP, FALSE); GListPtr gIter = possible_matches; for (; gIter != NULL; gIter = gIter->next) { pe_action_t *stop = (pe_action_t *) gIter->data; stop->flags |= pe_action_optional; } g_list_free(possible_matches); } } /* create active recurring operations as optional */ static void process_recurring(pe_node_t * node, pe_resource_t * rsc, int start_index, int stop_index, GListPtr sorted_op_list, pe_working_set_t * data_set) { int counter = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; guint interval_ms = 0; char *key = NULL; const char *id = ID(rsc_op); counter++; if (node->details->online == FALSE) { pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname); break; /* Need to check if there's a monitor for role="Stopped" */ } else if (start_index < stop_index && counter <= stop_index) { pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname); continue; } else if (counter < start_index) { pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter); continue; } crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if (interval_ms == 0) { pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (pcmk__str_eq(status, "-1", pcmk__str_casei)) { pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = pcmk__op_key(rsc->id, task, interval_ms); pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname); custom_action(rsc, key, task, node, TRUE, TRUE, data_set); } } void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) { int counter = -1; int implied_monitor_start = -1; int implied_clone_start = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; *stop_index = -1; *start_index = -1; for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei) && pcmk__str_eq(status, "0", pcmk__str_casei)) { *stop_index = counter; } else if (pcmk__strcase_any_of(task, CRMD_ACTION_START, CRMD_ACTION_MIGRATED, NULL)) { *start_index = counter; } else if ((implied_monitor_start <= *stop_index) && pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); - if (pcmk__str_eq(rc, "0", pcmk__str_casei) || pcmk__str_eq(rc, "8", pcmk__str_casei)) { + if (pcmk__strcase_any_of(rc, "0", "8", NULL)) { implied_monitor_start = counter; } } else if (pcmk__strcase_any_of(task, CRMD_ACTION_PROMOTE, CRMD_ACTION_DEMOTE, NULL)) { implied_clone_start = counter; } } if (*start_index == -1) { if (implied_clone_start != -1) { *start_index = implied_clone_start; } else if (implied_monitor_start != -1) { *start_index = implied_monitor_start; } } } // If resource history entry has shutdown lock, remember lock node and time static void unpack_shutdown_lock(xmlNode *rsc_entry, pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set) { time_t lock_time = 0; // When lock started (i.e. node shutdown time) if ((crm_element_value_epoch(rsc_entry, XML_CONFIG_ATTR_SHUTDOWN_LOCK, &lock_time) == pcmk_ok) && (lock_time != 0)) { if ((data_set->shutdown_lock > 0) && (get_effective_time(data_set) > (lock_time + data_set->shutdown_lock))) { pe_rsc_info(rsc, "Shutdown lock for %s on %s expired", rsc->id, node->details->uname); pe__clear_resource_history(rsc, node, data_set); } else { rsc->lock_node = node; rsc->lock_time = lock_time; } } } static pe_resource_t * unpack_lrm_rsc_state(pe_node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set) { GListPtr gIter = NULL; int stop_index = -1; int start_index = -1; enum rsc_role_e req_role = RSC_ROLE_UNKNOWN; const char *task = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); pe_resource_t *rsc = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; xmlNode *migrate_op = NULL; xmlNode *rsc_op = NULL; xmlNode *last_failure = NULL; enum action_fail_response on_fail = action_fail_ignore; enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; crm_trace("[%s] Processing %s on %s", crm_element_name(rsc_entry), rsc_id, node->details->uname); /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) { if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, pcmk__str_none)) { op_list = g_list_prepend(op_list, rsc_op); } } if (is_not_set(data_set->flags, pe_flag_shutdown_lock)) { if (op_list == NULL) { // If there are no operations, there is nothing to do return NULL; } } /* find the resource */ rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); if (rsc == NULL) { if (op_list == NULL) { // If there are no operations, there is nothing to do return NULL; } else { rsc = process_orphan_resource(rsc_entry, node, data_set); } } CRM_ASSERT(rsc != NULL); // Check whether the resource is "shutdown-locked" to this node if (is_set(data_set->flags, pe_flag_shutdown_lock)) { unpack_shutdown_lock(rsc_entry, rsc, node, data_set); } /* process operations */ saved_role = rsc->role; rsc->role = RSC_ROLE_UNKNOWN; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { migrate_op = rsc_op; } unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set); } /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail, migrate_op, data_set); if (get_target_role(rsc, &req_role)) { if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) { pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); rsc->next_role = req_role; } else if (req_role > rsc->next_role) { pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } } if (saved_role > rsc->role) { rsc->role = saved_role; } return rsc; } static void handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set) { xmlNode *rsc_entry = NULL; for (rsc_entry = __xml_first_child_element(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next_element(rsc_entry)) { pe_resource_t *rsc; pe_resource_t *container; const char *rsc_id; const char *container_id; if (!pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_casei)) { continue; } container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER); rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); if (container_id == NULL || rsc_id == NULL) { continue; } container = pe_find_resource(data_set->resources, container_id); if (container == NULL) { continue; } rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL || is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE || rsc->container != NULL) { continue; } pe_rsc_trace(rsc, "Mapped container of orphaned resource %s to %s", rsc->id, container_id); rsc->container = container; container->fillers = g_list_append(container->fillers, rsc); } } static void unpack_lrm_resources(pe_node_t *node, xmlNode *lrm_rsc_list, pe_working_set_t *data_set) { xmlNode *rsc_entry = NULL; gboolean found_orphaned_container_filler = FALSE; for (rsc_entry = __xml_first_child_element(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next_element(rsc_entry)) { if (pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_none)) { pe_resource_t *rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set); if (!rsc) { continue; } if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) { found_orphaned_container_filler = TRUE; } } } /* now that all the resource state has been unpacked for this node * we have to go back and map any orphaned container fillers to their * container resource */ if (found_orphaned_container_filler) { handle_orphaned_container_fillers(lrm_rsc_list, data_set); } } static void set_active(pe_resource_t * rsc) { pe_resource_t *top = uber_parent(rsc); if (top && is_set(top->flags, pe_rsc_promotable)) { rsc->role = RSC_ROLE_SLAVE; } else { rsc->role = RSC_ROLE_STARTED; } } static void set_node_score(gpointer key, gpointer value, gpointer user_data) { pe_node_t *node = value; int *score = user_data; node->weight = *score; } #define STATUS_PATH_MAX 1024 static xmlNode * find_lrm_op(const char *resource, const char *op, const char *node, const char *source, bool success_only, pe_working_set_t *data_set) { int offset = 0; char xpath[STATUS_PATH_MAX]; xmlNode *xml = NULL; offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node); offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']", resource); /* Need to check against transition_magic too? */ if (source && pcmk__str_eq(op, CRMD_ACTION_MIGRATE, pcmk__str_casei)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op, source); } else if (source && pcmk__str_eq(op, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op, source); } else { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op); } CRM_LOG_ASSERT(offset > 0); xml = get_xpath_object(xpath, data_set->input, LOG_DEBUG); if (xml && success_only) { int rc = PCMK_OCF_UNKNOWN_ERROR; int status = PCMK_LRM_OP_ERROR; crm_element_value_int(xml, XML_LRM_ATTR_RC, &rc); crm_element_value_int(xml, XML_LRM_ATTR_OPSTATUS, &status); if ((rc != PCMK_OCF_OK) || (status != PCMK_LRM_OP_DONE)) { return NULL; } } return xml; } static int pe__call_id(xmlNode *op_xml) { int id = 0; if (op_xml) { crm_element_value_int(op_xml, XML_LRM_ATTR_CALLID, &id); } return id; } /*! * \brief Check whether a stop happened on the same node after some event * * \param[in] rsc Resource being checked * \param[in] node Node being checked * \param[in] xml_op Event that stop is being compared to * \param[in] data_set Cluster working set * * \return TRUE if stop happened after event, FALSE otherwise * * \note This is really unnecessary, but kept as a safety mechanism. We * currently don't save more than one successful event in history, so this * only matters when processing really old CIB files that we don't * technically support anymore, or as preparation for logging an extended * history in the future. */ static bool stop_happened_after(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, pe_working_set_t *data_set) { xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->uname, NULL, TRUE, data_set); return (stop_op && (pe__call_id(stop_op) > pe__call_id(xml_op))); } static void unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, pe_working_set_t *data_set) { /* A successful migration sequence is: * migrate_to on source node * migrate_from on target node * stop on source node * * If a migrate_to is followed by a stop, the entire migration (successful * or failed) is complete, and we don't care what happened on the target. * * If no migrate_from has happened, the migration is considered to be * "partial". If the migrate_from failed, make sure the resource gets * stopped on both source and target (if up). * * If the migrate_to and migrate_from both succeeded (which also implies the * resource is no longer running on the source), but there is no stop, the * migration is considered to be "dangling". Schedule a stop on the source * in this case. */ int from_rc = 0; int from_status = 0; pe_node_t *target_node = NULL; pe_node_t *source_node = NULL; xmlNode *migrate_from = NULL; const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); // Sanity check CRM_CHECK(source && target && !strcmp(source, node->details->uname), return); if (stop_happened_after(rsc, node, xml_op, data_set)) { return; } // Clones are not allowed to migrate, so role can't be master rsc->role = RSC_ROLE_STARTED; target_node = pe_find_node(data_set->nodes, target); source_node = pe_find_node(data_set->nodes, source); // Check whether there was a migrate_from action on the target migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, source, FALSE, data_set); if (migrate_from) { crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d", ID(migrate_from), target, from_status, from_rc); } if (migrate_from && from_rc == PCMK_OCF_OK && from_status == PCMK_LRM_OP_DONE) { /* The migrate_to and migrate_from both succeeded, so mark the migration * as "dangling". This will be used to schedule a stop action on the * source without affecting the target. */ pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op), source); rsc->role = RSC_ROLE_STOPPED; rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } else if (migrate_from && (from_status != PCMK_LRM_OP_PENDING)) { // Failed if (target_node && target_node->details->online) { pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node, target_node->details->online); native_add_running(rsc, target_node, data_set); } } else { // Pending, or complete but erased if (target_node && target_node->details->online) { pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node, target_node->details->online); native_add_running(rsc, target_node, data_set); if (source_node && source_node->details->online) { /* This is a partial migration: the migrate_to completed * successfully on the source, but the migrate_from has not * completed. Remember the source and target; if the newly * chosen target remains the same when we schedule actions * later, we may continue with the migration. */ rsc->partial_migration_target = target_node; rsc->partial_migration_source = source_node; } } else { /* Consider it failed here - forces a restart, prevents migration */ set_bit(rsc->flags, pe_rsc_failed); set_bit(rsc->flags, pe_rsc_stop); clear_bit(rsc->flags, pe_rsc_allow_migrate); } } } static void unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, pe_working_set_t *data_set) { int target_stop_id = 0; int target_migrate_from_id = 0; xmlNode *target_stop = NULL; xmlNode *target_migrate_from = NULL; const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); // Sanity check CRM_CHECK(source && target && !strcmp(source, node->details->uname), return); /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be master. */ rsc->role = RSC_ROLE_STARTED; // Check for stop on the target target_stop = find_lrm_op(rsc->id, CRMD_ACTION_STOP, target, NULL, TRUE, data_set); target_stop_id = pe__call_id(target_stop); // Check for migrate_from on the target target_migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, source, TRUE, data_set); target_migrate_from_id = pe__call_id(target_migrate_from); if ((target_stop == NULL) || (target_stop_id < target_migrate_from_id)) { /* There was no stop on the source, or a stop that happened before a * migrate_from, so assume the resource is still active on the target * (if it is up). */ pe_node_t *target_node = pe_find_node(data_set->nodes, target); pe_rsc_trace(rsc, "stop (%d) + migrate_from (%d)", target_stop_id, target_migrate_from_id); if (target_node && target_node->details->online) { native_add_running(rsc, target_node, data_set); } } else if (target_migrate_from == NULL) { /* We know there was a stop on the target, but there may not have been a * migrate_from (the stop could have happened before migrate_from was * scheduled or attempted). * * That means this could be a "dangling" migration. But first, check * whether there is a newer migrate_from or start on the source node -- * it's possible the failed migration was followed by a successful * full restart or migration in the reverse direction, in which case we * don't want to force it to stop. */ xmlNode *source_migrate_from = NULL; xmlNode *source_start = NULL; int source_migrate_to_id = pe__call_id(xml_op); source_migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, source, NULL, TRUE, data_set); if (pe__call_id(source_migrate_from) > source_migrate_to_id) { return; } source_start = find_lrm_op(rsc->id, CRMD_ACTION_START, source, NULL, TRUE, data_set); if (pe__call_id(source_start) > source_migrate_to_id) { return; } // Mark node as having dangling migration so we can force a stop later rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } } static void unpack_migrate_from_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, pe_working_set_t *data_set) { xmlNode *source_stop = NULL; xmlNode *source_migrate_to = NULL; const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); // Sanity check CRM_CHECK(source && target && !strcmp(target, node->details->uname), return); /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be master. */ rsc->role = RSC_ROLE_STARTED; // Check for a stop on the source source_stop = find_lrm_op(rsc->id, CRMD_ACTION_STOP, source, NULL, TRUE, data_set); // Check for a migrate_to on the source source_migrate_to = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, source, target, TRUE, data_set); if ((source_stop == NULL) || (pe__call_id(source_stop) < pe__call_id(source_migrate_to))) { /* There was no stop on the source, or a stop that happened before * migrate_to, so assume the resource is still active on the source (if * it is up). */ pe_node_t *source_node = pe_find_node(data_set->nodes, source); if (source_node && source_node->details->online) { native_add_running(rsc, source_node, data_set); } } } static void record_failed_op(xmlNode *op, const pe_node_t *node, const pe_resource_t *rsc, pe_working_set_t *data_set) { xmlNode *xIter = NULL; const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY); if (node->details->online == FALSE) { return; } for (xIter = data_set->failed->children; xIter; xIter = xIter->next) { const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY); const char *uname = crm_element_value(xIter, XML_ATTR_UNAME); if(pcmk__str_eq(op_key, key, pcmk__str_casei) && pcmk__str_eq(uname, node->details->uname, pcmk__str_casei)) { crm_trace("Skipping duplicate entry %s on %s", op_key, node->details->uname); return; } } crm_trace("Adding entry %s on %s", op_key, node->details->uname); crm_xml_add(op, XML_ATTR_UNAME, node->details->uname); crm_xml_add(op, XML_LRM_ATTR_RSCID, rsc->id); add_node_copy(data_set->failed, op); } static const char *get_op_key(xmlNode *xml_op) { const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); if(key == NULL) { key = ID(xml_op); } return key; } static const char * last_change_str(xmlNode *xml_op) { time_t when; const char *when_s = NULL; if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &when) == pcmk_ok) { when_s = pcmk__epoch2str(&when); if (when_s) { // Skip day of week to make message shorter when_s = strchr(when_s, ' '); if (when_s) { ++when_s; } } } return ((when_s && *when_s)? when_s : "unknown time"); } /*! * \internal * \brief Compare two on-fail values * * \param[in] first One on-fail value to compare * \param[in] second The other on-fail value to compare * * \return A negative number if second is more severe than first, zero if they * are equal, or a positive number if first is more severe than second. * \note This is only needed until the action_fail_response values can be * renumbered at the next API compatibility break. */ static int cmp_on_fail(enum action_fail_response first, enum action_fail_response second) { switch (first) { case action_fail_demote: switch (second) { case action_fail_ignore: return 1; case action_fail_demote: return 0; default: return -1; } break; case action_fail_reset_remote: switch (second) { case action_fail_ignore: case action_fail_demote: case action_fail_recover: return 1; case action_fail_reset_remote: return 0; default: return -1; } break; case action_fail_restart_container: switch (second) { case action_fail_ignore: case action_fail_demote: case action_fail_recover: case action_fail_reset_remote: return 1; case action_fail_restart_container: return 0; default: return -1; } break; default: break; } switch (second) { case action_fail_demote: return (first == action_fail_ignore)? -1 : 1; case action_fail_reset_remote: switch (first) { case action_fail_ignore: case action_fail_demote: case action_fail_recover: return -1; default: return 1; } break; case action_fail_restart_container: switch (first) { case action_fail_ignore: case action_fail_demote: case action_fail_recover: case action_fail_reset_remote: return -1; default: return 1; } break; default: break; } return first - second; } static void unpack_rsc_op_failure(pe_resource_t * rsc, pe_node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set) { guint interval_ms = 0; bool is_probe = false; pe_action_t *action = NULL; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON); CRM_ASSERT(rsc); CRM_CHECK(task != NULL, return); *last_failure = xml_op; crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if ((interval_ms == 0) && !strcmp(task, CRMD_ACTION_STATUS)) { is_probe = true; } if (exit_reason == NULL) { exit_reason = ""; } if (is_not_set(data_set->flags, pe_flag_symmetric_cluster) && (rc == PCMK_OCF_NOT_INSTALLED)) { crm_trace("Unexpected result (%s%s%s) was recorded for " "%s of %s on %s at %s " CRM_XS " rc=%d id=%s", services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason, (is_probe? "probe" : task), rsc->id, node->details->uname, last_change_str(xml_op), rc, ID(xml_op)); } else { crm_warn("Unexpected result (%s%s%s) was recorded for " "%s of %s on %s at %s " CRM_XS " rc=%d id=%s", services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason, (is_probe? "probe" : task), rsc->id, node->details->uname, last_change_str(xml_op), rc, ID(xml_op)); if (is_probe && (rc != PCMK_OCF_OK) && (rc != PCMK_OCF_NOT_RUNNING) && (rc != PCMK_OCF_RUNNING_MASTER)) { /* A failed (not just unexpected) probe result could mean the user * didn't know resources will be probed even where they can't run. */ crm_notice("If it is not possible for %s to run on %s, see " "the resource-discovery option for location constraints", rsc->id, node->details->uname); } record_failed_op(xml_op, node, rsc, data_set); } action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); if (cmp_on_fail(*on_fail, action->on_fail) < 0) { pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail), fail2text(action->on_fail), action->uuid, key); *on_fail = action->on_fail; } if (!strcmp(task, CRMD_ACTION_STOP)) { resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set); } else if (!strcmp(task, CRMD_ACTION_MIGRATE)) { unpack_migrate_to_failure(rsc, node, xml_op, data_set); } else if (!strcmp(task, CRMD_ACTION_MIGRATED)) { unpack_migrate_from_failure(rsc, node, xml_op, data_set); } else if (!strcmp(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if (!strcmp(task, CRMD_ACTION_DEMOTE)) { if (action->on_fail == action_fail_block) { rsc->role = RSC_ROLE_MASTER; rsc->next_role = RSC_ROLE_STOPPED; } else if(rc == PCMK_OCF_NOT_RUNNING) { rsc->role = RSC_ROLE_STOPPED; } else { /* Staying in master role would put the scheduler and controller * into a loop. Setting slave role is not dangerous because the * resource will be stopped as part of recovery, and any master * promotion will be ordered after that stop. */ rsc->role = RSC_ROLE_SLAVE; } } if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) { /* leave stopped */ pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id); rsc->role = RSC_ROLE_STOPPED; } else if (rsc->role < RSC_ROLE_STARTED) { pe_rsc_trace(rsc, "Setting %s active", rsc->id); set_active(rsc); } pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", rsc->id, role2text(rsc->role), node->details->unclean ? "true" : "false", fail2text(action->on_fail), role2text(action->fail_role)); if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) { rsc->next_role = action->fail_role; } if (action->fail_role == RSC_ROLE_STOPPED) { int score = -INFINITY; pe_resource_t *fail_rsc = rsc; if (fail_rsc->parent) { pe_resource_t *parent = uber_parent(fail_rsc); if (pe_rsc_is_clone(parent) && is_not_set(parent->flags, pe_rsc_unique)) { /* For clone resources, if a child fails on an operation * with on-fail = stop, all the resources fail. Do this by preventing * the parent from coming up again. */ fail_rsc = parent; } } crm_notice("%s will not be started under current conditions", fail_rsc->id); /* make sure it doesn't come up again */ if (fail_rsc->allowed_nodes != NULL) { g_hash_table_destroy(fail_rsc->allowed_nodes); } fail_rsc->allowed_nodes = pe__node_list2table(data_set->nodes); g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score); } pe_free_action(action); } /*! * \internal * \brief Remap operation status based on action result * * Given an action result, determine an appropriate operation status for the * purposes of responding to the action (the status provided by the executor is * not directly usable since the executor does not know what was expected). * * \param[in,out] rsc Resource that operation history entry is for * \param[in] rc Actual return code of operation * \param[in] target_rc Expected return code of operation * \param[in] node Node where operation was executed * \param[in] xml_op Operation history entry XML from CIB status * \param[in,out] on_fail What should be done about the result * \param[in] data_set Current cluster working set * * \return Operation status based on return code and action info * \note This may update the resource's current and next role. */ static int determine_op_status( pe_resource_t *rsc, int rc, int target_rc, pe_node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set) { guint interval_ms = 0; bool is_probe = false; int result = PCMK_LRM_OP_DONE; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON); CRM_ASSERT(rsc); CRM_CHECK(task != NULL, return PCMK_LRM_OP_ERROR); if (exit_reason == NULL) { exit_reason = ""; } crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if ((interval_ms == 0) && !strcmp(task, CRMD_ACTION_STATUS)) { is_probe = true; task = "probe"; } if (target_rc < 0) { /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the * target_rc in the transition key, which (along with the similar case * of a corrupted transition key in the CIB) will be reported to this * function as -1. Pacemaker 2.0+ does not support rolling upgrades from * those versions or processing of saved CIB files from those versions, * so we do not need to care much about this case. */ result = PCMK_LRM_OP_ERROR; crm_warn("Expected result not found for %s on %s (corrupt or obsolete CIB?)", key, node->details->uname); } else if (target_rc != rc) { result = PCMK_LRM_OP_ERROR; pe_rsc_debug(rsc, "%s on %s: expected %d (%s), got %d (%s%s%s)", key, node->details->uname, target_rc, services_ocf_exitcode_str(target_rc), rc, services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason); } switch (rc) { case PCMK_OCF_OK: if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) { result = PCMK_LRM_OP_DONE; pe_rsc_info(rsc, "Probe found %s active on %s at %s", rsc->id, node->details->uname, last_change_str(xml_op)); } break; case PCMK_OCF_NOT_RUNNING: if (is_probe || target_rc == rc || is_not_set(rsc->flags, pe_rsc_managed)) { result = PCMK_LRM_OP_DONE; rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } break; case PCMK_OCF_RUNNING_MASTER: if (is_probe && (rc != target_rc)) { result = PCMK_LRM_OP_DONE; pe_rsc_info(rsc, "Probe found %s active and promoted on %s at %s", rsc->id, node->details->uname, last_change_str(xml_op)); } rsc->role = RSC_ROLE_MASTER; break; case PCMK_OCF_DEGRADED_MASTER: case PCMK_OCF_FAILED_MASTER: rsc->role = RSC_ROLE_MASTER; result = PCMK_LRM_OP_ERROR; break; case PCMK_OCF_NOT_CONFIGURED: result = PCMK_LRM_OP_ERROR_FATAL; break; case PCMK_OCF_UNIMPLEMENT_FEATURE: if (interval_ms > 0) { result = PCMK_LRM_OP_NOTSUPPORTED; break; } // fall through case PCMK_OCF_NOT_INSTALLED: case PCMK_OCF_INVALID_PARAM: case PCMK_OCF_INSUFFICIENT_PRIV: if (!pe_can_fence(data_set, node) && !strcmp(task, CRMD_ACTION_STOP)) { /* If a stop fails and we can't fence, there's nothing else we can do */ pe_proc_err("No further recovery can be attempted for %s " "because %s on %s failed (%s%s%s) at %s " CRM_XS " rc=%d id=%s", rsc->id, task, node->details->uname, services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason, last_change_str(xml_op), rc, ID(xml_op)); clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); } result = PCMK_LRM_OP_ERROR_HARD; break; default: if (result == PCMK_LRM_OP_DONE) { crm_info("Treating unknown exit status %d from %s of %s " "on %s at %s as failure", rc, task, rsc->id, node->details->uname, last_change_str(xml_op)); result = PCMK_LRM_OP_ERROR; } break; } return result; } // return TRUE if start or monitor last failure but parameters changed static bool should_clear_for_param_change(xmlNode *xml_op, const char *task, pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set) { if (!strcmp(task, "start") || !strcmp(task, "monitor")) { if (pe__bundle_needs_remote_name(rsc)) { /* We haven't allocated resources yet, so we can't reliably * substitute addr parameters for the REMOTE_CONTAINER_HACK. * When that's needed, defer the check until later. */ pe__add_param_check(xml_op, rsc, node, pe_check_last_failure, data_set); } else { op_digest_cache_t *digest_data = NULL; digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set); switch (digest_data->rc) { case RSC_DIGEST_UNKNOWN: crm_trace("Resource %s history entry %s on %s" " has no digest to compare", rsc->id, get_op_key(xml_op), node->details->id); break; case RSC_DIGEST_MATCH: break; default: return TRUE; } } } return FALSE; } // Order action after fencing of remote node, given connection rsc static void order_after_remote_fencing(pe_action_t *action, pe_resource_t *remote_conn, pe_working_set_t *data_set) { pe_node_t *remote_node = pe_find_node(data_set->nodes, remote_conn->id); if (remote_node) { pe_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL, FALSE, data_set); order_actions(fence, action, pe_order_implies_then); } } static bool should_ignore_failure_timeout(pe_resource_t *rsc, xmlNode *xml_op, const char *task, guint interval_ms, bool is_last_failure, pe_working_set_t *data_set) { /* Clearing failures of recurring monitors has special concerns. The * executor reports only changes in the monitor result, so if the * monitor is still active and still getting the same failure result, * that will go undetected after the failure is cleared. * * Also, the operation history will have the time when the recurring * monitor result changed to the given code, not the time when the * result last happened. * * @TODO We probably should clear such failures only when the failure * timeout has passed since the last occurrence of the failed result. * However we don't record that information. We could maybe approximate * that by clearing only if there is a more recent successful monitor or * stop result, but we don't even have that information at this point * since we are still unpacking the resource's operation history. * * This is especially important for remote connection resources with a * reconnect interval, so in that case, we skip clearing failures * if the remote node hasn't been fenced. */ if (rsc->remote_reconnect_ms && is_set(data_set->flags, pe_flag_stonith_enabled) && (interval_ms != 0) && pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) { pe_node_t *remote_node = pe_find_node(data_set->nodes, rsc->id); if (remote_node && !remote_node->details->remote_was_fenced) { if (is_last_failure) { crm_info("Waiting to clear monitor failure for remote node %s" " until fencing has occurred", rsc->id); } return TRUE; } } return FALSE; } /*! * \internal * \brief Check operation age and schedule failure clearing when appropriate * * This function has two distinct purposes. The first is to check whether an * operation history entry is expired (i.e. the resource has a failure timeout, * the entry is older than the timeout, and the resource either has no fail * count or its fail count is entirely older than the timeout). The second is to * schedule fail count clearing when appropriate (i.e. the operation is expired * and either the resource has an expired fail count or the operation is a * last_failure for a remote connection resource with a reconnect interval, * or the operation is a last_failure for a start or monitor operation and the * resource's parameters have changed since the operation). * * \param[in] rsc Resource that operation happened to * \param[in] node Node that operation happened on * \param[in] rc Actual result of operation * \param[in] xml_op Operation history entry XML * \param[in] data_set Current working set * * \return TRUE if operation history entry is expired, FALSE otherwise */ static bool check_operation_expiry(pe_resource_t *rsc, pe_node_t *node, int rc, xmlNode *xml_op, pe_working_set_t *data_set) { bool expired = FALSE; bool is_last_failure = pcmk__ends_with(ID(xml_op), "_last_failure_0"); time_t last_run = 0; guint interval_ms = 0; int unexpired_fail_count = 0; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *clear_reason = NULL; crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if ((rsc->failure_timeout > 0) && (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0)) { // Resource has a failure-timeout, and history entry has a timestamp time_t now = get_effective_time(data_set); time_t last_failure = 0; // Is this particular operation history older than the failure timeout? if ((now >= (last_run + rsc->failure_timeout)) && !should_ignore_failure_timeout(rsc, xml_op, task, interval_ms, is_last_failure, data_set)) { expired = TRUE; } // Does the resource as a whole have an unexpired fail count? unexpired_fail_count = pe_get_failcount(node, rsc, &last_failure, pe_fc_effective, xml_op, data_set); // Update scheduler recheck time according to *last* failure crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds" " last-failure@%lld", ID(xml_op), (long long) last_run, (expired? "" : "not "), (long long) now, unexpired_fail_count, rsc->failure_timeout, (long long) last_failure); last_failure += rsc->failure_timeout + 1; if (unexpired_fail_count && (now < last_failure)) { pe__update_recheck_time(last_failure, data_set); } } if (expired) { if (pe_get_failcount(node, rsc, NULL, pe_fc_default, xml_op, data_set)) { // There is a fail count ignoring timeout if (unexpired_fail_count == 0) { // There is no fail count considering timeout clear_reason = "it expired"; } else { /* This operation is old, but there is an unexpired fail count. * In a properly functioning cluster, this should only be * possible if this operation is not a failure (otherwise the * fail count should be expired too), so this is really just a * failsafe. */ expired = FALSE; } } else if (is_last_failure && rsc->remote_reconnect_ms) { /* Clear any expired last failure when reconnect interval is set, * even if there is no fail count. */ clear_reason = "reconnect interval is set"; } } if (!expired && is_last_failure && should_clear_for_param_change(xml_op, task, rsc, node, data_set)) { clear_reason = "resource parameters have changed"; } if (clear_reason != NULL) { // Schedule clearing of the fail count pe_action_t *clear_op = pe__clear_failcount(rsc, node, clear_reason, data_set); if (is_set(data_set->flags, pe_flag_stonith_enabled) && rsc->remote_reconnect_ms) { /* If we're clearing a remote connection due to a reconnect * interval, we want to wait until any scheduled fencing * completes. * * We could limit this to remote_node->details->unclean, but at * this point, that's always true (it won't be reliable until * after unpack_node_loop() is done). */ crm_info("Clearing %s failure will wait until any scheduled " "fencing of %s completes", task, rsc->id); order_after_remote_fencing(clear_op, rsc, data_set); } } if (expired && (interval_ms == 0) && pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) { switch(rc) { case PCMK_OCF_OK: case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_MASTER: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_MASTER: // Don't expire probes that return these values expired = FALSE; break; } } return expired; } int pe__target_rc_from_xml(xmlNode *xml_op) { int target_rc = 0; const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); if (key == NULL) { return -1; } decode_transition_key(key, NULL, NULL, NULL, &target_rc); return target_rc; } static enum action_fail_response get_action_on_fail(pe_resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set) { enum action_fail_response result = action_fail_recover; pe_action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); result = action->on_fail; pe_free_action(action); return result; } static void update_resource_state(pe_resource_t * rsc, pe_node_t * node, xmlNode * xml_op, const char * task, int rc, xmlNode * last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set) { gboolean clear_past_failure = FALSE; CRM_ASSERT(rsc); CRM_ASSERT(xml_op); if (rc == PCMK_OCF_NOT_RUNNING) { clear_past_failure = TRUE; } else if (rc == PCMK_OCF_NOT_INSTALLED) { rsc->role = RSC_ROLE_STOPPED; } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) { if (last_failure) { const char *op_key = get_op_key(xml_op); const char *last_failure_key = get_op_key(last_failure); if (pcmk__str_eq(op_key, last_failure_key, pcmk__str_casei)) { clear_past_failure = TRUE; } } if (rsc->role < RSC_ROLE_STARTED) { set_active(rsc); } } else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_casei)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei)) { rsc->role = RSC_ROLE_STOPPED; clear_past_failure = TRUE; } else if (pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_casei)) { rsc->role = RSC_ROLE_MASTER; clear_past_failure = TRUE; } else if (pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_casei)) { if (*on_fail == action_fail_demote) { // Demote clears an error only if on-fail=demote clear_past_failure = TRUE; } rsc->role = RSC_ROLE_SLAVE; } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) { unpack_migrate_to_success(rsc, node, xml_op, data_set); } else if (rsc->role < RSC_ROLE_STARTED) { pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname); set_active(rsc); } /* clear any previous failure actions */ if (clear_past_failure) { switch (*on_fail) { case action_fail_stop: case action_fail_fence: case action_fail_migrate: case action_fail_standby: pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop", rsc->id, fail2text(*on_fail)); break; case action_fail_block: case action_fail_ignore: case action_fail_demote: case action_fail_recover: case action_fail_restart_container: *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; break; case action_fail_reset_remote: if (rsc->remote_reconnect_ms == 0) { /* With no reconnect interval, the connection is allowed to * start again after the remote node is fenced and * completely stopped. (With a reconnect interval, we wait * for the failure to be cleared entirely before attempting * to reconnect.) */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } break; } } } /*! * \internal * \brief Remap informational monitor results to usual values * * Certain OCF result codes are for providing extended information to the * user about services that aren't yet failed but not entirely healthy either. * These must be treated as the "normal" result by pacemaker. * * \param[in] rc Actual result of a monitor action * \param[in] xml_op Operation history XML * \param[in] node Node that operation happened on * \param[in] rsc Resource that operation happened to * \param[in] data_set Cluster working set * * \return Result code that pacemaker should use * * \note If the result is remapped, and the node is not shutting down or failed, * the operation will be recorded in the data set's list of failed * operations, to highlight it for the user. */ static int remap_monitor_rc(int rc, xmlNode *xml_op, const pe_node_t *node, const pe_resource_t *rsc, pe_working_set_t *data_set) { int remapped_rc = rc; switch (rc) { case PCMK_OCF_DEGRADED: remapped_rc = PCMK_OCF_OK; break; case PCMK_OCF_DEGRADED_MASTER: remapped_rc = PCMK_OCF_RUNNING_MASTER; break; default: break; } if (rc != remapped_rc) { crm_trace("Remapping monitor result %d to %d", rc, remapped_rc); if (!node->details->shutdown || node->details->online) { record_failed_op(xml_op, node, rsc, data_set); } } return remapped_rc; } static void unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum action_fail_response *on_fail, pe_working_set_t *data_set) { int rc = 0; int task_id = 0; int target_rc = 0; int status = PCMK_LRM_OP_UNKNOWN; guint interval_ms = 0; const char *task = NULL; const char *task_key = NULL; const char *exit_reason = NULL; bool expired = FALSE; pe_resource_t *parent = rsc; enum action_fail_response failure_strategy = action_fail_recover; CRM_CHECK(rsc && node && xml_op, return); target_rc = pe__target_rc_from_xml(xml_op); task_key = get_op_key(xml_op); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON); if (exit_reason == NULL) { exit_reason = ""; } crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc); crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status); crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); CRM_CHECK(task != NULL, return); CRM_CHECK(status <= PCMK_LRM_OP_INVALID, return); CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return); if (!strcmp(task, CRMD_ACTION_NOTIFY) || !strcmp(task, CRMD_ACTION_METADATA)) { /* safe to ignore these */ return; } if (is_not_set(rsc->flags, pe_rsc_unique)) { parent = uber_parent(rsc); } pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)", task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role)); if (node->details->unclean) { pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean." " Further action depends on the value of the stop's on-fail attribute", node->details->uname, rsc->id); } /* It should be possible to call remap_monitor_rc() first then call * check_operation_expiry() only if rc != target_rc, because there should * never be a fail count without at least one unexpected result in the * resource history. That would be more efficient by avoiding having to call * check_operation_expiry() for expected results. * * However, we do have such configurations in the scheduler regression * tests, even if it shouldn't be possible with the current code. It's * probably a good idea anyway, but that would require updating the test * inputs to something currently possible. */ if ((status != PCMK_LRM_OP_NOT_INSTALLED) && check_operation_expiry(rsc, node, rc, xml_op, data_set)) { expired = TRUE; } if (!strcmp(task, CRMD_ACTION_STATUS)) { rc = remap_monitor_rc(rc, xml_op, node, rsc, data_set); } if (expired && (rc != target_rc)) { const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC); if (interval_ms == 0) { crm_notice("Ignoring expired %s failure on %s " CRM_XS " actual=%d expected=%d magic=%s", task_key, node->details->uname, rc, target_rc, magic); goto done; } else if(node->details->online && node->details->unclean == FALSE) { /* Reschedule the recurring monitor. CancelXmlOp() won't work at * this stage, so as a hacky workaround, forcibly change the restart * digest so check_action_definition() does what we want later. * * @TODO We should skip this if there is a newer successful monitor. * Also, this causes rescheduling only if the history entry * has an op-digest (which the expire-non-blocked-failure * scheduler regression test doesn't, but that may not be a * realistic scenario in production). */ crm_notice("Rescheduling %s after failure expired on %s " CRM_XS " actual=%d expected=%d magic=%s", task_key, node->details->uname, rc, target_rc, magic); crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout"); goto done; } } /* If the executor reported an operation status of anything but done or * error, consider that final. But for done or error, we know better whether * it should be treated as a failure or not, because we know the expected * result. */ if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) { status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set); pe_rsc_trace(rsc, "Remapped %s status to %d", task_key, status); } switch (status) { case PCMK_LRM_OP_CANCELLED: // Should never happen pe_err("Resource history contains cancellation '%s' " "(%s of %s on %s at %s)", ID(xml_op), task, rsc->id, node->details->uname, last_change_str(xml_op)); break; case PCMK_LRM_OP_PENDING: if (!strcmp(task, CRMD_ACTION_START)) { set_bit(rsc->flags, pe_rsc_start_pending); set_active(rsc); } else if (!strcmp(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if (!strcmp(task, CRMD_ACTION_MIGRATE) && node->details->unclean) { /* If a pending migrate_to action is out on a unclean node, * we have to force the stop action on the target. */ const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); pe_node_t *target = pe_find_node(data_set->nodes, migrate_target); if (target) { stop_action(rsc, target, FALSE); } } if (rsc->pending_task == NULL) { if ((interval_ms != 0) || strcmp(task, CRMD_ACTION_STATUS)) { rsc->pending_task = strdup(task); rsc->pending_node = node; } else { /* Pending probes are not printed, even if pending * operations are requested. If someone ever requests that * behavior, enable the below and the corresponding part of * native.c:native_pending_task(). */ #if 0 rsc->pending_task = strdup("probe"); rsc->pending_node = node; #endif } } break; case PCMK_LRM_OP_DONE: pe_rsc_trace(rsc, "%s of %s on %s completed at %s " CRM_XS " id=%s", task, rsc->id, node->details->uname, last_change_str(xml_op), ID(xml_op)); update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set); break; case PCMK_LRM_OP_NOT_INSTALLED: failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); if (failure_strategy == action_fail_ignore) { crm_warn("Cannot ignore failed %s of %s on %s: " "Resource agent doesn't exist " CRM_XS " status=%d rc=%d id=%s", task, rsc->id, node->details->uname, status, rc, ID(xml_op)); /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */ *on_fail = action_fail_migrate; } resource_location(parent, node, -INFINITY, "hard-error", data_set); unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set); break; case PCMK_LRM_OP_NOT_CONNECTED: if (pe__is_guest_or_remote_node(node) && is_set(node->details->remote_rsc->flags, pe_rsc_managed)) { /* We should never get into a situation where a managed remote * connection resource is considered OK but a resource action * behind the connection gets a "not connected" status. But as a * fail-safe in case a bug or unusual circumstances do lead to * that, ensure the remote connection is considered failed. */ set_bit(node->details->remote_rsc->flags, pe_rsc_failed); set_bit(node->details->remote_rsc->flags, pe_rsc_stop); } // fall through case PCMK_LRM_OP_ERROR: case PCMK_LRM_OP_ERROR_HARD: case PCMK_LRM_OP_ERROR_FATAL: case PCMK_LRM_OP_TIMEOUT: case PCMK_LRM_OP_NOTSUPPORTED: case PCMK_LRM_OP_INVALID: failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); if ((failure_strategy == action_fail_ignore) || (failure_strategy == action_fail_restart_container && !strcmp(task, CRMD_ACTION_STOP))) { crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s " "succeeded " CRM_XS " rc=%d id=%s", task, services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason, rsc->id, node->details->uname, last_change_str(xml_op), rc, ID(xml_op)); update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, on_fail, data_set); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); set_bit(rsc->flags, pe_rsc_failure_ignored); record_failed_op(xml_op, node, rsc, data_set); if ((failure_strategy == action_fail_restart_container) && cmp_on_fail(*on_fail, action_fail_recover) <= 0) { *on_fail = failure_strategy; } } else { unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set); if(status == PCMK_LRM_OP_ERROR_HARD) { do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE, "Preventing %s from restarting on %s because " "of hard failure (%s%s%s)" CRM_XS " rc=%d id=%s", parent->id, node->details->uname, services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason, rc, ID(xml_op)); resource_location(parent, node, -INFINITY, "hard-error", data_set); } else if(status == PCMK_LRM_OP_ERROR_FATAL) { crm_err("Preventing %s from restarting anywhere because " "of fatal failure (%s%s%s) " CRM_XS " rc=%d id=%s", parent->id, services_ocf_exitcode_str(rc), (*exit_reason? ": " : ""), exit_reason, rc, ID(xml_op)); resource_location(parent, NULL, -INFINITY, "fatal-error", data_set); } } break; } done: pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s", rsc->id, task, role2text(rsc->role), role2text(rsc->next_role)); } static void add_node_attrs(xmlNode *xml_obj, pe_node_t *node, bool overwrite, pe_working_set_t *data_set) { const char *cluster_name = NULL; pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_UNAME), strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID), strdup(node->details->id)); if (pcmk__str_eq(node->details->id, data_set->dc_uuid, pcmk__str_casei)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE)); } cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name"); if (cluster_name) { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME), strdup(cluster_name)); } pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_ATTR_SETS, &rule_data, node->details->attrs, NULL, overwrite, data_set); if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) { const char *site_name = pe_node_attribute_raw(node, "site-name"); if (site_name) { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_SITE_NAME), strdup(site_name)); } else if (cluster_name) { /* Default to cluster-name if unset */ g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_SITE_NAME), strdup(cluster_name)); } } } static GListPtr extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter) { int counter = -1; int stop_index = -1; int start_index = -1; xmlNode *rsc_op = NULL; GListPtr gIter = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) { if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, pcmk__str_none)) { crm_xml_add(rsc_op, "resource", rsc); crm_xml_add(rsc_op, XML_ATTR_UNAME, node); op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if (active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; if (start_index < stop_index) { crm_trace("Skipping %s: not active", ID(rsc_entry)); break; } else if (counter < start_index) { crm_trace("Skipping %s: old", ID(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); } g_list_free(sorted_op_list); return op_list; } GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t * data_set) { GListPtr output = NULL; GListPtr intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE); pe_node_t *this_node = NULL; xmlNode *node_state = NULL; for (node_state = __xml_first_child_element(status); node_state != NULL; node_state = __xml_next_element(node_state)) { if (pcmk__str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { const char *uname = crm_element_value(node_state, XML_ATTR_UNAME); if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) { continue; } this_node = pe_find_node(data_set->nodes, uname); if(this_node == NULL) { CRM_LOG_ASSERT(this_node != NULL); continue; } else if (pe__is_guest_or_remote_node(this_node)) { determine_remote_online_status(data_set, this_node); } else { determine_online_status(node_state, this_node, data_set); } if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ xmlNode *lrm_rsc = NULL; tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE); for (lrm_rsc = __xml_first_child_element(tmp); lrm_rsc != NULL; lrm_rsc = __xml_next_element(lrm_rsc)) { if (pcmk__str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, pcmk__str_none)) { const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID); if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); } } } } } return output; } diff --git a/tools/crm_mon.c b/tools/crm_mon.c index fc78b7a02a..3d61094e87 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -1,2202 +1,2202 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // pcmk__ends_with_ext() #include #include #include #include #include #include #include #include #include #include #include #include #include "crm_mon.h" #define SUMMARY "Provides a summary of cluster's current state.\n\n" \ "Outputs varying levels of detail in a number of different formats." /* * Definitions indicating which items to print */ static unsigned int show; /* * Definitions indicating how to output */ static mon_output_format_t output_format = mon_output_unset; /* other globals */ static GIOChannel *io_channel = NULL; static GMainLoop *mainloop = NULL; static guint timer_id = 0; static mainloop_timer_t *refresh_timer = NULL; static pe_working_set_t *mon_data_set = NULL; static cib_t *cib = NULL; static stonith_t *st = NULL; static xmlNode *current_cib = NULL; static GError *error = NULL; static pcmk__common_args_t *args = NULL; static pcmk__output_t *out = NULL; static GOptionContext *context = NULL; static gchar **processed_args = NULL; static time_t last_refresh = 0; crm_trigger_t *refresh_trigger = NULL; static pcmk__supported_format_t formats[] = { #if CURSES_ENABLED CRM_MON_SUPPORTED_FORMAT_CURSES, #endif PCMK__SUPPORTED_FORMAT_HTML, PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, CRM_MON_SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; /* Define exit codes for monitoring-compatible output * For nagios plugins, the possibilities are * OK=0, WARN=1, CRIT=2, and UNKNOWN=3 */ #define MON_STATUS_WARN CRM_EX_ERROR #define MON_STATUS_CRIT CRM_EX_INVALID_PARAM #define MON_STATUS_UNKNOWN CRM_EX_UNIMPLEMENT_FEATURE #define RECONNECT_MSECS 5000 struct { int reconnect_msec; gboolean daemonize; gboolean show_bans; char *pid_file; char *external_agent; char *external_recipient; char *neg_location_prefix; char *only_node; char *only_rsc; unsigned int mon_ops; GSList *user_includes_excludes; GSList *includes_excludes; } options = { .reconnect_msec = RECONNECT_MSECS, .mon_ops = mon_op_default }; static void clean_up_connections(void); static crm_exit_t clean_up(crm_exit_t exit_code); static void crm_diff_update(const char *event, xmlNode * msg); static gboolean mon_refresh_display(gpointer user_data); static int cib_connect(gboolean full); static void mon_st_callback_event(stonith_t * st, stonith_event_t * e); static void mon_st_callback_display(stonith_t * st, stonith_event_t * e); static void kick_refresh(gboolean data_updated); static unsigned int all_includes(mon_output_format_t fmt) { if (fmt == mon_output_monitor || fmt == mon_output_plain || fmt == mon_output_console) { return ~mon_show_options; } else { return mon_show_all; } } static unsigned int default_includes(mon_output_format_t fmt) { switch (fmt) { case mon_output_monitor: case mon_output_plain: case mon_output_console: return mon_show_stack | mon_show_dc | mon_show_times | mon_show_counts | mon_show_nodes | mon_show_resources | mon_show_failures; case mon_output_xml: case mon_output_legacy_xml: return all_includes(fmt); case mon_output_html: case mon_output_cgi: return mon_show_summary | mon_show_nodes | mon_show_resources | mon_show_failures; default: return 0; } } struct { const char *name; unsigned int bit; } sections[] = { { "attributes", mon_show_attributes }, { "bans", mon_show_bans }, { "counts", mon_show_counts }, { "dc", mon_show_dc }, { "failcounts", mon_show_failcounts }, { "failures", mon_show_failures }, { "fencing", mon_show_fencing_all }, { "fencing-failed", mon_show_fence_failed }, { "fencing-pending", mon_show_fence_pending }, { "fencing-succeeded", mon_show_fence_worked }, { "nodes", mon_show_nodes }, { "operations", mon_show_operations }, { "options", mon_show_options }, { "resources", mon_show_resources }, { "stack", mon_show_stack }, { "summary", mon_show_summary }, { "tickets", mon_show_tickets }, { "times", mon_show_times }, { NULL } }; static unsigned int find_section_bit(const char *name) { for (int i = 0; sections[i].name != NULL; i++) { if (pcmk__str_eq(sections[i].name, name, pcmk__str_casei)) { return sections[i].bit; } } return 0; } static gboolean apply_exclude(const gchar *excludes, GError **error) { char **parts = NULL; parts = g_strsplit(excludes, ",", 0); for (char **s = parts; *s != NULL; s++) { unsigned int bit = find_section_bit(*s); if (pcmk__str_eq(*s, "all", pcmk__str_none)) { show = 0; } else if (pcmk__str_eq(*s, "none", pcmk__str_none)) { show = all_includes(output_format); } else if (bit != 0) { show &= ~bit; } else { g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--exclude options: all, attributes, bans, counts, dc, " "failcounts, failures, fencing, fencing-failed, " "fencing-pending, fencing-succeeded, nodes, none, " "operations, options, resources, stack, summary, " "tickets, times"); return FALSE; } } g_strfreev(parts); return TRUE; } static gboolean apply_include(const gchar *includes, GError **error) { char **parts = NULL; parts = g_strsplit(includes, ",", 0); for (char **s = parts; *s != NULL; s++) { unsigned int bit = find_section_bit(*s); if (pcmk__str_eq(*s, "all", pcmk__str_none)) { show = all_includes(output_format); } else if (pcmk__starts_with(*s, "bans")) { show |= mon_show_bans; if (options.neg_location_prefix != NULL) { free(options.neg_location_prefix); options.neg_location_prefix = NULL; } if (strlen(*s) > 4 && (*s)[4] == ':') { options.neg_location_prefix = strdup(*s+5); } - } else if (pcmk__str_eq(*s, "default", pcmk__str_none) || pcmk__str_eq(*s, "defaults", pcmk__str_none)) { + } else if (pcmk__str_any_of(*s, "default", "defaults", NULL)) { show |= default_includes(output_format); } else if (pcmk__str_eq(*s, "none", pcmk__str_none)) { show = 0; } else if (bit != 0) { show |= bit; } else { g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--include options: all, attributes, bans[:PREFIX], counts, dc, " "default, failcounts, failures, fencing, fencing-failed, " "fencing-pending, fencing-succeeded, nodes, none, operations, " "options, resources, stack, summary, tickets, times"); return FALSE; } } g_strfreev(parts); return TRUE; } static gboolean apply_include_exclude(GSList *lst, mon_output_format_t fmt, GError **error) { gboolean rc = TRUE; GSList *node = lst; /* Set the default of what to display here. Note that we OR everything to * show instead of set show directly because it could have already had some * settings applied to it in main. */ show |= default_includes(fmt); while (node != NULL) { char *s = node->data; if (pcmk__starts_with(s, "--include=")) { rc = apply_include(s+10, error); } else if (pcmk__starts_with(s, "-I=")) { rc = apply_include(s+3, error); } else if (pcmk__starts_with(s, "--exclude=")) { rc = apply_exclude(s+10, error); } else if (pcmk__starts_with(s, "-U=")) { rc = apply_exclude(s+3, error); } if (rc != TRUE) { break; } node = node->next; } return rc; } static gboolean user_include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { char *s = crm_strdup_printf("%s=%s", option_name, optarg); options.user_includes_excludes = g_slist_append(options.user_includes_excludes, s); return TRUE; } static gboolean include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { char *s = crm_strdup_printf("%s=%s", option_name, optarg); options.includes_excludes = g_slist_append(options.includes_excludes, s); return TRUE; } static gboolean as_cgi_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("html"); output_format = mon_output_cgi; options.mon_ops |= mon_op_one_shot; return TRUE; } static gboolean as_html_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (args->output_ty != NULL) { free(args->output_ty); } if (args->output_dest != NULL) { free(args->output_dest); args->output_dest = NULL; } if (optarg != NULL) { args->output_dest = strdup(optarg); } args->output_ty = strdup("html"); output_format = mon_output_html; umask(S_IWGRP | S_IWOTH); return TRUE; } static gboolean as_simple_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("text"); output_format = mon_output_monitor; options.mon_ops |= mon_op_one_shot; return TRUE; } static gboolean as_xml_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("xml"); output_format = mon_output_legacy_xml; options.mon_ops |= mon_op_one_shot; return TRUE; } static gboolean fence_history_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { int rc = crm_atoi(optarg, "2"); switch (rc) { case 3: options.mon_ops |= mon_op_fence_full_history | mon_op_fence_history | mon_op_fence_connect; return include_exclude_cb("--include", "fencing", data, err); case 2: options.mon_ops |= mon_op_fence_history | mon_op_fence_connect; return include_exclude_cb("--include", "fencing", data, err); case 1: options.mon_ops |= mon_op_fence_history | mon_op_fence_connect; return include_exclude_cb("--include", "fencing-failed,fencing-pending", data, err); case 0: options.mon_ops &= ~(mon_op_fence_history | mon_op_fence_connect); return include_exclude_cb("--exclude", "fencing", data, err); default: g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Fence history must be 0-3"); return FALSE; } } static gboolean group_by_node_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_group_by_node; return TRUE; } static gboolean hide_headers_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return include_exclude_cb("--exclude", "summary", data, err); } static gboolean inactive_resources_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_inactive_resources; return TRUE; } static gboolean no_curses_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { output_format = mon_output_plain; return TRUE; } static gboolean one_shot_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_one_shot; return TRUE; } static gboolean print_brief_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_print_brief; return TRUE; } static gboolean print_clone_detail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_print_clone_detail; return TRUE; } static gboolean print_pending_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_print_pending; return TRUE; } static gboolean print_timing_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_print_timing; return include_exclude_cb("--include", "operations", data, err); } static gboolean reconnect_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { int rc = crm_get_msec(optarg); if (rc == -1) { g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Invalid value for -i: %s", optarg); return FALSE; } else { options.reconnect_msec = crm_parse_interval_spec(optarg); } return TRUE; } static gboolean show_attributes_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return include_exclude_cb("--include", "attributes", data, err); } static gboolean show_bans_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (optarg != NULL) { char *s = crm_strdup_printf("bans:%s", optarg); gboolean rc = include_exclude_cb("--include", s, data, err); free(s); return rc; } else { return include_exclude_cb("--include", "bans", data, err); } } static gboolean show_failcounts_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return include_exclude_cb("--include", "failcounts", data, err); } static gboolean show_operations_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return include_exclude_cb("--include", "failcounts,operations", data, err); } static gboolean show_tickets_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return include_exclude_cb("--include", "tickets", data, err); } static gboolean use_cib_file_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { setenv("CIB_file", optarg, 1); options.mon_ops |= mon_op_one_shot; return TRUE; } static gboolean watch_fencing_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_watch_fencing; return TRUE; } #define INDENT " " /* *INDENT-OFF* */ static GOptionEntry addl_entries[] = { { "interval", 'i', 0, G_OPTION_ARG_CALLBACK, reconnect_cb, "Update frequency (default is 5 seconds)", "TIMESPEC" }, { "one-shot", '1', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, one_shot_cb, "Display the cluster status once on the console and exit", NULL }, { "daemonize", 'd', 0, G_OPTION_ARG_NONE, &options.daemonize, "Run in the background as a daemon.\n" INDENT "Requires at least one of --output-to and --external-agent.", NULL }, { "pid-file", 'p', 0, G_OPTION_ARG_FILENAME, &options.pid_file, "(Advanced) Daemon pid file location", "FILE" }, { "external-agent", 'E', 0, G_OPTION_ARG_FILENAME, &options.external_agent, "A program to run when resource operations take place", "FILE" }, { "external-recipient", 'e', 0, G_OPTION_ARG_STRING, &options.external_recipient, "A recipient for your program (assuming you want the program to send something to someone).", "RCPT" }, { "watch-fencing", 'W', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, watch_fencing_cb, "Listen for fencing events. For use with --external-agent.", NULL }, { "xml-file", 'x', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, use_cib_file_cb, NULL, NULL }, { NULL } }; static GOptionEntry display_entries[] = { { "include", 'I', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb, "A list of sections to include in the output.\n" INDENT "See `Output Control` help for more information.", "SECTION(s)" }, { "exclude", 'U', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb, "A list of sections to exclude from the output.\n" INDENT "See `Output Control` help for more information.", "SECTION(s)" }, { "node", 0, 0, G_OPTION_ARG_STRING, &options.only_node, "When displaying information about nodes, show only what's related to the given\n" INDENT "node, or to all nodes tagged with the given tag", "NODE" }, { "resource", 0, 0, G_OPTION_ARG_STRING, &options.only_rsc, "When displaying information about resources, show only what's related to the given\n" INDENT "resource, or to all resources tagged with the given tag", "RSC" }, { "group-by-node", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, group_by_node_cb, "Group resources by node", NULL }, { "inactive", 'r', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, inactive_resources_cb, "Display inactive resources", NULL }, { "failcounts", 'f', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_failcounts_cb, "Display resource fail counts", NULL }, { "operations", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_operations_cb, "Display resource operation history", NULL }, { "timing-details", 't', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_timing_cb, "Display resource operation history with timing details", NULL }, { "tickets", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_tickets_cb, "Display cluster tickets", NULL }, { "fence-history", 'm', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, fence_history_cb, "Show fence history:\n" INDENT "0=off, 1=failures and pending (default without option),\n" INDENT "2=add successes (default without value for option),\n" INDENT "3=show full history without reduction to most recent of each flavor", "LEVEL" }, { "neg-locations", 'L', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, show_bans_cb, "Display negative location constraints [optionally filtered by id prefix]", NULL }, { "show-node-attributes", 'A', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_attributes_cb, "Display node attributes", NULL }, { "hide-headers", 'D', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, hide_headers_cb, "Hide all headers", NULL }, { "show-detail", 'R', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_clone_detail_cb, "Show more details (node IDs, individual clone instances)", NULL }, { "brief", 'b', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_brief_cb, "Brief output", NULL }, { "pending", 'j', G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_pending_cb, "Display pending state if 'record-pending' is enabled", NULL }, { "simple-status", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_simple_cb, "Display the cluster status once as a simple one line output (suitable for nagios)", NULL }, { NULL } }; static GOptionEntry deprecated_entries[] = { { "as-html", 'h', G_OPTION_FLAG_FILENAME, G_OPTION_ARG_CALLBACK, as_html_cb, "Write cluster status to the named HTML file.\n" INDENT "Use --output-as=html --output-to=FILE instead.", "FILE" }, { "as-xml", 'X', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_xml_cb, "Write cluster status as XML to stdout. This will enable one-shot mode.\n" INDENT "Use --output-as=xml instead.", NULL }, { "disable-ncurses", 'N', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, no_curses_cb, "Disable the use of ncurses.\n" INDENT "Use --output-as=text instead.", NULL }, { "web-cgi", 'w', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_cgi_cb, "Web mode with output suitable for CGI (preselected when run as *.cgi).\n" INDENT "Use --output-as=html --html-cgi instead.", NULL }, { NULL } }; /* *INDENT-ON* */ static gboolean mon_timer_popped(gpointer data) { int rc = pcmk_ok; #if CURSES_ENABLED if (output_format == mon_output_console) { clear(); refresh(); } #endif if (timer_id > 0) { g_source_remove(timer_id); timer_id = 0; } print_as(output_format, "Reconnecting...\n"); rc = cib_connect(TRUE); if (rc != pcmk_ok) { timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL); } return FALSE; } static void do_mon_cib_connection_destroy(gpointer user_data, bool is_error) { if (is_error) { out->err(out, "Connection to the cluster-daemons terminated"); } else { out->info(out, "Connection to the cluster-daemons terminated"); } if (refresh_timer != NULL) { /* we'll trigger a refresh after reconnect */ mainloop_timer_stop(refresh_timer); } if (timer_id) { /* we'll trigger a new reconnect-timeout at the end */ g_source_remove(timer_id); timer_id = 0; } if (st) { /* the client API won't properly reconnect notifications * if they are still in the table - so remove them */ st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT); st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE); st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY); if (st->state != stonith_disconnected) { st->cmds->disconnect(st); } } if (cib) { cib->cmds->signoff(cib); timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL); } return; } static void mon_cib_connection_destroy_regular(gpointer user_data) { do_mon_cib_connection_destroy(user_data, false); } static void mon_cib_connection_destroy_error(gpointer user_data) { do_mon_cib_connection_destroy(user_data, true); } /* * Mainloop signal handler. */ static void mon_shutdown(int nsig) { clean_up(CRM_EX_OK); } #if CURSES_ENABLED static sighandler_t ncurses_winch_handler; static void mon_winresize(int nsig) { static int not_done; int lines = 0, cols = 0; if (!not_done++) { if (ncurses_winch_handler) /* the original ncurses WINCH signal handler does the * magic of retrieving the new window size; * otherwise, we'd have to use ioctl or tgetent */ (*ncurses_winch_handler) (SIGWINCH); getmaxyx(stdscr, lines, cols); resizeterm(lines, cols); mainloop_set_trigger(refresh_trigger); } not_done--; } #endif static int cib_connect(gboolean full) { int rc = pcmk_ok; static gboolean need_pass = TRUE; CRM_CHECK(cib != NULL, return -EINVAL); if (getenv("CIB_passwd") != NULL) { need_pass = FALSE; } if (is_set(options.mon_ops, mon_op_fence_connect) && st == NULL) { st = stonith_api_new(); } if (is_set(options.mon_ops, mon_op_fence_connect) && st != NULL && st->state == stonith_disconnected) { rc = st->cmds->connect(st, crm_system_name, NULL); if (rc == pcmk_ok) { crm_trace("Setting up stonith callbacks"); if (is_set(options.mon_ops, mon_op_watch_fencing)) { st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, mon_st_callback_event); st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_event); } else { st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, mon_st_callback_display); st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display); } } } if (cib->state != cib_connected_query && cib->state != cib_connected_command) { crm_trace("Connecting to the CIB"); /* Hack: the CIB signon will print the prompt for a password if needed, * but to stderr. If we're in curses, show it on the screen instead. * * @TODO Add a password prompt (maybe including input) function to * pcmk__output_t and use it in libcib. */ if ((output_format == mon_output_console) && need_pass && (cib->variant == cib_remote)) { need_pass = FALSE; print_as(output_format, "Password:"); } rc = cib->cmds->signon(cib, crm_system_name, cib_query); if (rc != pcmk_ok) { out->err(out, "Could not connect to the CIB: %s", pcmk_strerror(rc)); return rc; } rc = cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call); if (rc == pcmk_ok) { mon_refresh_display(&output_format); } if (rc == pcmk_ok && full) { if (rc == pcmk_ok) { rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy_regular); if (rc == -EPROTONOSUPPORT) { print_as (output_format, "Notification setup not supported, won't be able to reconnect after failure"); if (output_format == mon_output_console) { sleep(2); } rc = pcmk_ok; } } if (rc == pcmk_ok) { cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); rc = cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); } if (rc != pcmk_ok) { out->err(out, "Notification setup failed, could not monitor CIB actions"); clean_up_connections(); } } } return rc; } #if CURSES_ENABLED static const char * get_option_desc(char c) { const char *desc = "No help available"; for (GOptionEntry *entry = display_entries; entry != NULL; entry++) { if (entry->short_name == c) { desc = entry->description; break; } } return desc; } #define print_option_help(output_format, option, condition) \ out->info(out, "%c %c: \t%s", ((condition)? '*': ' '), option, get_option_desc(option)); static gboolean detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data) { int c; gboolean config_mode = FALSE; while (1) { /* Get user input */ c = getchar(); switch (c) { case 'm': if (is_not_set(show, mon_show_fencing_all)) { options.mon_ops |= mon_op_fence_history; options.mon_ops |= mon_op_fence_connect; if (st == NULL) { mon_cib_connection_destroy_regular(NULL); } } if (is_set(show, mon_show_fence_failed) || is_set(show, mon_show_fence_pending) || is_set(show, mon_show_fence_worked)) { show &= ~mon_show_fencing_all; } else { show |= mon_show_fencing_all; } break; case 'c': show ^= mon_show_tickets; break; case 'f': show ^= mon_show_failcounts; break; case 'n': options.mon_ops ^= mon_op_group_by_node; break; case 'o': show ^= mon_show_operations; if (is_not_set(show, mon_show_operations)) { options.mon_ops &= ~mon_op_print_timing; } break; case 'r': options.mon_ops ^= mon_op_inactive_resources; break; case 'R': options.mon_ops ^= mon_op_print_clone_detail; break; case 't': options.mon_ops ^= mon_op_print_timing; if (is_set(options.mon_ops, mon_op_print_timing)) { show |= mon_show_operations; } break; case 'A': show ^= mon_show_attributes; break; case 'L': show ^= mon_show_bans; break; case 'D': /* If any header is shown, clear them all, otherwise set them all */ if (is_set(show, mon_show_stack) || is_set(show, mon_show_dc) || is_set(show, mon_show_times) || is_set(show, mon_show_counts)) { show &= ~mon_show_summary; } else { show |= mon_show_summary; } /* Regardless, we don't show options in console mode. */ show &= ~mon_show_options; break; case 'b': options.mon_ops ^= mon_op_print_brief; break; case 'j': options.mon_ops ^= mon_op_print_pending; break; case '?': config_mode = TRUE; break; default: goto refresh; } if (!config_mode) goto refresh; blank_screen(); out->info(out, "%s", "Display option change mode\n"); print_option_help(out, 'c', is_set(show, mon_show_tickets)); print_option_help(out, 'f', is_set(show, mon_show_failcounts)); print_option_help(out, 'n', is_set(options.mon_ops, mon_op_group_by_node)); print_option_help(out, 'o', is_set(show, mon_show_operations)); print_option_help(out, 'r', is_set(options.mon_ops, mon_op_inactive_resources)); print_option_help(out, 't', is_set(options.mon_ops, mon_op_print_timing)); print_option_help(out, 'A', is_set(show, mon_show_attributes)); print_option_help(out, 'L', is_set(show,mon_show_bans)); print_option_help(out, 'D', is_not_set(show, mon_show_summary)); print_option_help(out, 'R', is_set(options.mon_ops, mon_op_print_clone_detail)); print_option_help(out, 'b', is_set(options.mon_ops, mon_op_print_brief)); print_option_help(out, 'j', is_set(options.mon_ops, mon_op_print_pending)); print_option_help(out, 'm', is_set(show, mon_show_fencing_all)); out->info(out, "%s", "\nToggle fields via field letter, type any other key to return"); } refresh: mon_refresh_display(NULL); return TRUE; } #endif // Basically crm_signal_handler(SIGCHLD, SIG_IGN) plus the SA_NOCLDWAIT flag static void avoid_zombies(void) { struct sigaction sa; memset(&sa, 0, sizeof(struct sigaction)); if (sigemptyset(&sa.sa_mask) < 0) { crm_warn("Cannot avoid zombies: %s", pcmk_strerror(errno)); return; } sa.sa_handler = SIG_IGN; sa.sa_flags = SA_RESTART|SA_NOCLDWAIT; if (sigaction(SIGCHLD, &sa, NULL) < 0) { crm_warn("Cannot avoid zombies: %s", pcmk_strerror(errno)); } } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; GOptionEntry extra_prog_entries[] = { { "quiet", 'Q', 0, G_OPTION_ARG_NONE, &(args->quiet), "Be less descriptive in output.", NULL }, { NULL } }; const char *description = "Notes:\n\n" "If this program is called as crm_mon.cgi, --output-as=html --html-cgi will\n" "automatically be added to the command line arguments.\n\n" "Time Specification:\n\n" "The TIMESPEC in any command line option can be specified in many different\n" "formats. It can be just an integer number of seconds, a number plus units\n" "(ms/msec/us/usec/s/sec/m/min/h/hr), or an ISO 8601 period specification.\n\n" "Output Control:\n\n" "By default, a certain list of sections are written to the output destination.\n" "The default varies based on the output format - XML includes everything, while\n" "other output formats will display less. This list can be modified with the\n" "--include and --exclude command line options. Each option may be given multiple\n" "times on the command line, and each can give a comma-separated list of sections.\n" "The options are applied to the default set, from left to right as seen on the\n" "command line. For a list of valid sections, pass --include=list or --exclude=list.\n\n" "Examples:\n\n" "Display the cluster status on the console with updates as they occur:\n\n" "\tcrm_mon\n\n" "Display the cluster status on the console just once then exit:\n\n" "\tcrm_mon -1\n\n" "Display your cluster status, group resources by node, and include inactive resources in the list:\n\n" "\tcrm_mon --group-by-node --inactive\n\n" "Start crm_mon as a background daemon and have it write the cluster status to an HTML file:\n\n" "\tcrm_mon --daemonize --output-as html --output-to /path/to/docroot/filename.html\n\n" "Start crm_mon and export the current cluster status as XML to stdout, then exit:\n\n" "\tcrm_mon --output-as xml\n\n"; context = pcmk__build_arg_context(args, "console (default), html, text, xml", group, NULL); pcmk__add_main_args(context, extra_prog_entries); g_option_context_set_description(context, description); pcmk__add_arg_group(context, "display", "Display Options:", "Show display options", display_entries); pcmk__add_arg_group(context, "additional", "Additional Options:", "Show additional options", addl_entries); pcmk__add_arg_group(context, "deprecated", "Deprecated Options:", "Show deprecated options", deprecated_entries); return context; } /* If certain format options were specified, we want to set some extra * options. We can just process these like they were given on the * command line. */ static void add_output_args(void) { GError *err = NULL; if (output_format == mon_output_plain) { if (!pcmk__force_args(context, &err, "%s --text-fancy", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } else if (output_format == mon_output_cgi) { if (!pcmk__force_args(context, &err, "%s --html-cgi", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } else if (output_format == mon_output_xml) { if (!pcmk__force_args(context, &err, "%s --xml-simple-list", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } else if (output_format == mon_output_legacy_xml) { output_format = mon_output_xml; if (!pcmk__force_args(context, &err, "%s --xml-legacy", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } } /* Which output format to use could come from two places: The --as-xml * style arguments we gave in deprecated_entries above, or the formatted output * arguments added by pcmk__register_formats. If the latter were used, * output_format will be mon_output_unset. * * Call the callbacks as if those older style arguments were provided so * the various things they do get done. */ static void reconcile_output_format(pcmk__common_args_t *args) { gboolean retval = TRUE; GError *err = NULL; if (output_format != mon_output_unset) { return; } if (pcmk__str_eq(args->output_ty, "html", pcmk__str_casei)) { char *dest = NULL; if (args->output_dest != NULL) { dest = strdup(args->output_dest); } retval = as_html_cb("h", dest, NULL, &err); free(dest); } else if (pcmk__str_eq(args->output_ty, "text", pcmk__str_casei)) { retval = no_curses_cb("N", NULL, NULL, &err); } else if (pcmk__str_eq(args->output_ty, "xml", pcmk__str_casei)) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("xml"); output_format = mon_output_xml; options.mon_ops |= mon_op_one_shot; } else if (is_set(options.mon_ops, mon_op_one_shot)) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("text"); output_format = mon_output_plain; } else { /* Neither old nor new arguments were given, so set the default. */ if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("console"); output_format = mon_output_console; } if (!retval) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } int main(int argc, char **argv) { int rc = pcmk_ok; GOptionGroup *output_group = NULL; args = pcmk__new_common_args(SUMMARY); context = build_arg_context(args, &output_group); pcmk__register_formats(output_group, formats); options.pid_file = strdup("/tmp/ClusterMon.pid"); crm_log_cli_init("crm_mon"); // Avoid needing to wait for subprocesses forked for -E/--external-agent avoid_zombies(); if (pcmk__ends_with_ext(argv[0], ".cgi")) { output_format = mon_output_cgi; options.mon_ops |= mon_op_one_shot; } processed_args = pcmk__cmdline_preproc(argv, "ehimpxEILU"); fence_history_cb("--fence-history", "1", NULL, NULL); /* Set an HTML title regardless of what format we will eventually use. This can't * be done in add_output_args. That function is called after command line * arguments are processed in the next block, which means it'll override whatever * title the user provides. Doing this here means the user can give their own * title on the command line. */ if (!pcmk__force_args(context, &error, "%s --html-title \"Cluster Status\"", g_get_prgname())) { return clean_up(CRM_EX_USAGE); } if (!g_option_context_parse_strv(context, &processed_args, &error)) { return clean_up(CRM_EX_USAGE); } for (int i = 0; i < args->verbosity; i++) { crm_bump_log_level(argc, argv); } if (!args->version) { if (args->quiet) { include_exclude_cb("--exclude", "times", NULL, NULL); } if (is_set(options.mon_ops, mon_op_watch_fencing)) { fence_history_cb("--fence-history", "0", NULL, NULL); options.mon_ops |= mon_op_fence_connect; } /* create the cib-object early to be able to do further * decisions based on the cib-source */ cib = cib_new(); if (cib == NULL) { rc = -EINVAL; } else { switch (cib->variant) { case cib_native: /* cib & fencing - everything available */ break; case cib_file: /* Don't try to connect to fencing as we * either don't have a running cluster or * the fencing-information would possibly * not match the cib data from a file. * As we don't expect cib-updates coming * in enforce one-shot. */ fence_history_cb("--fence-history", "0", NULL, NULL); options.mon_ops |= mon_op_one_shot; break; case cib_remote: /* updates coming in but no fencing */ fence_history_cb("--fence-history", "0", NULL, NULL); break; case cib_undefined: case cib_database: default: /* something is odd */ rc = -EINVAL; break; } } if (is_set(options.mon_ops, mon_op_one_shot)) { if (output_format == mon_output_console) { output_format = mon_output_plain; } } else if (options.daemonize) { if ((output_format == mon_output_console) || (output_format == mon_output_plain)) { output_format = mon_output_none; } crm_enable_stderr(FALSE); if (pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches | pcmk__str_casei) && !options.external_agent) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--daemonize requires at least one of --output-to and --external-agent"); return clean_up(CRM_EX_USAGE); } if (cib) { /* to be on the safe side don't have cib-object around * when we are forking */ cib_delete(cib); cib = NULL; crm_make_daemon(crm_system_name, TRUE, options.pid_file); cib = cib_new(); if (cib == NULL) { rc = -EINVAL; } /* otherwise assume we've got the same cib-object we've just destroyed * in our parent */ } } else if (output_format == mon_output_console) { #if CURSES_ENABLED crm_enable_stderr(FALSE); #else options.mon_ops |= mon_op_one_shot; output_format = mon_output_plain; printf("Defaulting to one-shot mode\n"); printf("You need to have curses available at compile time to enable console mode\n"); #endif } } if (rc != pcmk_ok) { // Shouldn't really be possible g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Invalid CIB source"); return clean_up(CRM_EX_ERROR); } reconcile_output_format(args); add_output_args(); if (args->version && output_format == mon_output_console) { /* Use the text output format here if we are in curses mode but were given * --version. Displaying version information uses printf, and then we * immediately exit. We don't want to initialize curses for that. */ rc = pcmk__output_new(&out, "text", args->output_dest, argv); } else { rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); } if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); return clean_up(CRM_EX_ERROR); } /* output_format MUST NOT BE CHANGED AFTER THIS POINT. */ /* Apply --include/--exclude flags we used internally. There's no error reporting * here because this would be a programming error. */ apply_include_exclude(options.includes_excludes, output_format, &error); /* And now apply any --include/--exclude flags the user gave on the command line. * These are done in a separate pass from the internal ones because we want to * make sure whatever the user specifies overrides whatever we do. */ if (!apply_include_exclude(options.user_includes_excludes, output_format, &error)) { return clean_up(CRM_EX_USAGE); } crm_mon_register_messages(out); pe__register_messages(out); stonith__register_messages(out); if (args->version) { out->version(out, false); return clean_up(CRM_EX_OK); } /* Extra sanity checks when in CGI mode */ if (output_format == mon_output_cgi) { if (cib && cib->variant == cib_file) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode used with CIB file"); return clean_up(CRM_EX_USAGE); } else if (options.external_agent != NULL) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode cannot be used with --external-agent"); return clean_up(CRM_EX_USAGE); } else if (options.daemonize == TRUE) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode cannot be used with -d"); return clean_up(CRM_EX_USAGE); } } if (output_format == mon_output_xml || output_format == mon_output_legacy_xml) { options.mon_ops |= mon_op_print_timing | mon_op_inactive_resources; } if ((output_format == mon_output_html || output_format == mon_output_cgi) && out->dest != stdout) { pcmk__html_add_header("meta", "http-equiv", "refresh", "content", crm_itoa(options.reconnect_msec/1000), NULL); } crm_info("Starting %s", crm_system_name); if (cib) { do { if (is_not_set(options.mon_ops, mon_op_one_shot)) { print_as(output_format ,"Waiting until cluster is available on this node ...\n"); } rc = cib_connect(is_not_set(options.mon_ops, mon_op_one_shot)); if (is_set(options.mon_ops, mon_op_one_shot)) { break; } else if (rc != pcmk_ok) { sleep(options.reconnect_msec / 1000); #if CURSES_ENABLED if (output_format == mon_output_console) { clear(); refresh(); } #endif } else { if (output_format == mon_output_html && out->dest != stdout) { printf("Writing html to %s ...\n", args->output_dest); } } } while (rc == -ENOTCONN); } if (rc != pcmk_ok) { if (output_format == mon_output_monitor) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "CLUSTER CRIT: Connection to cluster failed: %s", pcmk_strerror(rc)); return clean_up(MON_STATUS_CRIT); } else { if (rc == -ENOTCONN) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node"); } else { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_strerror(rc)); } } return clean_up(crm_errno2exit(rc)); } if (is_set(options.mon_ops, mon_op_one_shot)) { return clean_up(CRM_EX_OK); } mainloop = g_main_loop_new(NULL, FALSE); mainloop_add_signal(SIGTERM, mon_shutdown); mainloop_add_signal(SIGINT, mon_shutdown); #if CURSES_ENABLED if (output_format == mon_output_console) { ncurses_winch_handler = crm_signal_handler(SIGWINCH, mon_winresize); if (ncurses_winch_handler == SIG_DFL || ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR) ncurses_winch_handler = NULL; io_channel = g_io_channel_unix_new(STDIN_FILENO); g_io_add_watch(io_channel, G_IO_IN, detect_user_input, NULL); } #endif refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL); g_main_loop_run(mainloop); g_main_loop_unref(mainloop); if (io_channel != NULL) { g_io_channel_shutdown(io_channel, TRUE, NULL); } crm_info("Exiting %s", crm_system_name); return clean_up(CRM_EX_OK); } /*! * \internal * \brief Print one-line status suitable for use with monitoring software * * \param[in] data_set Working set of CIB state * \param[in] history List of stonith actions * * \note This function's output (and the return code when the program exits) * should conform to https://www.monitoring-plugins.org/doc/guidelines.html */ static void print_simple_status(pcmk__output_t *out, pe_working_set_t * data_set, stonith_history_t *history, unsigned int mon_ops) { GListPtr gIter = NULL; int nodes_online = 0; int nodes_standby = 0; int nodes_maintenance = 0; char *offline_nodes = NULL; gboolean no_dc = FALSE; gboolean offline = FALSE; if (data_set->dc_node == NULL) { mon_ops |= mon_op_has_warnings; no_dc = TRUE; } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (node->details->standby && node->details->online) { nodes_standby++; } else if (node->details->maintenance && node->details->online) { nodes_maintenance++; } else if (node->details->online) { nodes_online++; } else { char *s = crm_strdup_printf("offline node: %s", node->details->uname); /* coverity[leaked_storage] False positive */ offline_nodes = pcmk__add_word(offline_nodes, s); free(s); mon_ops |= mon_op_has_warnings; offline = TRUE; } } if (is_set(mon_ops, mon_op_has_warnings)) { out->info(out, "CLUSTER WARN:%s%s%s", no_dc ? " No DC" : "", no_dc && offline ? "," : "", offline ? offline_nodes : ""); free(offline_nodes); } else { char *nodes_standby_s = NULL; char *nodes_maint_s = NULL; if (nodes_standby > 0) { nodes_standby_s = crm_strdup_printf(", %d standby node%s", nodes_standby, pcmk__plural_s(nodes_standby)); } if (nodes_maintenance > 0) { nodes_maint_s = crm_strdup_printf(", %d maintenance node%s", nodes_maintenance, pcmk__plural_s(nodes_maintenance)); } out->info(out, "CLUSTER OK: %d node%s online%s%s, " "%d resource instance%s configured", nodes_online, pcmk__plural_s(nodes_online), nodes_standby_s != NULL ? nodes_standby_s : "", nodes_maint_s != NULL ? nodes_maint_s : "", data_set->ninstances, pcmk__plural_s(data_set->ninstances)); free(nodes_standby_s); free(nodes_maint_s); } /* coverity[leaked_storage] False positive */ } /*! * \internal * \brief Reduce the stonith-history * for successful actions we keep the last of every action-type & target * for failed actions we record as well who had failed * for actions in progress we keep full track * * \param[in] history List of stonith actions * */ static stonith_history_t * reduce_stonith_history(stonith_history_t *history) { stonith_history_t *new = history, *hp, *np; if (new) { hp = new->next; new->next = NULL; while (hp) { stonith_history_t *hp_next = hp->next; hp->next = NULL; for (np = new; ; np = np->next) { if ((hp->state == st_done) || (hp->state == st_failed)) { /* action not in progress */ if (pcmk__str_eq(hp->target, np->target, pcmk__str_casei) && pcmk__str_eq(hp->action, np->action, pcmk__str_casei) && (hp->state == np->state) && ((hp->state == st_done) || pcmk__str_eq(hp->delegate, np->delegate, pcmk__str_casei))) { /* purge older hp */ stonith_history_free(hp); break; } } if (!np->next) { np->next = hp; break; } } hp = hp_next; } } return new; } static int send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc, int status, const char *desc) { pid_t pid; /*setenv needs chars, these are ints */ char *rc_s = crm_itoa(rc); char *status_s = crm_itoa(status); char *target_rc_s = crm_itoa(target_rc); crm_debug("Sending external notification to '%s' via '%s'", options.external_recipient, options.external_agent); if(rsc) { setenv("CRM_notify_rsc", rsc, 1); } if (options.external_recipient) { setenv("CRM_notify_recipient", options.external_recipient, 1); } setenv("CRM_notify_node", node, 1); setenv("CRM_notify_task", task, 1); setenv("CRM_notify_desc", desc, 1); setenv("CRM_notify_rc", rc_s, 1); setenv("CRM_notify_target_rc", target_rc_s, 1); setenv("CRM_notify_status", status_s, 1); pid = fork(); if (pid == -1) { crm_perror(LOG_ERR, "notification fork() failed."); } if (pid == 0) { /* crm_debug("notification: I am the child. Executing the nofitication program."); */ execl(options.external_agent, options.external_agent, NULL); exit(CRM_EX_ERROR); } crm_trace("Finished running custom notification program '%s'.", options.external_agent); free(target_rc_s); free(status_s); free(rc_s); return 0; } static void handle_rsc_op(xmlNode * xml, const char *node_id) { int rc = -1; int status = -1; int target_rc = -1; gboolean notify = TRUE; char *rsc = NULL; char *task = NULL; const char *desc = NULL; const char *magic = NULL; const char *id = NULL; const char *node = NULL; xmlNode *n = xml; xmlNode * rsc_op = xml; if(strcmp((const char*)xml->name, XML_LRM_TAG_RSC_OP) != 0) { xmlNode *cIter; for(cIter = xml->children; cIter; cIter = cIter->next) { handle_rsc_op(cIter, node_id); } return; } id = crm_element_value(rsc_op, XML_LRM_ATTR_TASK_KEY); if (id == NULL) { /* Compatibility with <= 1.1.5 */ id = ID(rsc_op); } magic = crm_element_value(rsc_op, XML_ATTR_TRANSITION_MAGIC); if (magic == NULL) { /* non-change */ return; } if (!decode_transition_magic(magic, NULL, NULL, NULL, &status, &rc, &target_rc)) { crm_err("Invalid event %s detected for %s", magic, id); return; } if (parse_op_key(id, &rsc, &task, NULL) == FALSE) { crm_err("Invalid event detected for %s", id); goto bail; } node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET); while (n != NULL && !pcmk__str_eq(XML_CIB_TAG_STATE, TYPE(n), pcmk__str_casei)) { n = n->parent; } if(node == NULL && n) { node = crm_element_value(n, XML_ATTR_UNAME); } if (node == NULL && n) { node = ID(n); } if (node == NULL) { node = node_id; } if (node == NULL) { crm_err("No node detected for event %s (%s)", magic, id); goto bail; } /* look up where we expected it to be? */ desc = pcmk_strerror(pcmk_ok); if (status == PCMK_LRM_OP_DONE && target_rc == rc) { crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc); if (rc == PCMK_OCF_NOT_RUNNING) { notify = FALSE; } } else if (status == PCMK_LRM_OP_DONE) { desc = services_ocf_exitcode_str(rc); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } else { desc = services_lrm_status_str(status); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } if (notify && options.external_agent) { send_custom_trap(node, rsc, task, target_rc, rc, status, desc); } bail: free(rsc); free(task); } static gboolean mon_trigger_refresh(gpointer user_data) { mainloop_set_trigger(refresh_trigger); return FALSE; } #define NODE_PATT "/lrm[@id=" static char * get_node_from_xpath(const char *xpath) { char *nodeid = NULL; char *tmp = strstr(xpath, NODE_PATT); if(tmp) { tmp += strlen(NODE_PATT); tmp += 1; nodeid = strdup(tmp); tmp = strstr(nodeid, "\'"); CRM_ASSERT(tmp); tmp[0] = 0; } return nodeid; } static void crm_diff_update_v2(const char *event, xmlNode * msg) { xmlNode *change = NULL; xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); for (change = __xml_first_child(diff); change != NULL; change = __xml_next(change)) { const char *name = NULL; const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); xmlNode *match = NULL; const char *node = NULL; if(op == NULL) { continue; } else if(strcmp(op, "create") == 0) { match = change->children; } else if(strcmp(op, "move") == 0) { continue; } else if(strcmp(op, "delete") == 0) { continue; } else if(strcmp(op, "modify") == 0) { match = first_named_child(change, XML_DIFF_RESULT); if(match) { match = match->children; } } if(match) { name = (const char *)match->name; } crm_trace("Handling %s operation for %s %p, %s", op, xpath, match, name); if(xpath == NULL) { /* Version field, ignore */ } else if(name == NULL) { crm_debug("No result for %s operation to %s", op, xpath); CRM_ASSERT(strcmp(op, "delete") == 0 || strcmp(op, "move") == 0); } else if(strcmp(name, XML_TAG_CIB) == 0) { xmlNode *state = NULL; xmlNode *status = first_named_child(match, XML_CIB_TAG_STATUS); for (state = __xml_first_child_element(status); state != NULL; state = __xml_next_element(state)) { node = crm_element_value(state, XML_ATTR_UNAME); if (node == NULL) { node = ID(state); } handle_rsc_op(state, node); } } else if(strcmp(name, XML_CIB_TAG_STATUS) == 0) { xmlNode *state = NULL; for (state = __xml_first_child_element(match); state != NULL; state = __xml_next_element(state)) { node = crm_element_value(state, XML_ATTR_UNAME); if (node == NULL) { node = ID(state); } handle_rsc_op(state, node); } } else if(strcmp(name, XML_CIB_TAG_STATE) == 0) { node = crm_element_value(match, XML_ATTR_UNAME); if (node == NULL) { node = ID(match); } handle_rsc_op(match, node); } else if(strcmp(name, XML_CIB_TAG_LRM) == 0) { node = ID(match); handle_rsc_op(match, node); } else if(strcmp(name, XML_LRM_TAG_RESOURCES) == 0) { char *local_node = get_node_from_xpath(xpath); handle_rsc_op(match, local_node); free(local_node); } else if(strcmp(name, XML_LRM_TAG_RESOURCE) == 0) { char *local_node = get_node_from_xpath(xpath); handle_rsc_op(match, local_node); free(local_node); } else if(strcmp(name, XML_LRM_TAG_RSC_OP) == 0) { char *local_node = get_node_from_xpath(xpath); handle_rsc_op(match, local_node); free(local_node); } else { crm_trace("Ignoring %s operation for %s %p, %s", op, xpath, match, name); } } } static void crm_diff_update_v1(const char *event, xmlNode * msg) { /* Process operation updates */ xmlXPathObject *xpathObj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); int lpc = 0, max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); handle_rsc_op(rsc_op, NULL); } freeXpathObject(xpathObj); } static void crm_diff_update(const char *event, xmlNode * msg) { int rc = -1; static bool stale = FALSE; gboolean cib_updated = FALSE; xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); print_dot(output_format); if (current_cib != NULL) { rc = xml_apply_patchset(current_cib, diff, TRUE); switch (rc) { case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(current_cib); current_cib = NULL; break; case pcmk_ok: cib_updated = TRUE; break; default: crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(current_cib); current_cib = NULL; } } if (current_cib == NULL) { crm_trace("Re-requesting the full cib"); cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call); } if (options.external_agent) { int format = 0; crm_element_value_int(diff, "format", &format); switch(format) { case 1: crm_diff_update_v1(event, msg); break; case 2: crm_diff_update_v2(event, msg); break; default: crm_err("Unknown patch format: %d", format); } } if (current_cib == NULL) { if(!stale) { print_as(output_format, "--- Stale data ---"); } stale = TRUE; return; } stale = FALSE; kick_refresh(cib_updated); } static gboolean mon_refresh_display(gpointer user_data) { xmlNode *cib_copy = copy_xml(current_cib); stonith_history_t *stonith_history = NULL; int history_rc = 0; last_refresh = time(NULL); if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) { if (cib) { cib->cmds->signoff(cib); } out->err(out, "Upgrade failed: %s", pcmk_strerror(-pcmk_err_schema_validation)); clean_up(CRM_EX_CONFIG); return FALSE; } /* get the stonith-history if there is evidence we need it */ while (is_set(options.mon_ops, mon_op_fence_history)) { if (st != NULL) { history_rc = st->cmds->history(st, st_opt_sync_call, NULL, &stonith_history, 120); if (history_rc != 0) { out->err(out, "Critical: Unable to get stonith-history"); mon_cib_connection_destroy_error(NULL); } else { stonith_history = stonith__sort_history(stonith_history); if (is_not_set(options.mon_ops, mon_op_fence_full_history) && output_format != mon_output_xml) { stonith_history = reduce_stonith_history(stonith_history); } break; /* all other cases are errors */ } } else { out->err(out, "Critical: No stonith-API"); } free_xml(cib_copy); out->err(out, "Reading stonith-history failed"); return FALSE; } if (mon_data_set == NULL) { mon_data_set = pe_new_working_set(); CRM_ASSERT(mon_data_set != NULL); } set_bit(mon_data_set->flags, pe_flag_no_compat); mon_data_set->input = cib_copy; cluster_status(mon_data_set); /* Unpack constraints if any section will need them * (tickets may be referenced in constraints but not granted yet, * and bans need negative location constraints) */ if (is_set(show, mon_show_bans) || is_set(show, mon_show_tickets)) { xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, mon_data_set->input); unpack_constraints(cib_constraints, mon_data_set); } switch (output_format) { case mon_output_html: case mon_output_cgi: if (print_html_status(out, mon_data_set, stonith_history, options.mon_ops, show, options.neg_location_prefix, options.only_node, options.only_rsc) != 0) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_CANTCREAT, "Critical: Unable to output html file"); clean_up(CRM_EX_CANTCREAT); return FALSE; } break; case mon_output_legacy_xml: case mon_output_xml: print_xml_status(out, mon_data_set, crm_errno2exit(history_rc), stonith_history, options.mon_ops, show, options.neg_location_prefix, options.only_node, options.only_rsc); break; case mon_output_monitor: print_simple_status(out, mon_data_set, stonith_history, options.mon_ops); if (is_set(options.mon_ops, mon_op_has_warnings)) { clean_up(MON_STATUS_WARN); return FALSE; } break; case mon_output_console: /* If curses is not enabled, this will just fall through to the plain * text case. */ #if CURSES_ENABLED blank_screen(); print_status(out, mon_data_set, stonith_history, options.mon_ops, show, options.neg_location_prefix, options.only_node, options.only_rsc); refresh(); break; #endif case mon_output_plain: print_status(out, mon_data_set, stonith_history, options.mon_ops, show, options.neg_location_prefix, options.only_node, options.only_rsc); break; case mon_output_unset: case mon_output_none: break; } if (options.daemonize) { out->reset(out); } stonith_history_free(stonith_history); stonith_history = NULL; pe_reset_working_set(mon_data_set); return TRUE; } static void mon_st_callback_event(stonith_t * st, stonith_event_t * e) { if (st->state == stonith_disconnected) { /* disconnect cib as well and have everything reconnect */ mon_cib_connection_destroy_regular(NULL); } else if (options.external_agent) { char *desc = crm_strdup_printf("Operation %s requested by %s for peer %s: %s (ref=%s)", e->operation, e->origin, e->target, pcmk_strerror(e->result), e->id); send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc); free(desc); } } static void kick_refresh(gboolean data_updated) { static int updates = 0; time_t now = time(NULL); if (data_updated) { updates++; } if(refresh_timer == NULL) { refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL); } /* Refresh * - immediately if the last update was more than 5s ago * - every 10 cib-updates * - at most 2s after the last update */ if ((now - last_refresh) > (options.reconnect_msec / 1000)) { mainloop_set_trigger(refresh_trigger); mainloop_timer_stop(refresh_timer); updates = 0; } else if(updates >= 10) { mainloop_set_trigger(refresh_trigger); mainloop_timer_stop(refresh_timer); updates = 0; } else { mainloop_timer_start(refresh_timer); } } static void mon_st_callback_display(stonith_t * st, stonith_event_t * e) { if (st->state == stonith_disconnected) { /* disconnect cib as well and have everything reconnect */ mon_cib_connection_destroy_regular(NULL); } else { print_dot(output_format); kick_refresh(TRUE); } } static void clean_up_connections(void) { if (cib != NULL) { cib->cmds->signoff(cib); cib_delete(cib); cib = NULL; } if (st != NULL) { if (st->state != stonith_disconnected) { st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT); st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE); st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY); st->cmds->disconnect(st); } stonith_api_delete(st); st = NULL; } } /* * De-init ncurses, disconnect from the CIB manager, disconnect fencing, * deallocate memory and show usage-message if requested. * * We don't actually return, but nominally returning crm_exit_t allows a usage * like "return clean_up(exit_code);" which helps static analysis understand the * code flow. */ static crm_exit_t clean_up(crm_exit_t exit_code) { /* Quitting crm_mon is much more complicated than it ought to be. */ /* (1) Close connections, free things, etc. */ clean_up_connections(); free(options.pid_file); free(options.neg_location_prefix); g_slist_free_full(options.includes_excludes, free); pe_free_working_set(mon_data_set); mon_data_set = NULL; g_strfreev(processed_args); /* (2) If this is abnormal termination and we're in curses mode, shut down * curses first. Any messages displayed to the screen before curses is shut * down will be lost because doing the shut down will also restore the * screen to whatever it looked like before crm_mon was started. */ if ((error != NULL || exit_code == CRM_EX_USAGE) && output_format == mon_output_console) { out->finish(out, exit_code, false, NULL); pcmk__output_free(out); out = NULL; } /* (3) If this is a command line usage related failure, print the usage * message. */ if (exit_code == CRM_EX_USAGE && (output_format == mon_output_console || output_format == mon_output_plain)) { char *help = g_option_context_get_help(context, TRUE, NULL); fprintf(stderr, "%s", help); g_free(help); } pcmk__free_arg_context(context); /* (4) If this is any kind of error, print the error out and exit. Make * sure to handle situations both before and after formatted output is * set up. We want errors to appear formatted if at all possible. */ if (error != NULL) { if (out != NULL) { out->err(out, "%s: %s", g_get_prgname(), error->message); out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } else { fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message); } g_clear_error(&error); crm_exit(exit_code); } /* (5) Print formatted output to the screen if we made it far enough in * crm_mon to be able to do so. */ if (out != NULL) { if (options.daemonize) { out->dest = freopen(NULL, "w", out->dest); CRM_ASSERT(out->dest != NULL); } out->finish(out, exit_code, true, NULL); pcmk__output_free(out); pcmk__unregister_formats(); } crm_exit(exit_code); } diff --git a/tools/crm_mon_print.c b/tools/crm_mon_print.c index 1bb6b01cd2..07c98ce469 100644 --- a/tools/crm_mon_print.c +++ b/tools/crm_mon_print.c @@ -1,1086 +1,1086 @@ /* * Copyright 2019-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "crm_mon.h" static void print_resources_heading(pcmk__output_t *out, unsigned int mon_ops); static void print_resources_closing(pcmk__output_t *out, unsigned int mon_ops); static int print_resources(pcmk__output_t *out, pe_working_set_t *data_set, unsigned int print_opts, unsigned int mon_ops, gboolean brief_output, gboolean print_summary, GListPtr only_node, GListPtr only_rsc, gboolean print_spacer); static int print_rsc_history(pcmk__output_t *out, pe_working_set_t *data_set, pe_node_t *node, xmlNode *rsc_entry, unsigned int mon_ops, GListPtr op_list); static int print_node_history(pcmk__output_t *out, pe_working_set_t *data_set, pe_node_t *node, xmlNode *node_state, gboolean operations, unsigned int mon_ops, GListPtr only_node, GListPtr only_rsc); static gboolean add_extra_info(pcmk__output_t *out, pe_node_t * node, GListPtr rsc_list, const char *attrname, int *expected_score); static void print_node_attribute(gpointer name, gpointer user_data); static int print_node_summary(pcmk__output_t *out, pe_working_set_t * data_set, gboolean operations, unsigned int mon_ops, GListPtr only_node, GListPtr only_rsc, gboolean print_spacer); static int print_cluster_tickets(pcmk__output_t *out, pe_working_set_t * data_set, gboolean print_spacer); static int print_neg_locations(pcmk__output_t *out, pe_working_set_t *data_set, unsigned int mon_ops, const char *prefix, GListPtr only_rsc, gboolean print_spacer); static int print_node_attributes(pcmk__output_t *out, pe_working_set_t *data_set, unsigned int mon_ops, GListPtr only_node, GListPtr only_rsc, gboolean print_spacer); static int print_failed_actions(pcmk__output_t *out, pe_working_set_t *data_set, GListPtr only_node, gboolean print_spacer); static GListPtr build_uname_list(pe_working_set_t *data_set, const char *s) { GListPtr unames = NULL; if (pcmk__str_eq(s, "*", pcmk__str_null_matches)) { /* Nothing was given so return a list of all node names. Or, '*' was * given. This would normally fall into the pe__unames_with_tag branch * where it will return an empty list. Catch it here instead. */ unames = g_list_prepend(unames, strdup("*")); } else { pe_node_t *node = pe_find_node(data_set->nodes, s); if (node) { /* The given string was a valid uname for a node. Return a * singleton list containing just that uname. */ unames = g_list_prepend(unames, strdup(s)); } else { /* The given string was not a valid uname. It's either a tag or * it's a typo or something. In the first case, we'll return a * list of all the unames of the nodes with the given tag. In the * second case, we'll return a NULL pointer and nothing will * get displayed. */ unames = pe__unames_with_tag(data_set, s); } } return unames; } static GListPtr build_rsc_list(pe_working_set_t *data_set, const char *s) { GListPtr resources = NULL; if (pcmk__str_eq(s, "*", pcmk__str_null_matches)) { resources = g_list_prepend(resources, strdup("*")); } else { pe_resource_t *rsc = pe_find_resource_with_flags(data_set->resources, s, pe_find_renamed|pe_find_any); if (rsc) { /* A colon in the name we were given means we're being asked to filter * on a specific instance of a cloned resource. Put that exact string * into the filter list. Otherwise, use the printable ID of whatever * resource was found that matches what was asked for. */ if (strstr(s, ":") != NULL) { resources = g_list_prepend(resources, strdup(rsc->id)); } else { resources = g_list_prepend(resources, strdup(rsc_printable_id(rsc))); } } else { /* The given string was not a valid resource name. It's either * a tag or it's a typo or something. See build_uname_list for * more detail. */ resources = pe__rscs_with_tag(data_set, s); } } return resources; } /*! * \internal * \brief Print resources section heading appropriate to options * * \param[in] out The output functions structure. * \param[in] mon_ops Bitmask of mon_op_*. */ static void print_resources_heading(pcmk__output_t *out, unsigned int mon_ops) { const char *heading; if (is_set(mon_ops, mon_op_group_by_node)) { /* Active resources have already been printed by node */ heading = is_set(mon_ops, mon_op_inactive_resources) ? "Inactive Resources" : NULL; } else if (is_set(mon_ops, mon_op_inactive_resources)) { heading = "Full List of Resources"; } else { heading = "Active Resources"; } /* Print section heading */ out->begin_list(out, NULL, NULL, "%s", heading); } /*! * \internal * \brief Print whatever resource section closing is appropriate * * \param[in] out The output functions structure. * \param[in] mon_ops Bitmask of mon_op_*. */ static void print_resources_closing(pcmk__output_t *out, unsigned int mon_ops) { const char *heading; /* What type of resources we did or did not display */ if (is_set(mon_ops, mon_op_group_by_node)) { heading = "inactive "; } else if (is_set(mon_ops, mon_op_inactive_resources)) { heading = ""; } else { heading = "active "; } out->list_item(out, NULL, "No %sresources", heading); } /*! * \internal * \brief Print whatever resource section(s) are appropriate * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. * \param[in] print_opts Bitmask of pe_print_*. * \param[in] mon_ops Bitmask of mon_op_*. * \param[in] brief_output Whether to display full or brief output. * \param[in] print_summary Whether to display a failure summary. */ static int print_resources(pcmk__output_t *out, pe_working_set_t *data_set, unsigned int print_opts, unsigned int mon_ops, gboolean brief_output, gboolean print_summary, GListPtr only_node, GListPtr only_rsc, gboolean print_spacer) { GListPtr rsc_iter; int rc = pcmk_rc_no_output; /* If we already showed active resources by node, and * we're not showing inactive resources, we have nothing to do */ if (is_set(mon_ops, mon_op_group_by_node) && is_not_set(mon_ops, mon_op_inactive_resources)) { return rc; } PCMK__OUTPUT_SPACER_IF(out, print_spacer); print_resources_heading(out, mon_ops); /* If we haven't already printed resources grouped by node, * and brief output was requested, print resource summary */ if (brief_output && is_not_set(mon_ops, mon_op_group_by_node)) { GListPtr rscs = pe__filter_rsc_list(data_set->resources, only_rsc); pe__rscs_brief_output(out, rscs, print_opts, is_set(mon_ops, mon_op_inactive_resources)); g_list_free(rscs); } /* For each resource, display it if appropriate */ for (rsc_iter = data_set->resources; rsc_iter != NULL; rsc_iter = rsc_iter->next) { pe_resource_t *rsc = (pe_resource_t *) rsc_iter->data; int x; /* Complex resources may have some sub-resources active and some inactive */ gboolean is_active = rsc->fns->active(rsc, TRUE); gboolean partially_active = rsc->fns->active(rsc, FALSE); /* Skip inactive orphans (deleted but still in CIB) */ if (is_set(rsc->flags, pe_rsc_orphan) && !is_active) { continue; /* Skip active resources if we already displayed them by node */ } else if (is_set(mon_ops, mon_op_group_by_node)) { if (is_active) { continue; } /* Skip primitives already counted in a brief summary */ } else if (brief_output && (rsc->variant == pe_native)) { continue; /* Skip resources that aren't at least partially active, * unless we're displaying inactive resources */ } else if (!partially_active && is_not_set(mon_ops, mon_op_inactive_resources)) { continue; } else if (partially_active && !pe__rsc_running_on_any_node_in_list(rsc, only_node)) { continue; } /* Print this resource */ x = out->message(out, crm_map_element_name(rsc->xml), print_opts, rsc, only_node, only_rsc); if (x == pcmk_rc_ok) { rc = pcmk_rc_ok; } } if (print_summary && rc != pcmk_rc_ok) { print_resources_closing(out, mon_ops); } out->end_list(out); return pcmk_rc_ok; } static int failure_count(pe_working_set_t *data_set, pe_node_t *node, pe_resource_t *rsc, time_t *last_failure) { return rsc ? pe_get_failcount(node, rsc, last_failure, pe_fc_default, NULL, data_set) : 0; } static GListPtr get_operation_list(xmlNode *rsc_entry) { GListPtr op_list = NULL; xmlNode *rsc_op = NULL; for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) { const char *task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); const char *interval_ms_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL_MS); const char *op_rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); int op_rc_i = crm_parse_int(op_rc, "0"); /* Display 0-interval monitors as "probe" */ if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei) && pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches | pcmk__str_casei)) { task = "probe"; } /* Ignore notifies and some probes */ if (pcmk__str_eq(task, CRMD_ACTION_NOTIFY, pcmk__str_casei) || (pcmk__str_eq(task, "probe", pcmk__str_casei) && (op_rc_i == 7))) { continue; } if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, pcmk__str_none)) { op_list = g_list_append(op_list, rsc_op); } } op_list = g_list_sort(op_list, sort_op_by_callid); return op_list; } /*! * \internal * \brief Print resource operation/failure history * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. * \param[in] node Node that ran this resource. * \param[in] rsc_entry Root of XML tree describing resource status. * \param[in] mon_ops Bitmask of mon_op_*. * \param[in] op_list A list of operations to print. */ static int print_rsc_history(pcmk__output_t *out, pe_working_set_t *data_set, pe_node_t *node, xmlNode *rsc_entry, unsigned int mon_ops, GListPtr op_list) { GListPtr gIter = NULL; int rc = pcmk_rc_no_output; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); pe_resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); /* Print each operation */ for (gIter = op_list; gIter != NULL; gIter = gIter->next) { xmlNode *xml_op = (xmlNode *) gIter->data; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *interval_ms_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS); const char *op_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); int op_rc_i = crm_parse_int(op_rc, "0"); /* Display 0-interval monitors as "probe" */ if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei) && pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches | pcmk__str_casei)) { task = "probe"; } /* If this is the first printed operation, print heading for resource */ if (rc == pcmk_rc_no_output) { time_t last_failure = 0; int failcount = failure_count(data_set, node, rsc, &last_failure); out->message(out, "resource-history", rsc, rsc_id, TRUE, failcount, last_failure, TRUE); rc = pcmk_rc_ok; } /* Print the operation */ out->message(out, "op-history", xml_op, task, interval_ms_s, op_rc_i, is_set(mon_ops, mon_op_print_timing)); } /* Free the list we created (no need to free the individual items) */ g_list_free(op_list); PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } /*! * \internal * \brief Print node operation/failure history * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. * \param[in] node_state Root of XML tree describing node status. * \param[in] operations Whether to print operations or just failcounts. * \param[in] mon_ops Bitmask of mon_op_*. */ static int print_node_history(pcmk__output_t *out, pe_working_set_t *data_set, pe_node_t *node, xmlNode *node_state, gboolean operations, unsigned int mon_ops, GListPtr only_node, GListPtr only_rsc) { xmlNode *lrm_rsc = NULL; xmlNode *rsc_entry = NULL; int rc = pcmk_rc_no_output; lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); /* Print history of each of the node's resources */ for (rsc_entry = __xml_first_child_element(lrm_rsc); rsc_entry != NULL; rsc_entry = __xml_next_element(rsc_entry)) { const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); pe_resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if (!pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_none)) { continue; } /* We can't use is_filtered here to filter group resources. For is_filtered, * we have to decide whether to check the parent or not. If we check the * parent, all elements of a group will always be printed because that's how * is_filtered works for groups. If we do not check the parent, sometimes * this will filter everything out. * * For other resource types, is_filtered is okay. */ if (uber_parent(rsc)->variant == pe_group) { if (!pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) && !pcmk__str_in_list(only_rsc, rsc_printable_id(uber_parent(rsc)))) { continue; } } else { if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) { continue; } } if (operations == FALSE) { time_t last_failure = 0; int failcount = failure_count(data_set, node, rsc, &last_failure); if (failcount <= 0) { continue; } if (rc == pcmk_rc_no_output) { rc = pcmk_rc_ok; out->message(out, "node", node, get_resource_display_options(mon_ops), FALSE, NULL, is_set(mon_ops, mon_op_print_clone_detail), is_set(mon_ops, mon_op_print_brief), is_set(mon_ops, mon_op_group_by_node), only_node, only_rsc); } out->message(out, "resource-history", rsc, rsc_id, FALSE, failcount, last_failure, FALSE); } else { GListPtr op_list = get_operation_list(rsc_entry); if (op_list == NULL) { continue; } if (rc == pcmk_rc_no_output) { rc = pcmk_rc_ok; out->message(out, "node", node, get_resource_display_options(mon_ops), FALSE, NULL, is_set(mon_ops, mon_op_print_clone_detail), is_set(mon_ops, mon_op_print_brief), is_set(mon_ops, mon_op_group_by_node), only_node, only_rsc); } print_rsc_history(out, data_set, node, rsc_entry, mon_ops, op_list); } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } /*! * \internal * \brief Determine whether extended information about an attribute should be added. * * \param[in] out The output functions structure. * \param[in] node Node that ran this resource. * \param[in] rsc_list The list of resources for this node. * \param[in] attrname The attribute to find. * \param[out] expected_score The expected value for this attribute. * * \return TRUE if extended information should be printed, FALSE otherwise * \note Currently, extended information is only supported for ping/pingd * resources, for which a message will be printed if connectivity is lost * or degraded. */ static gboolean add_extra_info(pcmk__output_t *out, pe_node_t *node, GListPtr rsc_list, const char *attrname, int *expected_score) { GListPtr gIter = NULL; for (gIter = rsc_list; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; const char *type = g_hash_table_lookup(rsc->meta, "type"); const char *name = NULL; if (rsc->children != NULL) { if (add_extra_info(out, node, rsc->children, attrname, expected_score)) { return TRUE; } } - if (!pcmk__str_eq(type, "ping", pcmk__str_casei) && !pcmk__str_eq(type, "pingd", pcmk__str_casei)) { + if (!pcmk__strcase_any_of(type, "ping", "pingd", NULL)) { return FALSE; } name = g_hash_table_lookup(rsc->parameters, "name"); if (name == NULL) { name = "pingd"; } /* To identify the resource with the attribute name. */ if (pcmk__str_eq(name, attrname, pcmk__str_casei)) { int host_list_num = 0; /* int value = crm_parse_int(attrvalue, "0"); */ const char *hosts = g_hash_table_lookup(rsc->parameters, "host_list"); const char *multiplier = g_hash_table_lookup(rsc->parameters, "multiplier"); if (hosts) { char **host_list = g_strsplit(hosts, " ", 0); host_list_num = g_strv_length(host_list); g_strfreev(host_list); } /* pingd multiplier is the same as the default value. */ *expected_score = host_list_num * crm_parse_int(multiplier, "1"); return TRUE; } } return FALSE; } /* structure for passing multiple user data to g_list_foreach() */ struct mon_attr_data { pcmk__output_t *out; pe_node_t *node; }; static void print_node_attribute(gpointer name, gpointer user_data) { const char *value = NULL; int expected_score = 0; gboolean add_extra = FALSE; struct mon_attr_data *data = (struct mon_attr_data *) user_data; value = pe_node_attribute_raw(data->node, name); add_extra = add_extra_info(data->out, data->node, data->node->details->running_rsc, name, &expected_score); /* Print attribute name and value */ data->out->message(data->out, "node-attribute", name, value, add_extra, expected_score); } /*! * \internal * \brief Print history for all nodes. * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. * \param[in] operations Whether to print operations or just failcounts. * \param[in] mon_ops Bitmask of mon_op_*. */ static int print_node_summary(pcmk__output_t *out, pe_working_set_t * data_set, gboolean operations, unsigned int mon_ops, GListPtr only_node, GListPtr only_rsc, gboolean print_spacer) { xmlNode *node_state = NULL; xmlNode *cib_status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); int rc = pcmk_rc_no_output; if (xmlChildElementCount(cib_status) == 0) { return rc; } /* Print each node in the CIB status */ for (node_state = __xml_first_child_element(cib_status); node_state != NULL; node_state = __xml_next_element(node_state)) { pe_node_t *node; if (!pcmk__str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { continue; } node = pe_find_node_id(data_set->nodes, ID(node_state)); if (!node || !node->details || !node->details->online) { continue; } if (!pcmk__str_in_list(only_node, node->details->uname)) { continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, operations ? "Operations" : "Migration Summary"); print_node_history(out, data_set, node, node_state, operations, mon_ops, only_node, only_rsc); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } /*! * \internal * \brief Print all tickets. * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. */ static int print_cluster_tickets(pcmk__output_t *out, pe_working_set_t * data_set, gboolean print_spacer) { GHashTableIter iter; gpointer key, value; if (g_hash_table_size(data_set->tickets) == 0) { return pcmk_rc_no_output; } PCMK__OUTPUT_SPACER_IF(out, print_spacer); /* Print section heading */ out->begin_list(out, NULL, NULL, "Tickets"); /* Print each ticket */ g_hash_table_iter_init(&iter, data_set->tickets); while (g_hash_table_iter_next(&iter, &key, &value)) { pe_ticket_t *ticket = (pe_ticket_t *) value; out->message(out, "ticket", ticket); } /* Close section */ out->end_list(out); return pcmk_rc_ok; } /*! * \internal * \brief Print section for negative location constraints * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. * \param[in] mon_ops Bitmask of mon_op_*. * \param[in] prefix ID prefix to filter results by. */ static int print_neg_locations(pcmk__output_t *out, pe_working_set_t *data_set, unsigned int mon_ops, const char *prefix, GListPtr only_rsc, gboolean print_spacer) { GListPtr gIter, gIter2; int rc = pcmk_rc_no_output; /* Print each ban */ for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) { pe__location_t *location = gIter->data; if (prefix != NULL && !g_str_has_prefix(location->id, prefix)) continue; if (!pcmk__str_in_list(only_rsc, rsc_printable_id(location->rsc_lh)) && !pcmk__str_in_list(only_rsc, rsc_printable_id(uber_parent(location->rsc_lh)))) { continue; } for (gIter2 = location->node_list_rh; gIter2 != NULL; gIter2 = gIter2->next) { pe_node_t *node = (pe_node_t *) gIter2->data; if (node->weight < 0) { PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Negative Location Constraints"); out->message(out, "ban", node, location, is_set(mon_ops, mon_op_print_clone_detail)); } } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } /*! * \internal * \brief Print node attributes section * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. * \param[in] mon_ops Bitmask of mon_op_*. */ static int print_node_attributes(pcmk__output_t *out, pe_working_set_t *data_set, unsigned int mon_ops, GListPtr only_node, GListPtr only_rsc, gboolean print_spacer) { GListPtr gIter = NULL; int rc = pcmk_rc_no_output; /* Unpack all resource parameters (it would be more efficient to do this * only when needed for the first time in add_extra_info()) */ for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { crm_mon_get_parameters(gIter->data, data_set); } /* Display each node's attributes */ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { struct mon_attr_data data; data.out = out; data.node = (pe_node_t *) gIter->data; if (data.node && data.node->details && data.node->details->online) { GList *attr_list = NULL; GHashTableIter iter; gpointer key, value; g_hash_table_iter_init(&iter, data.node->details->attrs); while (g_hash_table_iter_next (&iter, &key, &value)) { attr_list = append_attr_list(attr_list, key); } if (attr_list == NULL) { continue; } if (!pcmk__str_in_list(only_node, data.node->details->uname)) { continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Node Attributes"); out->message(out, "node", data.node, get_resource_display_options(mon_ops), FALSE, NULL, is_set(mon_ops, mon_op_print_clone_detail), is_set(mon_ops, mon_op_print_brief), is_set(mon_ops, mon_op_group_by_node), only_node, only_rsc); g_list_foreach(attr_list, print_node_attribute, &data); g_list_free(attr_list); out->end_list(out); } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } /*! * \internal * \brief Print a section for failed actions * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. */ static int print_failed_actions(pcmk__output_t *out, pe_working_set_t *data_set, GListPtr only_node, gboolean print_spacer) { xmlNode *xml_op = NULL; int rc = pcmk_rc_no_output; if (xmlChildElementCount(data_set->failed) == 0) { return rc; } for (xml_op = __xml_first_child(data_set->failed); xml_op != NULL; xml_op = __xml_next(xml_op)) { if (!pcmk__str_in_list(only_node, crm_element_value(xml_op, XML_ATTR_UNAME))) { continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Failed Resource Actions"); out->message(out, "failed-action", xml_op); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } #define CHECK_RC(retcode, retval) \ if (retval == pcmk_rc_ok) { \ retcode = pcmk_rc_ok; \ } /*! * \internal * \brief Top-level printing function for text/curses output. * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. * \param[in] stonith_history List of stonith actions. * \param[in] mon_ops Bitmask of mon_op_*. * \param[in] show Bitmask of mon_show_*. * \param[in] prefix ID prefix to filter results by. */ void print_status(pcmk__output_t *out, pe_working_set_t *data_set, stonith_history_t *stonith_history, unsigned int mon_ops, unsigned int show, char *prefix, char *only_node, char *only_rsc) { GListPtr unames = NULL; GListPtr resources = NULL; unsigned int print_opts = get_resource_display_options(mon_ops); int rc = pcmk_rc_no_output; CHECK_RC(rc, out->message(out, "cluster-summary", data_set, is_set(mon_ops, mon_op_print_clone_detail), is_set(show, mon_show_stack), is_set(show, mon_show_dc), is_set(show, mon_show_times), is_set(show, mon_show_counts), is_set(show, mon_show_options))); unames = build_uname_list(data_set, only_node); resources = build_rsc_list(data_set, only_rsc); if (is_set(show, mon_show_nodes) && unames) { PCMK__OUTPUT_SPACER_IF(out, rc == pcmk_rc_ok); CHECK_RC(rc, out->message(out, "node-list", data_set->nodes, unames, resources, print_opts, is_set(mon_ops, mon_op_print_clone_detail), is_set(mon_ops, mon_op_print_brief), is_set(mon_ops, mon_op_group_by_node))); } /* Print resources section, if needed */ if (is_set(show, mon_show_resources)) { CHECK_RC(rc, print_resources(out, data_set, print_opts, mon_ops, is_set(mon_ops, mon_op_print_brief), TRUE, unames, resources, rc == pcmk_rc_ok)); } /* print Node Attributes section if requested */ if (is_set(show, mon_show_attributes)) { CHECK_RC(rc, print_node_attributes(out, data_set, mon_ops, unames, resources, rc == pcmk_rc_ok)); } /* If requested, print resource operations (which includes failcounts) * or just failcounts */ if (is_set(show, mon_show_operations) || is_set(show, mon_show_failcounts)) { CHECK_RC(rc, print_node_summary(out, data_set, is_set(show, mon_show_operations), mon_ops, unames, resources, rc == pcmk_rc_ok)); } /* If there were any failed actions, print them */ if (is_set(show, mon_show_failures) && xml_has_children(data_set->failed)) { CHECK_RC(rc, print_failed_actions(out, data_set, unames, rc == pcmk_rc_ok)); } /* Print failed stonith actions */ if (is_set(show, mon_show_fence_failed) && is_set(mon_ops, mon_op_fence_history)) { stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq, GINT_TO_POINTER(st_failed)); if (hp) { CHECK_RC(rc, out->message(out, "failed-fencing-history", hp, unames, is_set(mon_ops, mon_op_fence_full_history), rc == pcmk_rc_ok)); } } /* Print tickets if requested */ if (is_set(show, mon_show_tickets)) { CHECK_RC(rc, print_cluster_tickets(out, data_set, rc == pcmk_rc_ok)); } /* Print negative location constraints if requested */ if (is_set(show, mon_show_bans)) { CHECK_RC(rc, print_neg_locations(out, data_set, mon_ops, prefix, resources, rc == pcmk_rc_ok)); } /* Print stonith history */ if (is_set(mon_ops, mon_op_fence_history)) { if (is_set(show, mon_show_fence_worked)) { stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_neq, GINT_TO_POINTER(st_failed)); if (hp) { CHECK_RC(rc, out->message(out, "fencing-history", hp, unames, is_set(mon_ops, mon_op_fence_full_history), rc == pcmk_rc_ok)); } } else if (is_set(show, mon_show_fence_pending)) { stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_pending, NULL); if (hp) { CHECK_RC(rc, out->message(out, "pending-fencing-actions", hp, unames, is_set(mon_ops, mon_op_fence_full_history), rc == pcmk_rc_ok)); } } } g_list_free_full(unames, free); } /*! * \internal * \brief Top-level printing function for XML output. * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. * \param[in] stonith_history List of stonith actions. * \param[in] mon_ops Bitmask of mon_op_*. * \param[in] show Bitmask of mon_show_*. * \param[in] prefix ID prefix to filter results by. */ void print_xml_status(pcmk__output_t *out, pe_working_set_t *data_set, crm_exit_t history_rc, stonith_history_t *stonith_history, unsigned int mon_ops, unsigned int show, char *prefix, char *only_node, char *only_rsc) { GListPtr unames = NULL; GListPtr resources = NULL; unsigned int print_opts = get_resource_display_options(mon_ops); out->message(out, "cluster-summary", data_set, is_set(mon_ops, mon_op_print_clone_detail), is_set(show, mon_show_stack), is_set(show, mon_show_dc), is_set(show, mon_show_times), is_set(show, mon_show_counts), is_set(show, mon_show_options)); unames = build_uname_list(data_set, only_node); resources = build_rsc_list(data_set, only_rsc); /*** NODES ***/ if (is_set(show, mon_show_nodes)) { out->message(out, "node-list", data_set->nodes, unames, resources, print_opts, is_set(mon_ops, mon_op_print_clone_detail), is_set(mon_ops, mon_op_print_brief), is_set(mon_ops, mon_op_group_by_node)); } /* Print resources section, if needed */ if (is_set(show, mon_show_resources)) { print_resources(out, data_set, print_opts, mon_ops, FALSE, FALSE, unames, resources, FALSE); } /* print Node Attributes section if requested */ if (is_set(show, mon_show_attributes)) { print_node_attributes(out, data_set, mon_ops, unames, resources, FALSE); } /* If requested, print resource operations (which includes failcounts) * or just failcounts */ if (is_set(show, mon_show_operations) || is_set(show, mon_show_failcounts)) { print_node_summary(out, data_set, is_set(show, mon_show_operations), mon_ops, unames, resources, FALSE); } /* If there were any failed actions, print them */ if (is_set(show, mon_show_failures) && xml_has_children(data_set->failed)) { print_failed_actions(out, data_set, unames, FALSE); } /* Print stonith history */ if (is_set(show, mon_show_fencing_all) && is_set(mon_ops, mon_op_fence_history)) { out->message(out, "full-fencing-history", history_rc, stonith_history, unames, is_set(mon_ops, mon_op_fence_full_history), FALSE); } /* Print tickets if requested */ if (is_set(show, mon_show_tickets)) { print_cluster_tickets(out, data_set, FALSE); } /* Print negative location constraints if requested */ if (is_set(show, mon_show_bans)) { print_neg_locations(out, data_set, mon_ops, prefix, resources, FALSE); } g_list_free_full(unames, free); g_list_free_full(resources, free); } /*! * \internal * \brief Top-level printing function for HTML output. * * \param[in] out The output functions structure. * \param[in] data_set Cluster state to display. * \param[in] stonith_history List of stonith actions. * \param[in] mon_ops Bitmask of mon_op_*. * \param[in] show Bitmask of mon_show_*. * \param[in] prefix ID prefix to filter results by. */ int print_html_status(pcmk__output_t *out, pe_working_set_t *data_set, stonith_history_t *stonith_history, unsigned int mon_ops, unsigned int show, char *prefix, char *only_node, char *only_rsc) { GListPtr unames = NULL; GListPtr resources = NULL; unsigned int print_opts = get_resource_display_options(mon_ops); out->message(out, "cluster-summary", data_set, is_set(mon_ops, mon_op_print_clone_detail), is_set(show, mon_show_stack), is_set(show, mon_show_dc), is_set(show, mon_show_times), is_set(show, mon_show_counts), is_set(show, mon_show_options)); unames = build_uname_list(data_set, only_node); resources = build_rsc_list(data_set, only_rsc); /*** NODE LIST ***/ if (is_set(show, mon_show_nodes) && unames) { out->message(out, "node-list", data_set->nodes, unames, resources, print_opts, is_set(mon_ops, mon_op_print_clone_detail), is_set(mon_ops, mon_op_print_brief), is_set(mon_ops, mon_op_group_by_node)); } /* Print resources section, if needed */ if (is_set(show, mon_show_resources)) { print_resources(out, data_set, print_opts, mon_ops, is_set(mon_ops, mon_op_print_brief), TRUE, unames, resources, FALSE); } /* print Node Attributes section if requested */ if (is_set(show, mon_show_attributes)) { print_node_attributes(out, data_set, mon_ops, unames, resources, FALSE); } /* If requested, print resource operations (which includes failcounts) * or just failcounts */ if (is_set(show, mon_show_operations) || is_set(show, mon_show_failcounts)) { print_node_summary(out, data_set, is_set(show, mon_show_operations), mon_ops, unames, resources, FALSE); } /* If there were any failed actions, print them */ if (is_set(show, mon_show_failures) && xml_has_children(data_set->failed)) { print_failed_actions(out, data_set, unames, FALSE); } /* Print failed stonith actions */ if (is_set(show, mon_show_fence_failed) && is_set(mon_ops, mon_op_fence_history)) { stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq, GINT_TO_POINTER(st_failed)); if (hp) { out->message(out, "failed-fencing-history", hp, unames, is_set(mon_ops, mon_op_fence_full_history), FALSE); } } /* Print stonith history */ if (is_set(mon_ops, mon_op_fence_history)) { if (is_set(show, mon_show_fence_worked)) { stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_neq, GINT_TO_POINTER(st_failed)); if (hp) { out->message(out, "fencing-history", hp, unames, is_set(mon_ops, mon_op_fence_full_history), FALSE); } } else if (is_set(show, mon_show_fence_pending)) { stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_pending, NULL); if (hp) { out->message(out, "pending-fencing-actions", hp, unames, is_set(mon_ops, mon_op_fence_full_history), FALSE); } } } /* Print tickets if requested */ if (is_set(show, mon_show_tickets)) { print_cluster_tickets(out, data_set, FALSE); } /* Print negative location constraints if requested */ if (is_set(show, mon_show_bans)) { print_neg_locations(out, data_set, mon_ops, prefix, resources, FALSE); } g_list_free_full(unames, free); g_list_free_full(resources, free); return 0; } diff --git a/tools/crm_resource.c b/tools/crm_resource.c index ffeca585a9..b31e9031bf 100644 --- a/tools/crm_resource.c +++ b/tools/crm_resource.c @@ -1,1773 +1,1773 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "crm_resource - perform tasks related to Pacemaker cluster resources" struct { const char *attr_set_type; int cib_options; gboolean clear_expired; char *extra_arg; char *extra_option; int find_flags; /* Flags to use when searching for resource */ gboolean force; char *host_uname; char *interval_spec; char *operation; GHashTable *override_params; char *prop_id; char *prop_name; char *prop_set; char *prop_value; gboolean recursive; gchar **remainder; gboolean require_crmd; /* whether command requires controller connection */ gboolean require_dataset; /* whether command requires populated dataset instance */ gboolean require_resource; /* whether command requires that resource be specified */ int resource_verbose; char rsc_cmd; char *rsc_id; char *rsc_long_cmd; char *rsc_type; gboolean promoted_role_only; int timeout_ms; char *v_agent; char *v_class; char *v_provider; gboolean validate_cmdline; /* whether we are just validating based on command line options */ GHashTable *validate_options; char *xml_file; } options = { .attr_set_type = XML_TAG_ATTR_SETS, .cib_options = cib_sync_call, .require_dataset = TRUE, .require_resource = TRUE, .rsc_cmd = 'L' }; gboolean agent_provider_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean attr_set_type_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean class_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean cleanup_refresh_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean delete_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean expired_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean extra_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean fail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean flag_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean get_param_prop_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean list_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean set_delete_param_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean set_prop_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean timeout_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean validate_restart_force_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean wait_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean why_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); bool BE_QUIET = FALSE; static crm_exit_t exit_code = CRM_EX_OK; // Things that should be cleaned up on exit static GError *error = NULL; static GMainLoop *mainloop = NULL; static cib_t *cib_conn = NULL; static pcmk_ipc_api_t *controld_api = NULL; static pe_working_set_t *data_set = NULL; #define MESSAGE_TIMEOUT_S 60 #define INDENT " " // Clean up and exit static crm_exit_t bye(crm_exit_t ec) { if (error != NULL) { fprintf(stderr, "%s\n", error->message); g_clear_error(&error); } if (cib_conn != NULL) { cib_t *save_cib_conn = cib_conn; cib_conn = NULL; // Ensure we can't free this twice save_cib_conn->cmds->signoff(save_cib_conn); cib_delete(save_cib_conn); } if (controld_api != NULL) { pcmk_ipc_api_t *save_controld_api = controld_api; controld_api = NULL; // Ensure we can't free this twice pcmk_free_ipc_api(save_controld_api); } if (mainloop != NULL) { g_main_loop_unref(mainloop); mainloop = NULL; } pe_free_working_set(data_set); data_set = NULL; crm_exit(ec); return ec; } static void quit_main_loop(crm_exit_t ec) { exit_code = ec; if (mainloop != NULL) { GMainLoop *mloop = mainloop; mainloop = NULL; // Don't re-enter this block pcmk_quit_main_loop(mloop, 10); g_main_loop_unref(mloop); } } static gboolean resource_ipc_timeout(gpointer data) { // Start with newline because "Waiting for ..." message doesn't have one if (error != NULL) { g_clear_error(&error); } g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_TIMEOUT, "\nAborting because no messages received in %d seconds", MESSAGE_TIMEOUT_S); quit_main_loop(CRM_EX_TIMEOUT); return FALSE; } static void controller_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data) { switch (event_type) { case pcmk_ipc_event_disconnect: if (exit_code == CRM_EX_DISCONNECT) { // Unexpected crm_info("Connection to controller was terminated"); } quit_main_loop(exit_code); break; case pcmk_ipc_event_reply: if (status != CRM_EX_OK) { fprintf(stderr, "\nError: bad reply from controller: %s\n", crm_exit_str(status)); pcmk_disconnect_ipc(api); quit_main_loop(status); } else { fprintf(stderr, "."); if ((pcmk_controld_api_replies_expected(api) == 0) && mainloop && g_main_loop_is_running(mainloop)) { fprintf(stderr, " OK\n"); crm_debug("Got all the replies we expected"); pcmk_disconnect_ipc(api); quit_main_loop(CRM_EX_OK); } } break; default: break; } } static void start_mainloop(pcmk_ipc_api_t *capi) { unsigned int count = pcmk_controld_api_replies_expected(capi); if (count > 0) { fprintf(stderr, "Waiting for %d %s from the controller", count, pcmk__plural_alt(count, "reply", "replies")); exit_code = CRM_EX_DISCONNECT; // For unexpected disconnects mainloop = g_main_loop_new(NULL, FALSE); g_timeout_add(MESSAGE_TIMEOUT_S * 1000, resource_ipc_timeout, NULL); g_main_loop_run(mainloop); } } static int compare_id(gconstpointer a, gconstpointer b) { return strcmp((const char *)a, (const char *)b); } static GListPtr build_constraint_list(xmlNode *root) { GListPtr retval = NULL; xmlNode *cib_constraints = NULL; xmlXPathObjectPtr xpathObj = NULL; int ndx = 0; cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, root); xpathObj = xpath_search(cib_constraints, "//" XML_CONS_TAG_RSC_LOCATION); for (ndx = 0; ndx < numXpathResults(xpathObj); ndx++) { xmlNode *match = getXpathResult(xpathObj, ndx); retval = g_list_insert_sorted(retval, (gpointer) ID(match), compare_id); } freeXpathObject(xpathObj); return retval; } /* short option letters still available: eEJkKXyYZ */ static GOptionEntry query_entries[] = { { "list", 'L', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, list_cb, "List all cluster resources with status", NULL }, { "list-raw", 'l', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, list_cb, "List IDs of all instantiated resources (individual members\n" INDENT "rather than groups etc.)", NULL }, { "list-cts", 'c', G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, list_cb, NULL, NULL }, { "list-operations", 'O', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, list_cb, "List active resource operations, optionally filtered by\n" INDENT "--resource and/or --node", NULL }, { "list-all-operations", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, list_cb, "List all resource operations, optionally filtered by\n" INDENT "--resource and/or --node", NULL }, { "list-standards", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, extra_cb, "List supported standards", NULL }, { "list-ocf-providers", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, extra_cb, "List all available OCF providers", NULL }, { "list-agents", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, extra_cb, "List all agents available for the named standard and/or provider", "STD/PROV" }, { "list-ocf-alternatives", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, extra_cb, "List all available providers for the named OCF agent", "AGENT" }, { "show-metadata", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, extra_cb, "Show the metadata for the named class:provider:agent", "SPEC" }, { "query-xml", 'q', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, flag_cb, "Show XML configuration of resource (after any template expansion)", NULL }, { "query-xml-raw", 'w', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, flag_cb, "Show XML configuration of resource (before any template expansion)", NULL }, { "get-parameter", 'g', G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, get_param_prop_cb, "Display named parameter for resource (use instance attribute\n" INDENT "unless --meta or --utilization is specified)", "PARAM" }, { "get-property", 'G', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, get_param_prop_cb, "Display named property of resource ('class', 'type', or 'provider') " "(requires --resource)", "PROPERTY" }, { "locate", 'W', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, flag_cb, "Show node(s) currently running resource", NULL }, { "stack", 'A', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, flag_cb, "Display the prerequisites and dependents of a resource", NULL }, { "constraints", 'a', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, flag_cb, "Display the (co)location constraints that apply to a resource", NULL }, { "why", 'Y', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, why_cb, "Show why resources are not running, optionally filtered by\n" INDENT "--resource and/or --node", NULL }, { NULL } }; static GOptionEntry command_entries[] = { { "validate", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, validate_restart_force_cb, "Validate resource configuration by calling agent's validate-all\n" INDENT "action. The configuration may be specified either by giving an\n" INDENT "existing resource name with -r, or by specifying --class,\n" INDENT "--agent, and --provider arguments, along with any number of\n" INDENT "--option arguments.", NULL }, { "cleanup", 'C', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, cleanup_refresh_cb, "If resource has any past failures, clear its history and fail\n" INDENT "count. Optionally filtered by --resource, --node, --operation\n" INDENT "and --interval (otherwise all). --operation and --interval\n" INDENT "apply to fail counts, but entire history is always clear, to\n" INDENT "allow current state to be rechecked. If the named resource is\n" INDENT "part of a group, or one numbered instance of a clone or bundled\n" INDENT "resource, the clean-up applies to the whole collective resource\n" INDENT "unless --force is given.", NULL }, { "refresh", 'R', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, cleanup_refresh_cb, "Delete resource's history (including failures) so its current state\n" INDENT "is rechecked. Optionally filtered by --resource and --node\n" INDENT "(otherwise all). If the named resource is part of a group, or one\n" INDENT "numbered instance of a clone or bundled resource, the refresh\n" INDENT "applies to the whole collective resource unless --force is given.", NULL }, { "set-parameter", 'p', G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, set_delete_param_cb, "Set named parameter for resource (requires -v). Use instance\n" INDENT "attribute unless --meta or --utilization is specified.", "PARAM" }, { "delete-parameter", 'd', G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, set_delete_param_cb, "Delete named parameter for resource. Use instance attribute\n" INDENT "unless --meta or --utilization is specified.", "PARAM" }, { "set-property", 'S', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, set_prop_cb, "Set named property of resource ('class', 'type', or 'provider') " "(requires -r, -t, -v)", "PROPERTY" }, { NULL } }; static GOptionEntry location_entries[] = { { "move", 'M', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, flag_cb, "Create a constraint to move resource. If --node is specified,\n" INDENT "the constraint will be to move to that node, otherwise it\n" INDENT "will be to ban the current node. Unless --force is specified\n" INDENT "this will return an error if the resource is already running\n" INDENT "on the specified node. If --force is specified, this will\n" INDENT "always ban the current node.\n" INDENT "Optional: --lifetime, --master. NOTE: This may prevent the\n" INDENT "resource from running on its previous location until the\n" INDENT "implicit constraint expires or is removed with --clear.", NULL }, { "ban", 'B', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, flag_cb, "Create a constraint to keep resource off a node.\n" INDENT "Optional: --node, --lifetime, --master.\n" INDENT "NOTE: This will prevent the resource from running on the\n" INDENT "affected node until the implicit constraint expires or is\n" INDENT "removed with --clear. If --node is not specified, it defaults\n" INDENT "to the node currently running the resource for primitives\n" INDENT "and groups, or the master for promotable clones with\n" INDENT "promoted-max=1 (all other situations result in an error as\n" INDENT "there is no sane default).", NULL }, { "clear", 'U', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, flag_cb, "Remove all constraints created by the --ban and/or --move\n" INDENT "commands. Requires: --resource. Optional: --node, --master,\n" INDENT "--expired. If --node is not specified, all constraints created\n" INDENT "by --ban and --move will be removed for the named resource. If\n" INDENT "--node and --force are specified, any constraint created by\n" INDENT "--move will be cleared, even if it is not for the specified\n" INDENT "node. If --expired is specified, only those constraints whose\n" INDENT "lifetimes have expired will be removed.", NULL }, { "expired", 'e', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, expired_cb, "Modifies the --clear argument to remove constraints with\n" INDENT "expired lifetimes.", NULL }, { "lifetime", 'u', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &move_lifetime, "Lifespan (as ISO 8601 duration) of created constraints (with\n" INDENT "-B, -M) see https://en.wikipedia.org/wiki/ISO_8601#Durations)", "TIMESPEC" }, { "master", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &options.promoted_role_only, "Limit scope of command to Master role (with -B, -M, -U). For\n" INDENT "-B and -M the previous master may remain active in the Slave role.", NULL }, { NULL } }; static GOptionEntry advanced_entries[] = { { "delete", 'D', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, delete_cb, "(Advanced) Delete a resource from the CIB. Required: -t", NULL }, { "fail", 'F', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, fail_cb, "(Advanced) Tell the cluster this resource has failed", NULL }, { "restart", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, validate_restart_force_cb, "(Advanced) Tell the cluster to restart this resource and\n" INDENT "anything that depends on it", NULL }, { "wait", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, wait_cb, "(Advanced) Wait until the cluster settles into a stable state", NULL }, { "force-demote", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, validate_restart_force_cb, "(Advanced) Bypass the cluster and demote a resource on the local\n" INDENT "node. Unless --force is specified, this will refuse to do so if\n" INDENT "the cluster believes the resource is a clone instance already\n" INDENT "running on the local node.", NULL }, { "force-stop", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, validate_restart_force_cb, "(Advanced) Bypass the cluster and stop a resource on the local node", NULL }, { "force-start", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, validate_restart_force_cb, "(Advanced) Bypass the cluster and start a resource on the local\n" INDENT "node. Unless --force is specified, this will refuse to do so if\n" INDENT "the cluster believes the resource is a clone instance already\n" INDENT "running on the local node.", NULL }, { "force-promote", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, validate_restart_force_cb, "(Advanced) Bypass the cluster and promote a resource on the local\n" INDENT "node. Unless --force is specified, this will refuse to do so if\n" INDENT "the cluster believes the resource is a clone instance already\n" INDENT "running on the local node.", NULL }, { "force-check", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, validate_restart_force_cb, "(Advanced) Bypass the cluster and check the state of a resource on\n" INDENT "the local node", NULL }, { NULL } }; static GOptionEntry validate_entries[] = { { "class", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, class_cb, "The standard the resource agent confirms to (for example, ocf).\n" INDENT "Use with --agent, --provider, --option, and --validate.", "CLASS" }, { "agent", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, agent_provider_cb, "The agent to use (for example, IPaddr). Use with --class,\n" INDENT "--provider, --option, and --validate.", "AGENT" }, { "provider", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, agent_provider_cb, "The vendor that supplies the resource agent (for example,\n" INDENT "heartbeat). Use with --class, --agent, --option, and --validate.", "PROVIDER" }, { "option", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, extra_cb, "Specify a device configuration parameter as NAME=VALUE (may be\n" INDENT "specified multiple times). Use with --validate and without the\n" INDENT "-r option.", "PARAM" }, { NULL } }; static GOptionEntry addl_entries[] = { { "node", 'N', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.host_uname, "Node name", "NAME" }, { "recursive", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &options.recursive, "Follow colocation chains when using --set-parameter", NULL }, { "resource-type", 't', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.rsc_type, "Resource XML element (primitive, group, etc.) (with -D)", "ELEMENT" }, { "parameter-value", 'v', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.prop_value, "Value to use with -p", "PARAM" }, { "meta", 'm', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, attr_set_type_cb, "Use resource meta-attribute instead of instance attribute\n" INDENT "(with -p, -g, -d)", NULL }, { "utilization", 'z', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, attr_set_type_cb, "Use resource utilization attribute instead of instance attribute\n" INDENT "(with -p, -g, -d)", NULL }, { "operation", 'n', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.operation, "Operation to clear instead of all (with -C -r)", "OPERATION" }, { "interval", 'I', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.interval_spec, "Interval of operation to clear (default 0) (with -C -r -n)", "N" }, { "set-name", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.prop_set, "(Advanced) XML ID of attributes element to use (with -p, -d)", "ID" }, { "nvpair", 'i', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.prop_id, "(Advanced) XML ID of nvpair element to use (with -p, -d)", "ID" }, { "timeout", 'T', G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, timeout_cb, "(Advanced) Abort if command does not finish in this time (with\n" INDENT "--restart, --wait, --force-*)", "N" }, { "force", 'f', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &options.force, "If making CIB changes, do so regardless of quorum. See help for\n" INDENT "individual commands for additional behavior.", NULL }, { "xml-file", 'x', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_FILENAME, &options.xml_file, NULL, "FILE" }, { "host-uname", 'H', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_STRING, &options.host_uname, NULL, "HOST" }, { NULL } }; gboolean agent_provider_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.validate_cmdline = TRUE; options.require_resource = FALSE; if (pcmk__str_eq(option_name, "--provider", pcmk__str_casei)) { if (options.v_provider) { free(options.v_provider); } options.v_provider = strdup(optarg); } else { if (options.v_agent) { free(options.v_agent); } options.v_agent = strdup(optarg); } return TRUE; } gboolean attr_set_type_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { - if (pcmk__str_eq(option_name, "-m", pcmk__str_none) || pcmk__str_eq(option_name, "--meta", pcmk__str_none)) { + if (pcmk__str_any_of(option_name, "-m", "--meta", NULL)) { options.attr_set_type = XML_TAG_META_SETS; - } else if (pcmk__str_eq(option_name, "-z", pcmk__str_none) || pcmk__str_eq(option_name, "--utilization", pcmk__str_none)) { + } else if (pcmk__str_any_of(option_name, "-z", "--utilization", NULL)) { options.attr_set_type = XML_TAG_UTILIZATION; } return TRUE; } gboolean class_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (!(pcmk_get_ra_caps(optarg) & pcmk_ra_cap_params)) { if (BE_QUIET == FALSE) { g_set_error(error, G_OPTION_ERROR, CRM_EX_INVALID_PARAM, "Standard %s does not support parameters\n", optarg); } return FALSE; } else { if (options.v_class != NULL) { free(options.v_class); } options.v_class = strdup(optarg); } options.validate_cmdline = TRUE; options.require_resource = FALSE; return TRUE; } gboolean cleanup_refresh_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { - if (pcmk__str_eq(option_name, "-C", pcmk__str_none) || pcmk__str_eq(option_name, "--cleanup", pcmk__str_none)) { + if (pcmk__str_any_of(option_name, "-C", "--cleanup", NULL)) { options.rsc_cmd = 'C'; } else { options.rsc_cmd = 'R'; } options.require_resource = FALSE; if (getenv("CIB_file") == NULL) { options.require_crmd = TRUE; } options.find_flags = pe_find_renamed|pe_find_anon; return TRUE; } gboolean delete_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.require_dataset = FALSE; options.rsc_cmd = 'D'; options.find_flags = pe_find_renamed|pe_find_any; return TRUE; } gboolean expired_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.clear_expired = TRUE; options.require_resource = FALSE; return TRUE; } gboolean extra_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (options.extra_option) { free(options.extra_option); } if (options.extra_arg) { free(options.extra_arg); } options.extra_option = strdup(option_name); if (optarg) { options.extra_arg = strdup(optarg); } return TRUE; } gboolean fail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.require_crmd = TRUE; options.rsc_cmd = 'F'; return TRUE; } gboolean flag_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { - if (pcmk__str_eq(option_name, "-U", pcmk__str_none) || pcmk__str_eq(option_name, "--clear", pcmk__str_none)) { + if (pcmk__str_any_of(option_name, "-U", "--clear", NULL)) { options.find_flags = pe_find_renamed|pe_find_anon; options.rsc_cmd = 'U'; - } else if (pcmk__str_eq(option_name, "-B", pcmk__str_none) || pcmk__str_eq(option_name, "--ban", pcmk__str_none)) { + } else if (pcmk__str_any_of(option_name, "-B", "--ban", NULL)) { options.find_flags = pe_find_renamed|pe_find_anon; options.rsc_cmd = 'B'; - } else if (pcmk__str_eq(option_name, "-M", pcmk__str_none) || pcmk__str_eq(option_name, "--move", pcmk__str_none)) { + } else if (pcmk__str_any_of(option_name, "-M", "--move", NULL)) { options.find_flags = pe_find_renamed|pe_find_anon; options.rsc_cmd = 'M'; - } else if (pcmk__str_eq(option_name, "-q", pcmk__str_none) || pcmk__str_eq(option_name, "--query-xml", pcmk__str_none)) { + } else if (pcmk__str_any_of(option_name, "-q", "--query-xml", NULL)) { options.find_flags = pe_find_renamed|pe_find_any; options.rsc_cmd = 'q'; - } else if (pcmk__str_eq(option_name, "-w", pcmk__str_none) || pcmk__str_eq(option_name, "--query-xml-raw", pcmk__str_none)) { + } else if (pcmk__str_any_of(option_name, "-w", "--query-xml-raw", NULL)) { options.find_flags = pe_find_renamed|pe_find_any; options.rsc_cmd = 'w'; - } else if (pcmk__str_eq(option_name, "-W", pcmk__str_none) || pcmk__str_eq(option_name, "--locate", pcmk__str_none)) { + } else if (pcmk__str_any_of(option_name, "-W", "--locate", NULL)) { options.find_flags = pe_find_renamed|pe_find_anon; options.rsc_cmd = 'W'; - } else if (pcmk__str_eq(option_name, "-A", pcmk__str_none) || pcmk__str_eq(option_name, "--stack", pcmk__str_none)) { + } else if (pcmk__str_any_of(option_name, "-A", "--stack", NULL)) { options.find_flags = pe_find_renamed|pe_find_anon; options.rsc_cmd = 'A'; } else { options.find_flags = pe_find_renamed|pe_find_anon; options.rsc_cmd = 'a'; } return TRUE; } gboolean get_param_prop_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { - if (pcmk__str_eq(option_name, "-g", pcmk__str_none) || pcmk__str_eq(option_name, "--get-parameter", pcmk__str_none)) { + if (pcmk__str_any_of(option_name, "-g", "--get-parameter", NULL)) { options.rsc_cmd = 'g'; } else { options.rsc_cmd = 'G'; } if (options.prop_name) { free(options.prop_name); } options.prop_name = strdup(optarg); options.find_flags = pe_find_renamed|pe_find_any; return TRUE; } gboolean list_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { - if (pcmk__str_eq(option_name, "-c", pcmk__str_none) || pcmk__str_eq(option_name, "--list-cts", pcmk__str_none)) { + if (pcmk__str_any_of(option_name, "-c", "--list-cts", NULL)) { options.rsc_cmd = 'c'; - } else if (pcmk__str_eq(option_name, "-L", pcmk__str_none) || pcmk__str_eq(option_name, "--list", pcmk__str_none)) { + } else if (pcmk__str_any_of(option_name, "-L", "--list", NULL)) { options.rsc_cmd = 'L'; - } else if (pcmk__str_eq(option_name, "-l", pcmk__str_none) || pcmk__str_eq(option_name, "--list-raw", pcmk__str_none)) { + } else if (pcmk__str_any_of(option_name, "-l", "--list-raw", NULL)) { options.rsc_cmd = 'l'; - } else if (pcmk__str_eq(option_name, "-O", pcmk__str_none) || pcmk__str_eq(option_name, "--list-operations", pcmk__str_none)) { + } else if (pcmk__str_any_of(option_name, "-O", "--list-operations", NULL)) { options.rsc_cmd = 'O'; } else { options.rsc_cmd = 'o'; } options.require_resource = FALSE; return TRUE; } gboolean set_delete_param_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { - if (pcmk__str_eq(option_name, "-p", pcmk__str_none) || pcmk__str_eq(option_name, "--set-parameter", pcmk__str_none)) { + if (pcmk__str_any_of(option_name, "-p", "--set-parameter", NULL)) { options.rsc_cmd = 'p'; } else { options.rsc_cmd = 'd'; } if (options.prop_name) { free(options.prop_name); } options.prop_name = strdup(optarg); options.find_flags = pe_find_renamed|pe_find_any; return TRUE; } gboolean set_prop_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.require_dataset = FALSE; if (options.prop_name) { free(options.prop_name); } options.prop_name = strdup(optarg); options.rsc_cmd = 'S'; options.find_flags = pe_find_renamed|pe_find_any; return TRUE; } gboolean timeout_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.timeout_ms = crm_get_msec(optarg); return TRUE; } gboolean validate_restart_force_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.rsc_cmd = 0; if (options.rsc_long_cmd) { free(options.rsc_long_cmd); } options.rsc_long_cmd = strdup(option_name+2); options.find_flags = pe_find_renamed|pe_find_anon; return TRUE; } gboolean wait_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.rsc_cmd = 0; if (options.rsc_long_cmd) { free(options.rsc_long_cmd); } options.rsc_long_cmd = strdup("wait"); options.require_resource = FALSE; options.require_dataset = FALSE; return TRUE; } gboolean why_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.require_resource = FALSE; options.rsc_cmd = 'Y'; options.find_flags = pe_find_renamed|pe_find_anon; return TRUE; } static int ban_or_move(pe_resource_t *rsc, crm_exit_t *exit_code) { int rc = pcmk_rc_ok; pe_node_t *current = NULL; unsigned int nactive = 0; current = pe__find_active_requires(rsc, &nactive); if (nactive == 1) { rc = cli_resource_ban(options.rsc_id, current->details->uname, NULL, cib_conn, options.cib_options, options.promoted_role_only); } else if (is_set(rsc->flags, pe_rsc_promotable)) { int count = 0; GListPtr iter = NULL; current = NULL; for(iter = rsc->children; iter; iter = iter->next) { pe_resource_t *child = (pe_resource_t *)iter->data; enum rsc_role_e child_role = child->fns->state(child, TRUE); if(child_role == RSC_ROLE_MASTER) { count++; current = pe__current_node(child); } } if(count == 1 && current) { rc = cli_resource_ban(options.rsc_id, current->details->uname, NULL, cib_conn, options.cib_options, options.promoted_role_only); } else { rc = EINVAL; *exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, *exit_code, "Resource '%s' not moved: active in %d locations (promoted in %d).\n" "To prevent '%s' from running on a specific location, " "specify a node." "To prevent '%s' from being promoted at a specific " "location, specify a node and the master option.", options.rsc_id, nactive, count, options.rsc_id, options.rsc_id); } } else { rc = EINVAL; *exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, *exit_code, "Resource '%s' not moved: active in %d locations.\n" "To prevent '%s' from running on a specific location, " "specify a node.", options.rsc_id, nactive, options.rsc_id); } return rc; } static void cleanup(pe_resource_t *rsc) { int rc = pcmk_rc_ok; if (options.force == FALSE) { rsc = uber_parent(rsc); } crm_debug("Erasing failures of %s (%s requested) on %s", rsc->id, options.rsc_id, (options.host_uname? options.host_uname: "all nodes")); rc = cli_resource_delete(controld_api, options.host_uname, rsc, options.operation, options.interval_spec, TRUE, data_set, options.force); if ((rc == pcmk_rc_ok) && !BE_QUIET) { // Show any reasons why resource might stay stopped cli_resource_check(cib_conn, rsc); } if (rc == pcmk_rc_ok) { start_mainloop(controld_api); } } static int clear_constraints(xmlNodePtr *cib_xml_copy) { GListPtr before = NULL; GListPtr after = NULL; GListPtr remaining = NULL; GListPtr ele = NULL; pe_node_t *dest = NULL; int rc = pcmk_rc_ok; if (BE_QUIET == FALSE) { before = build_constraint_list(data_set->input); } if (options.clear_expired) { rc = cli_resource_clear_all_expired(data_set->input, cib_conn, options.cib_options, options.rsc_id, options.host_uname, options.promoted_role_only); } else if (options.host_uname) { dest = pe_find_node(data_set->nodes, options.host_uname); if (dest == NULL) { rc = pcmk_rc_node_unknown; if (BE_QUIET == FALSE) { g_list_free(before); } return rc; } rc = cli_resource_clear(options.rsc_id, dest->details->uname, NULL, cib_conn, options.cib_options, TRUE, options.force); } else { rc = cli_resource_clear(options.rsc_id, NULL, data_set->nodes, cib_conn, options.cib_options, TRUE, options.force); } if (BE_QUIET == FALSE) { rc = cib_conn->cmds->query(cib_conn, NULL, cib_xml_copy, cib_scope_local | cib_sync_call); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__RC_ERROR, rc, "Could not get modified CIB: %s\n", pcmk_strerror(rc)); g_list_free(before); return rc; } data_set->input = *cib_xml_copy; cluster_status(data_set); after = build_constraint_list(data_set->input); remaining = subtract_lists(before, after, (GCompareFunc) strcmp); for (ele = remaining; ele != NULL; ele = ele->next) { printf("Removing constraint: %s\n", (char *) ele->data); } g_list_free(before); g_list_free(after); g_list_free(remaining); } return rc; } static int delete() { int rc = pcmk_rc_ok; xmlNode *msg_data = NULL; if (options.rsc_type == NULL) { rc = ENXIO; g_set_error(&error, PCMK__RC_ERROR, rc, "You need to specify a resource type with -t"); return rc; } msg_data = create_xml_node(NULL, options.rsc_type); crm_xml_add(msg_data, XML_ATTR_ID, options.rsc_id); rc = cib_conn->cmds->remove(cib_conn, XML_CIB_TAG_RESOURCES, msg_data, options.cib_options); rc = pcmk_legacy2rc(rc); free_xml(msg_data); return rc; } static int list_agents(const char *spec, crm_exit_t *exit_code) { int rc = pcmk_rc_ok; lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; char *provider = strchr (spec, ':'); lrmd_t *lrmd_conn = lrmd_api_new(); if (provider) { *provider++ = 0; } rc = lrmd_conn->cmds->list_agents(lrmd_conn, &list, spec, provider); if (rc > 0) { for (iter = list; iter != NULL; iter = iter->next) { printf("%s\n", iter->val); } lrmd_list_freeall(list); rc = pcmk_rc_ok; } else { *exit_code = CRM_EX_NOSUCH; rc = pcmk_rc_error; g_set_error(&error, PCMK__EXITC_ERROR, *exit_code, "No agents found for standard=%s, provider=%s", spec, (provider? provider : "*")); } lrmd_api_delete(lrmd_conn); return rc; } static int list_providers(const char *command, const char *spec, crm_exit_t *exit_code) { int rc = pcmk_rc_ok; const char *text = NULL; lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; lrmd_t *lrmd_conn = lrmd_api_new(); if (pcmk__strcase_any_of(command, "--list-ocf-providers", "--list-ocf-alternatives", NULL)) { rc = lrmd_conn->cmds->list_ocf_providers(lrmd_conn, spec, &list); text = "OCF providers"; } else if (pcmk__str_eq("--list-standards", command, pcmk__str_casei)) { rc = lrmd_conn->cmds->list_standards(lrmd_conn, &list); text = "standards"; } if (rc > 0) { for (iter = list; iter != NULL; iter = iter->next) { printf("%s\n", iter->val); } lrmd_list_freeall(list); rc = pcmk_rc_ok; } else if (spec) { *exit_code = CRM_EX_NOSUCH; rc = pcmk_rc_error; g_set_error(&error, PCMK__EXITC_ERROR, *exit_code, "No %s found for %s", text, spec); } else { *exit_code = CRM_EX_NOSUCH; rc = pcmk_rc_error; g_set_error(&error, PCMK__EXITC_ERROR, *exit_code, "No %s found", text); } lrmd_api_delete(lrmd_conn); return rc; } static int list_raw() { int rc = pcmk_rc_ok; int found = 0; GListPtr lpc = NULL; for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { pe_resource_t *rsc = (pe_resource_t *) lpc->data; found++; cli_resource_print_raw(rsc); } if (found == 0) { printf("NO resources configured\n"); rc = ENXIO; } return rc; } static void list_stacks_and_constraints(pe_resource_t *rsc) { GListPtr lpc = NULL; xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); unpack_constraints(cib_constraints, data_set); // Constraints apply to group/clone, not member/instance rsc = uber_parent(rsc); for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { pe_resource_t *r = (pe_resource_t *) lpc->data; clear_bit(r->flags, pe_rsc_allocating); } cli_resource_print_colocation(rsc, TRUE, options.rsc_cmd == 'A', 1); fprintf(stdout, "* %s\n", rsc->id); cli_resource_print_location(rsc, NULL); for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { pe_resource_t *r = (pe_resource_t *) lpc->data; clear_bit(r->flags, pe_rsc_allocating); } cli_resource_print_colocation(rsc, FALSE, options.rsc_cmd == 'A', 1); } static int populate_working_set(xmlNodePtr *cib_xml_copy) { int rc = pcmk_rc_ok; if (options.xml_file != NULL) { *cib_xml_copy = filename2xml(options.xml_file); } else { rc = cib_conn->cmds->query(cib_conn, NULL, cib_xml_copy, cib_scope_local | cib_sync_call); rc = pcmk_legacy2rc(rc); } if(rc != pcmk_rc_ok) { return rc; } /* Populate the working set instance */ data_set = pe_new_working_set(); if (data_set == NULL) { rc = ENOMEM; return rc; } set_bit(data_set->flags, pe_flag_no_counts); set_bit(data_set->flags, pe_flag_no_compat); rc = update_working_set_xml(data_set, cib_xml_copy); if (rc != pcmk_rc_ok) { return rc; } cluster_status(data_set); return rc; } static int refresh() { int rc = pcmk_rc_ok; const char *router_node = options.host_uname; int attr_options = pcmk__node_attr_none; if (options.host_uname) { pe_node_t *node = pe_find_node(data_set->nodes, options.host_uname); if (pe__is_guest_or_remote_node(node)) { node = pe__current_node(node->details->remote_rsc); if (node == NULL) { rc = ENXIO; g_set_error(&error, PCMK__RC_ERROR, rc, "No cluster connection to Pacemaker Remote node %s detected", options.host_uname); return rc; } router_node = node->details->uname; attr_options |= pcmk__node_attr_remote; } } if (controld_api == NULL) { printf("Dry run: skipping clean-up of %s due to CIB_file\n", options.host_uname? options.host_uname : "all nodes"); rc = pcmk_rc_ok; return rc; } crm_debug("Re-checking the state of all resources on %s", options.host_uname?options.host_uname:"all nodes"); rc = pcmk__node_attr_request_clear(NULL, options.host_uname, NULL, NULL, NULL, NULL, attr_options); if (pcmk_controld_api_reprobe(controld_api, options.host_uname, router_node) == pcmk_rc_ok) { start_mainloop(controld_api); } return rc; } static void refresh_resource(pe_resource_t *rsc) { int rc = pcmk_rc_ok; if (options.force == FALSE) { rsc = uber_parent(rsc); } crm_debug("Re-checking the state of %s (%s requested) on %s", rsc->id, options.rsc_id, (options.host_uname? options.host_uname: "all nodes")); rc = cli_resource_delete(controld_api, options.host_uname, rsc, NULL, 0, FALSE, data_set, options.force); if ((rc == pcmk_rc_ok) && !BE_QUIET) { // Show any reasons why resource might stay stopped cli_resource_check(cib_conn, rsc); } if (rc == pcmk_rc_ok) { start_mainloop(controld_api); } } static int set_option(const char *arg) { int rc = pcmk_rc_ok; char *name = NULL; char *value = NULL; crm_info("Scanning: --option %s", arg); rc = pcmk_scan_nvpair(arg, &name, &value); if (rc != 2) { g_set_error(&error, PCMK__RC_ERROR, rc, "Invalid option: --option %s: %s", arg, pcmk_strerror(rc)); } else { crm_info("Got: '%s'='%s'", name, value); g_hash_table_replace(options.validate_options, name, value); } return rc; } static int set_property() { int rc = pcmk_rc_ok; xmlNode *msg_data = NULL; if (pcmk__str_empty(options.rsc_type)) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "Must specify -t with resource type"); rc = ENXIO; return rc; } else if (pcmk__str_empty(options.prop_value)) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "Must supply -v with new value"); rc = EINVAL; return rc; } CRM_LOG_ASSERT(options.prop_name != NULL); msg_data = create_xml_node(NULL, options.rsc_type); crm_xml_add(msg_data, XML_ATTR_ID, options.rsc_id); crm_xml_add(msg_data, options.prop_name, options.prop_value); rc = cib_conn->cmds->modify(cib_conn, XML_CIB_TAG_RESOURCES, msg_data, options.cib_options); rc = pcmk_legacy2rc(rc); free_xml(msg_data); return rc; } static int show_metadata(const char *spec, crm_exit_t *exit_code) { int rc = pcmk_rc_ok; char *standard = NULL; char *provider = NULL; char *type = NULL; char *metadata = NULL; lrmd_t *lrmd_conn = lrmd_api_new(); rc = crm_parse_agent_spec(spec, &standard, &provider, &type); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { rc = lrmd_conn->cmds->get_metadata(lrmd_conn, standard, provider, type, &metadata, 0); rc = pcmk_legacy2rc(rc); if (metadata) { printf("%s\n", metadata); } else { *exit_code = crm_errno2exit(rc); g_set_error(&error, PCMK__EXITC_ERROR, *exit_code, "Metadata query for %s failed: %s", spec, pcmk_rc_str(rc)); } } else { rc = ENXIO; g_set_error(&error, PCMK__RC_ERROR, rc, "'%s' is not a valid agent specification", spec); } lrmd_api_delete(lrmd_conn); return rc; } static void validate_cmdline(crm_exit_t *exit_code) { // -r cannot be used with any of --class, --agent, or --provider if (options.rsc_id != NULL) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--resource cannot be used with --class, --agent, and --provider"); // If --class, --agent, or --provider are given, --validate must also be given. } else if (!pcmk__str_eq(options.rsc_long_cmd, "validate", pcmk__str_casei)) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--class, --agent, and --provider require --validate"); // Not all of --class, --agent, and --provider need to be given. Not all // classes support the concept of a provider. Check that what we were given // is valid. } else if (pcmk__str_eq(options.v_class, "stonith", pcmk__str_none)) { if (options.v_provider != NULL) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "stonith does not support providers"); } else if (stonith_agent_exists(options.v_agent, 0) == FALSE) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "%s is not a known stonith agent", options.v_agent ? options.v_agent : ""); } } else if (resources_agent_exists(options.v_class, options.v_provider, options.v_agent) == FALSE) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "%s:%s:%s is not a known resource", options.v_class ? options.v_class : "", options.v_provider ? options.v_provider : "", options.v_agent ? options.v_agent : ""); } if (error == NULL) { *exit_code = cli_resource_execute_from_params("test", options.v_class, options.v_provider, options.v_agent, "validate-all", options.validate_options, options.override_params, options.timeout_ms, options.resource_verbose, options.force); } } static GOptionContext * build_arg_context(pcmk__common_args_t *args) { GOptionContext *context = NULL; GOptionEntry extra_prog_entries[] = { { "quiet", 'Q', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &(args->quiet), "Be less descriptive in output.", NULL }, { "resource", 'r', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.rsc_id, "Resource ID", "ID" }, { G_OPTION_REMAINING, 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING_ARRAY, &options.remainder, NULL, NULL }, { NULL } }; const char *description = "Examples:\n\n" "List the available OCF agents:\n\n" "\t# crm_resource --list-agents ocf\n\n" "List the available OCF agents from the linux-ha project:\n\n" "\t# crm_resource --list-agents ocf:heartbeat\n\n" "Move 'myResource' to a specific node:\n\n" "\t# crm_resource --resource myResource --move --node altNode\n\n" "Allow (but not force) 'myResource' to move back to its original " "location:\n\n" "\t# crm_resource --resource myResource --clear\n\n" "Stop 'myResource' (and anything that depends on it):\n\n" "\t# crm_resource --resource myResource --set-parameter target-role " "--meta --parameter-value Stopped\n\n" "Tell the cluster not to manage 'myResource' (the cluster will not " "attempt to start or stop the\n" "resource under any circumstances; useful when performing maintenance " "tasks on a resource):\n\n" "\t# crm_resource --resource myResource --set-parameter is-managed " "--meta --parameter-value false\n\n" "Erase the operation history of 'myResource' on 'aNode' (the cluster " "will 'forget' the existing\n" "resource state, including any errors, and attempt to recover the" "resource; useful when a resource\n" "had failed permanently and has been repaired by an administrator):\n\n" "\t# crm_resource --resource myResource --cleanup --node aNode\n\n"; context = pcmk__build_arg_context(args, NULL, NULL, NULL); g_option_context_set_description(context, description); /* Add the -Q option, which cannot be part of the globally supported options * because some tools use that flag for something else. */ pcmk__add_main_args(context, extra_prog_entries); pcmk__add_arg_group(context, "queries", "Queries:", "Show query help", query_entries); pcmk__add_arg_group(context, "commands", "Commands:", "Show command help", command_entries); pcmk__add_arg_group(context, "locations", "Locations:", "Show location help", location_entries); pcmk__add_arg_group(context, "validate", "Validate:", "Show validate help", validate_entries); pcmk__add_arg_group(context, "advanced", "Advanced:", "Show advanced option help", advanced_entries); pcmk__add_arg_group(context, "additional", "Additional Options:", "Show additional options", addl_entries); return context; } int main(int argc, char **argv) { xmlNode *cib_xml_copy = NULL; pe_resource_t *rsc = NULL; int rc = pcmk_rc_ok; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); GOptionContext *context = NULL; gchar **processed_args = NULL; context = build_arg_context(args); crm_log_cli_init("crm_resource"); processed_args = pcmk__cmdline_preproc(argv, "GINSTdginpstuv"); if (!g_option_context_parse_strv(context, &processed_args, &error)) { fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message); exit_code = CRM_EX_USAGE; goto done; } if (pcmk__strcase_any_of(options.extra_option, "--list-ocf-providers", "--list-ocf-alternatives", "--list-standards", NULL)) { rc = list_providers(options.extra_option, options.extra_arg, &exit_code); goto done; } else if (pcmk__str_eq(options.extra_option, "--show-metadata", pcmk__str_casei)) { rc = show_metadata(options.extra_arg, &exit_code); goto done; } else if (pcmk__str_eq(options.extra_option, "--list-agents", pcmk__str_casei)) { rc = list_agents(options.extra_arg, &exit_code); goto done; } else if (pcmk__str_eq(options.extra_option, "--option", pcmk__str_casei)) { rc = set_option(options.extra_arg); if (rc != pcmk_rc_ok) { goto done; } } for (int i = 0; i < args->verbosity; i++) { crm_bump_log_level(argc, argv); } options.resource_verbose = args->verbosity; BE_QUIET = args->quiet; options.validate_options = crm_str_table_new(); crm_log_args(argc, argv); if (options.host_uname) { crm_trace("Option host => %s", options.host_uname); } // Catch the case where the user didn't specify a command if (options.rsc_cmd == 'L') { options.require_resource = FALSE; } // --expired without --clear/-U doesn't make sense if (options.clear_expired && options.rsc_cmd != 'U') { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--expired requires --clear or -U"); goto done; } if (options.remainder && options.rsc_cmd == 0 && options.rsc_long_cmd) { options.override_params = crm_str_table_new(); for (gchar **s = options.remainder; *s; s++) { char *name = calloc(1, strlen(*s)); char *value = calloc(1, strlen(*s)); int rc = sscanf(*s, "%[^=]=%s", name, value); if (rc == 2) { g_hash_table_replace(options.override_params, name, value); } else { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "Error parsing '%s' as a name=value pair for --%s", argv[optind], options.rsc_long_cmd); free(value); free(name); goto done; } } } else if (options.remainder && options.rsc_cmd == 0) { gchar **strv = NULL; gchar *msg = NULL; int i = 1; int len = 0; for (gchar **s = options.remainder; *s; s++) { len++; } strv = calloc(len, sizeof(char *)); strv[0] = strdup("non-option ARGV-elements:"); for (gchar **s = options.remainder; *s; s++) { strv[i] = crm_strdup_printf("[%d of %d] %s\n", i, len, *s); i++; } msg = g_strjoinv("", strv); g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "%s", msg); for(i = 0; i < len; i++) { free(strv[i]); } g_free(msg); g_free(strv); goto done; } if (args->version) { /* FIXME: When crm_resource is converted to use formatted output, this can go. */ pcmk__cli_help('v', CRM_EX_USAGE); } if (optind > argc) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "Invalid option(s) supplied, use --help for valid usage"); exit_code = CRM_EX_USAGE; goto done; } // Sanity check validating from command line parameters. If everything checks out, // go ahead and run the validation. This way we don't need a CIB connection. if (options.validate_cmdline) { validate_cmdline(&exit_code); goto done; } if (error != NULL) { exit_code = CRM_EX_USAGE; goto done; } if (options.force) { crm_debug("Forcing..."); options.cib_options |= cib_quorum_override; } if (options.require_resource && !options.rsc_id) { rc = ENXIO; g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "Must supply a resource id with -r"); goto done; } if (options.find_flags && options.rsc_id) { options.require_dataset = TRUE; } // Establish a connection to the CIB cib_conn = cib_new(); if ((cib_conn == NULL) || (cib_conn->cmds == NULL)) { rc = pcmk_rc_error; g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_DISCONNECT, "Could not create CIB connection"); goto done; } rc = cib_conn->cmds->signon(cib_conn, crm_system_name, cib_command); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__RC_ERROR, rc, "Could not connect to the CIB: %s", pcmk_rc_str(rc)); goto done; } /* Populate working set from XML file if specified or CIB query otherwise */ if (options.require_dataset) { rc = populate_working_set(&cib_xml_copy); if (rc != pcmk_rc_ok) { goto done; } } // If command requires that resource exist if specified, find it if (options.find_flags && options.rsc_id) { rsc = pe_find_resource_with_flags(data_set->resources, options.rsc_id, options.find_flags); if (rsc == NULL) { rc = ENXIO; g_set_error(&error, PCMK__RC_ERROR, rc, "Resource '%s' not found", options.rsc_id); goto done; } } // Establish a connection to the controller if needed if (options.require_crmd) { rc = pcmk_new_ipc_api(&controld_api, pcmk_ipc_controld); if (rc != pcmk_rc_ok) { CMD_ERR("Error connecting to the controller: %s", pcmk_rc_str(rc)); goto done; } pcmk_register_ipc_callback(controld_api, controller_event_callback, NULL); rc = pcmk_connect_ipc(controld_api, pcmk_ipc_dispatch_main); if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__RC_ERROR, rc, "Error connecting to the controller: %s", pcmk_rc_str(rc)); goto done; } } /* Handle rsc_cmd appropriately */ if (options.rsc_cmd == 'L') { rc = pcmk_rc_ok; cli_resource_print_list(data_set, FALSE); } else if (options.rsc_cmd == 'l') { rc = list_raw(); } else if (options.rsc_cmd == 0 && options.rsc_long_cmd && pcmk__str_eq(options.rsc_long_cmd, "restart", pcmk__str_casei)) { /* We don't pass data_set because rsc needs to stay valid for the entire * lifetime of cli_resource_restart(), but it will reset and update the * working set multiple times, so it needs to use its own copy. */ rc = cli_resource_restart(rsc, options.host_uname, options.timeout_ms, cib_conn, options.cib_options, options.promoted_role_only, options.force); } else if (options.rsc_cmd == 0 && options.rsc_long_cmd && pcmk__str_eq(options.rsc_long_cmd, "wait", pcmk__str_casei)) { rc = wait_till_stable(options.timeout_ms, cib_conn); } else if (options.rsc_cmd == 0 && options.rsc_long_cmd) { // validate, force-(stop|start|demote|promote|check) exit_code = cli_resource_execute(rsc, options.rsc_id, options.rsc_long_cmd, options.override_params, options.timeout_ms, cib_conn, data_set, options.resource_verbose, options.force); } else if (options.rsc_cmd == 'A' || options.rsc_cmd == 'a') { list_stacks_and_constraints(rsc); } else if (options.rsc_cmd == 'c') { GListPtr lpc = NULL; rc = pcmk_rc_ok; for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { rsc = (pe_resource_t *) lpc->data; cli_resource_print_cts(rsc); } cli_resource_print_cts_constraints(data_set); } else if (options.rsc_cmd == 'F') { rc = cli_resource_fail(controld_api, options.host_uname, options.rsc_id, data_set); if (rc == pcmk_rc_ok) { start_mainloop(controld_api); } } else if (options.rsc_cmd == 'O') { rc = cli_resource_print_operations(options.rsc_id, options.host_uname, TRUE, data_set); } else if (options.rsc_cmd == 'o') { rc = cli_resource_print_operations(options.rsc_id, options.host_uname, FALSE, data_set); } else if (options.rsc_cmd == 'W') { rc = cli_resource_search(rsc, options.rsc_id, data_set); if (rc >= 0) { rc = pcmk_rc_ok; } } else if (options.rsc_cmd == 'q') { rc = cli_resource_print(rsc, data_set, TRUE); } else if (options.rsc_cmd == 'w') { rc = cli_resource_print(rsc, data_set, FALSE); } else if (options.rsc_cmd == 'Y') { pe_node_t *dest = NULL; if (options.host_uname) { dest = pe_find_node(data_set->nodes, options.host_uname); if (dest == NULL) { rc = pcmk_rc_node_unknown; goto done; } } cli_resource_why(cib_conn, data_set->resources, rsc, dest); rc = pcmk_rc_ok; } else if (options.rsc_cmd == 'U') { rc = clear_constraints(&cib_xml_copy); } else if (options.rsc_cmd == 'M' && options.host_uname) { rc = cli_resource_move(rsc, options.rsc_id, options.host_uname, cib_conn, options.cib_options, data_set, options.promoted_role_only, options.force); } else if (options.rsc_cmd == 'B' && options.host_uname) { pe_node_t *dest = pe_find_node(data_set->nodes, options.host_uname); if (dest == NULL) { rc = pcmk_rc_node_unknown; goto done; } rc = cli_resource_ban(options.rsc_id, dest->details->uname, NULL, cib_conn, options.cib_options, options.promoted_role_only); } else if (options.rsc_cmd == 'B' || options.rsc_cmd == 'M') { rc = ban_or_move(rsc, &exit_code); } else if (options.rsc_cmd == 'G') { rc = cli_resource_print_property(rsc, options.prop_name, data_set); } else if (options.rsc_cmd == 'S') { rc = set_property(); } else if (options.rsc_cmd == 'g') { rc = cli_resource_print_attribute(rsc, options.prop_name, options.attr_set_type, data_set); } else if (options.rsc_cmd == 'p') { if (pcmk__str_empty(options.prop_value)) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "You need to supply a value with the -v option"); rc = EINVAL; goto done; } /* coverity[var_deref_model] False positive */ rc = cli_resource_update_attribute(rsc, options.rsc_id, options.prop_set, options.attr_set_type, options.prop_id, options.prop_name, options.prop_value, options.recursive, cib_conn, options.cib_options, data_set, options.force); } else if (options.rsc_cmd == 'd') { /* coverity[var_deref_model] False positive */ rc = cli_resource_delete_attribute(rsc, options.rsc_id, options.prop_set, options.attr_set_type, options.prop_id, options.prop_name, cib_conn, options.cib_options, data_set, options.force); } else if ((options.rsc_cmd == 'C') && rsc) { cleanup(rsc); } else if (options.rsc_cmd == 'C') { rc = cli_cleanup_all(controld_api, options.host_uname, options.operation, options.interval_spec, data_set); if (rc == pcmk_rc_ok) { start_mainloop(controld_api); } } else if ((options.rsc_cmd == 'R') && rsc) { refresh_resource(rsc); } else if (options.rsc_cmd == 'R') { rc = refresh(); } else if (options.rsc_cmd == 'D') { rc = delete(); } else { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "Unknown command: %c", options.rsc_cmd); } done: if (rc != pcmk_rc_ok) { if (rc == pcmk_rc_no_quorum) { g_prefix_error(&error, "To ignore quorum, use the force option.\n"); } if (error != NULL) { char *msg = crm_strdup_printf("%s\nError performing operation: %s", error->message, pcmk_rc_str(rc)); g_clear_error(&error); g_set_error(&error, PCMK__RC_ERROR, rc, "%s", msg); free(msg); } else { g_set_error(&error, PCMK__RC_ERROR, rc, "Error performing operation: %s", pcmk_rc_str(rc)); } if (exit_code == CRM_EX_OK) { exit_code = pcmk_rc2exitc(rc); } } free(options.extra_arg); free(options.extra_option); free(options.host_uname); free(options.interval_spec); free(options.operation); free(options.prop_id); free(options.prop_name); free(options.prop_set); free(options.prop_value); free(options.rsc_id); free(options.rsc_long_cmd); free(options.rsc_type); free(options.v_agent); free(options.v_class); free(options.v_provider); free(options.xml_file); if (options.remainder) { for (gchar **s = options.remainder; *s; s++) { g_free(*s); } } if (options.override_params != NULL) { g_hash_table_destroy(options.override_params); } /* options.validate_options does not need to be destroyed here. See the * comments in cli_resource_execute_from_params. */ g_strfreev(processed_args); g_option_context_free(context); return bye(exit_code); } diff --git a/tools/crm_ticket.c b/tools/crm_ticket.c index e67e0d98af..dcb0d62dc0 100644 --- a/tools/crm_ticket.c +++ b/tools/crm_ticket.c @@ -1,1074 +1,1073 @@ /* * Copyright 2012-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include gboolean do_force = FALSE; gboolean BE_QUIET = FALSE; const char *ticket_id = NULL; const char *get_attr_name = NULL; const char *attr_name = NULL; const char *attr_value = NULL; const char *attr_id = NULL; const char *set_name = NULL; const char *attr_default = NULL; const char *xml_file = NULL; char ticket_cmd = 'S'; int cib_options = cib_sync_call; static pe_ticket_t * find_ticket(const char *ticket_id, pe_working_set_t * data_set) { pe_ticket_t *ticket = NULL; ticket = g_hash_table_lookup(data_set->tickets, ticket_id); return ticket; } static void print_date(time_t time) { int lpc = 0; char date_str[26]; asctime_r(localtime(&time), date_str); for (; lpc < 26; lpc++) { if (date_str[lpc] == '\n') { date_str[lpc] = 0; } } fprintf(stdout, "'%s'", date_str); } static int print_ticket(pe_ticket_t * ticket, gboolean raw, gboolean details) { if (raw) { fprintf(stdout, "%s\n", ticket->id); return pcmk_ok; } fprintf(stdout, "%s\t%s %s", ticket->id, ticket->granted ? "granted" : "revoked", ticket->standby ? "[standby]" : " "); if (details && g_hash_table_size(ticket->state) > 0) { GHashTableIter iter; const char *name = NULL; const char *value = NULL; int lpc = 0; fprintf(stdout, " ("); g_hash_table_iter_init(&iter, ticket->state); while (g_hash_table_iter_next(&iter, (void **)&name, (void **)&value)) { if (lpc > 0) { fprintf(stdout, ", "); } fprintf(stdout, "%s=", name); - if (pcmk__str_eq(name, "last-granted", pcmk__str_none) - || pcmk__str_eq(name, "expires", pcmk__str_none)) { + if (pcmk__str_any_of(name, "last-granted", "expires", NULL)) { print_date(crm_parse_int(value, 0)); } else { fprintf(stdout, "%s", value); } lpc++; } fprintf(stdout, ")\n"); } else { if (ticket->last_granted > -1) { fprintf(stdout, " last-granted="); print_date(ticket->last_granted); } fprintf(stdout, "\n"); } return pcmk_ok; } static int print_ticket_list(pe_working_set_t * data_set, gboolean raw, gboolean details) { GHashTableIter iter; pe_ticket_t *ticket = NULL; g_hash_table_iter_init(&iter, data_set->tickets); while (g_hash_table_iter_next(&iter, NULL, (void **)&ticket)) { print_ticket(ticket, raw, details); } return pcmk_ok; } #define XPATH_MAX 1024 static int find_ticket_state(cib_t * the_cib, const char *ticket_id, xmlNode ** ticket_state_xml) { int offset = 0; int rc = pcmk_ok; xmlNode *xml_search = NULL; char *xpath_string = NULL; CRM_ASSERT(ticket_state_xml != NULL); *ticket_state_xml = NULL; xpath_string = calloc(1, XPATH_MAX); offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "%s", "/cib/status/tickets"); if (ticket_id) { offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "/%s[@id=\"%s\"]", XML_CIB_TAG_TICKET_STATE, ticket_id); } CRM_LOG_ASSERT(offset > 0); rc = the_cib->cmds->query(the_cib, xpath_string, &xml_search, cib_sync_call | cib_scope_local | cib_xpath); if (rc != pcmk_ok) { goto bail; } crm_log_xml_debug(xml_search, "Match"); if (xml_has_children(xml_search)) { if (ticket_id) { fprintf(stdout, "Multiple ticket_states match ticket_id=%s\n", ticket_id); } *ticket_state_xml = xml_search; } else { *ticket_state_xml = xml_search; } bail: free(xpath_string); return rc; } static int find_ticket_constraints(cib_t * the_cib, const char *ticket_id, xmlNode ** ticket_cons_xml) { int offset = 0; int rc = pcmk_ok; xmlNode *xml_search = NULL; char *xpath_string = NULL; CRM_ASSERT(ticket_cons_xml != NULL); *ticket_cons_xml = NULL; xpath_string = calloc(1, XPATH_MAX); offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "%s/%s", get_object_path(XML_CIB_TAG_CONSTRAINTS), XML_CONS_TAG_RSC_TICKET); if (ticket_id) { offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "[@ticket=\"%s\"]", ticket_id); } CRM_LOG_ASSERT(offset > 0); rc = the_cib->cmds->query(the_cib, xpath_string, &xml_search, cib_sync_call | cib_scope_local | cib_xpath); if (rc != pcmk_ok) { goto bail; } crm_log_xml_debug(xml_search, "Match"); *ticket_cons_xml = xml_search; bail: free(xpath_string); return rc; } static int dump_ticket_xml(cib_t * the_cib, const char *ticket_id) { int rc = pcmk_ok; xmlNode *state_xml = NULL; rc = find_ticket_state(the_cib, ticket_id, &state_xml); if (state_xml == NULL) { return rc; } fprintf(stdout, "State XML:\n"); if (state_xml) { char *state_xml_str = NULL; state_xml_str = dump_xml_formatted(state_xml); fprintf(stdout, "\n%s\n", crm_str(state_xml_str)); free_xml(state_xml); free(state_xml_str); } return pcmk_ok; } static int dump_constraints(cib_t * the_cib, const char *ticket_id) { int rc = pcmk_ok; xmlNode *cons_xml = NULL; char *cons_xml_str = NULL; rc = find_ticket_constraints(the_cib, ticket_id, &cons_xml); if (cons_xml == NULL) { return rc; } cons_xml_str = dump_xml_formatted(cons_xml); fprintf(stdout, "Constraints XML:\n\n%s\n", crm_str(cons_xml_str)); free_xml(cons_xml); free(cons_xml_str); return pcmk_ok; } static int get_ticket_state_attr(const char *ticket_id, const char *attr_name, const char **attr_value, pe_working_set_t * data_set) { pe_ticket_t *ticket = NULL; CRM_ASSERT(attr_value != NULL); *attr_value = NULL; ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { return -ENXIO; } *attr_value = g_hash_table_lookup(ticket->state, attr_name); if (*attr_value == NULL) { return -ENXIO; } return pcmk_ok; } static gboolean ticket_warning(const char *ticket_id, const char *action) { gboolean rc = FALSE; int offset = 0; static int text_max = 1024; char *warning = NULL; const char *word = NULL; warning = calloc(1, text_max); if (pcmk__str_eq(action, "grant", pcmk__str_casei)) { offset += snprintf(warning + offset, text_max - offset, "This command cannot help you verify whether '%s' has been already granted elsewhere.\n", ticket_id); word = "to"; } else { offset += snprintf(warning + offset, text_max - offset, "Revoking '%s' can trigger the specified 'loss-policy'(s) relating to '%s'.\n\n", ticket_id, ticket_id); offset += snprintf(warning + offset, text_max - offset, "You can check that with:\ncrm_ticket --ticket %s --constraints\n\n", ticket_id); offset += snprintf(warning + offset, text_max - offset, "Otherwise before revoking '%s', you may want to make '%s' standby with:\ncrm_ticket --ticket %s --standby\n\n", ticket_id, ticket_id, ticket_id); word = "from"; } offset += snprintf(warning + offset, text_max - offset, "If you really want to %s '%s' %s this site now, and you know what you are doing,\n", action, ticket_id, word); offset += snprintf(warning + offset, text_max - offset, "please specify --force."); CRM_LOG_ASSERT(offset > 0); fprintf(stdout, "%s\n", warning); free(warning); return rc; } static gboolean allow_modification(const char *ticket_id, GListPtr attr_delete, GHashTable *attr_set) { const char *value = NULL; GListPtr list_iter = NULL; if (do_force) { return TRUE; } if (g_hash_table_lookup_extended(attr_set, "granted", NULL, (gpointer *) & value)) { if (crm_is_true(value)) { ticket_warning(ticket_id, "grant"); return FALSE; } else { ticket_warning(ticket_id, "revoke"); return FALSE; } } for(list_iter = attr_delete; list_iter; list_iter = list_iter->next) { const char *key = (const char *)list_iter->data; if (pcmk__str_eq(key, "granted", pcmk__str_casei)) { ticket_warning(ticket_id, "revoke"); return FALSE; } } return TRUE; } static int modify_ticket_state(const char * ticket_id, GListPtr attr_delete, GHashTable * attr_set, cib_t * cib, pe_working_set_t * data_set) { int rc = pcmk_ok; xmlNode *xml_top = NULL; xmlNode *ticket_state_xml = NULL; gboolean found = FALSE; GListPtr list_iter = NULL; GHashTableIter hash_iter; char *key = NULL; char *value = NULL; pe_ticket_t *ticket = NULL; rc = find_ticket_state(cib, ticket_id, &ticket_state_xml); if (rc == pcmk_ok) { crm_debug("Found a match state for ticket: id=%s", ticket_id); xml_top = ticket_state_xml; found = TRUE; } else if (rc != -ENXIO) { return rc; } else if (g_hash_table_size(attr_set) == 0){ return pcmk_ok; } else { xmlNode *xml_obj = NULL; xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS); xml_obj = create_xml_node(xml_top, XML_CIB_TAG_TICKETS); ticket_state_xml = create_xml_node(xml_obj, XML_CIB_TAG_TICKET_STATE); crm_xml_add(ticket_state_xml, XML_ATTR_ID, ticket_id); } for(list_iter = attr_delete; list_iter; list_iter = list_iter->next) { const char *key = (const char *)list_iter->data; xml_remove_prop(ticket_state_xml, key); } ticket = find_ticket(ticket_id, data_set); g_hash_table_iter_init(&hash_iter, attr_set); while (g_hash_table_iter_next(&hash_iter, (gpointer *) & key, (gpointer *) & value)) { crm_xml_add(ticket_state_xml, key, value); if (pcmk__str_eq(key, "granted", pcmk__str_casei) && (ticket == NULL || ticket->granted == FALSE) && crm_is_true(value)) { char *now = crm_ttoa(time(NULL)); crm_xml_add(ticket_state_xml, "last-granted", now); free(now); } } if (found && (attr_delete != NULL)) { crm_log_xml_debug(xml_top, "Replace"); rc = cib->cmds->replace(cib, XML_CIB_TAG_STATUS, ticket_state_xml, cib_options); } else { crm_log_xml_debug(xml_top, "Update"); rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, xml_top, cib_options); } free_xml(xml_top); return rc; } static int delete_ticket_state(const char *ticket_id, cib_t * cib) { xmlNode *ticket_state_xml = NULL; int rc = pcmk_ok; rc = find_ticket_state(cib, ticket_id, &ticket_state_xml); if (rc == -ENXIO) { return pcmk_ok; } else if (rc != pcmk_ok) { return rc; } crm_log_xml_debug(ticket_state_xml, "Delete"); rc = cib->cmds->remove(cib, XML_CIB_TAG_STATUS, ticket_state_xml, cib_options); if (rc == pcmk_ok) { fprintf(stdout, "Cleaned up %s\n", ticket_id); } free_xml(ticket_state_xml); return rc; } static pcmk__cli_option_t long_options[] = { // long option, argument type, storage, short option, description, flags { "help", no_argument, NULL, '?', "\t\tThis text", pcmk__option_default }, { "version", no_argument, NULL, '$', "\t\tVersion information", pcmk__option_default }, { "verbose", no_argument, NULL, 'V', "\t\tIncrease debug output", pcmk__option_default }, { "quiet", no_argument, NULL, 'Q', "\t\tPrint only the value on stdout\n", pcmk__option_default }, { "ticket", required_argument, NULL, 't', "\tTicket ID", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', "\nQueries:", pcmk__option_default }, { "info", no_argument, NULL, 'l', "\t\tDisplay the information of ticket(s)", pcmk__option_default }, { "details", no_argument, NULL, 'L', "\t\tDisplay the details of ticket(s)", pcmk__option_default }, { "raw", no_argument, NULL, 'w', "\t\tDisplay the IDs of ticket(s)", pcmk__option_default }, { "query-xml", no_argument, NULL, 'q', "\tQuery the XML of ticket(s)", pcmk__option_default }, { "constraints", no_argument, NULL, 'c', "\tDisplay the rsc_ticket constraints that apply to ticket(s)", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', "\nCommands:", pcmk__option_default }, { "grant", no_argument, NULL, 'g', "\t\tGrant a ticket to this cluster site", pcmk__option_default }, { "revoke", no_argument, NULL, 'r', "\t\tRevoke a ticket from this cluster site", pcmk__option_default }, { "standby", no_argument, NULL, 's', "\t\tTell this cluster site this ticket is standby", pcmk__option_default }, { "activate", no_argument, NULL, 'a', "\tTell this cluster site this ticket is active", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', "\nAdvanced Commands:", pcmk__option_default }, { "get-attr", required_argument, NULL, 'G', "\tDisplay the named attribute for a ticket", pcmk__option_default }, { "set-attr", required_argument, NULL, 'S', "\tSet the named attribute for a ticket", pcmk__option_default }, { "delete-attr", required_argument, NULL, 'D', "\tDelete the named attribute for a ticket", pcmk__option_default }, { "cleanup", no_argument, NULL, 'C', "\t\tDelete all state of a ticket at this cluster site", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', "\nAdditional Options:", pcmk__option_default }, { "attr-value", required_argument, NULL, 'v', "\tAttribute value to use with -S", pcmk__option_default }, { "default", required_argument, NULL, 'd', "\t(Advanced) Default attribute value to display if none is found " "(for use with -G)", pcmk__option_default }, { "force", no_argument, NULL, 'f', "\t\t(Advanced) Force the action to be performed", pcmk__option_default }, { "xml-file", required_argument, NULL, 'x', NULL, pcmk__option_hidden }, /* legacy options */ { "set-name", required_argument, NULL, 'n', "\t(Advanced) ID of the instance_attributes object to change", pcmk__option_default }, { "nvpair", required_argument, NULL, 'i', "\t(Advanced) ID of the nvpair object to change/delete", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', "\nExamples:", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', "Display the info of tickets:", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " crm_ticket --info", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Display the detailed info of tickets:", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " crm_ticket --details", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Display the XML of 'ticketA':", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " crm_ticket --ticket ticketA --query-xml", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Display the rsc_ticket constraints that apply to 'ticketA':", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " crm_ticket --ticket ticketA --constraints", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Grant 'ticketA' to this cluster site:", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " crm_ticket --ticket ticketA --grant", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Revoke 'ticketA' from this cluster site:", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " crm_ticket --ticket ticketA --revoke", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Make 'ticketA' standby (the cluster site will treat a granted " "'ticketA' as 'standby', and the dependent resources will be " "stopped or demoted gracefully without triggering loss-policies):", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " crm_ticket --ticket ticketA --standby", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Activate 'ticketA' from being standby:", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " crm_ticket --ticket ticketA --activate", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Get the value of the 'granted' attribute for 'ticketA':", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " crm_ticket --ticket ticketA --get-attr granted", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Set the value of the 'standby' attribute for 'ticketA':", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " crm_ticket --ticket ticketA --set-attr standby --attr-value true", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Delete the 'granted' attribute for 'ticketA':", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " crm_ticket --ticket ticketA --delete-attr granted", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Erase the operation history of 'ticketA' at this cluster site:", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', "The cluster site will 'forget' the existing ticket state.", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " crm_ticket --ticket ticketA --cleanup", pcmk__option_example }, { 0, 0, 0, 0 } }; int main(int argc, char **argv) { pe_working_set_t *data_set = NULL; xmlNode *cib_xml_copy = NULL; xmlNode *cib_constraints = NULL; cib_t *cib_conn = NULL; crm_exit_t exit_code = CRM_EX_OK; int rc = pcmk_ok; int option_index = 0; int argerr = 0; int flag; guint modified = 0; GListPtr attr_delete = NULL; GHashTable *attr_set = crm_str_table_new(); crm_log_init(NULL, LOG_CRIT, FALSE, FALSE, argc, argv, FALSE); pcmk__set_cli_options(NULL, "| [options]", long_options, "perform tasks related to cluster tickets\n\n" "Allows ticket attributes to be queried, modified " "and deleted.\n"); if (argc < 2) { pcmk__cli_help('?', CRM_EX_USAGE); } while (1) { flag = pcmk__next_cli_option(argc, argv, &option_index, NULL); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case '$': case '?': pcmk__cli_help(flag, CRM_EX_OK); break; case 'Q': BE_QUIET = TRUE; break; case 't': ticket_id = optarg; break; case 'l': case 'L': case 'w': case 'q': case 'c': ticket_cmd = flag; break; case 'g': g_hash_table_insert(attr_set, strdup("granted"), strdup("true")); modified++; break; case 'r': g_hash_table_insert(attr_set, strdup("granted"), strdup("false")); modified++; break; case 's': g_hash_table_insert(attr_set, strdup("standby"), strdup("true")); modified++; break; case 'a': g_hash_table_insert(attr_set, strdup("standby"), strdup("false")); modified++; break; case 'G': get_attr_name = optarg; ticket_cmd = flag; break; case 'S': attr_name = optarg; if (attr_name && attr_value) { g_hash_table_insert(attr_set, strdup(attr_name), strdup(attr_value)); attr_name = NULL; attr_value = NULL; modified++; } break; case 'D': attr_delete = g_list_append(attr_delete, optarg); modified++; break; case 'C': ticket_cmd = flag; break; case 'v': attr_value = optarg; if (attr_name && attr_value) { g_hash_table_insert(attr_set, strdup(attr_name), strdup(attr_value)); attr_name = NULL; attr_value = NULL; modified++; } break; case 'd': attr_default = optarg; break; case 'f': do_force = TRUE; break; case 'x': xml_file = optarg; break; case 'n': set_name = optarg; break; case 'i': attr_id = optarg; break; default: CMD_ERR("Argument code 0%o (%c) is not (?yet?) supported", flag, flag); ++argerr; break; } } if (optind < argc && argv[optind] != NULL) { CMD_ERR("non-option ARGV-elements:"); while (optind < argc && argv[optind] != NULL) { CMD_ERR("%s", argv[optind++]); ++argerr; } } if (optind > argc) { ++argerr; } if (argerr) { pcmk__cli_help('?', CRM_EX_USAGE); } data_set = pe_new_working_set(); if (data_set == NULL) { crm_perror(LOG_CRIT, "Could not allocate working set"); exit_code = CRM_EX_OSERR; goto bail; } set_bit(data_set->flags, pe_flag_no_counts); set_bit(data_set->flags, pe_flag_no_compat); cib_conn = cib_new(); if (cib_conn == NULL) { CMD_ERR("Could not connect to the CIB manager"); exit_code = CRM_EX_DISCONNECT; goto bail; } rc = cib_conn->cmds->signon(cib_conn, crm_system_name, cib_command); if (rc != pcmk_ok) { CMD_ERR("Could not connect to CIB: %s", pcmk_strerror(rc)); exit_code = crm_errno2exit(rc); goto bail; } if (xml_file != NULL) { cib_xml_copy = filename2xml(xml_file); } else { rc = cib_conn->cmds->query(cib_conn, NULL, &cib_xml_copy, cib_scope_local | cib_sync_call); if (rc != pcmk_ok) { CMD_ERR("Could not get local CIB: %s", pcmk_strerror(rc)); exit_code = crm_errno2exit(rc); goto bail; } } if (cli_config_update(&cib_xml_copy, NULL, FALSE) == FALSE) { CMD_ERR("Could not update local CIB to latest schema version"); exit_code = CRM_EX_CONFIG; goto bail; } data_set->input = cib_xml_copy; data_set->now = crm_time_new(NULL); cluster_status(data_set); /* For recording the tickets that are referenced in rsc_ticket constraints * but have never been granted yet. */ cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); unpack_constraints(cib_constraints, data_set); if (ticket_cmd == 'l' || ticket_cmd == 'L' || ticket_cmd == 'w') { gboolean raw = FALSE; gboolean details = FALSE; if (ticket_cmd == 'L') { details = TRUE; } else if (ticket_cmd == 'w') { raw = TRUE; } if (ticket_id) { pe_ticket_t *ticket = find_ticket(ticket_id, data_set); if (ticket == NULL) { CMD_ERR("No such ticket '%s'", ticket_id); exit_code = CRM_EX_NOSUCH; goto bail; } rc = print_ticket(ticket, raw, details); } else { rc = print_ticket_list(data_set, raw, details); } if (rc != pcmk_ok) { CMD_ERR("Could not print ticket: %s", pcmk_strerror(rc)); } exit_code = crm_errno2exit(rc); } else if (ticket_cmd == 'q') { rc = dump_ticket_xml(cib_conn, ticket_id); if (rc != pcmk_ok) { CMD_ERR("Could not query ticket XML: %s", pcmk_strerror(rc)); } exit_code = crm_errno2exit(rc); } else if (ticket_cmd == 'c') { rc = dump_constraints(cib_conn, ticket_id); if (rc != pcmk_ok) { CMD_ERR("Could not show ticket constraints: %s", pcmk_strerror(rc)); } exit_code = crm_errno2exit(rc); } else if (ticket_cmd == 'G') { const char *value = NULL; if (ticket_id == NULL) { CMD_ERR("Must supply ticket ID with -t"); exit_code = CRM_EX_NOSUCH; goto bail; } rc = get_ticket_state_attr(ticket_id, get_attr_name, &value, data_set); if (rc == pcmk_ok) { fprintf(stdout, "%s\n", value); } else if (rc == -ENXIO && attr_default) { fprintf(stdout, "%s\n", attr_default); rc = pcmk_ok; } exit_code = crm_errno2exit(rc); } else if (ticket_cmd == 'C') { if (ticket_id == NULL) { CMD_ERR("Must supply ticket ID with -t"); exit_code = CRM_EX_USAGE; goto bail; } if (do_force == FALSE) { pe_ticket_t *ticket = NULL; ticket = find_ticket(ticket_id, data_set); if (ticket == NULL) { CMD_ERR("No such ticket '%s'", ticket_id); exit_code = CRM_EX_NOSUCH; goto bail; } if (ticket->granted) { ticket_warning(ticket_id, "revoke"); exit_code = CRM_EX_INSUFFICIENT_PRIV; goto bail; } } rc = delete_ticket_state(ticket_id, cib_conn); if (rc != pcmk_ok) { CMD_ERR("Could not clean up ticket: %s", pcmk_strerror(rc)); } exit_code = crm_errno2exit(rc); } else if (modified) { if (ticket_id == NULL) { CMD_ERR("Must supply ticket ID with -t"); exit_code = CRM_EX_USAGE; goto bail; } if (attr_value && (pcmk__str_empty(attr_name))) { CMD_ERR("Must supply attribute name with -S for -v %s", attr_value); exit_code = CRM_EX_USAGE; goto bail; } if (attr_name && (pcmk__str_empty(attr_value))) { CMD_ERR("Must supply attribute value with -v for -S %s", attr_name); exit_code = CRM_EX_USAGE; goto bail; } if (allow_modification(ticket_id, attr_delete, attr_set) == FALSE) { CMD_ERR("Ticket modification not allowed"); exit_code = CRM_EX_INSUFFICIENT_PRIV; goto bail; } rc = modify_ticket_state(ticket_id, attr_delete, attr_set, cib_conn, data_set); if (rc != pcmk_ok) { CMD_ERR("Could not modify ticket: %s", pcmk_strerror(rc)); } exit_code = crm_errno2exit(rc); } else if (ticket_cmd == 'S') { /* Correct usage was handled in the "if (modified)" block above, so * this is just for reporting usage errors */ if (pcmk__str_empty(attr_name)) { // We only get here if ticket_cmd was left as default CMD_ERR("Must supply a command"); exit_code = CRM_EX_USAGE; goto bail; } if (ticket_id == NULL) { CMD_ERR("Must supply ticket ID with -t"); exit_code = CRM_EX_USAGE; goto bail; } if (pcmk__str_empty(attr_value)) { CMD_ERR("Must supply value with -v for -S %s", attr_name); exit_code = CRM_EX_USAGE; goto bail; } } else { CMD_ERR("Unknown command: %c", ticket_cmd); exit_code = CRM_EX_USAGE; } bail: if (attr_set) { g_hash_table_destroy(attr_set); } attr_set = NULL; if (attr_delete) { g_list_free(attr_delete); } attr_delete = NULL; pe_free_working_set(data_set); data_set = NULL; if (cib_conn != NULL) { cib_conn->cmds->signoff(cib_conn); cib_delete(cib_conn); } if (rc == -pcmk_err_no_quorum) { CMD_ERR("Use --force to ignore quorum"); } crm_exit(exit_code); }