diff --git a/daemons/execd/remoted_pidone.c b/daemons/execd/remoted_pidone.c index 187de51d04..a0d8571006 100644 --- a/daemons/execd/remoted_pidone.c +++ b/daemons/execd/remoted_pidone.c @@ -1,303 +1,304 @@ /* - * Copyright 2017-2024 the Pacemaker project contributors + * Copyright 2017-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" static pid_t main_pid = 0; static void sigdone(void) { crm_exit(CRM_EX_OK); } static void sigreap(void) { pid_t pid = 0; int status; do { /* * Opinions seem to differ as to what to put here: * -1, any child process * 0, any child process whose process group ID is equal to that of the calling process */ pid = waitpid(-1, &status, WNOHANG); if (pid == main_pid) { /* Exit when pacemaker-remote exits and use the same return code */ if (WIFEXITED(status)) { crm_exit(WEXITSTATUS(status)); } crm_exit(CRM_EX_ERROR); } } while (pid > 0); } static struct { int sig; void (*handler)(void); } sigmap[] = { { SIGCHLD, sigreap }, { SIGINT, sigdone }, }; /*! * \internal * \brief Check a line of text for a valid environment variable name * * \param[in] line Text to check * \param[out] first First character of valid name if found, NULL otherwise * \param[out] last Last character of valid name if found, NULL otherwise * * \return TRUE if valid name found, FALSE otherwise * \note It's reasonable to impose limitations on environment variable names * beyond what C or setenv() does: We only allow names that contain only * [a-zA-Z0-9_] characters and do not start with a digit. */ static bool find_env_var_name(char *line, char **first, char **last) { // Skip leading whitespace *first = line; while (isspace(**first)) { ++*first; } if (isalpha(**first) || (**first == '_')) { // Valid first character *last = *first; while (isalnum(*(*last + 1)) || (*(*last + 1) == '_')) { ++*last; } return TRUE; } *first = *last = NULL; return FALSE; } static void load_env_vars(const char *filename) { /* We haven't forked or initialized logging yet, so don't leave any file * descriptors open, and don't log -- silently ignore errors. */ FILE *fp = fopen(filename, "r"); if (fp != NULL) { char line[LINE_MAX] = { '\0', }; while (fgets(line, LINE_MAX, fp) != NULL) { char *name = NULL; char *end = NULL; char *value = NULL; char *quote = NULL; // Look for valid name immediately followed by equals sign if (find_env_var_name(line, &name, &end) && (*++end == '=')) { // Null-terminate name, and advance beyond equals sign *end++ = '\0'; // Check whether value is quoted if ((*end == '\'') || (*end == '"')) { quote = end++; } value = end; if (quote) { /* Value is remaining characters up to next non-backslashed * matching quote character. */ while (((*end != *quote) || (*(end - 1) == '\\')) && (*end != '\0')) { end++; } if (*end == *quote) { // Null-terminate value, and advance beyond close quote *end++ = '\0'; } else { // Matching closing quote wasn't found value = NULL; } } else { /* Value is remaining characters up to next non-backslashed * whitespace. */ while ((!isspace(*end) || (*(end - 1) == '\\')) && (*end != '\0')) { ++end; } if (end == (line + LINE_MAX - 1)) { // Line was too long value = NULL; } // Do NOT null-terminate value (yet) } /* We have a valid name and value, and end is now the character * after the closing quote or the first whitespace after the * unquoted value. Make sure the rest of the line is just * whitespace or a comment. */ if (value) { char *value_end = end; while (isspace(*end) && (*end != '\n')) { ++end; } if ((*end == '\n') || (*end == '#')) { if (quote == NULL) { // Now we can null-terminate an unquoted value *value_end = '\0'; } // Don't overwrite (bundle options take precedence) + // coverity[tainted_string] This can't easily be changed right now setenv(name, value, 0); } else { value = NULL; } } } if ((value == NULL) && (strchr(line, '\n') == NULL)) { // Eat remainder of line beyond LINE_MAX if (fscanf(fp, "%*[^\n]\n") == EOF) { value = NULL; // Don't care, make compiler happy } } } fclose(fp); } } void remoted_spawn_pidone(int argc, char **argv, char **envp) { sigset_t set; /* This environment variable exists for two purposes: * - For testing, setting it to "full" enables full PID 1 behavior even * when PID is not 1 * - Setting to "vars" enables just the loading of environment variables * from /etc/pacemaker/pcmk-init.env, which could be useful for testing or * containers with a custom PID 1 script that launches the remote * executor. */ const char *pid1 = PCMK_VALUE_DEFAULT; if (getpid() != 1) { pid1 = pcmk__env_option(PCMK__ENV_REMOTE_PID1); if (!pcmk__str_any_of(pid1, "full", "vars", NULL)) { // Default, unset, or invalid return; } } /* When a container is launched, it may be given specific environment * variables, which for Pacemaker bundles are given in the bundle * configuration. However, that does not allow for host-specific values. * To allow for that, look for a special file containing a shell-like syntax * of name/value pairs, and export those into the environment. */ load_env_vars("/etc/pacemaker/pcmk-init.env"); if (strcmp(pid1, "vars") == 0) { return; } /* Containers can be expected to have /var/log, but they may not have * /var/log/pacemaker, so use a different default if no value has been * explicitly configured in the container's environment. */ if (pcmk__env_option(PCMK__ENV_LOGFILE) == NULL) { pcmk__set_env_option(PCMK__ENV_LOGFILE, "/var/log/pcmk-init.log", true); } sigfillset(&set); sigprocmask(SIG_BLOCK, &set, 0); main_pid = fork(); switch (main_pid) { case 0: sigprocmask(SIG_UNBLOCK, &set, NULL); setsid(); setpgid(0, 0); // Child remains as pacemaker-remoted return; case -1: crm_err("fork failed: %s", pcmk_rc_str(errno)); } /* Parent becomes the reaper of zombie processes */ /* Safe to initialize logging now if needed */ # ifdef HAVE_PROGNAME /* Differentiate ourselves in the 'ps' output */ { char *p; int i, maxlen; char *LastArgv = NULL; const char *name = "pcmk-init"; for (i = 0; i < argc; i++) { if (!i || (LastArgv + 1 == argv[i])) LastArgv = argv[i] + strlen(argv[i]); } for (i = 0; envp[i] != NULL; i++) { if ((LastArgv + 1) == envp[i]) { LastArgv = envp[i] + strlen(envp[i]); } } maxlen = (LastArgv - argv[0]) - 2; i = strlen(name); /* We can overwrite individual argv[] arguments */ snprintf(argv[0], maxlen, "%s", name); /* Now zero out everything else */ p = &argv[0][i]; while (p < LastArgv) { *p++ = '\0'; } argv[1] = NULL; } # endif // HAVE_PROGNAME while (1) { int sig; size_t i; sigwait(&set, &sig); for (i = 0; i < PCMK__NELEM(sigmap); i++) { if (sigmap[i].sig == sig) { sigmap[i].handler(); break; } } } } diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index f70f2c56e7..ebbdea8660 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -1,3651 +1,3651 @@ /* * Copyright 2009-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include // bool #include #include #include #include #include #include #include #include #include #include #include // xmlNode #include // xmlXPathObject, etc. #include #include #include #include #include #include #include #include #include static GHashTable *device_table = NULL; GHashTable *topology = NULL; static GList *cmd_list = NULL; static GHashTable *fenced_handlers = NULL; struct device_search_s { /* target of fence action */ char *host; /* requested fence action */ char *action; /* timeout to use if a device is queried dynamically for possible targets */ // @TODO This name is misleading now, it's the value of stonith-timeout int per_device_timeout; /* number of registered fencing devices at time of request */ int replies_needed; /* number of device replies received so far */ int replies_received; /* whether the target is eligible to perform requested action (or off) */ bool allow_self; /* private data to pass to search callback function */ void *user_data; /* function to call when all replies have been received */ void (*callback) (GList * devices, void *user_data); /* devices capable of performing requested action (or off if remapping) */ GList *capable; /* Whether to perform searches that support the action */ uint32_t support_action_only; }; static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data); static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence); static int get_agent_metadata(const char *agent, xmlNode **metadata); static void read_action_metadata(fenced_device_t *device); static enum fenced_target_by unpack_level_kind(const xmlNode *level); typedef struct { int id; uint32_t options; int default_timeout; /* seconds */ int timeout; /* seconds */ int start_delay; // seconds (-1 means disable static/random fencing delays) int delay_id; char *op; char *origin; char *client; char *client_name; char *remote_op_id; char *target; char *action; char *device; //! Head of device list (used only for freeing list with command object) GList *device_list; //! Next item to process in \c device_list GList *next_device_iter; void *internal_user_data; void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data); fenced_device_t *active_on; fenced_device_t *activating_on; } async_command_t; static xmlNode *construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result); /*! * \internal * \brief Set a bad fencer API request error in a result object * * \param[out] result Result to set */ static inline void set_bad_request_result(pcmk__action_result_t *result) { pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Fencer API request missing required information (bug?)"); } /*! * \internal * \brief Check whether the fencer's device table contains a watchdog device * * \retval \c true If the device table contains a watchdog device * \retval \c false Otherwise */ bool fenced_has_watchdog_device(void) { return (device_table != NULL) && (g_hash_table_lookup(device_table, STONITH_WATCHDOG_ID) != NULL); } /*! * \internal * \brief Call a function for each known fence device * * \param[in] fn Function to call for each device * \param[in,out] user_data User data */ void fenced_foreach_device(GHFunc fn, gpointer user_data) { if (device_table != NULL) { g_hash_table_foreach(device_table, fn, user_data); } } /*! * \internal * \brief Remove each known fence device matching a given predicate * * \param[in] fn Function that returns \c TRUE to remove a fence device or * \c FALSE to keep it */ void fenced_foreach_device_remove(GHRFunc fn) { if (device_table != NULL) { g_hash_table_foreach_remove(device_table, fn, NULL); } } static gboolean is_action_required(const char *action, const fenced_device_t *device) { return (device != NULL) && pcmk_is_set(device->flags, fenced_df_auto_unfence) && pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none); } static int get_action_delay_max(const fenced_device_t *device, const char *action) { const char *value = NULL; guint delay_max = 0U; if (!pcmk__is_fencing_action(action)) { return 0; } value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX); if (value) { pcmk_parse_interval_spec(value, &delay_max); delay_max /= 1000; } return (int) delay_max; } static int get_action_delay_base(const fenced_device_t *device, const char *action, const char *target) { char *hash_value = NULL; guint delay_base = 0U; if (!pcmk__is_fencing_action(action)) { return 0; } hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE); if (hash_value) { char *value = pcmk__str_copy(hash_value); char *valptr = value; if (target != NULL) { for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) { char *mapval = strchr(val, ':'); if (mapval == NULL || mapval[1] == 0) { crm_err("pcmk_delay_base: empty value in mapping", val); continue; } if (mapval != val && strncasecmp(target, val, (size_t)(mapval - val)) == 0) { value = mapval + 1; crm_debug("pcmk_delay_base mapped to %s for %s", value, target); break; } } } if (strchr(value, ':') == 0) { pcmk_parse_interval_spec(value, &delay_base); delay_base /= 1000; } free(valptr); } return (int) delay_base; } /*! * \internal * \brief Override STONITH timeout with pcmk_*_timeout if available * * \param[in] device STONITH device to use * \param[in] action STONITH action name * \param[in] default_timeout Timeout to use if device does not have * a pcmk_*_timeout parameter for action * * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout * \note For consistency, it would be nice if reboot/off/on timeouts could be * set the same way as start/stop/monitor timeouts, i.e. with an * entry in the fencing resource configuration. However that * is insufficient because fencing devices may be registered directly via * the fencer's register_device() API instead of going through the CIB * (e.g. stonith_admin uses it for its -R option, and the executor uses it * to ensure a device is registered when a command is issued). As device * properties, pcmk_*_timeout parameters can be grabbed by the fencer when * the device is registered, whether by CIB change or API call. */ static int get_action_timeout(const fenced_device_t *device, const char *action, int default_timeout) { if (action && device && device->params) { char buffer[64] = { 0, }; const char *value = NULL; /* If "reboot" was requested but the device does not support it, * we will remap to "off", so check timeout for "off" instead */ if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, fenced_df_supports_reboot)) { crm_trace("%s doesn't support reboot, using timeout for off instead", device->id); action = PCMK_ACTION_OFF; } /* If the device config specified an action-specific timeout, use it */ snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action); value = g_hash_table_lookup(device->params, buffer); if (value) { long long timeout_ms = crm_get_msec(value); return (int) QB_MIN(pcmk__timeout_ms2s(timeout_ms), INT_MAX); } } return default_timeout; } /*! * \internal * \brief Get the currently executing device for a fencing operation * * \param[in] cmd Fencing operation to check * * \return Currently executing device for \p cmd if any, otherwise NULL */ static fenced_device_t * cmd_device(const async_command_t *cmd) { if ((cmd == NULL) || (cmd->device == NULL) || (device_table == NULL)) { return NULL; } return g_hash_table_lookup(device_table, cmd->device); } /*! * \internal * \brief Return the configured reboot action for a given device * * \param[in] device_id Device ID * * \return Configured reboot action for \p device_id */ const char * fenced_device_reboot_action(const char *device_id) { const char *action = NULL; if ((device_table != NULL) && (device_id != NULL)) { fenced_device_t *device = g_hash_table_lookup(device_table, device_id); if ((device != NULL) && (device->params != NULL)) { action = g_hash_table_lookup(device->params, "pcmk_reboot_action"); } } return pcmk__s(action, PCMK_ACTION_REBOOT); } /*! * \internal * \brief Check whether a given device supports the "on" action * * \param[in] device_id Device ID * * \return true if \p device_id supports "on", otherwise false */ bool fenced_device_supports_on(const char *device_id) { if ((device_table != NULL) && (device_id != NULL)) { fenced_device_t *device = g_hash_table_lookup(device_table, device_id); if (device != NULL) { return pcmk_is_set(device->flags, fenced_df_supports_on); } } return false; } static void free_async_command(async_command_t * cmd) { if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } cmd_list = g_list_remove(cmd_list, cmd); g_list_free_full(cmd->device_list, free); free(cmd->device); free(cmd->action); free(cmd->target); free(cmd->remote_op_id); free(cmd->client); free(cmd->client_name); free(cmd->origin); free(cmd->op); free(cmd); } /*! * \internal * \brief Create a new asynchronous fencing operation from request XML * * \param[in] msg Fencing request XML (from IPC or CPG) * * \return Newly allocated fencing operation on success, otherwise NULL * * \note This asserts on memory errors, so a NULL return indicates an * unparseable message. */ static async_command_t * create_async_command(xmlNode *msg) { xmlNode *op = NULL; async_command_t *cmd = NULL; int rc = pcmk_rc_ok; if (msg == NULL) { return NULL; } op = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_DEVICE_ACTION "]", LOG_ERR); if (op == NULL) { return NULL; } cmd = pcmk__assert_alloc(1, sizeof(async_command_t)); // All messages must include these cmd->action = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ACTION); cmd->op = crm_element_value_copy(msg, PCMK__XA_ST_OP); cmd->client = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTID); if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) { free_async_command(cmd); return NULL; } crm_element_value_int(msg, PCMK__XA_ST_CALLID, &(cmd->id)); crm_element_value_int(msg, PCMK__XA_ST_DELAY, &(cmd->start_delay)); crm_element_value_int(msg, PCMK__XA_ST_TIMEOUT, &(cmd->default_timeout)); cmd->timeout = cmd->default_timeout; rc = pcmk__xe_get_flags(msg, PCMK__XA_ST_CALLOPT, &(cmd->options), st_opt_none); if (rc != pcmk_rc_ok) { crm_warn("Couldn't parse options from request: %s", pcmk_rc_str(rc)); } cmd->origin = crm_element_value_copy(msg, PCMK__XA_SRC); cmd->remote_op_id = crm_element_value_copy(msg, PCMK__XA_ST_REMOTE_OP); cmd->client_name = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTNAME); cmd->target = crm_element_value_copy(op, PCMK__XA_ST_TARGET); cmd->device = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ID); cmd->done_cb = st_child_done; // Track in global command list cmd_list = g_list_append(cmd_list, cmd); return cmd; } static int get_action_limit(fenced_device_t *device) { const char *value = NULL; int action_limit = 1; value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT); if ((value == NULL) || (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok) || (action_limit == 0)) { action_limit = 1; } return action_limit; } static int get_active_cmds(fenced_device_t *device) { int counter = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return 0); for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd = gIter->data; gIterNext = gIter->next; if (cmd->active_on == device) { counter++; } } return counter; } static void fork_cb(int pid, void *user_data) { async_command_t *cmd = (async_command_t *) user_data; fenced_device_t *device = cmd->activating_on; if (device == NULL) { /* In case of a retry, we've done the move from activating_on to * active_on already */ device = cmd->active_on; } pcmk__assert(device != NULL); crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout", cmd->action, pid, ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, cmd->timeout); cmd->active_on = device; cmd->activating_on = NULL; } static int get_agent_metadata_cb(gpointer data) { fenced_device_t *device = data; guint period_ms; switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); device->default_host_arg = stonith__default_host_arg(device->agent_metadata); } return G_SOURCE_REMOVE; case EAGAIN: period_ms = pcmk__mainloop_timer_get_period(device->timer); if (period_ms < 160 * 1000) { mainloop_timer_set_period(device->timer, 2 * period_ms); } return G_SOURCE_CONTINUE; default: return G_SOURCE_REMOVE; } } /*! * \internal * \brief Call a command's action callback for an internal (not library) result * * \param[in,out] cmd Command to report result for * \param[in] execution_status Execution status to use for result * \param[in] exit_status Exit status to use for result * \param[in] exit_reason Exit reason to use for result */ static void report_internal_result(async_command_t *cmd, int exit_status, int execution_status, const char *exit_reason) { pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__set_result(&result, exit_status, execution_status, exit_reason); cmd->done_cb(0, &result, cmd); pcmk__reset_result(&result); } static gboolean stonith_device_execute(fenced_device_t *device) { int exec_rc = 0; const char *action_str = NULL; async_command_t *cmd = NULL; stonith_action_t *action = NULL; int active_cmds = 0; int action_limit = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return FALSE); active_cmds = get_active_cmds(device); action_limit = get_action_limit(device); if (action_limit > -1 && active_cmds >= action_limit) { crm_trace("%s is over its action limit of %d (%u active action%s)", device->id, action_limit, active_cmds, pcmk__plural_s(active_cmds)); return TRUE; } for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) { async_command_t *pending_op = gIter->data; gIterNext = gIter->next; if (pending_op && pending_op->delay_id) { crm_trace("Operation '%s'%s%s using %s was asked to run too early, " "waiting for start delay of %ds", pending_op->action, ((pending_op->target == NULL)? "" : " targeting "), pcmk__s(pending_op->target, ""), device->id, pending_op->start_delay); continue; } device->pending_ops = g_list_remove_link(device->pending_ops, gIter); g_list_free_1(gIter); cmd = pending_op; break; } if (cmd == NULL) { crm_trace("No actions using %s are needed", device->id); return TRUE; } if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { if (pcmk__is_fencing_action(cmd->action)) { if (node_does_watchdog_fencing(fenced_get_local_node())) { pcmk__panic("Watchdog self-fencing required"); goto done; } } else { crm_info("Faking success for %s watchdog operation", cmd->action); report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL); goto done; } } #if PCMK__ENABLE_CIBSECRETS exec_rc = pcmk__substitute_secrets(device->id, device->params); if (exec_rc != pcmk_rc_ok) { if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) { crm_info("Proceeding with stop operation for %s " "despite being unable to load CIB secrets (%s)", device->id, pcmk_rc_str(exec_rc)); } else { crm_err("Considering %s unconfigured " "because unable to load CIB secrets: %s", device->id, pcmk_rc_str(exec_rc)); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS, "Failed to get CIB secrets"); goto done; } } #endif action_str = cmd->action; if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, fenced_df_supports_reboot)) { crm_notice("Remapping 'reboot' action%s%s using %s to 'off' " "because agent '%s' does not support reboot", ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, device->agent); action_str = PCMK_ACTION_OFF; } action = stonith__action_create(device->agent, action_str, cmd->target, cmd->timeout, device->params, device->aliases, device->default_host_arg); /* for async exec, exec_rc is negative for early error exit otherwise handling of success/errors is done via callbacks */ cmd->activating_on = device; exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb, fork_cb); if (exec_rc < 0) { cmd->activating_on = NULL; cmd->done_cb(0, stonith__action_result(action), cmd); stonith__destroy_action(action); } done: /* Device might get triggered to work by multiple fencing commands * simultaneously. Trigger the device again to make sure any * remaining concurrent commands get executed. */ if (device->pending_ops) { mainloop_set_trigger(device->work); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static gboolean start_delay_helper(gpointer data) { async_command_t *cmd = data; fenced_device_t *device = cmd_device(cmd); cmd->delay_id = 0; if (device) { mainloop_set_trigger(device->work); } return FALSE; } static void schedule_stonith_command(async_command_t *cmd, fenced_device_t *device) { int delay_max = 0; int delay_base = 0; int requested_delay = cmd->start_delay; CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); if (cmd->device) { free(cmd->device); } cmd->device = pcmk__str_copy(device->id); cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout); if (cmd->remote_op_id) { crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s " "with op id %.8s and timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->origin, cmd->remote_op_id, cmd->timeout); } else { crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->client, cmd->timeout); } device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); // Value -1 means disable any static/random fencing delays if (requested_delay < 0) { return; } delay_max = get_action_delay_max(device, cmd->action); delay_base = get_action_delay_base(device, cmd->action, cmd->target); if (delay_max == 0) { delay_max = delay_base; } if (delay_max < delay_base) { crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than " PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s " "(limiting to maximum delay)", delay_base, delay_max, cmd->action, device->id); delay_base = delay_max; } if (delay_max > 0) { - // coverity[dontcall] It doesn't matter here if rand() is predictable cmd->start_delay += + // coverity[dont_call] It doesn't matter here if rand() is predictable ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + delay_base; } if (cmd->start_delay > 0) { crm_notice("Delaying '%s' action%s%s using %s for %ds " QB_XS " timeout=%ds requested_delay=%ds base=%ds max=%ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->start_delay, cmd->timeout, requested_delay, delay_base, delay_max); cmd->delay_id = pcmk__create_timer(cmd->start_delay * 1000, start_delay_helper, cmd); } } static void free_device(gpointer data) { GList *gIter = NULL; fenced_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { async_command_t *cmd = gIter->data; crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Device was removed before action could be executed"); } g_list_free(device->pending_ops); g_list_free_full(device->targets, free); if (device->timer) { mainloop_timer_stop(device->timer); mainloop_timer_del(device->timer); } mainloop_destroy_trigger(device->work); pcmk__xml_free(device->agent_metadata); free(device->namespace); if (device->on_target_actions != NULL) { g_string_free(device->on_target_actions, TRUE); } free(device->agent); free(device->id); free(device); } /*! * \internal * \brief Initialize the table of known fence devices */ void fenced_init_device_table(void) { if (device_table == NULL) { device_table = pcmk__strkey_table(NULL, free_device); } } /*! * \internal * \brief Free the table of known fence devices */ void fenced_free_device_table(void) { if (device_table != NULL) { g_hash_table_destroy(device_table); device_table = NULL; } } static GHashTable * build_port_aliases(const char *hostmap, GList ** targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = pcmk__strikey_table(free, free); if (hostmap == NULL) { return aliases; } max = strlen(hostmap); for (; lpc <= max; lpc++) { switch (hostmap[lpc]) { /* Skip escaped chars */ case '\\': lpc++; break; /* Assignment chars */ case '=': case ':': if (lpc > last) { free(name); name = pcmk__assert_alloc(1, 1 + lpc - last); memcpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if (name) { char *value = NULL; int k = 0; value = pcmk__assert_alloc(1, 1 + lpc - last); memcpy(value, hostmap + last, lpc - last); for (int i = 0; value[i] != '\0'; i++) { if (value[i] != '\\') { value[k++] = value[i]; } } value[k] = '\0'; crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if (targets) { *targets = g_list_append(*targets, pcmk__str_copy(value)); } value = NULL; name = NULL; added++; } else if (lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last); } last = lpc + 1; break; } if (hostmap[lpc] == 0) { break; } } if (added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } free(name); return aliases; } GHashTable *metadata_cache = NULL; void free_metadata_cache(void) { if (metadata_cache != NULL) { g_hash_table_destroy(metadata_cache); metadata_cache = NULL; } } static void init_metadata_cache(void) { if (metadata_cache == NULL) { metadata_cache = pcmk__strkey_table(free, free); } } int get_agent_metadata(const char *agent, xmlNode ** metadata) { char *buffer = NULL; if (metadata == NULL) { return EINVAL; } *metadata = NULL; if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) { return pcmk_rc_ok; } init_metadata_cache(); buffer = g_hash_table_lookup(metadata_cache, agent); if (buffer == NULL) { stonith_t *st = stonith__api_new(); int rc; if (st == NULL) { crm_warn("Could not get agent meta-data: " "API memory allocation failed"); return EAGAIN; } rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10); stonith__api_free(st); if (rc || !buffer) { crm_err("Could not retrieve metadata for fencing agent %s", agent); return EAGAIN; } g_hash_table_replace(metadata_cache, pcmk__str_copy(agent), buffer); } *metadata = pcmk__xml_parse(buffer); return pcmk_rc_ok; } static void read_action_metadata(fenced_device_t *device) { xmlXPathObject *xpath = NULL; int max = 0; int lpc = 0; if (device->agent_metadata == NULL) { return; } xpath = pcmk__xpath_search(device->agent_metadata->doc, "//" PCMK_XE_ACTION); max = pcmk__xpath_num_results(xpath); if (max == 0) { xmlXPathFreeObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *action = NULL; xmlNode *match = pcmk__xpath_result(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; action = crm_element_value(match, PCMK_XA_NAME); if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { fenced_device_set_flags(device, fenced_df_supports_list); } else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) { fenced_device_set_flags(device, fenced_df_supports_status); } else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { fenced_device_set_flags(device, fenced_df_supports_reboot); } else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { /* PCMK_XA_AUTOMATIC means the cluster will unfence a node when it * joins. * * @COMPAT PCMK__XA_REQUIRED is a deprecated synonym for * PCMK_XA_AUTOMATIC. */ if (pcmk__xe_attr_is_true(match, PCMK_XA_AUTOMATIC) || pcmk__xe_attr_is_true(match, PCMK__XA_REQUIRED)) { fenced_device_set_flags(device, fenced_df_auto_unfence); } fenced_device_set_flags(device, fenced_df_supports_on); } if ((action != NULL) && pcmk__xe_attr_is_true(match, PCMK_XA_ON_TARGET)) { pcmk__add_word(&(device->on_target_actions), 64, action); } } xmlXPathFreeObject(xpath); } static const char * target_list_type(fenced_device_t *dev) { const char *check_type = NULL; check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK); if (check_type == NULL) { if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) { check_type = PCMK_VALUE_STATIC_LIST; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) { check_type = PCMK_VALUE_STATIC_LIST; } else if (pcmk_is_set(dev->flags, fenced_df_supports_list)) { check_type = PCMK_VALUE_DYNAMIC_LIST; } else if (pcmk_is_set(dev->flags, fenced_df_supports_status)) { check_type = PCMK_VALUE_STATUS; } else { check_type = PCMK_VALUE_NONE; } } return check_type; } static fenced_device_t * build_device_from_xml(const xmlNode *dev) { const char *value; fenced_device_t *device = NULL; char *agent = crm_element_value_copy(dev, PCMK_XA_AGENT); CRM_CHECK(agent != NULL, return device); device = pcmk__assert_alloc(1, sizeof(fenced_device_t)); device->id = crm_element_value_copy(dev, PCMK_XA_ID); device->agent = agent; device->namespace = crm_element_value_copy(dev, PCMK__XA_NAMESPACE); device->params = xml2list(dev); value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST); if (value) { device->targets = stonith__parse_targets(value); } value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP); device->aliases = build_port_aliases(value, &(device->targets)); value = target_list_type(device); if (!pcmk__str_eq(value, PCMK_VALUE_STATIC_LIST, pcmk__str_casei) && (device->targets != NULL)) { // device->targets is necessary only with PCMK_VALUE_STATIC_LIST g_list_free_full(device->targets, free); device->targets = NULL; } switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); device->default_host_arg = stonith__default_host_arg(device->agent_metadata); } break; case EAGAIN: if (device->timer == NULL) { device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000, TRUE, get_agent_metadata_cb, device); } if (!mainloop_timer_running(device->timer)) { mainloop_timer_start(device->timer); } break; default: break; } value = crm_element_value(dev, PCMK__XA_RSC_PROVIDES); if (pcmk__str_eq(value, PCMK_VALUE_UNFENCING, pcmk__str_casei)) { fenced_device_set_flags(device, fenced_df_auto_unfence); } if (is_action_required(PCMK_ACTION_ON, device)) { crm_info("Fencing device '%s' requires unfencing", device->id); } if (device->on_target_actions != NULL) { crm_info("Fencing device '%s' requires actions (%s) to be executed " "on target", device->id, (const char *) device->on_target_actions->str); } device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); return device; } static void schedule_internal_command(const char *origin, fenced_device_t *device, const char *action, const char *target, int timeout, void *internal_user_data, void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data)) { async_command_t *cmd = NULL; cmd = pcmk__assert_alloc(1, sizeof(async_command_t)); cmd->id = -1; cmd->default_timeout = timeout ? timeout : 60; cmd->timeout = cmd->default_timeout; cmd->action = pcmk__str_copy(action); cmd->target = pcmk__str_copy(target); cmd->device = pcmk__str_copy(device->id); cmd->origin = pcmk__str_copy(origin); cmd->client = pcmk__str_copy(crm_system_name); cmd->client_name = pcmk__str_copy(crm_system_name); cmd->internal_user_data = internal_user_data; cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */ schedule_stonith_command(cmd, device); } // Fence agent status commands use custom exit status codes enum fence_status_code { fence_status_invalid = -1, fence_status_active = 0, fence_status_unknown = 1, fence_status_inactive = 2, }; static void status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; fenced_device_t *dev = cmd_device(cmd); gboolean can = FALSE; free_async_command(cmd); if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (result->execution_status != PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because status could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); search_devices_record_result(search, dev->id, FALSE); return; } switch (result->exit_status) { case fence_status_unknown: crm_trace("%s reported it cannot fence %s", dev->id, search->host); break; case fence_status_active: case fence_status_inactive: crm_trace("%s reported it can fence %s", dev->id, search->host); can = TRUE; break; default: crm_warn("Assuming %s cannot fence %s " "(status returned unknown code %d)", dev->id, search->host, result->exit_status); break; } search_devices_record_result(search, dev->id, can); } static void dynamic_list_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; fenced_device_t *dev = cmd_device(cmd); gboolean can_fence = FALSE; free_async_command(cmd); /* Host/alias must be in the list output to be eligible to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (pcmk__result_ok(result)) { crm_info("Refreshing target list for %s", dev->id); g_list_free_full(dev->targets, free); dev->targets = stonith__parse_targets(result->action_stdout); dev->targets_age = time(NULL); } else if (dev->targets != NULL) { if (result->execution_status == PCMK_EXEC_DONE) { crm_info("Reusing most recent target list for %s " "because list returned error code %d", dev->id, result->exit_status); } else { crm_info("Reusing most recent target list for %s " "because list could not be executed: %s%s%s%s", dev->id, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } } else { // We have never successfully executed list if (result->execution_status == PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because list returned error code %d", dev->id, search->host, result->exit_status); } else { crm_warn("Assuming %s cannot fence %s " "because list could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } /* Fall back to pcmk_host_check=PCMK_VALUE_STATUS if the user didn't * explicitly specify PCMK_VALUE_DYNAMIC_LIST */ if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) { crm_notice("Switching to pcmk_host_check='status' for %s", dev->id); pcmk__insert_dup(dev->params, PCMK_STONITH_HOST_CHECK, PCMK_VALUE_STATUS); } } if (dev->targets) { const char *alias = g_hash_table_lookup(dev->aliases, search->host); if (!alias) { alias = search->host; } if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) { can_fence = TRUE; } } search_devices_record_result(search, dev->id, can_fence); } /*! * \internal * \brief Returns true if any key in first is not in second or second has a different value for key */ static int device_params_diff(GHashTable *first, GHashTable *second) { char *key = NULL; char *value = NULL; GHashTableIter gIter; g_hash_table_iter_init(&gIter, first); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) { if(strstr(key, "CRM_meta") == key) { continue; } else if (strcmp(key, PCMK_XA_CRM_FEATURE_SET) == 0) { continue; } else { char *other_value = g_hash_table_lookup(second, key); if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) { crm_trace("Different value for %s: %s != %s", key, other_value, value); return 1; } } } return 0; } /*! * \internal * \brief Checks to see if an identical device already exists in the table */ static fenced_device_t * device_has_duplicate(const fenced_device_t *device) { fenced_device_t *dup = g_hash_table_lookup(device_table, device->id); if (!dup) { crm_trace("No match for %s", device->id); return NULL; } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) { crm_trace("Different agent: %s != %s", dup->agent, device->agent); return NULL; } // Use pcmk__digest_operation() here? if (device_params_diff(device->params, dup->params) || device_params_diff(dup->params, device->params)) { return NULL; } crm_trace("Match"); return dup; } int fenced_device_register(const xmlNode *dev, bool from_cib) { const char *local_node_name = fenced_get_local_node(); fenced_device_t *dup = NULL; fenced_device_t *device = build_device_from_xml(dev); int rc = pcmk_rc_ok; CRM_CHECK(device != NULL, return ENOMEM); /* do we have a watchdog-device? */ if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) || pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { if (stonith_watchdog_timeout_ms <= 0) { crm_err("Ignoring watchdog fence device without " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " set"); rc = ENODEV; goto done; } if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { crm_err("Ignoring watchdog fence device with unknown agent '%s' " "rather than '" STONITH_WATCHDOG_AGENT "'", pcmk__s(device->agent, "")); rc = ENODEV; goto done; } if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { crm_err("Ignoring watchdog fence device named '%s' rather than " "'" STONITH_WATCHDOG_ID "'", pcmk__s(device->id, "")); rc = ENODEV; goto done; } if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) { /* This has either an empty list or the targets configured for * watchdog fencing */ g_list_free_full(stonith_watchdog_targets, free); stonith_watchdog_targets = device->targets; device->targets = NULL; } if (!node_does_watchdog_fencing(local_node_name)) { crm_debug("Skip registration of watchdog fence device on node not " "in host list"); device->targets = NULL; stonith_device_remove(device->id, from_cib); goto done; } // Proceed as with any other fencing device g_list_free_full(device->targets, free); device->targets = stonith__parse_targets(local_node_name); pcmk__insert_dup(device->params, PCMK_STONITH_HOST_LIST, local_node_name); } dup = device_has_duplicate(device); if (dup != NULL) { guint ndevices = g_hash_table_size(device_table); crm_debug("Device '%s' already in device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); free_device(device); device = dup; fenced_device_clear_flags(device, fenced_df_dirty); } else { guint ndevices = 0; fenced_device_t *old = g_hash_table_lookup(device_table, device->id); if (from_cib && (old != NULL) && pcmk_is_set(old->flags, fenced_df_api_registered)) { /* If the CIB is writing over an entry that is shared with a stonith * client, copy any pending ops that currently exist on the old * entry to the new one. Otherwise the pending ops will be reported * as failures. */ crm_info("Overwriting existing entry for %s from CIB", device->id); device->pending_ops = old->pending_ops; fenced_device_set_flags(device, fenced_df_api_registered); old->pending_ops = NULL; if (device->pending_ops != NULL) { mainloop_set_trigger(device->work); } } g_hash_table_replace(device_table, device->id, device); ndevices = g_hash_table_size(device_table); crm_notice("Added '%s' to device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); } if (from_cib) { fenced_device_set_flags(device, fenced_df_cib_registered); } else { fenced_device_set_flags(device, fenced_df_api_registered); } done: if (rc != pcmk_rc_ok) { free_device(device); } return rc; } void stonith_device_remove(const char *id, bool from_cib) { fenced_device_t *device = g_hash_table_lookup(device_table, id); guint ndevices = 0; if (device == NULL) { ndevices = g_hash_table_size(device_table); crm_info("Device '%s' not found (%u active device%s)", id, ndevices, pcmk__plural_s(ndevices)); return; } if (from_cib) { fenced_device_clear_flags(device, fenced_df_cib_registered); } else { fenced_device_clear_flags(device, fenced_df_api_registered|fenced_df_verified); } if (!pcmk_any_flags_set(device->flags, fenced_df_api_registered |fenced_df_cib_registered)) { g_hash_table_remove(device_table, id); ndevices = g_hash_table_size(device_table); crm_info("Removed '%s' from device list (%u active device%s)", id, ndevices, pcmk__plural_s(ndevices)); } else { // Exactly one is true at this point const bool cib_registered = pcmk_is_set(device->flags, fenced_df_cib_registered); crm_trace("Not removing '%s' from device list (%u active) because " "still registered via %s", id, g_hash_table_size(device_table), (cib_registered? "CIB" : "API")); } } /*! * \internal * \brief Return the number of stonith levels registered for a node * * \param[in] tp Node's topology table entry * * \return Number of non-NULL levels in topology entry * \note This function is used only for log messages. */ static int count_active_levels(const stonith_topology_t *tp) { int lpc = 0; int count = 0; for (lpc = 0; lpc < ST__LEVEL_COUNT; lpc++) { if (tp->levels[lpc] != NULL) { count++; } } return count; } static void free_topology_entry(gpointer data) { stonith_topology_t *tp = data; int lpc = 0; for (lpc = 0; lpc < ST__LEVEL_COUNT; lpc++) { if (tp->levels[lpc] != NULL) { g_list_free_full(tp->levels[lpc], free); } } free(tp->target); free(tp->target_value); free(tp->target_pattern); free(tp->target_attribute); free(tp); } void free_topology_list(void) { if (topology != NULL) { g_hash_table_destroy(topology); topology = NULL; } } void init_topology_list(void) { if (topology == NULL) { topology = pcmk__strkey_table(NULL, free_topology_entry); } } char * stonith_level_key(const xmlNode *level, enum fenced_target_by mode) { if (mode == fenced_target_by_unknown) { mode = unpack_level_kind(level); } switch (mode) { case fenced_target_by_name: return crm_element_value_copy(level, PCMK_XA_TARGET); case fenced_target_by_pattern: return crm_element_value_copy(level, PCMK_XA_TARGET_PATTERN); case fenced_target_by_attribute: return crm_strdup_printf("%s=%s", crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE), crm_element_value(level, PCMK_XA_TARGET_VALUE)); default: return crm_strdup_printf("unknown-%s", pcmk__xe_id(level)); } } /*! * \internal * \brief Parse target identification from topology level XML * * \param[in] level Topology level XML to parse * * \return How to identify target of \p level */ static enum fenced_target_by unpack_level_kind(const xmlNode *level) { if (crm_element_value(level, PCMK_XA_TARGET) != NULL) { return fenced_target_by_name; } if (crm_element_value(level, PCMK_XA_TARGET_PATTERN) != NULL) { return fenced_target_by_pattern; } if ((crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE) != NULL) && (crm_element_value(level, PCMK_XA_TARGET_VALUE) != NULL)) { return fenced_target_by_attribute; } return fenced_target_by_unknown; } /*! * \internal * \brief Unpack essential information from topology request XML * * \param[in] xml Request XML to search * \param[out] mode If not NULL, where to store level kind * \param[out] target If not NULL, where to store representation of target * \param[out] id If not NULL, where to store level number * * \return Topology level XML from within \p xml, or NULL if not found * \note The caller is responsible for freeing \p *target if set. */ static xmlNode * unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target, int *id) { enum fenced_target_by local_mode = fenced_target_by_unknown; char *local_target = NULL; int local_id = 0; /* The level element can be the top element or lower. If top level, don't * search by xpath, because it might give multiple hits if the XML is the * CIB. */ if ((xml != NULL) && !pcmk__xe_is(xml, PCMK_XE_FENCING_LEVEL)) { xml = pcmk__xpath_find_one(xml->doc, "//" PCMK_XE_FENCING_LEVEL, LOG_WARNING); } if (xml != NULL) { local_mode = unpack_level_kind(xml); local_target = stonith_level_key(xml, local_mode); crm_element_value_int(xml, PCMK_XA_INDEX, &local_id); } if (mode != NULL) { *mode = local_mode; } if (id != NULL) { *id = local_id; } if (target != NULL) { *target = local_target; } else { free(local_target); } return xml; } /*! * \internal * \brief Register a fencing topology level for a target * * Given an XML request specifying the target name, level index, and device IDs * for the level, this will create an entry for the target in the global topology * table if one does not already exist, then append the specified device IDs to * the entry's device list for the specified level. * * \param[in] msg XML request for STONITH level registration * \param[out] result Where to set result of registration (can be \c NULL) */ void fenced_register_level(xmlNode *msg, pcmk__action_result_t *result) { int id = 0; xmlNode *level; enum fenced_target_by mode; char *target; stonith_topology_t *tp; const char *value = NULL; CRM_CHECK(msg != NULL, return); level = unpack_level_request(msg, &mode, &target, &id); if (level == NULL) { set_bad_request_result(result); return; } // Ensure an ID was given (even the client API adds an ID) if (pcmk__str_empty(pcmk__xe_id(level))) { crm_warn("Ignoring registration for topology level without ID"); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Topology level is invalid without ID"); return; } // Ensure a valid target was specified if (mode == fenced_target_by_unknown) { crm_warn("Ignoring registration for topology level '%s' " "without valid target", pcmk__xe_id(level)); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid target for topology level '%s'", pcmk__xe_id(level)); return; } // Ensure level ID is in allowed range if ((id < ST__LEVEL_MIN) || (id > ST__LEVEL_MAX)) { crm_warn("Ignoring topology registration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level '%s'", pcmk__s(crm_element_value(level, PCMK_XA_INDEX), ""), pcmk__xe_id(level)); return; } /* Find or create topology table entry */ tp = g_hash_table_lookup(topology, target); if (tp == NULL) { tp = pcmk__assert_alloc(1, sizeof(stonith_topology_t)); tp->kind = mode; tp->target = target; tp->target_value = crm_element_value_copy(level, PCMK_XA_TARGET_VALUE); tp->target_pattern = crm_element_value_copy(level, PCMK_XA_TARGET_PATTERN); tp->target_attribute = crm_element_value_copy(level, PCMK_XA_TARGET_ATTRIBUTE); g_hash_table_replace(topology, tp->target, tp); crm_trace("Added %s (%d) to the topology (%d active entries)", target, (int) mode, g_hash_table_size(topology)); } else { free(target); } if (tp->levels[id] != NULL) { crm_info("Adding to the existing %s[%d] topology entry", tp->target, id); } value = crm_element_value(level, PCMK_XA_DEVICES); if (value != NULL) { /* Empty string and whitespace are not possible with schema validation * enabled. Don't bother handling them specially here. */ gchar **devices = g_strsplit(value, ",", 0); for (char **dev = devices; (dev != NULL) && (*dev != NULL); dev++) { crm_trace("Adding device '%s' for %s[%d]", *dev, tp->target, id); tp->levels[id] = g_list_append(tp->levels[id], pcmk__str_copy(*dev)); } g_strfreev(devices); } { int nlevels = count_active_levels(tp); crm_info("Target %s has %d active fencing level%s", tp->target, nlevels, pcmk__plural_s(nlevels)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } /*! * \internal * \brief Unregister a fencing topology level for a target * * Given an XML request specifying the target name and level index (or 0 for all * levels), this will remove any corresponding entry for the target from the * global topology table. * * \param[in] msg XML request for STONITH level registration * \param[out] result Where to set result of unregistration (can be \c NULL) */ void fenced_unregister_level(xmlNode *msg, pcmk__action_result_t *result) { int id = -1; stonith_topology_t *tp; char *target; xmlNode *level = NULL; level = unpack_level_request(msg, NULL, &target, &id); if (level == NULL) { set_bad_request_result(result); return; } // Ensure level ID is in allowed range if ((id < 0) || (id >= ST__LEVEL_COUNT)) { crm_warn("Ignoring topology unregistration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level %s", pcmk__s(crm_element_value(level, PCMK_XA_INDEX), ""), // Client API doesn't add ID to unregistration XML pcmk__s(pcmk__xe_id(level), "")); return; } tp = g_hash_table_lookup(topology, target); if (tp == NULL) { guint nentries = g_hash_table_size(topology); crm_info("No fencing topology found for %s (%d active %s)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (id == 0 && g_hash_table_remove(topology, target)) { guint nentries = g_hash_table_size(topology); crm_info("Removed all fencing topology entries related to %s " "(%d active %s remaining)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (tp->levels[id] != NULL) { guint nlevels; g_list_free_full(tp->levels[id], free); tp->levels[id] = NULL; nlevels = count_active_levels(tp); crm_info("Removed level %d from fencing topology for %s " "(%d active level%s remaining)", id, target, nlevels, pcmk__plural_s(nlevels)); } free(target); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } static char * list_to_string(GList *list, const char *delim, gboolean terminate_with_delim) { int max = g_list_length(list); size_t delim_len = delim?strlen(delim):0; size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0); char *rv; GList *gIter; char *pos = NULL; const char *lead_delim = ""; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; alloc_size += strlen(value); } rv = pcmk__assert_alloc(alloc_size, sizeof(char)); pos = rv; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; pos = &pos[sprintf(pos, "%s%s", lead_delim, value)]; lead_delim = delim; } if (max && terminate_with_delim) { sprintf(pos, "%s", delim); } return rv; } /*! * \internal * \brief Execute a fence agent action directly (and asynchronously) * * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action * directly on a specified device. Only list, monitor, and status actions are * expected to use this call, though it should work with any agent command. * * \param[in] msg Request XML specifying action * \param[out] result Where to store result of action * * \note If the action is monitor, the device must be registered via the API * (CIB registration is not sufficient), because monitor should not be * possible unless the device is "started" (API registered). */ static void execute_agent_action(xmlNode *msg, pcmk__action_result_t *result) { xmlNode *dev = pcmk__xpath_find_one(msg->doc, "//" PCMK__XE_ST_DEVICE_ID, LOG_ERR); xmlNode *op = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_DEVICE_ACTION "]", LOG_ERR); const char *id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); const char *action = crm_element_value(op, PCMK__XA_ST_DEVICE_ACTION); async_command_t *cmd = NULL; fenced_device_t *device = NULL; if ((id == NULL) || (action == NULL)) { crm_info("Malformed API action request: device %s, action %s", (id? id : "not specified"), (action? action : "not specified")); set_bad_request_result(result); return; } if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) { // Watchdog agent actions are implemented internally if (stonith_watchdog_timeout_ms <= 0) { pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Watchdog fence device not configured"); return; } else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_result_output(result, list_to_string(stonith_watchdog_targets, "\n", TRUE), NULL); return; } else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return; } } device = g_hash_table_lookup(device_table, id); if (device == NULL) { crm_info("Ignoring API '%s' action request because device %s not found", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not found", id); return; } else if (!pcmk_is_set(device->flags, fenced_df_api_registered) && (strcmp(action, PCMK_ACTION_MONITOR) == 0)) { // Monitors may run only on "started" (API-registered) devices crm_info("Ignoring API '%s' action request because device %s not active", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not active", id); return; } cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); set_bad_request_result(result); return; } schedule_stonith_command(cmd, device); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence) { search->replies_received++; if (can_fence && device) { if (search->support_action_only != fenced_df_none) { fenced_device_t *dev = g_hash_table_lookup(device_table, device); if (dev && !pcmk_is_set(dev->flags, search->support_action_only)) { return; } } search->capable = g_list_append(search->capable, pcmk__str_copy(device)); } if (search->replies_needed == search->replies_received) { guint ndevices = g_list_length(search->capable); crm_debug("Search found %d device%s that can perform '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); search->callback(search->capable, search->user_data); free(search->host); free(search->action); free(search); } } /*! * \internal * \brief Check whether the local host is allowed to execute a fencing action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Hostname of fence target * \param[in] allow_self Whether self-fencing is allowed for this operation * * \return TRUE if local host is allowed to execute action, FALSE otherwise */ static gboolean localhost_is_eligible(const fenced_device_t *device, const char *action, const char *target, gboolean allow_self) { gboolean localhost_is_target = pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei); if ((device != NULL) && (action != NULL) && (device->on_target_actions != NULL) && (strstr((const char*) device->on_target_actions->str, action) != NULL)) { if (!localhost_is_target) { crm_trace("Operation '%s' using %s can only be executed for local " "host, not %s", action, device->id, target); return FALSE; } } else if (localhost_is_target && !allow_self) { crm_trace("'%s' operation does not support self-fencing", action); return FALSE; } return TRUE; } /*! * \internal * \brief Check if local node is allowed to execute (possibly remapped) action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Node name of fence target * \param[in] allow_self Whether self-fencing is allowed for this operation * * \return true if local node is allowed to execute \p action or any actions it * might be remapped to, otherwise false */ static bool localhost_is_eligible_with_remap(const fenced_device_t *device, const char *action, const char *target, gboolean allow_self) { // Check exact action if (localhost_is_eligible(device, action, target, allow_self)) { return true; } // Check potential remaps if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* "reboot" might get remapped to "off" then "on", so even if reboot is * disallowed, return true if either of those is allowed. We'll report * the disallowed actions with the results. We never allow self-fencing * for remapped "on" actions because the target is off at that point. */ if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self) || localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) { return true; } } return false; } /*! * \internal * \brief Check whether we can use a device's cached target list * * \param[in] dev Fencing device to check * * \return \c true if \p dev cached its targets less than a minute ago, * otherwise \c false */ static inline bool can_use_target_cache(const fenced_device_t *dev) { return (dev->targets != NULL) && (time(NULL) < (dev->targets_age + 60)); } static void can_fence_host_with_device(fenced_device_t *dev, struct device_search_s *search) { gboolean can = FALSE; const char *check_type = "Internal bug"; const char *target = NULL; const char *alias = NULL; const char *dev_id = "Unspecified device"; const char *action = (search == NULL)? NULL : search->action; CRM_CHECK((dev != NULL) && (action != NULL), goto search_report_results); if (dev->id != NULL) { dev_id = dev->id; } target = search->host; if (target == NULL) { can = TRUE; check_type = "No target"; goto search_report_results; } /* Answer immediately if the device does not support the action * or the local node is not allowed to perform it */ if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none) && !pcmk_is_set(dev->flags, fenced_df_supports_on)) { check_type = "Agent does not support 'on'"; goto search_report_results; } else if (!localhost_is_eligible_with_remap(dev, action, target, search->allow_self)) { check_type = "This node is not allowed to execute action"; goto search_report_results; } // Check eligibility as specified by pcmk_host_check check_type = target_list_type(dev); alias = g_hash_table_lookup(dev->aliases, target); if (pcmk__str_eq(check_type, PCMK_VALUE_NONE, pcmk__str_casei)) { can = TRUE; } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATIC_LIST, pcmk__str_casei)) { if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) { can = TRUE; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP) && g_hash_table_lookup(dev->aliases, target)) { can = TRUE; } } else if (pcmk__str_eq(check_type, PCMK_VALUE_DYNAMIC_LIST, pcmk__str_casei)) { if (!can_use_target_cache(dev)) { int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_list_timeout (%ds) parameter of %s " "is larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL, search->per_device_timeout, search, dynamic_list_search_cb); /* we'll respond to this search request async in the cb */ return; } if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets, pcmk__str_casei)) { can = TRUE; } } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATUS, pcmk__str_casei)) { int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_status_timeout (%ds) parameter of %s is " "larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), " "timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ return; } else { crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type); check_type = "Invalid " PCMK_STONITH_HOST_CHECK; } search_report_results: crm_info("%s is%s eligible to fence (%s) %s%s%s%s: %s", dev_id, (can? "" : " not"), pcmk__s(action, "unspecified action"), pcmk__s(target, "unspecified target"), (alias == NULL)? "" : " (as '", pcmk__s(alias, ""), (alias == NULL)? "" : "')", check_type); search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can); } static void search_devices(gpointer key, gpointer value, gpointer user_data) { fenced_device_t *dev = value; struct device_search_s *search = user_data; can_fence_host_with_device(dev, search); } #define DEFAULT_QUERY_TIMEOUT 20 static void get_capable_devices(const char *host, const char *action, int timeout, bool allow_self, void *user_data, void (*callback) (GList * devices, void *user_data), uint32_t support_action_only) { struct device_search_s *search; guint ndevices = g_hash_table_size(device_table); if (ndevices == 0) { callback(NULL, user_data); return; } search = pcmk__assert_alloc(1, sizeof(struct device_search_s)); search->host = pcmk__str_copy(host); search->action = pcmk__str_copy(action); search->per_device_timeout = timeout; search->allow_self = allow_self; search->callback = callback; search->user_data = user_data; search->support_action_only = support_action_only; /* We are guaranteed this many replies, even if a device is * unregistered while the search is in progress. */ search->replies_needed = ndevices; crm_debug("Searching %d device%s to see which can execute '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); fenced_foreach_device(search_devices, search); } struct st_query_data { xmlNode *reply; char *remote_peer; char *client_id; char *target; char *action; int call_options; }; /*! * \internal * \brief Add action-specific attributes to query reply XML * * \param[in,out] xml XML to add attributes to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target */ static void add_action_specific_attributes(xmlNode *xml, const char *action, const fenced_device_t *device, const char *target) { int action_specific_timeout; int delay_max; int delay_base; CRM_CHECK(xml && action && device, return); // PCMK__XA_ST_REQUIRED is currently used only for unfencing if (is_action_required(action, device)) { crm_trace("Action '%s' is required using %s", action, device->id); crm_xml_add_int(xml, PCMK__XA_ST_REQUIRED, 1); } // pcmk__timeout if configured action_specific_timeout = get_action_timeout(device, action, 0); if (action_specific_timeout) { crm_trace("Action '%s' has timeout %ds using %s", action, action_specific_timeout, device->id); crm_xml_add_int(xml, PCMK__XA_ST_ACTION_TIMEOUT, action_specific_timeout); } delay_max = get_action_delay_max(device, action); if (delay_max > 0) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); crm_xml_add_int(xml, PCMK__XA_ST_DELAY_MAX, delay_max); } delay_base = get_action_delay_base(device, action, target); if (delay_base > 0) { crm_xml_add_int(xml, PCMK__XA_ST_DELAY_BASE, delay_base); } if ((delay_max > 0) && (delay_base == 0)) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); } else if ((delay_max == 0) && (delay_base > 0)) { crm_trace("Action '%s' has a static delay of %ds using %s", action, delay_base, device->id); } else if ((delay_max > 0) && (delay_base > 0)) { crm_trace("Action '%s' has a minimum delay of %ds and a randomly chosen " "maximum delay of %ds using %s", action, delay_base, delay_max, device->id); } } /*! * \internal * \brief Add "disallowed" attribute to query reply XML if appropriate * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_self Whether self-fencing is allowed */ static void add_disallowed(xmlNode *xml, const char *action, const fenced_device_t *device, const char *target, gboolean allow_self) { if (!localhost_is_eligible(device, action, target, allow_self)) { crm_trace("Action '%s' using %s is disallowed for local host", action, device->id); pcmk__xe_set_bool_attr(xml, PCMK__XA_ST_ACTION_DISALLOWED, true); } } /*! * \internal * \brief Add child element with action-specific values to query reply XML * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_self Whether self-fencing is allowed */ static void add_action_reply(xmlNode *xml, const char *action, const fenced_device_t *device, const char *target, gboolean allow_self) { xmlNode *child = pcmk__xe_create(xml, PCMK__XE_ST_DEVICE_ACTION); crm_xml_add(child, PCMK_XA_ID, action); add_action_specific_attributes(child, action, device, target); add_disallowed(child, action, device, target, allow_self); } /*! * \internal * \brief Send a reply to a CPG peer or IPC client * * \param[in] reply XML reply to send * \param[in] call_options Send synchronously if st_opt_sync_call is set * \param[in] remote_peer If not NULL, name of peer node to send CPG reply * \param[in,out] client If not NULL, client to send IPC reply */ static void stonith_send_reply(const xmlNode *reply, int call_options, const char *remote_peer, pcmk__client_t *client) { CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)), return); if (remote_peer == NULL) { do_local_reply(reply, client, call_options); } else { const pcmk__node_status_t *node = pcmk__get_node(0, remote_peer, NULL, pcmk__node_search_cluster_member); pcmk__cluster_send_message(node, pcmk_ipc_fenced, reply); } } static void stonith_query_capable_device_cb(GList * devices, void *user_data) { struct st_query_data *query = user_data; int available_devices = 0; xmlNode *wrapper = NULL; xmlNode *list = NULL; GList *lpc = NULL; pcmk__client_t *client = NULL; if (query->client_id != NULL) { client = pcmk__find_client_by_id(query->client_id); if ((client == NULL) && (query->remote_peer == NULL)) { crm_trace("Skipping reply to %s: no longer a client", query->client_id); goto done; } } // Pack the results into XML wrapper = pcmk__xe_create(query->reply, PCMK__XE_ST_CALLDATA); list = pcmk__xe_create(wrapper, __func__); crm_xml_add(list, PCMK__XA_ST_TARGET, query->target); for (lpc = devices; lpc != NULL; lpc = lpc->next) { fenced_device_t *device = g_hash_table_lookup(device_table, lpc->data); const char *action = query->action; xmlNode *dev = NULL; if (!device) { /* It is possible the device got unregistered while * determining who can fence the target */ continue; } available_devices++; dev = pcmk__xe_create(list, PCMK__XE_ST_DEVICE_ID); crm_xml_add(dev, PCMK_XA_ID, device->id); crm_xml_add(dev, PCMK__XA_NAMESPACE, device->namespace); crm_xml_add(dev, PCMK_XA_AGENT, device->agent); // Has had successful monitor, list, or status on this node crm_xml_add_int(dev, PCMK__XA_ST_MONITOR_VERIFIED, pcmk_is_set(device->flags, fenced_df_verified)); crm_xml_add_int(dev, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, device->flags); /* If the originating fencer wants to reboot the node, and we have a * capable device that doesn't support "reboot", remap to "off" instead. */ if (!pcmk_is_set(device->flags, fenced_df_supports_reboot) && pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { crm_trace("%s doesn't support reboot, using values for off instead", device->id); action = PCMK_ACTION_OFF; } /* Add action-specific values if available */ add_action_specific_attributes(dev, action, device, query->target); if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* A "reboot" *might* get remapped to "off" then "on", so after * sending the "reboot"-specific values in the main element, we add * sub-elements for "off" and "on" values. * * We short-circuited earlier if "reboot", "off" and "on" are all * disallowed for the local host. However if only one or two are * disallowed, we send back the results and mark which ones are * disallowed. If "reboot" is disallowed, this might cause problems * with older fencer versions, which won't check for it. Older * versions will ignore "off" and "on", so they are not a problem. */ add_disallowed(dev, action, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_self_fencing)); add_action_reply(dev, PCMK_ACTION_OFF, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_self_fencing)); add_action_reply(dev, PCMK_ACTION_ON, device, query->target, FALSE); } /* A query without a target wants device parameters */ if (query->target == NULL) { xmlNode *attrs = pcmk__xe_create(dev, PCMK__XE_ATTRIBUTES); g_hash_table_foreach(device->params, hash2field, attrs); } } crm_xml_add_int(list, PCMK__XA_ST_AVAILABLE_DEVICES, available_devices); if (query->target) { crm_debug("Found %d matching device%s for target '%s'", available_devices, pcmk__plural_s(available_devices), query->target); } else { crm_debug("%d device%s installed", available_devices, pcmk__plural_s(available_devices)); } crm_log_xml_trace(list, "query-result"); stonith_send_reply(query->reply, query->call_options, query->remote_peer, client); done: pcmk__xml_free(query->reply); free(query->remote_peer); free(query->client_id); free(query->target); free(query->action); free(query); g_list_free_full(devices, free); } /*! * \internal * \brief Log the result of an asynchronous command * * \param[in] cmd Command the result is for * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] next Alternate device that will be tried if command failed * \param[in] op_merged Whether this command was merged with an earlier one */ static void log_async_result(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, const char *next, bool op_merged) { int log_level = LOG_ERR; int output_log_level = LOG_NEVER; guint devices_remaining = g_list_length(cmd->next_device_iter); GString *msg = g_string_sized_new(80); // Reasonable starting size // Choose log levels appropriately if we have a result if (pcmk__result_ok(result)) { log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA, pcmk__str_none)) { output_log_level = LOG_DEBUG; } next = NULL; } else { log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA, pcmk__str_none)) { output_log_level = LOG_WARNING; } } // Build the log message piece by piece pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL); if (pid != 0) { g_string_append_printf(msg, "[%d] ", pid); } if (cmd->target != NULL) { pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL); } if (cmd->device != NULL) { pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL); } // Add exit status or execution status as appropriate if (result->execution_status == PCMK_EXEC_DONE) { g_string_append_printf(msg, "returned %d", result->exit_status); } else { pcmk__g_strcat(msg, "could not be executed: ", pcmk_exec_status_str(result->execution_status), NULL); } // Add exit reason and next device if appropriate if (result->exit_reason != NULL) { pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL); } if (next != NULL) { pcmk__g_strcat(msg, ", retrying with ", next, NULL); } if (devices_remaining > 0) { g_string_append_printf(msg, " (%u device%s remaining)", (unsigned int) devices_remaining, pcmk__plural_s(devices_remaining)); } g_string_append_printf(msg, " " QB_XS " %scall %d from %s", (op_merged? "merged " : ""), cmd->id, cmd->client_name); // Log the result do_crm_log(log_level, "%s", msg->str); g_string_free(msg, TRUE); // Log the output (which may have multiple lines), if appropriate if (output_log_level != LOG_NEVER) { char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid); crm_log_output(output_log_level, prefix, result->action_stdout); free(prefix); } } /*! * \internal * \brief Reply to requester after asynchronous command completion * * \param[in] cmd Command that completed * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] merged If true, command was merged with another, not executed */ static void send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, bool merged) { xmlNode *reply = NULL; pcmk__client_t *client = NULL; CRM_CHECK((cmd != NULL) && (result != NULL), return); log_async_result(cmd, result, pid, NULL, merged); if (cmd->client != NULL) { client = pcmk__find_client_by_id(cmd->client); if ((client == NULL) && (cmd->origin == NULL)) { crm_trace("Skipping reply to %s: no longer a client", cmd->client); return; } } reply = construct_async_reply(cmd, result); if (merged) { pcmk__xe_set_bool_attr(reply, PCMK__XA_ST_OP_MERGED, true); } if (pcmk__is_fencing_action(cmd->action) && pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) { /* The target was also the originator, so broadcast the result on its * behalf (since it will be unable to). */ crm_trace("Broadcast '%s' result for %s (target was also originator)", cmd->action, cmd->target); crm_xml_add(reply, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST); crm_xml_add(reply, PCMK__XA_ST_OP, STONITH_OP_NOTIFY); pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, reply); } else { // Reply only to the originator stonith_send_reply(reply, cmd->options, cmd->origin, client); } crm_log_xml_trace(reply, "Reply"); pcmk__xml_free(reply); } static void cancel_stonith_command(async_command_t * cmd) { fenced_device_t *device = cmd_device(cmd); if (device) { crm_trace("Cancel scheduled '%s' action using %s", cmd->action, device->id); device->pending_ops = g_list_remove(device->pending_ops, cmd); } } /*! * \internal * \brief Cancel and reply to any duplicates of a just-completed operation * * Check whether any fencing operations are scheduled to do the same thing as * one that just succeeded. If so, rather than performing the same operation * twice, return the result of this operation for all matching pending commands. * * \param[in,out] cmd Fencing operation that just succeeded * \param[in] result Result of \p cmd * \param[in] pid If nonzero, process ID of agent invocation (for logs) * * \note Duplicate merging will do the right thing for either type of remapped * reboot. If the executing fencer remapped an unsupported reboot to off, * then cmd->action will be "reboot" and will be merged with any other * reboot requests. If the originating fencer remapped a topology reboot * to off then on, we will get here once with cmd->action "off" and once * with "on", and they will be merged separately with similar requests. */ static void reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result, int pid) { GList *next = NULL; for (GList *iter = cmd_list; iter != NULL; iter = next) { async_command_t *cmd_other = iter->data; next = iter->next; // We might delete this entry, so grab next now if (cmd == cmd_other) { continue; } /* A pending operation matches if: * 1. The client connections are different. * 2. The target is the same. * 3. The fencing action is the same. * 4. The device scheduled to execute the action is the same. */ if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) || !pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) || !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) || !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) { continue; } crm_notice("Merging fencing action '%s'%s%s originating from " "client %s with identical fencing request from client %s", cmd_other->action, (cmd_other->target == NULL)? "" : " targeting ", pcmk__s(cmd_other->target, ""), cmd_other->client_name, cmd->client_name); // Stop tracking the duplicate, send its result, and cancel it cmd_list = g_list_remove_link(cmd_list, iter); send_async_reply(cmd_other, result, pid, true); cancel_stonith_command(cmd_other); free_async_command(cmd_other); g_list_free_1(iter); } } /*! * \internal * \brief Return the next required device (if any) for an operation * * \param[in,out] cmd Fencing operation that just succeeded * * \return Next device required for action if any, otherwise NULL */ static fenced_device_t * next_required_device(async_command_t *cmd) { for (GList *iter = cmd->next_device_iter; iter != NULL; iter = iter->next) { fenced_device_t *next_device = g_hash_table_lookup(device_table, iter->data); if (is_action_required(cmd->action, next_device)) { /* This is only called for successful actions, so it's OK to skip * non-required devices. */ cmd->next_device_iter = iter->next; return next_device; } } return NULL; } static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; fenced_device_t *device = NULL; fenced_device_t *next_device = NULL; CRM_CHECK(cmd != NULL, return); device = cmd_device(cmd); cmd->active_on = NULL; /* The device is ready to do something else now */ if (device) { if (!pcmk_is_set(device->flags, fenced_df_verified) && pcmk__result_ok(result) && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL)) { fenced_device_set_flags(device, fenced_df_verified); } mainloop_set_trigger(device->work); } if (pcmk__result_ok(result)) { next_device = next_required_device(cmd); } else if ((cmd->next_device_iter != NULL) && !is_action_required(cmd->action, device)) { /* if this device didn't work out, see if there are any others we can try. * if the failed device was 'required', we can't pick another device. */ next_device = g_hash_table_lookup(device_table, cmd->next_device_iter->data); cmd->next_device_iter = cmd->next_device_iter->next; } if (next_device == NULL) { send_async_reply(cmd, result, pid, false); if (pcmk__result_ok(result)) { reply_to_duplicates(cmd, result, pid); } free_async_command(cmd); } else { // This operation requires more fencing log_async_result(cmd, result, pid, next_device->id, false); schedule_stonith_command(cmd, next_device); } } static void stonith_fence_get_devices_cb(GList * devices, void *user_data) { async_command_t *cmd = user_data; fenced_device_t *device = NULL; guint ndevices = g_list_length(devices); crm_info("Found %d matching device%s for target '%s'", ndevices, pcmk__plural_s(ndevices), cmd->target); if (devices != NULL) { device = g_hash_table_lookup(device_table, devices->data); } if (device == NULL) { // No device found pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "No device configured for target '%s'", cmd->target); send_async_reply(cmd, &result, 0, false); pcmk__reset_result(&result); free_async_command(cmd); g_list_free_full(devices, free); } else { /* Device found. Schedule a fencing command for it. * * Assign devices to device_list so that it will be freed with cmd. */ cmd->device_list = devices; cmd->next_device_iter = devices->next; schedule_stonith_command(cmd, device); } } /*! * \internal * \brief Execute a fence action via the local node * * \param[in] msg Fencing request * \param[out] result Where to store result of fence action */ static void fence_locally(xmlNode *msg, pcmk__action_result_t *result) { const char *device_id = NULL; fenced_device_t *device = NULL; async_command_t *cmd = NULL; xmlNode *dev = NULL; CRM_CHECK((msg != NULL) && (result != NULL), return); dev = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_TARGET "]", LOG_ERR); cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); set_bad_request_result(result); return; } device_id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (device_id != NULL) { device = g_hash_table_lookup(device_table, device_id); if (device == NULL) { crm_err("Requested device '%s' is not available", device_id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Requested device '%s' not found", device_id); return; } schedule_stonith_command(cmd, device); } else { const char *host = crm_element_value(dev, PCMK__XA_ST_TARGET); if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) { int nodeid = 0; pcmk__node_status_t *node = NULL; pcmk__scan_min_int(host, &nodeid, 0); node = pcmk__search_node_caches(nodeid, NULL, NULL, pcmk__node_search_any |pcmk__node_search_cluster_cib); if (node != NULL) { host = node->name; } } /* If we get to here, then self-fencing is implicitly allowed */ get_capable_devices(host, cmd->action, cmd->default_timeout, TRUE, cmd, stonith_fence_get_devices_cb, fenced_support_flag(cmd->action)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } /*! * \internal * \brief Build an XML reply for a fencing operation * * \param[in] request Request that reply is for * \param[in] data If not NULL, add to reply as call data * \param[in] result Full result of fencing operation * * \return Newly created XML reply * \note The caller is responsible for freeing the result. * \note This has some overlap with construct_async_reply(), but that copies * values from an async_command_t, whereas this one copies them from the * request. */ xmlNode * fenced_construct_reply(const xmlNode *request, xmlNode *data, const pcmk__action_result_t *result) { xmlNode *reply = NULL; reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG); stonith__xe_set_result(reply, result); if (request == NULL) { /* Most likely, this is the result of a stonith operation that was * initiated before we came up. Unfortunately that means we lack enough * information to provide clients with a full result. * * @TODO Maybe synchronize this information at start-up? */ crm_warn("Missing request information for client notifications for " "operation with result '%s' (initiated before we came up?)", pcmk_exec_status_str(result->execution_status)); } else { const char *name = NULL; const char *value = NULL; // Attributes to copy from request to reply const char *names[] = { PCMK__XA_ST_OP, PCMK__XA_ST_CALLID, PCMK__XA_ST_CLIENTID, PCMK__XA_ST_CLIENTNAME, PCMK__XA_ST_REMOTE_OP, PCMK__XA_ST_CALLOPT, }; for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(reply, PCMK__XE_ST_CALLDATA); pcmk__xml_copy(wrapper, data); } } return reply; } /*! * \internal * \brief Build an XML reply to an asynchronous fencing command * * \param[in] cmd Fencing command that reply is for * \param[in] result Command result */ static xmlNode * construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result) { xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(reply, PCMK__XA_ST_OP, cmd->op); crm_xml_add(reply, PCMK__XA_ST_DEVICE_ID, cmd->device); crm_xml_add(reply, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id); crm_xml_add(reply, PCMK__XA_ST_CLIENTID, cmd->client); crm_xml_add(reply, PCMK__XA_ST_CLIENTNAME, cmd->client_name); crm_xml_add(reply, PCMK__XA_ST_TARGET, cmd->target); crm_xml_add(reply, PCMK__XA_ST_DEVICE_ACTION, cmd->op); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, cmd->origin); crm_xml_add_int(reply, PCMK__XA_ST_CALLID, cmd->id); crm_xml_add_int(reply, PCMK__XA_ST_CALLOPT, cmd->options); stonith__xe_set_result(reply, result); return reply; } bool fencing_peer_active(pcmk__node_status_t *peer) { return (peer != NULL) && (peer->name != NULL) && pcmk_is_set(peer->processes, crm_get_cluster_proc()); } void set_fencing_completed(remote_fencing_op_t *op) { struct timespec tv; qb_util_timespec_from_epoch_get(&tv); op->completed = tv.tv_sec; op->completed_nsec = tv.tv_nsec; } /*! * \internal * \brief Look for alternate node needed if local node shouldn't fence target * * \param[in] target Node that must be fenced * * \return Name of an alternate node that should fence \p target if any, * or NULL otherwise */ static const char * check_alternate_host(const char *target) { if (pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei)) { GHashTableIter gIter; pcmk__node_status_t *entry = NULL; g_hash_table_iter_init(&gIter, pcmk__peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if (fencing_peer_active(entry) && !pcmk__str_eq(entry->name, target, pcmk__str_casei)) { crm_notice("Forwarding self-fencing request to %s", entry->name); return entry->name; } } crm_warn("Will handle own fencing because no peer can"); } return NULL; } static void remove_relay_op(xmlNode * request) { xmlNode *dev = pcmk__xpath_find_one(request->doc, "//*[@" PCMK__XA_ST_DEVICE_ACTION "]", LOG_TRACE); const char *relay_op_id = NULL; const char *op_id = NULL; const char *client_name = NULL; const char *target = NULL; remote_fencing_op_t *relay_op = NULL; if (dev) { target = crm_element_value(dev, PCMK__XA_ST_TARGET); } relay_op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP_RELAY); op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP); client_name = crm_element_value(request, PCMK__XA_ST_CLIENTNAME); /* Delete RELAY operation. */ if ((relay_op_id != NULL) && (target != NULL) && pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei)) { relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id); if (relay_op) { GHashTableIter iter; remote_fencing_op_t *list_op = NULL; g_hash_table_iter_init(&iter, stonith_remote_op_list); /* If the operation to be deleted is registered as a duplicate, delete the registration. */ while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) { GList *dup_iter = NULL; if (list_op != relay_op) { for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) { remote_fencing_op_t *other = dup_iter->data; if (other == relay_op) { other->duplicates = g_list_remove(other->duplicates, relay_op); break; } } } } crm_debug("Deleting relay op %s ('%s'%s%s for %s), " "replaced by op %s ('%s'%s%s for %s)", relay_op->id, relay_op->action, (relay_op->target == NULL)? "" : " targeting ", pcmk__s(relay_op->target, ""), relay_op->client_name, op_id, relay_op->action, (target == NULL)? "" : " targeting ", pcmk__s(target, ""), client_name); g_hash_table_remove(stonith_remote_op_list, relay_op_id); } } } /*! * \internal * \brief Check whether an API request was sent by a privileged user * * API commands related to fencing configuration may be done only by privileged * IPC users (i.e. root or hacluster), because all other users should go through * the CIB to have ACLs applied. If no client was given, this is a peer request, * which is always allowed. * * \param[in] c IPC client that sent request (or NULL if sent by CPG peer) * \param[in] op Requested API operation (for logging only) * * \return true if sender is peer or privileged client, otherwise false */ static inline bool is_privileged(const pcmk__client_t *c, const char *op) { if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) { return true; } else { crm_warn("Rejecting IPC request '%s' from unprivileged client %s", pcmk__s(op, ""), pcmk__client_name(c)); return false; } } // CRM_OP_REGISTER static xmlNode * handle_register_request(pcmk__request_t *request) { xmlNode *reply = pcmk__xe_create(NULL, "reply"); pcmk__assert(request->ipc_client != NULL); crm_xml_add(reply, PCMK__XA_ST_OP, CRM_OP_REGISTER); crm_xml_add(reply, PCMK__XA_ST_CLIENTID, request->ipc_client->id); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return reply; } // STONITH_OP_EXEC static xmlNode * handle_agent_request(pcmk__request_t *request) { execute_agent_action(request->xml, &request->result); if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_TIMEOUT_UPDATE static xmlNode * handle_update_timeout_request(pcmk__request_t *request) { const char *call_id = crm_element_value(request->xml, PCMK__XA_ST_CALLID); const char *client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); int op_timeout = 0; crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &op_timeout); do_stonith_async_timeout_update(client_id, call_id, op_timeout); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } // STONITH_OP_QUERY static xmlNode * handle_query_request(pcmk__request_t *request) { int timeout = 0; xmlNode *dev = NULL; const char *action = NULL; const char *target = NULL; const char *client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); struct st_query_data *query = NULL; if (request->peer != NULL) { // Record it for the future notification create_remote_stonith_op(client_id, request->xml, TRUE); } /* Delete the DC node RELAY operation. */ remove_relay_op(request->xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); dev = pcmk__xpath_find_one(request->xml->doc, "//*[@" PCMK__XA_ST_DEVICE_ACTION "]", LOG_NEVER); if (dev != NULL) { const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) { return NULL; // No query or reply necessary } target = crm_element_value(dev, PCMK__XA_ST_TARGET); action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION); } crm_log_xml_trace(request->xml, "Query"); query = pcmk__assert_alloc(1, sizeof(struct st_query_data)); query->reply = fenced_construct_reply(request->xml, NULL, &request->result); query->remote_peer = pcmk__str_copy(request->peer); query->client_id = pcmk__str_copy(client_id); query->target = pcmk__str_copy(target); query->action = pcmk__str_copy(action); query->call_options = request->call_options; crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &timeout); get_capable_devices(target, action, timeout, pcmk_is_set(query->call_options, st_opt_allow_self_fencing), query, stonith_query_capable_device_cb, fenced_df_none); return NULL; } // STONITH_OP_NOTIFY static xmlNode * handle_notify_request(pcmk__request_t *request) { const char *flag_name = NULL; pcmk__assert(request->ipc_client != NULL); flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_ACTIVATE); if (flag_name != NULL) { crm_debug("Enabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__set_client_flags(request->ipc_client, fenced_parse_notify_flag(flag_name)); } flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_DEACTIVATE); if (flag_name != NULL) { crm_debug("Disabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__clear_client_flags(request->ipc_client, fenced_parse_notify_flag(flag_name)); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return pcmk__ipc_create_ack(request->ipc_flags, PCMK__XE_ACK, NULL, CRM_EX_OK); } // STONITH_OP_RELAY static xmlNode * handle_relay_request(pcmk__request_t *request) { xmlNode *dev = pcmk__xpath_find_one(request->xml->doc, "//*[@" PCMK__XA_ST_TARGET "]", LOG_TRACE); crm_notice("Received forwarded fencing request from " "%s %s to fence (%s) peer %s", pcmk__request_origin_type(request), pcmk__request_origin(request), crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION), crm_element_value(dev, PCMK__XA_ST_TARGET)); if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) { set_bad_request_result(&request->result); return fenced_construct_reply(request->xml, NULL, &request->result); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); return NULL; } // STONITH_OP_FENCE static xmlNode * handle_fence_request(pcmk__request_t *request) { if (request->peer != NULL) { fence_locally(request->xml, &request->result); } else if (pcmk_is_set(request->call_options, st_opt_manual_ack)) { switch (fenced_handle_manual_confirmation(request->ipc_client, request->xml)) { case pcmk_rc_ok: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); break; case EINPROGRESS: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); break; default: set_bad_request_result(&request->result); break; } } else { const char *alternate_host = NULL; xmlNode *dev = pcmk__xpath_find_one(request->xml->doc, "//*[@" PCMK__XA_ST_TARGET "]", LOG_TRACE); const char *target = crm_element_value(dev, PCMK__XA_ST_TARGET); const char *action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION); const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (request->ipc_client != NULL) { int tolerance = 0; crm_notice("Client %s wants to fence (%s) %s using %s", pcmk__request_origin(request), action, target, (device? device : "any device")); crm_element_value_int(dev, PCMK__XA_ST_TOLERANCE, &tolerance); if (stonith_check_fence_tolerance(tolerance, target, action)) { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return fenced_construct_reply(request->xml, NULL, &request->result); } alternate_host = check_alternate_host(target); } else { crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'", request->peer, action, target, (device == NULL)? "(any)" : device); } if (alternate_host != NULL) { const char *client_id = NULL; remote_fencing_op_t *op = NULL; pcmk__node_status_t *node = pcmk__get_node(0, alternate_host, NULL, pcmk__node_search_cluster_member); if (request->ipc_client->id == 0) { client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); } else { client_id = request->ipc_client->id; } /* Create a duplicate fencing operation to relay with the client ID. * When a query response is received, this operation should be * deleted to avoid keeping the duplicate around. */ op = create_remote_stonith_op(client_id, request->xml, FALSE); crm_xml_add(request->xml, PCMK__XA_ST_OP, STONITH_OP_RELAY); crm_xml_add(request->xml, PCMK__XA_ST_CLIENTID, request->ipc_client->id); crm_xml_add(request->xml, PCMK__XA_ST_REMOTE_OP, op->id); // @TODO On failure, fail request immediately, or maybe panic pcmk__cluster_send_message(node, pcmk_ipc_fenced, request->xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } else if (initiate_remote_stonith_op(request->ipc_client, request->xml, FALSE) == NULL) { set_bad_request_result(&request->result); } else { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } } if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_FENCE_HISTORY static xmlNode * handle_history_request(pcmk__request_t *request) { xmlNode *reply = NULL; xmlNode *data = NULL; stonith_fence_history(request->xml, &data, request->peer, request->call_options); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (!pcmk_is_set(request->call_options, st_opt_discard_reply)) { /* When the local node broadcasts its history, it sets * st_opt_discard_reply and doesn't need a reply. */ reply = fenced_construct_reply(request->xml, data, &request->result); } pcmk__xml_free(data); return reply; } // STONITH_OP_DEVICE_ADD static xmlNode * handle_device_add_request(pcmk__request_t *request) { const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); xmlNode *dev = pcmk__xpath_find_one(request->xml->doc, "//" PCMK__XE_ST_DEVICE_ID, LOG_ERR); if (is_privileged(request->ipc_client, op)) { int rc = fenced_device_register(dev, false); rc = pcmk_rc2legacy(rc); pcmk__set_result(&request->result, ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR), stonith__legacy2status(rc), ((rc == pcmk_ok)? NULL : pcmk_strerror(rc))); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must register device via CIB"); } fenced_send_config_notification(op, &request->result, (dev == NULL)? NULL : pcmk__xe_id(dev)); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_DEVICE_DEL static xmlNode * handle_device_delete_request(pcmk__request_t *request) { xmlNode *dev = pcmk__xpath_find_one(request->xml->doc, "//" PCMK__XE_ST_DEVICE_ID, LOG_ERR); const char *device_id = crm_element_value(dev, PCMK_XA_ID); const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { stonith_device_remove(device_id, false); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete device via CIB"); } fenced_send_config_notification(op, &request->result, device_id); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_ADD static xmlNode * handle_level_add_request(pcmk__request_t *request) { const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { fenced_register_level(request->xml, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must add level via CIB"); } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_DEL static xmlNode * handle_level_delete_request(pcmk__request_t *request) { const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { fenced_unregister_level(request->xml, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete level via CIB"); } return fenced_construct_reply(request->xml, NULL, &request->result); } // CRM_OP_RM_NODE_CACHE static xmlNode * handle_cache_request(pcmk__request_t *request) { int node_id = 0; const char *name = NULL; crm_element_value_int(request->xml, PCMK_XA_ID, &node_id); name = crm_element_value(request->xml, PCMK_XA_UNAME); pcmk__cluster_forget_cluster_node(node_id, name); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } static xmlNode * handle_unknown_request(pcmk__request_t *request) { crm_err("Unknown IPC request %s from %s %s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request)); pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Unknown IPC request type '%s' (bug?)", request->op); return fenced_construct_reply(request->xml, NULL, &request->result); } static void fenced_register_handlers(void) { pcmk__server_command_t handlers[] = { { CRM_OP_REGISTER, handle_register_request }, { STONITH_OP_EXEC, handle_agent_request }, { STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request }, { STONITH_OP_QUERY, handle_query_request }, { STONITH_OP_NOTIFY, handle_notify_request }, { STONITH_OP_RELAY, handle_relay_request }, { STONITH_OP_FENCE, handle_fence_request }, { STONITH_OP_FENCE_HISTORY, handle_history_request }, { STONITH_OP_DEVICE_ADD, handle_device_add_request }, { STONITH_OP_DEVICE_DEL, handle_device_delete_request }, { STONITH_OP_LEVEL_ADD, handle_level_add_request }, { STONITH_OP_LEVEL_DEL, handle_level_delete_request }, { CRM_OP_RM_NODE_CACHE, handle_cache_request }, { NULL, handle_unknown_request }, }; fenced_handlers = pcmk__register_handlers(handlers); } void fenced_unregister_handlers(void) { if (fenced_handlers != NULL) { g_hash_table_destroy(fenced_handlers); fenced_handlers = NULL; } } static void handle_request(pcmk__request_t *request) { xmlNode *reply = NULL; const char *reason = NULL; if (fenced_handlers == NULL) { fenced_register_handlers(); } reply = pcmk__process_request(request, fenced_handlers); if (reply != NULL) { if (pcmk_is_set(request->flags, pcmk__request_reuse_options) && (request->ipc_client != NULL)) { /* Certain IPC-only commands must reuse the call options from the * original request rather than the ones set by stonith_send_reply() * -> do_local_reply(). */ pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply, request->ipc_flags); request->ipc_client->request_id = 0; } else { stonith_send_reply(reply, request->call_options, request->peer, request->ipc_client); } pcmk__xml_free(reply); } reason = request->result.exit_reason; crm_debug("Processed %s request from %s %s: %s%s%s%s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request), pcmk_exec_status_str(request->result.execution_status), (reason == NULL)? "" : " (", (reason == NULL)? "" : reason, (reason == NULL)? "" : ")"); } static void handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer) { // Copy, because request might be freed before we want to log this char *op = crm_element_value_copy(request, PCMK__XA_ST_OP); if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { process_remote_stonith_query(request); } else if (pcmk__str_any_of(op, STONITH_OP_NOTIFY, STONITH_OP_FENCE, NULL)) { fenced_process_fencing_reply(request); } else { crm_err("Ignoring unknown %s reply from %s %s", pcmk__s(op, "untyped"), ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); crm_log_xml_warn(request, "UnknownOp"); free(op); return; } crm_debug("Processed %s reply from %s %s", op, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); free(op); } /*! * \internal * \brief Handle a message from an IPC client or CPG peer * * \param[in,out] client If not NULL, IPC client that sent message * \param[in] id If from IPC client, IPC message ID * \param[in] flags Message flags * \param[in,out] message Message XML * \param[in] remote_peer If not NULL, CPG peer that sent message */ void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *message, const char *remote_peer) { uint32_t call_options = st_opt_none; int rc = pcmk_rc_ok; bool is_reply = false; CRM_CHECK(message != NULL, return); if (pcmk__xpath_find_one(message->doc, "//" PCMK__XE_ST_REPLY, LOG_NEVER) != NULL) { is_reply = true; } rc = pcmk__xe_get_flags(message, PCMK__XA_ST_CALLOPT, &call_options, st_opt_none); if (rc != pcmk_rc_ok) { crm_warn("Couldn't parse options from message: %s", pcmk_rc_str(rc)); } crm_debug("Processing %ssynchronous %s %s %u from %s %s", pcmk_is_set(call_options, st_opt_sync_call)? "" : "a", crm_element_value(message, PCMK__XA_ST_OP), (is_reply? "reply" : "request"), id, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); if (pcmk_is_set(call_options, st_opt_sync_call)) { pcmk__assert((client == NULL) || (client->request_id == id)); } if (is_reply) { handle_reply(client, message, remote_peer); } else { pcmk__request_t request = { .ipc_client = client, .ipc_id = id, .ipc_flags = flags, .peer = remote_peer, .xml = message, .call_options = call_options, .result = PCMK__UNKNOWN_RESULT, }; request.op = crm_element_value_copy(request.xml, PCMK__XA_ST_OP); CRM_CHECK(request.op != NULL, return); if (pcmk_is_set(request.call_options, st_opt_sync_call)) { pcmk__set_request_flags(&request, pcmk__request_sync); } handle_request(&request); pcmk__reset_request(&request); } } diff --git a/lib/cib/cib_remote.c b/lib/cib/cib_remote.c index 1fb38dbb0b..d0b5342346 100644 --- a/lib/cib/cib_remote.c +++ b/lib/cib/cib_remote.c @@ -1,659 +1,660 @@ /* * Copyright 2008-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // GnuTLS handshake timeout in seconds #define TLS_HANDSHAKE_TIMEOUT 5 static pcmk__tls_t *tls = NULL; #include typedef struct cib_remote_opaque_s { int port; char *server; char *user; char *passwd; gboolean encrypted; pcmk__remote_t command; pcmk__remote_t callback; pcmk__output_t *out; time_t start_time; int timeout_sec; } cib_remote_opaque_t; static int cib_remote_perform_op(cib_t *cib, const char *op, const char *host, const char *section, xmlNode *data, xmlNode **output_data, int call_options, const char *user_name) { int rc; int remaining_time = 0; time_t start_time; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; cib_remote_opaque_t *private = cib->variant_opaque; if (cib->state == cib_disconnected) { return -ENOTCONN; } if (output_data != NULL) { *output_data = NULL; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } rc = cib__create_op(cib, op, host, section, data, call_options, user_name, NULL, &op_msg); if (rc != pcmk_ok) { return rc; } if (pcmk_is_set(call_options, cib_transaction)) { rc = cib__extend_transaction(cib, op_msg); pcmk__xml_free(op_msg); return rc; } crm_trace("Sending %s message to the CIB manager", op); if (!(call_options & cib_sync_call)) { pcmk__remote_send_xml(&private->callback, op_msg); } else { pcmk__remote_send_xml(&private->command, op_msg); } pcmk__xml_free(op_msg); if ((call_options & cib_discard_reply)) { crm_trace("Discarding reply"); return pcmk_ok; } else if (!(call_options & cib_sync_call)) { return cib->call_id; } crm_trace("Waiting for a synchronous reply"); start_time = time(NULL); remaining_time = cib->call_timeout ? cib->call_timeout : 60; rc = pcmk_rc_ok; while (remaining_time > 0 && (rc != ENOTCONN)) { int reply_id = -1; int msg_id = cib->call_id; rc = pcmk__read_remote_message(&private->command, remaining_time * 1000); op_reply = pcmk__remote_message_xml(&private->command); if (!op_reply) { break; } crm_element_value_int(op_reply, PCMK__XA_CIB_CALLID, &reply_id); if (reply_id == msg_id) { break; } else if (reply_id < msg_id) { crm_debug("Received old reply: %d (wanted %d)", reply_id, msg_id); crm_log_xml_trace(op_reply, "Old reply"); } else if ((reply_id - 10000) > msg_id) { /* wrap-around case */ crm_debug("Received old reply: %d (wanted %d)", reply_id, msg_id); crm_log_xml_trace(op_reply, "Old reply"); } else { crm_err("Received a __future__ reply:" " %d (wanted %d)", reply_id, msg_id); } pcmk__xml_free(op_reply); op_reply = NULL; /* wasn't the right reply, try and read some more */ remaining_time = time(NULL) - start_time; } if (rc == ENOTCONN) { crm_err("Disconnected while waiting for reply."); return -ENOTCONN; } else if (op_reply == NULL) { crm_err("No reply message - empty"); return -ENOMSG; } crm_trace("Synchronous reply received"); /* Start processing the reply... */ if (crm_element_value_int(op_reply, PCMK__XA_CIB_RC, &rc) != 0) { rc = -EPROTO; } if (rc == -pcmk_err_diff_resync) { /* This is an internal value that clients do not and should not care about */ rc = pcmk_ok; } if (rc == pcmk_ok || rc == -EPERM) { crm_log_xml_debug(op_reply, "passed"); } else { crm_err("Call failed: %s", pcmk_strerror(rc)); crm_log_xml_warn(op_reply, "failed"); } if (output_data == NULL) { /* do nothing more */ } else if (!(call_options & cib_discard_reply)) { xmlNode *wrapper = pcmk__xe_first_child(op_reply, PCMK__XE_CIB_CALLDATA, NULL, NULL); xmlNode *tmp = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); if (tmp == NULL) { crm_trace("No output in reply to \"%s\" command %d", op, cib->call_id - 1); } else { *output_data = pcmk__xml_copy(NULL, tmp); } } pcmk__xml_free(op_reply); return rc; } static int cib_remote_callback_dispatch(gpointer user_data) { int rc; cib_t *cib = user_data; cib_remote_opaque_t *private = cib->variant_opaque; xmlNode *msg = NULL; const char *type = NULL; /* If start time is 0, we've previously handled a complete message and this * connection is being reused for a new message. Reset the start_time, * giving this new message timeout_sec from now to complete. */ if (private->start_time == 0) { private->start_time = time(NULL); } rc = pcmk__read_available_remote_data(&private->callback); switch (rc) { case pcmk_rc_ok: /* We have the whole message so process it */ break; case EAGAIN: /* Have we timed out? */ if (time(NULL) >= private->start_time + private->timeout_sec) { crm_info("Error reading from CIB manager connection: %s", pcmk_rc_str(ETIME)); return -1; } /* We haven't read the whole message yet */ return 0; default: /* Error */ crm_info("Error reading from CIB manager connection: %s", pcmk_rc_str(rc)); return -1; } + // coverity[tainted_data] This can't easily be changed right now msg = pcmk__remote_message_xml(&private->callback); if (msg == NULL) { private->start_time = 0; return 0; } type = crm_element_value(msg, PCMK__XA_T); crm_trace("Activating %s callbacks...", type); if (pcmk__str_eq(type, PCMK__VALUE_CIB, pcmk__str_none)) { cib_native_callback(cib, msg, 0, 0); } else if (pcmk__str_eq(type, PCMK__VALUE_CIB_NOTIFY, pcmk__str_none)) { g_list_foreach(cib->notify_list, cib_native_notify, msg); } else { crm_err("Unknown message type: %s", type); } pcmk__xml_free(msg); private->start_time = 0; return 0; } static int cib_remote_command_dispatch(gpointer user_data) { int rc; cib_t *cib = user_data; cib_remote_opaque_t *private = cib->variant_opaque; /* See cib_remote_callback_dispatch */ if (private->start_time == 0) { private->start_time = time(NULL); } rc = pcmk__read_available_remote_data(&private->command); if (rc == EAGAIN) { /* Have we timed out? */ if (time(NULL) >= private->start_time + private->timeout_sec) { crm_info("Error reading from CIB manager connection: %s", pcmk_rc_str(ETIME)); return -1; } /* We haven't read the whole message yet */ return 0; } free(private->command.buffer); private->command.buffer = NULL; crm_err("received late reply for remote cib connection, discarding"); if (rc != pcmk_rc_ok) { crm_info("Error reading from CIB manager connection: %s", pcmk_rc_str(rc)); return -1; } private->start_time = 0; return 0; } static int cib_tls_close(cib_t *cib) { cib_remote_opaque_t *private = cib->variant_opaque; if (private->encrypted) { if (private->command.tls_session) { gnutls_bye(private->command.tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(private->command.tls_session); } if (private->callback.tls_session) { gnutls_bye(private->callback.tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(private->callback.tls_session); } private->command.tls_session = NULL; private->callback.tls_session = NULL; pcmk__free_tls(tls); tls = NULL; } if (private->command.tcp_socket >= 0) { shutdown(private->command.tcp_socket, SHUT_RDWR); /* no more receptions */ close(private->command.tcp_socket); } if (private->callback.tcp_socket >= 0) { shutdown(private->callback.tcp_socket, SHUT_RDWR); /* no more receptions */ close(private->callback.tcp_socket); } private->command.tcp_socket = -1; private->callback.tcp_socket = -1; free(private->command.buffer); free(private->callback.buffer); private->command.buffer = NULL; private->callback.buffer = NULL; return 0; } static void cib_remote_connection_destroy(gpointer user_data) { crm_err("Connection destroyed"); cib_tls_close(user_data); } static int cib_tls_signon(cib_t *cib, pcmk__remote_t *connection, gboolean event_channel) { cib_remote_opaque_t *private = cib->variant_opaque; int rc; xmlNode *answer = NULL; xmlNode *login = NULL; static struct mainloop_fd_callbacks cib_fd_callbacks = { 0, }; cib_fd_callbacks.dispatch = event_channel ? cib_remote_callback_dispatch : cib_remote_command_dispatch; cib_fd_callbacks.destroy = cib_remote_connection_destroy; connection->tcp_socket = -1; connection->tls_session = NULL; rc = pcmk__connect_remote(private->server, private->port, 0, NULL, &(connection->tcp_socket), NULL, NULL); if (rc != pcmk_rc_ok) { crm_info("Remote connection to %s:%d failed: %s " QB_XS " rc=%d", private->server, private->port, pcmk_rc_str(rc), rc); return -ENOTCONN; } if (private->encrypted) { bool use_cert = pcmk__x509_enabled(); int tls_rc = GNUTLS_E_SUCCESS; rc = pcmk__init_tls(&tls, false, use_cert ? GNUTLS_CRD_CERTIFICATE : GNUTLS_CRD_ANON); if (rc != pcmk_rc_ok) { return -1; } /* bind the socket to GnuTls lib */ connection->tls_session = pcmk__new_tls_session(tls, connection->tcp_socket); if (connection->tls_session == NULL) { cib_tls_close(cib); return -1; } rc = pcmk__tls_client_handshake(connection, TLS_HANDSHAKE_TIMEOUT, &tls_rc); if (rc != pcmk_rc_ok) { crm_err("Remote CIB session creation for %s:%d failed: %s", private->server, private->port, (rc == EPROTO)? gnutls_strerror(tls_rc) : pcmk_rc_str(rc)); gnutls_deinit(connection->tls_session); connection->tls_session = NULL; cib_tls_close(cib); return -1; } } /* Now that the handshake is done, see if any client TLS certificate is * close to its expiration date and log if so. If a TLS certificate is not * in use, this function will just return so we don't need to check for the * session type here. */ pcmk__tls_check_cert_expiration(connection->tls_session); /* login to server */ login = pcmk__xe_create(NULL, PCMK__XE_CIB_COMMAND); crm_xml_add(login, PCMK_XA_OP, "authenticate"); crm_xml_add(login, PCMK_XA_USER, private->user); crm_xml_add(login, PCMK__XA_PASSWORD, private->passwd); crm_xml_add(login, PCMK__XA_HIDDEN, PCMK__VALUE_PASSWORD); pcmk__remote_send_xml(connection, login); pcmk__xml_free(login); rc = pcmk_ok; if (pcmk__read_remote_message(connection, -1) == ENOTCONN) { rc = -ENOTCONN; } answer = pcmk__remote_message_xml(connection); crm_log_xml_trace(answer, "Reply"); if (answer == NULL) { rc = -EPROTO; } else { /* grab the token */ const char *msg_type = crm_element_value(answer, PCMK__XA_CIB_OP); const char *tmp_ticket = crm_element_value(answer, PCMK__XA_CIB_CLIENTID); if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_casei)) { crm_err("Invalid registration message: %s", msg_type); rc = -EPROTO; } else if (tmp_ticket == NULL) { rc = -EPROTO; } else { connection->token = strdup(tmp_ticket); } } pcmk__xml_free(answer); answer = NULL; if (rc != 0) { cib_tls_close(cib); return rc; } crm_trace("remote client connection established"); private->timeout_sec = 60; connection->source = mainloop_add_fd("cib-remote", G_PRIORITY_HIGH, connection->tcp_socket, cib, &cib_fd_callbacks); return rc; } static int cib_remote_signon(cib_t *cib, const char *name, enum cib_conn_type type) { int rc = pcmk_ok; cib_remote_opaque_t *private = cib->variant_opaque; if (name == NULL) { name = pcmk__s(crm_system_name, "client"); } if (private->passwd == NULL) { if (private->out == NULL) { /* If no pcmk__output_t is set, just assume that a text prompt * is good enough. */ pcmk__text_prompt("Password", false, &(private->passwd)); } else { private->out->prompt("Password", false, &(private->passwd)); } } if (private->server == NULL || private->user == NULL) { rc = -EINVAL; goto done; } rc = cib_tls_signon(cib, &(private->command), FALSE); if (rc != pcmk_ok) { goto done; } rc = cib_tls_signon(cib, &(private->callback), TRUE); done: if (rc == pcmk_ok) { crm_info("Opened connection to %s:%d for %s", private->server, private->port, name); cib->state = cib_connected_command; cib->type = cib_command; } else { crm_info("Connection to %s:%d for %s failed: %s\n", private->server, private->port, name, pcmk_strerror(rc)); } return rc; } static int cib_remote_signoff(cib_t *cib) { int rc = pcmk_ok; crm_debug("Disconnecting from the CIB manager"); cib_tls_close(cib); cib->cmds->end_transaction(cib, false, cib_none); cib->state = cib_disconnected; cib->type = cib_no_connection; return rc; } static int cib_remote_free(cib_t *cib) { int rc = pcmk_ok; crm_warn("Freeing CIB"); if (cib->state != cib_disconnected) { rc = cib_remote_signoff(cib); if (rc == pcmk_ok) { cib_remote_opaque_t *private = cib->variant_opaque; free(private->server); free(private->user); free(private->passwd); free(cib->cmds); free(cib->user); free(private); free(cib); } } return rc; } static int cib_remote_register_notification(cib_t * cib, const char *callback, int enabled) { xmlNode *notify_msg = pcmk__xe_create(NULL, PCMK__XE_CIB_COMMAND); cib_remote_opaque_t *private = cib->variant_opaque; crm_xml_add(notify_msg, PCMK__XA_CIB_OP, PCMK__VALUE_CIB_NOTIFY); crm_xml_add(notify_msg, PCMK__XA_CIB_NOTIFY_TYPE, callback); crm_xml_add_int(notify_msg, PCMK__XA_CIB_NOTIFY_ACTIVATE, enabled); pcmk__remote_send_xml(&private->callback, notify_msg); pcmk__xml_free(notify_msg); return pcmk_ok; } static int cib_remote_set_connection_dnotify(cib_t * cib, void (*dnotify) (gpointer user_data)) { return -EPROTONOSUPPORT; } /*! * \internal * \brief Get the given CIB connection's unique client identifiers * * These can be used to check whether this client requested the action that * triggered a CIB notification. * * \param[in] cib CIB connection * \param[out] async_id If not \p NULL, where to store asynchronous client ID * \param[out] sync_id If not \p NULL, where to store synchronous client ID * * \return Legacy Pacemaker return code (specifically, \p pcmk_ok) * * \note This is the \p cib_remote variant implementation of * \p cib_api_operations_t:client_id(). * \note The client IDs are assigned during CIB sign-on. */ static int cib_remote_client_id(const cib_t *cib, const char **async_id, const char **sync_id) { cib_remote_opaque_t *private = cib->variant_opaque; if (async_id != NULL) { // private->callback is the channel for async requests *async_id = private->callback.token; } if (sync_id != NULL) { // private->command is the channel for sync requests *sync_id = private->command.token; } return pcmk_ok; } cib_t * cib_remote_new(const char *server, const char *user, const char *passwd, int port, gboolean encrypted) { cib_remote_opaque_t *private = NULL; cib_t *cib = cib_new_variant(); if (cib == NULL) { return NULL; } private = calloc(1, sizeof(cib_remote_opaque_t)); if (private == NULL) { free(cib); return NULL; } cib->variant = cib_remote; cib->variant_opaque = private; private->server = pcmk__str_copy(server); private->user = pcmk__str_copy(user); private->passwd = pcmk__str_copy(passwd); private->port = port; private->encrypted = encrypted; /* assign variant specific ops */ cib->delegate_fn = cib_remote_perform_op; cib->cmds->signon = cib_remote_signon; cib->cmds->signoff = cib_remote_signoff; cib->cmds->free = cib_remote_free; cib->cmds->register_notification = cib_remote_register_notification; cib->cmds->set_connection_dnotify = cib_remote_set_connection_dnotify; cib->cmds->client_id = cib_remote_client_id; return cib; } void cib__set_output(cib_t *cib, pcmk__output_t *out) { cib_remote_opaque_t *private; if (cib->variant != cib_remote) { return; } private = cib->variant_opaque; private->out = out; } diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c index c71576cbb0..c40ca744d0 100644 --- a/lib/common/ipc_client.c +++ b/lib/common/ipc_client.c @@ -1,1789 +1,1790 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #if defined(HAVE_UCRED) || defined(HAVE_SOCKPEERCRED) #include #elif defined(HAVE_GETPEERUCRED) #include #endif #include #include #include #include #include /* indirectly: pcmk_err_generic */ #include #include #include #include "crmcommon_private.h" static int is_ipc_provider_expected(qb_ipcc_connection_t *qb_ipc, int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid); /*! * \brief Create a new object for using Pacemaker daemon IPC * * \param[out] api Where to store new IPC object * \param[in] server Which Pacemaker daemon the object is for * * \return Standard Pacemaker result code * * \note The caller is responsible for freeing *api using pcmk_free_ipc_api(). * \note This is intended to supersede crm_ipc_new() but currently only * supports the controller, pacemakerd, and schedulerd IPC API. */ int pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) { if (api == NULL) { return EINVAL; } *api = calloc(1, sizeof(pcmk_ipc_api_t)); if (*api == NULL) { return errno; } (*api)->server = server; if (pcmk_ipc_name(*api, false) == NULL) { pcmk_free_ipc_api(*api); *api = NULL; return EOPNOTSUPP; } // Set server methods switch (server) { case pcmk_ipc_attrd: (*api)->cmds = pcmk__attrd_api_methods(); break; case pcmk_ipc_based: break; case pcmk_ipc_controld: (*api)->cmds = pcmk__controld_api_methods(); break; case pcmk_ipc_execd: break; case pcmk_ipc_fenced: break; case pcmk_ipc_pacemakerd: (*api)->cmds = pcmk__pacemakerd_api_methods(); break; case pcmk_ipc_schedulerd: (*api)->cmds = pcmk__schedulerd_api_methods(); break; default: // pcmk_ipc_unknown pcmk_free_ipc_api(*api); *api = NULL; return EINVAL; } if ((*api)->cmds == NULL) { pcmk_free_ipc_api(*api); *api = NULL; return ENOMEM; } (*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false), 0); if ((*api)->ipc == NULL) { pcmk_free_ipc_api(*api); *api = NULL; return ENOMEM; } // If daemon API has its own data to track, allocate it if ((*api)->cmds->new_data != NULL) { if ((*api)->cmds->new_data(*api) != pcmk_rc_ok) { pcmk_free_ipc_api(*api); *api = NULL; return ENOMEM; } } crm_trace("Created %s API IPC object", pcmk_ipc_name(*api, true)); return pcmk_rc_ok; } static void free_daemon_specific_data(pcmk_ipc_api_t *api) { if ((api != NULL) && (api->cmds != NULL)) { if ((api->cmds->free_data != NULL) && (api->api_data != NULL)) { api->cmds->free_data(api->api_data); api->api_data = NULL; } free(api->cmds); api->cmds = NULL; } } /*! * \internal * \brief Call an IPC API event callback, if one is registed * * \param[in,out] api IPC API connection * \param[in] event_type The type of event that occurred * \param[in] status Event status * \param[in,out] event_data Event-specific data */ void pcmk__call_ipc_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data) { if ((api != NULL) && (api->cb != NULL)) { api->cb(api, event_type, status, event_data, api->user_data); } } /*! * \internal * \brief Clean up after an IPC disconnect * * \param[in,out] user_data IPC API connection that disconnected * * \note This function can be used as a main loop IPC destroy callback. */ static void ipc_post_disconnect(gpointer user_data) { pcmk_ipc_api_t *api = user_data; crm_info("Disconnected from %s", pcmk_ipc_name(api, true)); // Perform any daemon-specific handling needed if ((api->cmds != NULL) && (api->cmds->post_disconnect != NULL)) { api->cmds->post_disconnect(api); } // Call client's registered event callback pcmk__call_ipc_callback(api, pcmk_ipc_event_disconnect, CRM_EX_DISCONNECT, NULL); /* If this is being called from a running main loop, mainloop_gio_destroy() * will free ipc and mainloop_io immediately after calling this function. * If this is called from a stopped main loop, these will leak, so the best * practice is to close the connection before stopping the main loop. */ api->ipc = NULL; api->mainloop_io = NULL; if (api->free_on_disconnect) { /* pcmk_free_ipc_api() has already been called, but did not free api * or api->cmds because this function needed them. Do that now. */ free_daemon_specific_data(api); crm_trace("Freeing IPC API object after disconnect"); free(api); } } /*! * \brief Free the contents of an IPC API object * * \param[in,out] api IPC API object to free */ void pcmk_free_ipc_api(pcmk_ipc_api_t *api) { bool free_on_disconnect = false; if (api == NULL) { return; } crm_debug("Releasing %s IPC API", pcmk_ipc_name(api, true)); if (api->ipc != NULL) { if (api->mainloop_io != NULL) { /* We need to keep the api pointer itself around, because it is the * user data for the IPC client destroy callback. That will be * triggered by the pcmk_disconnect_ipc() call below, but it might * happen later in the main loop (if still running). * * This flag tells the destroy callback to free the object. It can't * do that unconditionally, because the application might call this * function after a disconnect that happened by other means. */ free_on_disconnect = api->free_on_disconnect = true; } pcmk_disconnect_ipc(api); // Frees api if free_on_disconnect is true } if (!free_on_disconnect) { free_daemon_specific_data(api); crm_trace("Freeing IPC API object"); free(api); } } /*! * \brief Get the IPC name used with an IPC API connection * * \param[in] api IPC API connection * \param[in] for_log If true, return human-friendly name instead of IPC name * * \return IPC API's human-friendly or connection name, or if none is available, * "Pacemaker" if for_log is true and NULL if for_log is false */ const char * pcmk_ipc_name(const pcmk_ipc_api_t *api, bool for_log) { if (api == NULL) { return for_log? "Pacemaker" : NULL; } if (for_log) { const char *name = pcmk__server_log_name(api->server); return pcmk__s(name, "Pacemaker"); } switch (api->server) { // These servers do not have pcmk_ipc_api_t implementations yet case pcmk_ipc_based: case pcmk_ipc_execd: case pcmk_ipc_fenced: return NULL; default: return pcmk__server_ipc_name(api->server); } } /*! * \brief Check whether an IPC API connection is active * * \param[in,out] api IPC API connection * * \return true if IPC is connected, false otherwise */ bool pcmk_ipc_is_connected(pcmk_ipc_api_t *api) { return (api != NULL) && crm_ipc_connected(api->ipc); } /*! * \internal * \brief Call the daemon-specific API's dispatch function * * Perform daemon-specific handling of IPC reply dispatch. It is the daemon * method's responsibility to call the client's registered event callback, as * well as allocate and free any event data. * * \param[in,out] api IPC API connection * \param[in,out] message IPC reply XML to dispatch */ static bool call_api_dispatch(pcmk_ipc_api_t *api, xmlNode *message) { crm_log_xml_trace(message, "ipc-received"); if ((api->cmds != NULL) && (api->cmds->dispatch != NULL)) { return api->cmds->dispatch(api, message); } return false; } /*! * \internal * \brief Dispatch previously read IPC data * * \param[in] buffer Data read from IPC * \param[in,out] api IPC object * * \return Standard Pacemaker return code. In particular: * * pcmk_rc_ok: There are no more messages expected from the server. Quit * reading. * EINPROGRESS: There are more messages expected from the server. Keep reading. * * All other values indicate an error. */ static int dispatch_ipc_data(const char *buffer, pcmk_ipc_api_t *api) { bool more = false; xmlNode *msg; if (buffer == NULL) { crm_warn("Empty message received from %s IPC", pcmk_ipc_name(api, true)); return ENOMSG; } msg = pcmk__xml_parse(buffer); if (msg == NULL) { crm_warn("Malformed message received from %s IPC", pcmk_ipc_name(api, true)); return EPROTO; } more = call_api_dispatch(api, msg); pcmk__xml_free(msg); if (more) { return EINPROGRESS; } else { return pcmk_rc_ok; } } /*! * \internal * \brief Dispatch data read from IPC source * * \param[in] buffer Data read from IPC * \param[in] length Number of bytes of data in buffer (ignored) * \param[in,out] user_data IPC object * * \return Always 0 (meaning connection is still required) * * \note This function can be used as a main loop IPC dispatch callback. */ static int dispatch_ipc_source_data(const char *buffer, ssize_t length, gpointer user_data) { pcmk_ipc_api_t *api = user_data; CRM_CHECK(api != NULL, return 0); dispatch_ipc_data(buffer, api); return 0; } /*! * \brief Check whether an IPC connection has data available (without main loop) * * \param[in] api IPC API connection * \param[in] timeout_ms If less than 0, poll indefinitely; if 0, poll once * and return immediately; otherwise, poll for up to * this many milliseconds * * \return Standard Pacemaker return code * * \note Callers of pcmk_connect_ipc() using pcmk_ipc_dispatch_poll should call * this function to check whether IPC data is available. Return values of * interest include pcmk_rc_ok meaning data is available, and EAGAIN * meaning no data is available; all other values indicate errors. * \todo This does not allow the caller to poll multiple file descriptors at * once. If there is demand for that, we could add a wrapper for * pcmk__ipc_fd(api->ipc), so the caller can call poll() themselves. */ int pcmk_poll_ipc(const pcmk_ipc_api_t *api, int timeout_ms) { int rc; struct pollfd pollfd = { 0, }; if ((api == NULL) || (api->dispatch_type != pcmk_ipc_dispatch_poll)) { return EINVAL; } rc = pcmk__ipc_fd(api->ipc, &(pollfd.fd)); if (rc != pcmk_rc_ok) { crm_debug("Could not obtain file descriptor for %s IPC: %s", pcmk_ipc_name(api, true), pcmk_rc_str(rc)); return rc; } pollfd.events = POLLIN; rc = poll(&pollfd, 1, timeout_ms); if (rc < 0) { /* Some UNIX systems return negative and set EAGAIN for failure to * allocate memory; standardize the return code in that case */ return (errno == EAGAIN)? ENOMEM : errno; } else if (rc == 0) { return EAGAIN; } return pcmk_rc_ok; } /*! * \brief Dispatch available messages on an IPC connection (without main loop) * * \param[in,out] api IPC API connection * * \return Standard Pacemaker return code * * \note Callers of pcmk_connect_ipc() using pcmk_ipc_dispatch_poll should call * this function when IPC data is available. */ void pcmk_dispatch_ipc(pcmk_ipc_api_t *api) { if (api == NULL) { return; } while (crm_ipc_ready(api->ipc) > 0) { if (crm_ipc_read(api->ipc) > 0) { dispatch_ipc_data(crm_ipc_buffer(api->ipc), api); pcmk__ipc_free_client_buffer(api->ipc); } } } // \return Standard Pacemaker return code static int connect_with_main_loop(pcmk_ipc_api_t *api) { int rc; struct ipc_client_callbacks callbacks = { .dispatch = dispatch_ipc_source_data, .destroy = ipc_post_disconnect, }; rc = pcmk__add_mainloop_ipc(api->ipc, G_PRIORITY_DEFAULT, api, &callbacks, &(api->mainloop_io)); if (rc != pcmk_rc_ok) { return rc; } crm_debug("Connected to %s IPC (attached to main loop)", pcmk_ipc_name(api, true)); /* After this point, api->mainloop_io owns api->ipc, so api->ipc * should not be explicitly freed. */ return pcmk_rc_ok; } // \return Standard Pacemaker return code static int connect_without_main_loop(pcmk_ipc_api_t *api) { int rc = pcmk__connect_generic_ipc(api->ipc); if (rc != pcmk_rc_ok) { crm_ipc_close(api->ipc); } else { crm_debug("Connected to %s IPC (without main loop)", pcmk_ipc_name(api, true)); } return rc; } /*! * \internal * \brief Connect to a Pacemaker daemon via IPC (retrying after soft errors * and ECONNREFUSED) * * \param[in,out] api IPC API instance * \param[in] dispatch_type How IPC replies should be dispatched * \param[in] attempts How many times to try (in case of soft error) * * \return Standard Pacemaker return code */ int pcmk__connect_ipc_retry_conrefused(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type, int attempts) { int remaining = attempts; int rc = pcmk_rc_ok; do { if (rc == ECONNREFUSED) { pcmk__sleep_ms((attempts - remaining) * 500); } rc = pcmk__connect_ipc(api, dispatch_type, remaining); remaining--; } while (rc == ECONNREFUSED && remaining >= 0); return rc; } /*! * \internal * \brief Connect to a Pacemaker daemon via IPC (retrying after soft errors) * * \param[in,out] api IPC API instance * \param[in] dispatch_type How IPC replies should be dispatched * \param[in] attempts How many times to try (in case of soft error) * * \return Standard Pacemaker return code */ int pcmk__connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type, int attempts) { int rc = pcmk_rc_ok; if ((api == NULL) || (attempts < 1)) { return EINVAL; } if (api->ipc == NULL) { api->ipc = crm_ipc_new(pcmk_ipc_name(api, false), 0); if (api->ipc == NULL) { return ENOMEM; } } if (crm_ipc_connected(api->ipc)) { crm_trace("Already connected to %s", pcmk_ipc_name(api, true)); return pcmk_rc_ok; } api->dispatch_type = dispatch_type; crm_debug("Attempting connection to %s (up to %d time%s)", pcmk_ipc_name(api, true), attempts, pcmk__plural_s(attempts)); for (int remaining = attempts - 1; remaining >= 0; --remaining) { switch (dispatch_type) { case pcmk_ipc_dispatch_main: rc = connect_with_main_loop(api); break; case pcmk_ipc_dispatch_sync: case pcmk_ipc_dispatch_poll: rc = connect_without_main_loop(api); break; } if ((remaining == 0) || ((rc != EAGAIN) && (rc != EALREADY))) { break; // Result is final } // Retry after soft error (interrupted by signal, etc.) pcmk__sleep_ms((attempts - remaining) * 500); crm_debug("Re-attempting connection to %s (%d attempt%s remaining)", pcmk_ipc_name(api, true), remaining, pcmk__plural_s(remaining)); } if (rc != pcmk_rc_ok) { return rc; } if ((api->cmds != NULL) && (api->cmds->post_connect != NULL)) { rc = api->cmds->post_connect(api); if (rc != pcmk_rc_ok) { crm_ipc_close(api->ipc); } } return rc; } /*! * \brief Connect to a Pacemaker daemon via IPC * * \param[in,out] api IPC API instance * \param[in] dispatch_type How IPC replies should be dispatched * * \return Standard Pacemaker return code */ int pcmk_connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type) { int rc = pcmk__connect_ipc(api, dispatch_type, 2); if (rc != pcmk_rc_ok) { crm_err("Connection to %s failed: %s", pcmk_ipc_name(api, true), pcmk_rc_str(rc)); } return rc; } /*! * \brief Disconnect an IPC API instance * * \param[in,out] api IPC API connection * * \return Standard Pacemaker return code * * \note If the connection is attached to a main loop, this function should be * called before quitting the main loop, to ensure that all memory is * freed. */ void pcmk_disconnect_ipc(pcmk_ipc_api_t *api) { if ((api == NULL) || (api->ipc == NULL)) { return; } switch (api->dispatch_type) { case pcmk_ipc_dispatch_main: { mainloop_io_t *mainloop_io = api->mainloop_io; // Make sure no code with access to api can use these again api->mainloop_io = NULL; api->ipc = NULL; mainloop_del_ipc_client(mainloop_io); // After this point api might have already been freed } break; case pcmk_ipc_dispatch_poll: case pcmk_ipc_dispatch_sync: { crm_ipc_t *ipc = api->ipc; // Make sure no code with access to api can use ipc again api->ipc = NULL; // This should always be the case already, but to be safe api->free_on_disconnect = false; crm_ipc_close(ipc); crm_ipc_destroy(ipc); ipc_post_disconnect(api); } break; } } /*! * \brief Register a callback for IPC API events * * \param[in,out] api IPC API connection * \param[in] callback Callback to register * \param[in] userdata Caller data to pass to callback * * \note This function may be called multiple times to update the callback * and/or user data. The caller remains responsible for freeing * userdata in any case (after the IPC is disconnected, if the * user data is still registered with the IPC). */ void pcmk_register_ipc_callback(pcmk_ipc_api_t *api, pcmk_ipc_callback_t cb, void *user_data) { if (api == NULL) { return; } api->cb = cb; api->user_data = user_data; } /*! * \internal * \brief Send an XML request across an IPC API connection * * \param[in,out] api IPC API connection * \param[in] request XML request to send * * \return Standard Pacemaker return code * * \note Daemon-specific IPC API functions should call this function to send * requests, because it handles different dispatch types appropriately. */ int pcmk__send_ipc_request(pcmk_ipc_api_t *api, const xmlNode *request) { int rc; xmlNode *reply = NULL; enum crm_ipc_flags flags = crm_ipc_flags_none; if ((api == NULL) || (api->ipc == NULL) || (request == NULL)) { return EINVAL; } crm_log_xml_trace(request, "ipc-sent"); // Synchronous dispatch requires waiting for a reply if ((api->dispatch_type == pcmk_ipc_dispatch_sync) && (api->cmds != NULL) && (api->cmds->reply_expected != NULL) && (api->cmds->reply_expected(api, request))) { flags = crm_ipc_client_response; } /* The 0 here means a default timeout of 5 seconds * * @TODO Maybe add a timeout_ms member to pcmk_ipc_api_t and a * pcmk_set_ipc_timeout() setter for it, then use it here. */ rc = crm_ipc_send(api->ipc, request, flags, 0, &reply); if (rc < 0) { return pcmk_legacy2rc(rc); } else if (rc == 0) { return ENODATA; } // With synchronous dispatch, we dispatch any reply now if (reply != NULL) { bool more = call_api_dispatch(api, reply); pcmk__xml_free(reply); while (more) { rc = crm_ipc_read(api->ipc); if (rc == -EAGAIN) { continue; } else if (rc == -ENOMSG || rc == pcmk_ok) { return pcmk_rc_ok; } else if (rc < 0) { return -rc; } rc = dispatch_ipc_data(crm_ipc_buffer(api->ipc), api); pcmk__ipc_free_client_buffer(api->ipc); if (rc == pcmk_rc_ok) { more = false; } else if (rc == EINPROGRESS) { more = true; } else { continue; } } } return pcmk_rc_ok; } /*! * \internal * \brief Create the XML for an IPC request to purge a node from the peer cache * * \param[in] api IPC API connection * \param[in] node_name If not NULL, name of node to purge * \param[in] nodeid If not 0, node ID of node to purge * * \return Newly allocated IPC request XML * * \note The controller, fencer, and pacemakerd use the same request syntax, but * the attribute manager uses a different one. The CIB manager doesn't * have any syntax for it. The executor and scheduler don't connect to the * cluster layer and thus don't have or need any syntax for it. * * \todo Modify the attribute manager to accept the common syntax (as well * as its current one, for compatibility with older clients). Modify * the CIB manager to accept and honor the common syntax. Modify the * executor and scheduler to accept the syntax (immediately returning * success), just for consistency. Modify this function to use the * common syntax with all daemons if their version supports it. */ static xmlNode * create_purge_node_request(const pcmk_ipc_api_t *api, const char *node_name, uint32_t nodeid) { xmlNode *request = NULL; const char *client = crm_system_name? crm_system_name : "client"; switch (api->server) { case pcmk_ipc_attrd: request = pcmk__xe_create(NULL, __func__); crm_xml_add(request, PCMK__XA_T, PCMK__VALUE_ATTRD); crm_xml_add(request, PCMK__XA_SRC, crm_system_name); crm_xml_add(request, PCMK_XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE); pcmk__xe_set_bool_attr(request, PCMK__XA_REAP, true); crm_xml_add(request, PCMK__XA_ATTR_HOST, node_name); if (nodeid > 0) { crm_xml_add_int(request, PCMK__XA_ATTR_HOST_ID, nodeid); } break; case pcmk_ipc_controld: case pcmk_ipc_fenced: case pcmk_ipc_pacemakerd: request = pcmk__new_request(api->server, client, NULL, pcmk_ipc_name(api, false), CRM_OP_RM_NODE_CACHE, NULL); if (nodeid > 0) { crm_xml_add_ll(request, PCMK_XA_ID, (long long) nodeid); } crm_xml_add(request, PCMK_XA_UNAME, node_name); break; case pcmk_ipc_based: case pcmk_ipc_execd: case pcmk_ipc_schedulerd: break; default: // pcmk_ipc_unknown (shouldn't be possible) return NULL; } return request; } /*! * \brief Ask a Pacemaker daemon to purge a node from its peer cache * * \param[in,out] api IPC API connection * \param[in] node_name If not NULL, name of node to purge * \param[in] nodeid If not 0, node ID of node to purge * * \return Standard Pacemaker return code * * \note At least one of node_name or nodeid must be specified. */ int pcmk_ipc_purge_node(pcmk_ipc_api_t *api, const char *node_name, uint32_t nodeid) { int rc = 0; xmlNode *request = NULL; if (api == NULL) { return EINVAL; } if ((node_name == NULL) && (nodeid == 0)) { return EINVAL; } request = create_purge_node_request(api, node_name, nodeid); if (request == NULL) { return EOPNOTSUPP; } rc = pcmk__send_ipc_request(api, request); pcmk__xml_free(request); crm_debug("%s peer cache purge of node %s[%lu]: rc=%d", pcmk_ipc_name(api, true), node_name, (unsigned long) nodeid, rc); return rc; } /* * Generic IPC API (to eventually be deprecated as public API and made internal) */ struct crm_ipc_s { struct pollfd pfd; int need_reply; GByteArray *buffer; char *server_name; // server IPC name being connected to qb_ipcc_connection_t *ipc; }; /*! * \brief Create a new (legacy) object for using Pacemaker daemon IPC * * \param[in] name IPC system name to connect to * \param[in] max_size Use a maximum IPC buffer size of at least this size * * \return Newly allocated IPC object on success, NULL otherwise * * \note The caller is responsible for freeing the result using * crm_ipc_destroy(). * \note This should be considered deprecated for use with daemons supported by * pcmk_new_ipc_api(). * \note @COMPAT Since 3.0.1, \p max_size is ignored and the default given by * \c crm_ipc_default_buffer_size() will be used instead. */ crm_ipc_t * crm_ipc_new(const char *name, size_t max_size) { crm_ipc_t *client = NULL; client = calloc(1, sizeof(crm_ipc_t)); if (client == NULL) { crm_err("Could not create IPC connection: %s", strerror(errno)); return NULL; } client->server_name = strdup(name); if (client->server_name == NULL) { crm_err("Could not create %s IPC connection: %s", name, strerror(errno)); free(client); return NULL; } client->buffer = NULL; client->pfd.fd = -1; client->pfd.events = POLLIN; client->pfd.revents = 0; return client; } /*! * \internal * \brief Connect a generic (not daemon-specific) IPC object * * \param[in,out] ipc Generic IPC object to connect * * \return Standard Pacemaker return code */ int pcmk__connect_generic_ipc(crm_ipc_t *ipc) { uid_t cl_uid = 0; gid_t cl_gid = 0; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; int rc = pcmk_rc_ok; if (ipc == NULL) { return EINVAL; } ipc->need_reply = FALSE; ipc->ipc = qb_ipcc_connect(ipc->server_name, crm_ipc_default_buffer_size()); if (ipc->ipc == NULL) { return errno; } rc = qb_ipcc_fd_get(ipc->ipc, &ipc->pfd.fd); if (rc < 0) { // -errno crm_ipc_close(ipc); return -rc; } rc = pcmk_daemon_user(&cl_uid, &cl_gid); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { crm_ipc_close(ipc); return rc; } rc = is_ipc_provider_expected(ipc->ipc, ipc->pfd.fd, cl_uid, cl_gid, &found_pid, &found_uid, &found_gid); if (rc != pcmk_rc_ok) { if (rc == pcmk_rc_ipc_unauthorized) { crm_info("%s IPC provider authentication failed: process %lld has " "uid %lld (expected %lld) and gid %lld (expected %lld)", ipc->server_name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) cl_uid, (long long) found_gid, (long long) cl_gid); } crm_ipc_close(ipc); return rc; } return pcmk_rc_ok; } void crm_ipc_close(crm_ipc_t * client) { if (client) { if (client->ipc) { qb_ipcc_connection_t *ipc = client->ipc; client->ipc = NULL; qb_ipcc_disconnect(ipc); } } } void crm_ipc_destroy(crm_ipc_t * client) { if (client) { if (client->ipc && qb_ipcc_is_connected(client->ipc)) { crm_notice("Destroying active %s IPC connection", client->server_name); /* The next line is basically unsafe * * If this connection was attached to mainloop and mainloop is active, * the 'disconnected' callback will end up back here and we'll end * up free'ing the memory twice - something that can still happen * even without this if we destroy a connection and it closes before * we call exit */ /* crm_ipc_close(client); */ } else { crm_trace("Destroying inactive %s IPC connection", client->server_name); } if (client->buffer != NULL) { pcmk__ipc_free_client_buffer(client); } free(client->server_name); free(client); } } /*! * \internal * \brief Get the file descriptor for a generic IPC object * * \param[in,out] ipc Generic IPC object to get file descriptor for * \param[out] fd Where to store file descriptor * * \return Standard Pacemaker return code */ int pcmk__ipc_fd(crm_ipc_t *ipc, int *fd) { if ((ipc == NULL) || (fd == NULL)) { return EINVAL; } if ((ipc->ipc == NULL) || (ipc->pfd.fd < 0)) { return ENOTCONN; } *fd = ipc->pfd.fd; return pcmk_rc_ok; } int crm_ipc_get_fd(crm_ipc_t * client) { int fd = -1; if (pcmk__ipc_fd(client, &fd) != pcmk_rc_ok) { crm_err("Could not obtain file descriptor for %s IPC", ((client == NULL)? "unspecified" : client->server_name)); errno = EINVAL; return -EINVAL; } return fd; } bool crm_ipc_connected(crm_ipc_t * client) { bool rc = FALSE; if (client == NULL) { crm_trace("No client"); return FALSE; } else if (client->ipc == NULL) { crm_trace("No connection"); return FALSE; } else if (client->pfd.fd < 0) { crm_trace("Bad descriptor"); return FALSE; } rc = qb_ipcc_is_connected(client->ipc); if (rc == FALSE) { client->pfd.fd = -EINVAL; } return rc; } /*! * \brief Check whether an IPC connection is ready to be read * * \param[in,out] client Connection to check * * \return Positive value if ready to be read, 0 if not ready, -errno on error */ int crm_ipc_ready(crm_ipc_t *client) { int rc; pcmk__assert(client != NULL); if (!crm_ipc_connected(client)) { return -ENOTCONN; } client->pfd.revents = 0; rc = poll(&(client->pfd), 1, 0); return (rc < 0)? -errno : rc; } long crm_ipc_read(crm_ipc_t *client) { guint8 *buffer = NULL; long rc = -ENOMSG; pcmk__assert((client != NULL) && (client->ipc != NULL)); buffer = g_malloc0(crm_ipc_default_buffer_size()); do { pcmk__ipc_header_t *header = NULL; ssize_t bytes = qb_ipcc_event_recv(client->ipc, buffer, crm_ipc_default_buffer_size(), 0); header = (pcmk__ipc_header_t *)(void *) buffer; if (bytes <= 0) { crm_trace("No message received from %s IPC: %s", client->server_name, strerror(-bytes)); if (!crm_ipc_connected(client) || bytes == -ENOTCONN) { crm_err("Connection to %s IPC failed", client->server_name); rc = -ENOTCONN; pcmk__ipc_free_client_buffer(client); } else if (bytes == -EAGAIN) { rc = -EAGAIN; } goto done; } else if (bytes != header->size + sizeof(pcmk__ipc_header_t)) { crm_trace("Message size does not match header"); rc = -EBADMSG; pcmk__ipc_free_client_buffer(client); goto done; } crm_trace("Received %s IPC event %" PRId32 " size=%" PRIu32 " rc=%zu", client->server_name, header->qb.id, header->qb.size, bytes); rc = pcmk__ipc_msg_append(&client->buffer, buffer); if (rc == pcmk_rc_ok) { break; } else if (rc == pcmk_rc_ipc_more) { continue; } else { pcmk__ipc_free_client_buffer(client); rc = pcmk_rc2legacy(rc); goto done; } } while (true); if (client->buffer->len > 0) { /* Data length excluding the header */ rc = client->buffer->len - sizeof(pcmk__ipc_header_t); } done: g_free(buffer); return rc; } void pcmk__ipc_free_client_buffer(crm_ipc_t *client) { pcmk__assert(client != NULL); if (client->buffer != NULL) { g_byte_array_free(client->buffer, TRUE); client->buffer = NULL; } } const char * crm_ipc_buffer(crm_ipc_t * client) { pcmk__assert(client != NULL); CRM_CHECK(client->buffer != NULL, return NULL); return (const char *) (client->buffer->data + sizeof(pcmk__ipc_header_t)); } uint32_t crm_ipc_buffer_flags(crm_ipc_t * client) { pcmk__ipc_header_t *header = NULL; pcmk__assert(client != NULL); if (client->buffer == NULL) { return 0; } header = (pcmk__ipc_header_t *)(void*) client->buffer->data; return header->flags; } const char * crm_ipc_name(crm_ipc_t * client) { pcmk__assert(client != NULL); return client->server_name; } // \return Standard Pacemaker return code static int internal_ipc_get_reply(crm_ipc_t *client, int request_id, int ms_timeout, ssize_t *bytes, xmlNode **reply) { guint8 *buffer = NULL; pcmk__ipc_header_t *hdr = NULL; time_t timeout = 0; int32_t qb_timeout = -1; int rc = pcmk_rc_ok; int reply_id = 0; if (ms_timeout > 0) { timeout = time(NULL) + 1 + pcmk__timeout_ms2s(ms_timeout); qb_timeout = 1000; } /* get the reply */ crm_trace("Expecting reply to %s IPC message %d", client->server_name, request_id); buffer = g_malloc0(crm_ipc_default_buffer_size()); do { guint8 *data = NULL; xmlNode *xml = NULL; *bytes = qb_ipcc_recv(client->ipc, buffer, crm_ipc_default_buffer_size(), qb_timeout); hdr = (pcmk__ipc_header_t *) (void *) buffer; if (*bytes <= 0) { if (!crm_ipc_connected(client)) { crm_err("%s IPC provider disconnected while waiting for message %d", client->server_name, request_id); break; } continue; } else if (*bytes != hdr->size + sizeof(pcmk__ipc_header_t)) { crm_trace("Message size does not match header"); rc = -EBADMSG; break; } reply_id = hdr->qb.id; if (reply_id == request_id) { /* Got the reply we were expecting. */ rc = pcmk__ipc_msg_append(&client->buffer, buffer); if (rc == pcmk_rc_ok) { break; } else if (rc == pcmk_rc_ipc_more) { continue; } else { goto done; } } data = buffer + sizeof(pcmk__ipc_header_t); xml = pcmk__xml_parse((const char *) data); if (reply_id < request_id) { crm_err("Discarding old reply %d (need %d)", reply_id, request_id); crm_log_xml_notice(xml, "OldIpcReply"); } else if (reply_id > request_id) { crm_err("Discarding newer reply %d (need %d)", reply_id, request_id); crm_log_xml_notice(xml, "ImpossibleReply"); pcmk__assert(hdr->qb.id <= request_id); } } while (time(NULL) < timeout || (timeout == 0 && *bytes == -EAGAIN)); if (client->buffer->len > 0) { crm_trace("Received %u-byte reply %d to %s IPC %d: %.100s", client->buffer->len, reply_id, client->server_name, request_id, crm_ipc_buffer(client)); if (reply != NULL) { *reply = pcmk__xml_parse(crm_ipc_buffer(client)); } } else if (*bytes < 0) { rc = (int) -*bytes; // System errno crm_trace("No reply to %s IPC %d: %s " QB_XS " rc=%d", client->server_name, request_id, pcmk_rc_str(rc), rc); } /* If bytes == 0, we'll return that to crm_ipc_send which will interpret * that as pcmk_rc_ok, log that the IPC request failed (since we did not * give it a valid reply), and return that 0 to its callers. It's up to * the callers to take appropriate action after that. */ /* Once we've parsed the client buffer as XML and saved it to reply, * there's no need to keep the client buffer around anymore. Free it here * to avoid having to do this anywhere crm_ipc_send is called. */ done: pcmk__ipc_free_client_buffer(client); g_free(buffer); return rc; } static int discard_old_replies(crm_ipc_t *client, int32_t ms_timeout) { pcmk__ipc_header_t *header = NULL; int rc = pcmk_rc_ok; ssize_t qb_rc = 0; char *buffer = pcmk__assert_alloc(crm_ipc_default_buffer_size(), sizeof(char)); qb_rc = qb_ipcc_recv(client->ipc, buffer, crm_ipc_default_buffer_size(), ms_timeout); if (qb_rc < 0) { crm_warn("Sending %s IPC disabled until pending reply received", client->server_name); rc = EALREADY; goto done; } header = (pcmk__ipc_header_t *)(void *) buffer; if (!pcmk__valid_ipc_header(header)) { rc = EBADMSG; } else if (!pcmk_is_set(header->flags, crm_ipc_multipart) || pcmk_is_set(header->flags, crm_ipc_multipart_end)) { crm_notice("Sending %s IPC re-enabled after pending reply received", client->server_name); client->need_reply = FALSE; } else { crm_warn("Sending %s IPC disabled until multipart IPC message " "reply received", client->server_name); rc = EALREADY; } done: free(buffer); return rc; } /*! * \brief Send an IPC XML message * * \param[in,out] client Connection to IPC server * \param[in] message XML message to send * \param[in] flags Bitmask of crm_ipc_flags * \param[in] ms_timeout Give up if not sent within this much time * (5 seconds if 0, or no timeout if negative) * \param[out] reply Reply from server (or NULL if none) * * \return Negative errno on error, otherwise size of reply received in bytes * if reply was needed, otherwise number of bytes sent */ int crm_ipc_send(crm_ipc_t *client, const xmlNode *message, enum crm_ipc_flags flags, int32_t ms_timeout, xmlNode **reply) { int rc = 0; ssize_t bytes = 0; ssize_t sent_bytes = 0; struct iovec *iov = NULL; static uint32_t id = 0; pcmk__ipc_header_t *header; GString *iov_buffer = NULL; uint16_t index = 0; if (client == NULL) { crm_notice("Can't send IPC request without connection (bug?): %.100s", message); return -ENOTCONN; } else if (!crm_ipc_connected(client)) { /* Don't even bother */ crm_notice("Can't send %s IPC requests: Connection closed", client->server_name); return -ENOTCONN; } if (ms_timeout == 0) { ms_timeout = 5000; } /* This block exists only to clear out any old replies that we haven't * yet read. We don't care about their contents since it's too late to * do anything with them, so we just read and throw them away. */ if (client->need_reply) { int discard_rc = discard_old_replies(client, ms_timeout); if (discard_rc != pcmk_rc_ok) { return pcmk_rc2legacy(discard_rc); } } id++; CRM_LOG_ASSERT(id != 0); /* Crude wrap-around detection */ iov_buffer = g_string_sized_new(1024); pcmk__xml_string(message, 0, iov_buffer, 0); do { ssize_t qb_rc = 0; time_t timeout = 0; rc = pcmk__ipc_prepare_iov(id, iov_buffer, index, &iov, &bytes); if ((rc != pcmk_rc_ok) && (rc != pcmk_rc_ipc_more)) { crm_warn("Couldn't prepare %s IPC request: %s " QB_XS " rc=%d", client->server_name, pcmk_rc_str(rc), rc); g_string_free(iov_buffer, TRUE); return pcmk_rc2legacy(rc); } header = iov[0].iov_base; pcmk__set_ipc_flags(header->flags, client->server_name, flags); if (pcmk_is_set(flags, crm_ipc_proxied)) { /* Don't look for a synchronous response */ pcmk__clear_ipc_flags(flags, "client", crm_ipc_client_response); } if (pcmk_is_set(header->flags, crm_ipc_multipart)) { bool is_end = pcmk_is_set(header->flags, crm_ipc_multipart_end); crm_trace("Sending %s IPC request %" PRId32 " (%spart %" PRIu16 ") of " "%" PRId32 " bytes using %dms timeout", client->server_name, header->qb.id, is_end ? "final " : "", index, header->qb.size, ms_timeout); crm_trace("Text = %s", (char *) iov[1].iov_base); } else { crm_trace("Sending %s IPC request %" PRId32 " of %" PRId32 " bytes " "using %dms timeout", client->server_name, header->qb.id, header->qb.size, ms_timeout); crm_trace("Text = %s", (char *) iov[1].iov_base); } /* Send the IPC request, respecting any timeout we were passed */ if (ms_timeout > 0) { timeout = time(NULL) + 1 + pcmk__timeout_ms2s(ms_timeout); } do { qb_rc = qb_ipcc_sendv(client->ipc, iov, 2); } while ((qb_rc == -EAGAIN) && ((timeout == 0) || (time(NULL) < timeout))); /* An error occurred when sending. */ if (qb_rc <= 0) { rc = (int) qb_rc; // Negative of system errno goto send_cleanup; } /* Sending succeeded. The next action depends on whether this was a * multipart IPC message or not. */ if (rc == pcmk_rc_ok) { /* This was either a standalone IPC message or the last part of * a multipart message. Set the return value and break out of * this processing loop. */ sent_bytes += qb_rc; rc = (int) sent_bytes; break; } else { /* There's no way to get here for any value other than rc == pcmk_rc_more * given the check right after pcmk__ipc_prepare_iov. * * This was a multipart message, loop to process the next chunk. */ sent_bytes += qb_rc; index++; } pcmk_free_ipc_event(iov); iov = NULL; } while (true); /* If we should not wait for a response, bail now */ if (!pcmk_is_set(flags, crm_ipc_client_response)) { crm_trace("Not waiting for reply to %s IPC request %d", client->server_name, header->qb.id); goto send_cleanup; } pcmk__ipc_free_client_buffer(client); rc = internal_ipc_get_reply(client, header->qb.id, ms_timeout, &bytes, reply); if (rc == pcmk_rc_ok) { rc = (int) bytes; // Size of reply received } else { /* rc is either a positive system errno or a negative standard Pacemaker * return code. If it's an errno, we need to convert it back to a * negative number for comparison and return at the end of this function. */ rc = pcmk_rc2legacy(rc); if (ms_timeout > 0) { /* We didn't get the reply in time, so disable future sends for now. * The only alternative would be to close the connection since we * don't know how to detect and discard out-of-sequence replies. * * @TODO Implement out-of-sequence detection */ client->need_reply = TRUE; } } send_cleanup: if (!crm_ipc_connected(client)) { crm_notice("Couldn't send %s IPC request %d: Connection closed " QB_XS " rc=%d", client->server_name, header->qb.id, rc); } else if (rc == -ETIMEDOUT) { crm_warn("%s IPC request %d failed: %s after %dms " QB_XS " rc=%d", client->server_name, header->qb.id, pcmk_strerror(rc), ms_timeout, rc); crm_write_blackbox(0, NULL); } else if (rc <= 0) { crm_warn("%s IPC request %d failed: %s " QB_XS " rc=%d", client->server_name, header->qb.id, ((rc == 0)? "No bytes sent" : pcmk_strerror(rc)), rc); } g_string_free(iov_buffer, TRUE); pcmk_free_ipc_event(iov); + // coverity[return_overflow] return rc; } /*! * \brief Ensure an IPC provider has expected user or group * * \param[in] qb_ipc libqb client connection if available * \param[in] sock Connected Unix socket for IPC * \param[in] refuid Expected user ID * \param[in] refgid Expected group ID * \param[out] gotpid If not NULL, where to store provider's actual process ID * (or 1 on platforms where ID is not available) * \param[out] gotuid If not NULL, where to store provider's actual user ID * \param[out] gotgid If not NULL, where to store provider's actual group ID * * \return Standard Pacemaker return code * \note An actual user ID of 0 (root) will always be considered authorized, * regardless of the expected values provided. The caller can use the * output arguments to be stricter than this function. */ static int is_ipc_provider_expected(qb_ipcc_connection_t *qb_ipc, int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) { int rc = EOPNOTSUPP; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; #ifdef HAVE_QB_IPCC_AUTH_GET if (qb_ipc != NULL) { rc = qb_ipcc_auth_get(qb_ipc, &found_pid, &found_uid, &found_gid); rc = -rc; // libqb returns 0 or -errno if (rc == pcmk_rc_ok) { goto found; } } #endif #ifdef HAVE_UCRED { struct ucred ucred; socklen_t ucred_len = sizeof(ucred); if (getsockopt(sock, SOL_SOCKET, SO_PEERCRED, &ucred, &ucred_len) < 0) { rc = errno; } else if (ucred_len != sizeof(ucred)) { rc = EOPNOTSUPP; } else { found_pid = ucred.pid; found_uid = ucred.uid; found_gid = ucred.gid; goto found; } } #endif #ifdef HAVE_SOCKPEERCRED { struct sockpeercred sockpeercred; socklen_t sockpeercred_len = sizeof(sockpeercred); if (getsockopt(sock, SOL_SOCKET, SO_PEERCRED, &sockpeercred, &sockpeercred_len) < 0) { rc = errno; } else if (sockpeercred_len != sizeof(sockpeercred)) { rc = EOPNOTSUPP; } else { found_pid = sockpeercred.pid; found_uid = sockpeercred.uid; found_gid = sockpeercred.gid; goto found; } } #endif #ifdef HAVE_GETPEEREID // For example, FreeBSD if (getpeereid(sock, &found_uid, &found_gid) < 0) { rc = errno; } else { found_pid = PCMK__SPECIAL_PID; goto found; } #endif #ifdef HAVE_GETPEERUCRED { ucred_t *ucred = NULL; if (getpeerucred(sock, &ucred) < 0) { rc = errno; } else { found_pid = ucred_getpid(ucred); found_uid = ucred_geteuid(ucred); found_gid = ucred_getegid(ucred); ucred_free(ucred); goto found; } } #endif return rc; // If we get here, nothing succeeded found: if (gotpid != NULL) { *gotpid = found_pid; } if (gotuid != NULL) { *gotuid = found_uid; } if (gotgid != NULL) { *gotgid = found_gid; } if ((found_uid != 0) && (found_uid != refuid) && (found_gid != refgid)) { return pcmk_rc_ipc_unauthorized; } return pcmk_rc_ok; } int crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) { int ret = is_ipc_provider_expected(NULL, sock, refuid, refgid, gotpid, gotuid, gotgid); /* The old function had some very odd return codes*/ if (ret == 0) { return 1; } else if (ret == pcmk_rc_ipc_unauthorized) { return 0; } else { return pcmk_rc2legacy(ret); } } int pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid, gid_t refgid, pid_t *gotpid) { static char last_asked_name[PATH_MAX / 2] = ""; /* log spam prevention */ int fd; int rc = pcmk_rc_ipc_unresponsive; int auth_rc = 0; int32_t qb_rc; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; qb_ipcc_connection_t *c; #ifdef HAVE_QB_IPCC_CONNECT_ASYNC struct pollfd pollfd = { 0, }; int poll_rc; c = qb_ipcc_connect_async(name, 0, &(pollfd.fd)); #else c = qb_ipcc_connect(name, 0); #endif if (c == NULL) { crm_info("Could not connect to %s IPC: %s", name, strerror(errno)); rc = pcmk_rc_ipc_unresponsive; goto bail; } #ifdef HAVE_QB_IPCC_CONNECT_ASYNC pollfd.events = POLLIN; do { poll_rc = poll(&pollfd, 1, 2000); } while ((poll_rc == -1) && (errno == EINTR)); /* If poll() failed, given that disconnect function is not registered yet, * qb_ipcc_disconnect() won't clean up the socket. In any case, call * qb_ipcc_connect_continue() here so that it may fail and do the cleanup * for us. */ if (qb_ipcc_connect_continue(c) != 0) { crm_info("Could not connect to %s IPC: %s", name, (poll_rc == 0)?"timeout":strerror(errno)); rc = pcmk_rc_ipc_unresponsive; c = NULL; // qb_ipcc_connect_continue cleaned up for us goto bail; } #endif qb_rc = qb_ipcc_fd_get(c, &fd); if (qb_rc != 0) { rc = (int) -qb_rc; // System errno crm_err("Could not get fd from %s IPC: %s " QB_XS " rc=%d", name, pcmk_rc_str(rc), rc); goto bail; } auth_rc = is_ipc_provider_expected(c, fd, refuid, refgid, &found_pid, &found_uid, &found_gid); if (auth_rc == pcmk_rc_ipc_unauthorized) { crm_err("Daemon (IPC %s) effectively blocked with unauthorized" " process %lld (uid: %lld, gid: %lld)", name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); rc = pcmk_rc_ipc_unauthorized; goto bail; } if (auth_rc != pcmk_rc_ok) { rc = auth_rc; crm_err("Could not get peer credentials from %s IPC: %s " QB_XS " rc=%d", name, pcmk_rc_str(rc), rc); goto bail; } if (gotpid != NULL) { *gotpid = found_pid; } rc = pcmk_rc_ok; if ((found_uid != refuid || found_gid != refgid) && strncmp(last_asked_name, name, sizeof(last_asked_name))) { if ((found_uid == 0) && (refuid != 0)) { crm_warn("Daemon (IPC %s) runs as root, whereas the expected" " credentials are %lld:%lld, hazard of violating" " the least privilege principle", name, (long long) refuid, (long long) refgid); } else { crm_notice("Daemon (IPC %s) runs as %lld:%lld, whereas the" " expected credentials are %lld:%lld, which may" " mean a different set of privileges than expected", name, (long long) found_uid, (long long) found_gid, (long long) refuid, (long long) refgid); } memccpy(last_asked_name, name, '\0', sizeof(last_asked_name)); } bail: if (c != NULL) { qb_ipcc_disconnect(c); } return rc; } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START #include bool crm_ipc_connect(crm_ipc_t *client) { int rc = pcmk__connect_generic_ipc(client); if (rc == pcmk_rc_ok) { return true; } if ((client != NULL) && (client->ipc == NULL)) { errno = (rc > 0)? rc : ENOTCONN; crm_debug("Could not establish %s IPC connection: %s (%d)", client->server_name, pcmk_rc_str(errno), errno); } else if (rc == pcmk_rc_ipc_unauthorized) { crm_err("%s IPC provider authentication failed", (client == NULL)? "Pacemaker" : client->server_name); errno = ECONNABORTED; } else { crm_err("Could not verify authenticity of %s IPC provider", (client == NULL)? "Pacemaker" : client->server_name); errno = ENOTCONN; } return false; } // LCOV_EXCL_STOP // End deprecated API diff --git a/lib/common/schemas.c b/lib/common/schemas.c index 9e3f1c0462..72cdfc0926 100644 --- a/lib/common/schemas.c +++ b/lib/common/schemas.c @@ -1,1603 +1,1604 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include // UCHAR_MAX #include #include #include #include // xmlNode #include // xmlChar #include #include #include #include #include #include #include /* PCMK__XML_LOG_BASE */ #include "crmcommon_private.h" #define SCHEMA_ZERO { .v = { 0, 0 } } #define schema_strdup_printf(prefix, version, suffix) \ crm_strdup_printf(prefix "%u.%u" suffix, (version).v[0], (version).v[1]) typedef struct { xmlRelaxNGPtr rng; xmlRelaxNGValidCtxtPtr valid; xmlRelaxNGParserCtxtPtr parser; } relaxng_ctx_cache_t; static GList *known_schemas = NULL; static bool initialized = false; static bool silent_logging = FALSE; static void G_GNUC_PRINTF(2, 3) xml_log(int priority, const char *fmt, ...) { va_list ap; va_start(ap, fmt); if (silent_logging == FALSE) { /* XXX should not this enable dechunking as well? */ PCMK__XML_LOG_BASE(priority, FALSE, 0, NULL, fmt, ap); } va_end(ap); } static int xml_latest_schema_index(void) { /* This function assumes that pcmk__schema_init() has been called * beforehand, so we have at least two schemas (one real schema and the * "none" schema). * * @COMPAT: The "none" schema is deprecated since 2.1.8. * Update this when we drop that schema. */ return g_list_length(known_schemas) - 2; } /*! * \internal * \brief Return the schema entry of the highest-versioned schema * * \return Schema entry of highest-versioned schema */ static GList * get_highest_schema(void) { /* The highest numerically versioned schema is the one before none * * @COMPAT none is deprecated since 2.1.8 */ GList *entry = pcmk__get_schema("none"); pcmk__assert((entry != NULL) && (entry->prev != NULL)); return entry->prev; } /*! * \internal * \brief Return the name of the highest-versioned schema * * \return Name of highest-versioned schema (or NULL on error) */ const char * pcmk__highest_schema_name(void) { GList *entry = get_highest_schema(); return ((pcmk__schema_t *)(entry->data))->name; } /*! * \internal * \brief Find first entry of highest major schema version series * * \return Schema entry of first schema with highest major version */ GList * pcmk__find_x_0_schema(void) { #if defined(PCMK__UNIT_TESTING) /* If we're unit testing, this can't be static because it'll stick * around from one test run to the next. It needs to be cleared out * every time. */ GList *x_0_entry = NULL; #else static GList *x_0_entry = NULL; #endif pcmk__schema_t *highest_schema = NULL; if (x_0_entry != NULL) { return x_0_entry; } x_0_entry = get_highest_schema(); highest_schema = x_0_entry->data; for (GList *iter = x_0_entry->prev; iter != NULL; iter = iter->prev) { pcmk__schema_t *schema = iter->data; /* We've found a schema in an older major version series. Return * the index of the first one in the same major version series as * the highest schema. */ if (schema->version.v[0] < highest_schema->version.v[0]) { x_0_entry = iter->next; break; } /* We're out of list to examine. This probably means there was only * one major version series, so return the first schema entry. */ if (iter->prev == NULL) { x_0_entry = known_schemas->data; break; } } return x_0_entry; } static inline bool version_from_filename(const char *filename, pcmk__schema_version_t *version) { if (pcmk__ends_with(filename, ".rng")) { return sscanf(filename, "pacemaker-%hhu.%hhu.rng", &(version->v[0]), &(version->v[1])) == 2; } else { return sscanf(filename, "pacemaker-%hhu.%hhu", &(version->v[0]), &(version->v[1])) == 2; } } static int schema_filter(const struct dirent *a) { int rc = 0; pcmk__schema_version_t version = SCHEMA_ZERO; if (strstr(a->d_name, "pacemaker-") != a->d_name) { /* crm_trace("%s - wrong prefix", a->d_name); */ } else if (!pcmk__ends_with_ext(a->d_name, ".rng")) { /* crm_trace("%s - wrong suffix", a->d_name); */ } else if (!version_from_filename(a->d_name, &version)) { /* crm_trace("%s - wrong format", a->d_name); */ } else { /* crm_debug("%s - candidate", a->d_name); */ rc = 1; } return rc; } static int schema_cmp(pcmk__schema_version_t a_version, pcmk__schema_version_t b_version) { for (int i = 0; i < 2; ++i) { if (a_version.v[i] < b_version.v[i]) { return -1; } else if (a_version.v[i] > b_version.v[i]) { return 1; } } return 0; } static int schema_cmp_directory(const struct dirent **a, const struct dirent **b) { pcmk__schema_version_t a_version = SCHEMA_ZERO; pcmk__schema_version_t b_version = SCHEMA_ZERO; if (!version_from_filename(a[0]->d_name, &a_version) || !version_from_filename(b[0]->d_name, &b_version)) { // Shouldn't be possible, but makes static analysis happy return 0; } return schema_cmp(a_version, b_version); } /*! * \internal * \brief Add given schema + auxiliary data to internal bookkeeping. */ static void add_schema(enum pcmk__schema_validator validator, const pcmk__schema_version_t *version, const char *name, GList *transforms) { pcmk__schema_t *schema = NULL; schema = pcmk__assert_alloc(1, sizeof(pcmk__schema_t)); schema->validator = validator; schema->version.v[0] = version->v[0]; schema->version.v[1] = version->v[1]; schema->transforms = transforms; // schema->schema_index is set after all schemas are loaded and sorted if (version->v[0] || version->v[1]) { schema->name = schema_strdup_printf("pacemaker-", *version, ""); } else { schema->name = pcmk__str_copy(name); } known_schemas = g_list_prepend(known_schemas, schema); } static void wrap_libxslt(bool finalize) { static xsltSecurityPrefsPtr secprefs; int ret = 0; /* security framework preferences */ if (!finalize) { pcmk__assert(secprefs == NULL); secprefs = xsltNewSecurityPrefs(); ret = xsltSetSecurityPrefs(secprefs, XSLT_SECPREF_WRITE_FILE, xsltSecurityForbid) | xsltSetSecurityPrefs(secprefs, XSLT_SECPREF_CREATE_DIRECTORY, xsltSecurityForbid) | xsltSetSecurityPrefs(secprefs, XSLT_SECPREF_READ_NETWORK, xsltSecurityForbid) | xsltSetSecurityPrefs(secprefs, XSLT_SECPREF_WRITE_NETWORK, xsltSecurityForbid); if (ret != 0) { return; } } else { xsltFreeSecurityPrefs(secprefs); secprefs = NULL; } /* cleanup only */ if (finalize) { xsltCleanupGlobals(); } } /*! * \internal * \brief Check whether a directory entry matches the upgrade XSLT pattern * * \param[in] entry Directory entry whose filename to check * * \return 1 if the entry's filename is of the form * upgrade-X.Y-ORDER.xsl with each number in the range 0 to * 255, or 0 otherwise */ static int transform_filter(const struct dirent *entry) { const char *re = NULL; unsigned int major = 0; unsigned int minor = 0; unsigned int order = 0; /* Each number is an unsigned char, which is 1 to 3 digits long. (Pacemaker * requires an 8-bit char via a configure test.) */ re = "upgrade-[[:digit:]]{1,3}\\.[[:digit:]]{1,3}-[[:digit:]]{1,3}\\.xsl"; if (!pcmk__str_eq(entry->d_name, re, pcmk__str_regex)) { return 0; } /* Performance isn't critical here and this is simpler than range-checking * within the regex */ if (sscanf(entry->d_name, "upgrade-%u.%u-%u.xsl", &major, &minor, &order) != 3) { return 0; } if ((major > UCHAR_MAX) || (minor > UCHAR_MAX) || (order > UCHAR_MAX)) { return 0; } return 1; } /*! * \internal * \brief Compare transform files based on the version strings in their names * * This is a crude version comparison that relies on the specific structure of * these filenames. * * \retval -1 if \p entry1 sorts before \p entry2 * \retval 0 if \p entry1 sorts equal to \p entry2 * \retval 1 if \p entry1 sorts after \p entry2 * * \note The GNU \c versionsort() function would be perfect here, but it's not * portable. */ static int compare_transforms(const struct dirent **entry1, const struct dirent **entry2) { unsigned char major1 = 0; unsigned char major2 = 0; unsigned char minor1 = 0; unsigned char minor2 = 0; unsigned char order1 = 0; unsigned char order2 = 0; // If these made it through the filter, they should be of the right format CRM_LOG_ASSERT(sscanf((*entry1)->d_name, "upgrade-%hhu.%hhu-%hhu.xsl", &major1, &minor1, &order1) == 3); CRM_LOG_ASSERT(sscanf((*entry2)->d_name, "upgrade-%hhu.%hhu-%hhu.xsl", &major2, &minor2, &order2) == 3); if (major1 < major2) { return -1; } else if (major1 > major2) { return 1; } if (minor1 < minor2) { return -1; } else if (minor1 > minor2) { return 1; } if (order1 < order2) { return -1; } else if (order1 > order2) { return 1; } return 0; } /*! * \internal * \brief Free a list of XSLT transform struct dirent objects * * \param[in,out] data List to free */ static void free_transform_list(void *data) { g_list_free_full((GList *) data, free); } /*! * \internal * \brief Load names of upgrade XSLT stylesheets from a directory into a table * * Stylesheets must have names of the form "upgrade-X.Y-order.xsl", where: * * X is the schema major version * * Y is the schema minor version * * ORDER is the order in which the stylesheet occurs in the transform pipeline * * \param[in] dir Directory containing XSLT stylesheets * * \return Table with schema version as key and \c GList of associated transform * files (as struct dirent) as value */ static GHashTable * load_transforms_from_dir(const char *dir) { struct dirent **namelist = NULL; GHashTable *transforms = NULL; int num_matches = scandir(dir, &namelist, transform_filter, compare_transforms); if (num_matches < 0) { int rc = errno; crm_warn("Could not load transforms from %s: %s", dir, pcmk_rc_str(rc)); goto done; } transforms = pcmk__strkey_table(free, free_transform_list); for (int i = 0; i < num_matches; i++) { pcmk__schema_version_t version = SCHEMA_ZERO; unsigned char order = 0; // Placeholder only if (sscanf(namelist[i]->d_name, "upgrade-%hhu.%hhu-%hhu.xsl", &(version.v[0]), &(version.v[1]), &order) == 3) { char *version_s = crm_strdup_printf("%hhu.%hhu", version.v[0], version.v[1]); GList *list = g_hash_table_lookup(transforms, version_s); if (list == NULL) { /* Prepend is more efficient. However, there won't be many of * these, and we want them to remain sorted by version. It's not * worth reversing all the lists at the end. * * Avoid calling g_hash_table_insert() if the list already * exists. Otherwise free_transform_list() gets called on it. */ list = g_list_append(list, namelist[i]); g_hash_table_insert(transforms, version_s, list); } else { list = g_list_append(list, namelist[i]); free(version_s); } } else { // Sanity only, should never happen thanks to transform_filter() free(namelist[i]); } } done: free(namelist); return transforms; } void pcmk__load_schemas_from_dir(const char *dir) { int lpc, max; struct dirent **namelist = NULL; GHashTable *transforms = NULL; max = scandir(dir, &namelist, schema_filter, schema_cmp_directory); if (max < 0) { int rc = errno; crm_warn("Could not load schemas from %s: %s", dir, pcmk_rc_str(rc)); goto done; } // Look for any upgrade transforms in the same directory transforms = load_transforms_from_dir(dir); for (lpc = 0; lpc < max; lpc++) { pcmk__schema_version_t version = SCHEMA_ZERO; if (version_from_filename(namelist[lpc]->d_name, &version)) { char *version_s = crm_strdup_printf("%hhu.%hhu", version.v[0], version.v[1]); char *orig_key = NULL; GList *transform_list = NULL; if (transforms != NULL) { // The schema becomes the owner of transform_list g_hash_table_lookup_extended(transforms, version_s, (gpointer *) &orig_key, (gpointer *) &transform_list); g_hash_table_steal(transforms, version_s); } add_schema(pcmk__schema_validator_rng, &version, NULL, transform_list); free(version_s); free(orig_key); } else { // Shouldn't be possible, but makes static analysis happy crm_warn("Skipping schema '%s': could not parse version", namelist[lpc]->d_name); } } for (lpc = 0; lpc < max; lpc++) { free(namelist[lpc]); } done: free(namelist); if (transforms != NULL) { g_hash_table_destroy(transforms); } } static gint schema_sort_GCompareFunc(gconstpointer a, gconstpointer b) { const pcmk__schema_t *schema_a = a; const pcmk__schema_t *schema_b = b; // @COMPAT The "none" schema is deprecated since 2.1.8 if (pcmk__str_eq(schema_a->name, PCMK_VALUE_NONE, pcmk__str_none)) { return 1; } else if (pcmk__str_eq(schema_b->name, PCMK_VALUE_NONE, pcmk__str_none)) { return -1; } else { return schema_cmp(schema_a->version, schema_b->version); } } /*! * \internal * \brief Sort the list of known schemas such that all pacemaker-X.Y are in * version order, then "none" * * This function should be called whenever additional schemas are loaded using * \c pcmk__load_schemas_from_dir(), after the initial sets in * \c pcmk__schema_init(). */ void pcmk__sort_schemas(void) { known_schemas = g_list_sort(known_schemas, schema_sort_GCompareFunc); } /*! * \internal * \brief Load pacemaker schemas into cache * * \note This currently also serves as an entry point for the * generic initialization of the libxslt library. */ void pcmk__schema_init(void) { if (!initialized) { const char *remote_schema_dir = pcmk__remote_schema_dir(); char *base = pcmk__xml_artefact_root(pcmk__xml_artefact_ns_legacy_rng); const pcmk__schema_version_t zero = SCHEMA_ZERO; int schema_index = 0; initialized = true; wrap_libxslt(false); pcmk__load_schemas_from_dir(base); pcmk__load_schemas_from_dir(remote_schema_dir); free(base); // @COMPAT Deprecated since 2.1.8 add_schema(pcmk__schema_validator_none, &zero, PCMK_VALUE_NONE, NULL); /* add_schema() prepends items to the list, so in the simple case, this * just reverses the list. However if there were any remote schemas, * sorting is necessary. */ pcmk__sort_schemas(); // Now set the schema indexes and log the final result for (GList *iter = known_schemas; iter != NULL; iter = iter->next) { pcmk__schema_t *schema = iter->data; crm_debug("Loaded schema %d: %s", schema_index, schema->name); schema->schema_index = schema_index++; } } } static bool validate_with_relaxng(xmlDocPtr doc, xmlRelaxNGValidityErrorFunc error_handler, void *error_handler_context, const char *relaxng_file, relaxng_ctx_cache_t **cached_ctx) { int rc = 0; bool valid = true; relaxng_ctx_cache_t *ctx = NULL; CRM_CHECK(doc != NULL, return false); CRM_CHECK(relaxng_file != NULL, return false); if (cached_ctx && *cached_ctx) { ctx = *cached_ctx; } else { crm_debug("Creating RNG parser context"); ctx = pcmk__assert_alloc(1, sizeof(relaxng_ctx_cache_t)); ctx->parser = xmlRelaxNGNewParserCtxt(relaxng_file); CRM_CHECK(ctx->parser != NULL, goto cleanup); if (error_handler) { xmlRelaxNGSetParserErrors(ctx->parser, (xmlRelaxNGValidityErrorFunc) error_handler, (xmlRelaxNGValidityWarningFunc) error_handler, error_handler_context); } else { xmlRelaxNGSetParserErrors(ctx->parser, (xmlRelaxNGValidityErrorFunc) fprintf, (xmlRelaxNGValidityWarningFunc) fprintf, stderr); } ctx->rng = xmlRelaxNGParse(ctx->parser); CRM_CHECK(ctx->rng != NULL, crm_err("Could not find/parse %s", relaxng_file); goto cleanup); ctx->valid = xmlRelaxNGNewValidCtxt(ctx->rng); CRM_CHECK(ctx->valid != NULL, goto cleanup); if (error_handler) { xmlRelaxNGSetValidErrors(ctx->valid, (xmlRelaxNGValidityErrorFunc) error_handler, (xmlRelaxNGValidityWarningFunc) error_handler, error_handler_context); } else { xmlRelaxNGSetValidErrors(ctx->valid, (xmlRelaxNGValidityErrorFunc) fprintf, (xmlRelaxNGValidityWarningFunc) fprintf, stderr); } } rc = xmlRelaxNGValidateDoc(ctx->valid, doc); if (rc > 0) { valid = false; } else if (rc < 0) { crm_err("Internal libxml error during validation"); } cleanup: if (cached_ctx) { *cached_ctx = ctx; } else { if (ctx->parser != NULL) { xmlRelaxNGFreeParserCtxt(ctx->parser); } if (ctx->valid != NULL) { xmlRelaxNGFreeValidCtxt(ctx->valid); } if (ctx->rng != NULL) { xmlRelaxNGFree(ctx->rng); } free(ctx); } return valid; } static void free_schema(gpointer data) { pcmk__schema_t *schema = data; relaxng_ctx_cache_t *ctx = NULL; switch (schema->validator) { case pcmk__schema_validator_none: // not cached break; case pcmk__schema_validator_rng: // cached ctx = (relaxng_ctx_cache_t *) schema->cache; if (ctx == NULL) { break; } if (ctx->parser != NULL) { xmlRelaxNGFreeParserCtxt(ctx->parser); } if (ctx->valid != NULL) { xmlRelaxNGFreeValidCtxt(ctx->valid); } if (ctx->rng != NULL) { xmlRelaxNGFree(ctx->rng); } free(ctx); schema->cache = NULL; break; } free(schema->name); g_list_free_full(schema->transforms, free); free(schema); } /*! * \internal * \brief Clean up global memory associated with XML schemas */ void pcmk__schema_cleanup(void) { if (known_schemas != NULL) { g_list_free_full(known_schemas, free_schema); known_schemas = NULL; } initialized = false; wrap_libxslt(true); } /*! * \internal * \brief Get schema list entry corresponding to a schema name * * \param[in] name Name of schema to get * * \return Schema list entry corresponding to \p name, or NULL if unknown */ GList * pcmk__get_schema(const char *name) { if (name == NULL) { return NULL; } for (GList *iter = known_schemas; iter != NULL; iter = iter->next) { pcmk__schema_t *schema = iter->data; if (pcmk__str_eq(name, schema->name, pcmk__str_none)) { return iter; } } return NULL; } /*! * \internal * \brief Compare two schema version numbers given the schema names * * \param[in] schema1 Name of first schema to compare * \param[in] schema2 Name of second schema to compare * * \return Standard comparison result (negative integer if \p schema1 has the * lower version number, positive integer if \p schema1 has the higher * version number, of 0 if the version numbers are equal) */ int pcmk__cmp_schemas_by_name(const char *schema1_name, const char *schema2_name) { GList *entry1 = pcmk__get_schema(schema1_name); GList *entry2 = pcmk__get_schema(schema2_name); if (entry1 == NULL) { return (entry2 == NULL)? 0 : -1; } else if (entry2 == NULL) { return 1; } else { pcmk__schema_t *schema1 = entry1->data; pcmk__schema_t *schema2 = entry2->data; return schema1->schema_index - schema2->schema_index; } } static bool validate_with(xmlNode *xml, pcmk__schema_t *schema, xmlRelaxNGValidityErrorFunc error_handler, void *error_handler_context) { bool valid = false; char *file = NULL; relaxng_ctx_cache_t **cache = NULL; if (schema == NULL) { return false; } if (schema->validator == pcmk__schema_validator_none) { return true; } file = pcmk__xml_artefact_path(pcmk__xml_artefact_ns_legacy_rng, schema->name); crm_trace("Validating with %s (type=%d)", pcmk__s(file, "missing schema"), schema->validator); switch (schema->validator) { case pcmk__schema_validator_rng: cache = (relaxng_ctx_cache_t **) &(schema->cache); valid = validate_with_relaxng(xml->doc, error_handler, error_handler_context, file, cache); break; default: crm_err("Unknown validator type: %d", schema->validator); break; } free(file); return valid; } static bool validate_with_silent(xmlNode *xml, pcmk__schema_t *schema) { bool rc, sl_backup = silent_logging; silent_logging = TRUE; rc = validate_with(xml, schema, (xmlRelaxNGValidityErrorFunc) xml_log, GUINT_TO_POINTER(LOG_ERR)); silent_logging = sl_backup; return rc; } bool pcmk__validate_xml(xmlNode *xml_blob, const char *validation, xmlRelaxNGValidityErrorFunc error_handler, void *error_handler_context) { GList *entry = NULL; pcmk__schema_t *schema = NULL; CRM_CHECK((xml_blob != NULL) && (xml_blob->doc != NULL), return false); if (validation == NULL) { validation = crm_element_value(xml_blob, PCMK_XA_VALIDATE_WITH); } pcmk__warn_if_schema_deprecated(validation); entry = pcmk__get_schema(validation); if (entry == NULL) { pcmk__config_err("Cannot validate CIB with %s " PCMK_XA_VALIDATE_WITH " (manually edit to use a known schema)", ((validation == NULL)? "missing" : "unknown")); return false; } schema = entry->data; return validate_with(xml_blob, schema, error_handler, error_handler_context); } /*! * \internal * \brief Validate XML using its configured schema (and send errors to logs) * * \param[in] xml XML to validate * * \return true if XML validates, otherwise false */ bool pcmk__configured_schema_validates(xmlNode *xml) { return pcmk__validate_xml(xml, NULL, (xmlRelaxNGValidityErrorFunc) xml_log, GUINT_TO_POINTER(LOG_ERR)); } /* With this arrangement, an attempt to identify the message severity as explicitly signalled directly from XSLT is performed in rather a smart way (no reliance on formatting string + arguments being always specified as ["%s", purposeful_string], as it can also be ["%s: %s", some_prefix, purposeful_string] etc. so every argument pertaining %s specifier is investigated), and if such a mark found, the respective level is determined and, when the messages are to go to the native logs, the mark itself gets dropped (by the means of string shift). NOTE: whether the native logging is the right sink is decided per the ctx parameter -- NULL denotes this case, otherwise it carries a pointer to the numeric expression of the desired target logging level (messages with higher level will be suppressed) NOTE: on some architectures, this string shift may not have any effect, but that's an acceptable tradeoff The logging level for not explicitly designated messages (suspicious, likely internal errors or some runaways) is LOG_WARNING. */ static void G_GNUC_PRINTF(2, 3) cib_upgrade_err(void *ctx, const char *fmt, ...) { va_list ap, aq; char *arg_cur; bool found = false; const char *fmt_iter = fmt; uint8_t msg_log_level = LOG_WARNING; /* default for runaway messages */ const unsigned * log_level = (const unsigned *) ctx; enum { escan_seennothing, escan_seenpercent, } scan_state = escan_seennothing; va_start(ap, fmt); va_copy(aq, ap); while (!found && *fmt_iter != '\0') { /* while casing schema borrowed from libqb:qb_vsnprintf_serialize */ switch (*fmt_iter++) { case '%': if (scan_state == escan_seennothing) { scan_state = escan_seenpercent; } else if (scan_state == escan_seenpercent) { scan_state = escan_seennothing; } break; case 's': if (scan_state == escan_seenpercent) { size_t prefix_len = 0; scan_state = escan_seennothing; arg_cur = va_arg(aq, char *); if (pcmk__starts_with(arg_cur, "WARNING: ")) { prefix_len = sizeof("WARNING: ") - 1; msg_log_level = LOG_WARNING; } else if (pcmk__starts_with(arg_cur, "INFO: ")) { prefix_len = sizeof("INFO: ") - 1; msg_log_level = LOG_INFO; } else if (pcmk__starts_with(arg_cur, "DEBUG: ")) { prefix_len = sizeof("DEBUG: ") - 1; msg_log_level = LOG_DEBUG; } else { break; } found = true; if (ctx == NULL) { memmove(arg_cur, arg_cur + prefix_len, strlen(arg_cur + prefix_len) + 1); } } break; case '#': case '-': case ' ': case '+': case '\'': case 'I': case '.': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '*': break; case 'l': case 'z': case 't': case 'j': case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': case 'e': case 'E': case 'f': case 'F': case 'g': case 'G': case 'a': case 'A': case 'c': case 'p': if (scan_state == escan_seenpercent) { (void) va_arg(aq, void *); /* skip forward */ scan_state = escan_seennothing; } break; default: scan_state = escan_seennothing; break; } } if (log_level != NULL) { /* intention of the following offset is: cibadmin -V -> start showing INFO labelled messages */ if (*log_level + 4 >= msg_log_level) { vfprintf(stderr, fmt, ap); } } else { PCMK__XML_LOG_BASE(msg_log_level, TRUE, 0, "CIB upgrade: ", fmt, ap); } va_end(aq); va_end(ap); } /*! * \internal * \brief Apply a single XSL transformation to given XML * * \param[in] xml XML to transform * \param[in] transform XSL name * \param[in] to_logs If false, certain validation errors will be sent to * stderr rather than logged * * \return Transformed XML on success, otherwise NULL */ static xmlNode * apply_transformation(const xmlNode *xml, const char *transform, gboolean to_logs) { char *xform = NULL; xmlNode *out = NULL; xmlDocPtr res = NULL; xsltStylesheet *xslt = NULL; xform = pcmk__xml_artefact_path(pcmk__xml_artefact_ns_legacy_xslt, transform); /* for capturing, e.g., what's emitted via */ if (to_logs) { xsltSetGenericErrorFunc(NULL, cib_upgrade_err); } else { xsltSetGenericErrorFunc(&crm_log_level, cib_upgrade_err); } xslt = xsltParseStylesheetFile((const xmlChar *) xform); CRM_CHECK(xslt != NULL, goto cleanup); /* Caller allocates private data for final result document. Intermediate * result documents are temporary and don't need private data. */ res = xsltApplyStylesheet(xslt, xml->doc, NULL); CRM_CHECK(res != NULL, goto cleanup); xsltSetGenericErrorFunc(NULL, NULL); /* restore default one */ out = xmlDocGetRootElement(res); cleanup: if (xslt) { xsltFreeStylesheet(xslt); } free(xform); return out; } /*! * \internal * \brief Perform all transformations needed to upgrade XML to next schema * * \param[in] input_xml XML to transform * \param[in] schema_index Index of schema that successfully validates * \p original_xml * \param[in] to_logs If false, certain validation errors will be sent to * stderr rather than logged * * \return XML result of schema transforms if successful, otherwise NULL */ static xmlNode * apply_upgrade(const xmlNode *input_xml, int schema_index, gboolean to_logs) { pcmk__schema_t *schema = g_list_nth_data(known_schemas, schema_index); pcmk__schema_t *upgraded_schema = g_list_nth_data(known_schemas, schema_index + 1); xmlNode *old_xml = NULL; xmlNode *new_xml = NULL; xmlRelaxNGValidityErrorFunc error_handler = NULL; pcmk__assert((schema != NULL) && (upgraded_schema != NULL)); if (to_logs) { error_handler = (xmlRelaxNGValidityErrorFunc) xml_log; } for (GList *iter = schema->transforms; iter != NULL; iter = iter->next) { const struct dirent *entry = iter->data; const char *transform = entry->d_name; crm_debug("Upgrading schema from %s to %s: applying XSL transform %s", schema->name, upgraded_schema->name, transform); new_xml = apply_transformation(input_xml, transform, to_logs); pcmk__xml_free(old_xml); if (new_xml == NULL) { crm_err("XSL transform %s failed, aborting upgrade", transform); return NULL; } input_xml = new_xml; old_xml = new_xml; } // Final result document from upgrade pipeline needs private data pcmk__xml_new_private_data((xmlNode *) new_xml->doc); // Ensure result validates with its new schema if (!validate_with(new_xml, upgraded_schema, error_handler, GUINT_TO_POINTER(LOG_ERR))) { crm_err("Schema upgrade from %s to %s failed: " "XSL transform pipeline produced an invalid configuration", schema->name, upgraded_schema->name); crm_log_xml_debug(new_xml, "bad-transform-result"); pcmk__xml_free(new_xml); return NULL; } crm_info("Schema upgrade from %s to %s succeeded", schema->name, upgraded_schema->name); return new_xml; } /*! * \internal * \brief Get the schema list entry corresponding to XML configuration * * \param[in] xml CIB XML to check * * \return List entry of schema configured in \p xml */ static GList * get_configured_schema(const xmlNode *xml) { const char *schema_name = crm_element_value(xml, PCMK_XA_VALIDATE_WITH); pcmk__warn_if_schema_deprecated(schema_name); return pcmk__get_schema(schema_name); } /*! * \brief Update CIB XML to latest schema that validates it * * \param[in,out] xml XML to update (may be freed and replaced * after being transformed) * \param[in] max_schema_name If not NULL, do not update \p xml to any * schema later than this one * \param[in] transform If false, do not update \p xml to any schema * that requires an XSL transform * \param[in] to_logs If false, certain validation errors will be * sent to stderr rather than logged * * \return Standard Pacemaker return code */ int pcmk__update_schema(xmlNode **xml, const char *max_schema_name, bool transform, bool to_logs) { int max_stable_schemas = xml_latest_schema_index(); int max_schema_index = 0; int rc = pcmk_rc_ok; GList *entry = NULL; pcmk__schema_t *best_schema = NULL; pcmk__schema_t *original_schema = NULL; xmlRelaxNGValidityErrorFunc error_handler = to_logs ? (xmlRelaxNGValidityErrorFunc) xml_log : NULL; CRM_CHECK((xml != NULL) && (*xml != NULL) && ((*xml)->doc != NULL), return EINVAL); if (max_schema_name != NULL) { GList *max_entry = pcmk__get_schema(max_schema_name); if (max_entry != NULL) { pcmk__schema_t *max_schema = max_entry->data; max_schema_index = max_schema->schema_index; } } if ((max_schema_index < 1) || (max_schema_index > max_stable_schemas)) { max_schema_index = max_stable_schemas; } entry = get_configured_schema(*xml); if (entry == NULL) { return pcmk_rc_cib_corrupt; } original_schema = entry->data; if (original_schema->schema_index >= max_schema_index) { return pcmk_rc_ok; } for (; entry != NULL; entry = entry->next) { pcmk__schema_t *current_schema = entry->data; xmlNode *upgrade = NULL; if (current_schema->schema_index > max_schema_index) { break; } if (!validate_with(*xml, current_schema, error_handler, GUINT_TO_POINTER(LOG_ERR))) { crm_debug("Schema %s does not validate", current_schema->name); if (best_schema != NULL) { /* we've satisfied the validation, no need to check further */ break; } rc = pcmk_rc_schema_validation; continue; // Try again with the next higher schema } crm_debug("Schema %s validates", current_schema->name); rc = pcmk_rc_ok; best_schema = current_schema; if (current_schema->schema_index == max_schema_index) { break; // No further transformations possible } + // coverity[null_field] The index check ensures entry->next is not NULL if (!transform || (current_schema->transforms == NULL) || validate_with_silent(*xml, entry->next->data)) { /* The next schema either doesn't require a transform or validates * successfully even without the transform. Skip the transform and * try the next schema with the same XML. */ continue; } upgrade = apply_upgrade(*xml, current_schema->schema_index, to_logs); if (upgrade == NULL) { /* The transform failed, so this schema can't be used. Later * schemas are unlikely to validate, but try anyway until we * run out of options. */ rc = pcmk_rc_transform_failed; } else { best_schema = current_schema; pcmk__xml_free(*xml); *xml = upgrade; } } if ((best_schema != NULL) && (best_schema->schema_index > original_schema->schema_index)) { crm_info("%s the configuration schema to %s", (transform? "Transformed" : "Upgraded"), best_schema->name); crm_xml_add(*xml, PCMK_XA_VALIDATE_WITH, best_schema->name); } return rc; } int pcmk_update_configured_schema(xmlNode **xml) { return pcmk__update_configured_schema(xml, true); } /*! * \brief Update XML from its configured schema to the latest major series * * \param[in,out] xml XML to update * \param[in] to_logs If false, certain validation errors will be * sent to stderr rather than logged * * \return Standard Pacemaker return code */ int pcmk__update_configured_schema(xmlNode **xml, bool to_logs) { pcmk__schema_t *x_0_schema = pcmk__find_x_0_schema()->data; pcmk__schema_t *original_schema = NULL; GList *entry = NULL; if (xml == NULL) { return EINVAL; } entry = get_configured_schema(*xml); if (entry == NULL) { return pcmk_rc_cib_corrupt; } original_schema = entry->data; if (original_schema->schema_index < x_0_schema->schema_index) { // Current configuration schema is not acceptable, try to update xmlNode *converted = NULL; const char *new_schema_name = NULL; pcmk__schema_t *schema = NULL; entry = NULL; converted = pcmk__xml_copy(NULL, *xml); if (pcmk__update_schema(&converted, NULL, true, to_logs) == pcmk_rc_ok) { new_schema_name = crm_element_value(converted, PCMK_XA_VALIDATE_WITH); entry = pcmk__get_schema(new_schema_name); } schema = (entry == NULL)? NULL : entry->data; if ((schema == NULL) || (schema->schema_index < x_0_schema->schema_index)) { // Updated configuration schema is still not acceptable if ((schema == NULL) || (schema->schema_index < original_schema->schema_index)) { // We couldn't validate any schema at all if (to_logs) { pcmk__config_err("Cannot upgrade configuration (claiming " "%s schema) to at least %s because it " "does not validate with any schema from " "%s to the latest", original_schema->name, x_0_schema->name, original_schema->name); } else { fprintf(stderr, "Cannot upgrade configuration (claiming " "%s schema) to at least %s because it " "does not validate with any schema from " "%s to the latest\n", original_schema->name, x_0_schema->name, original_schema->name); } } else { // We updated configuration successfully, but still too low if (to_logs) { pcmk__config_err("Cannot upgrade configuration (claiming " "%s schema) to at least %s because it " "would not upgrade past %s", original_schema->name, x_0_schema->name, pcmk__s(new_schema_name, "unspecified version")); } else { fprintf(stderr, "Cannot upgrade configuration (claiming " "%s schema) to at least %s because it " "would not upgrade past %s\n", original_schema->name, x_0_schema->name, pcmk__s(new_schema_name, "unspecified version")); } } pcmk__xml_free(converted); converted = NULL; return pcmk_rc_transform_failed; } else { // Updated configuration schema is acceptable pcmk__xml_free(*xml); *xml = converted; if (schema->schema_index < xml_latest_schema_index()) { if (to_logs) { pcmk__config_warn("Configuration with %s schema was " "internally upgraded to acceptable (but " "not most recent) %s", original_schema->name, schema->name); } } else if (to_logs) { crm_info("Configuration with %s schema was internally " "upgraded to latest version %s", original_schema->name, schema->name); } } } else if (!to_logs) { pcmk__schema_t *none_schema = NULL; entry = pcmk__get_schema(PCMK_VALUE_NONE); pcmk__assert((entry != NULL) && (entry->data != NULL)); none_schema = entry->data; if (original_schema->schema_index >= none_schema->schema_index) { // @COMPAT the none schema is deprecated since 2.1.8 fprintf(stderr, "Schema validation of configuration is " "disabled (support for " PCMK_XA_VALIDATE_WITH " set to \"" PCMK_VALUE_NONE "\" is deprecated" " and will be removed in a future release)\n"); } } return pcmk_rc_ok; } /*! * \internal * \brief Return a list of all schema files and any associated XSLT files * later than the given one * \brief Return a list of all schema versions later than the given one * * \param[in] schema The schema to compare against (for example, * "pacemaker-3.1.rng" or "pacemaker-3.1") * * \note The caller is responsible for freeing both the returned list and * the elements of the list */ GList * pcmk__schema_files_later_than(const char *name) { GList *lst = NULL; pcmk__schema_version_t ver; if (!version_from_filename(name, &ver)) { return lst; } for (GList *iter = g_list_nth(known_schemas, xml_latest_schema_index()); iter != NULL; iter = iter->prev) { pcmk__schema_t *schema = iter->data; if (schema_cmp(ver, schema->version) != -1) { continue; } for (GList *iter2 = g_list_last(schema->transforms); iter2 != NULL; iter2 = iter2->prev) { const struct dirent *entry = iter2->data; lst = g_list_prepend(lst, pcmk__str_copy(entry->d_name)); } lst = g_list_prepend(lst, crm_strdup_printf("%s.rng", schema->name)); } return lst; } static void append_href(xmlNode *xml, void *user_data) { GList **list = user_data; char *href = crm_element_value_copy(xml, "href"); if (href == NULL) { return; } *list = g_list_prepend(*list, href); } static void external_refs_in_schema(GList **list, const char *contents) { /* local-name()= is needed to ignore the xmlns= setting at the top of * the XML file. Otherwise, the xpath query will always return nothing. */ const char *search = "//*[local-name()='externalRef'] | //*[local-name()='include']"; xmlNode *xml = pcmk__xml_parse(contents); pcmk__xpath_foreach_result(xml->doc, search, append_href, list); pcmk__xml_free(xml); } static int read_file_contents(const char *file, char **contents) { int rc = pcmk_rc_ok; char *path = NULL; if (pcmk__ends_with(file, ".rng")) { path = pcmk__xml_artefact_path(pcmk__xml_artefact_ns_legacy_rng, file); } else { path = pcmk__xml_artefact_path(pcmk__xml_artefact_ns_legacy_xslt, file); } rc = pcmk__file_contents(path, contents); free(path); return rc; } static void add_schema_file_to_xml(xmlNode *parent, const char *file, GList **already_included) { char *contents = NULL; char *path = NULL; xmlNode *file_node = NULL; GList *includes = NULL; int rc = pcmk_rc_ok; /* If we already included this file, don't do so again. */ if (g_list_find_custom(*already_included, file, (GCompareFunc) strcmp) != NULL) { return; } /* Ensure whatever file we were given has a suffix we know about. If not, * just assume it's an RNG file. */ if (!pcmk__ends_with(file, ".rng") && !pcmk__ends_with(file, ".xsl")) { path = crm_strdup_printf("%s.rng", file); } else { path = pcmk__str_copy(file); } rc = read_file_contents(path, &contents); if (rc != pcmk_rc_ok || contents == NULL) { crm_warn("Could not read schema file %s: %s", file, pcmk_rc_str(rc)); free(path); return; } /* Create a new node with the contents of the file * as a CDATA block underneath it. */ file_node = pcmk__xe_create(parent, PCMK__XE_FILE); crm_xml_add(file_node, PCMK_XA_PATH, path); *already_included = g_list_prepend(*already_included, path); xmlAddChild(file_node, xmlNewCDataBlock(parent->doc, (const xmlChar *) contents, strlen(contents))); /* Scan the file for any or nodes and build up * a list of the files they reference. */ external_refs_in_schema(&includes, contents); /* For each referenced file, recurse to add it (and potentially anything it * references, ...) to the XML. */ for (GList *iter = includes; iter != NULL; iter = iter->next) { add_schema_file_to_xml(parent, iter->data, already_included); } free(contents); g_list_free_full(includes, free); } /*! * \internal * \brief Add an XML schema file and all the files it references as children * of a given XML node * * \param[in,out] parent The parent XML node * \param[in] name The schema version to compare against * (for example, "pacemaker-3.1" or "pacemaker-3.1.rng") * \param[in,out] already_included A list of names that have already been added * to the parent node. * * \note The caller is responsible for freeing both the returned list and * the elements of the list */ void pcmk__build_schema_xml_node(xmlNode *parent, const char *name, GList **already_included) { xmlNode *schema_node = pcmk__xe_create(parent, PCMK__XA_SCHEMA); crm_xml_add(schema_node, PCMK_XA_VERSION, name); add_schema_file_to_xml(schema_node, name, already_included); if (schema_node->children == NULL) { // Not needed if empty. May happen if name was invalid, for example. pcmk__xml_free(schema_node); } } /*! * \internal * \brief Return the directory containing any extra schema files that a * Pacemaker Remote node fetched from the cluster */ const char * pcmk__remote_schema_dir(void) { const char *dir = pcmk__env_option(PCMK__ENV_REMOTE_SCHEMA_DIRECTORY); if (pcmk__str_empty(dir)) { return PCMK__REMOTE_SCHEMA_DIR; } return dir; } /*! * \internal * \brief Warn if a given validation schema is deprecated * * \param[in] Schema name to check */ void pcmk__warn_if_schema_deprecated(const char *schema) { /* @COMPAT Disabling validation is deprecated since 2.1.8, but * resource-agents' ocf-shellfuncs (at least as of 4.15.1) uses it */ if (pcmk__str_eq(schema, PCMK_VALUE_NONE, pcmk__str_none)) { pcmk__config_warn("Support for " PCMK_XA_VALIDATE_WITH "='%s' is " "deprecated and will be removed in a future release " "without the possibility of upgrades (manually edit " "to use a supported schema)", schema); } } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START #include gboolean cli_config_update(xmlNode **xml, int *best_version, gboolean to_logs) { int rc = pcmk__update_configured_schema(xml, to_logs); if (best_version != NULL) { const char *name = crm_element_value(*xml, PCMK_XA_VALIDATE_WITH); if (name == NULL) { *best_version = -1; } else { GList *entry = pcmk__get_schema(name); pcmk__schema_t *schema = (entry == NULL)? NULL : entry->data; *best_version = (schema == NULL)? -1 : schema->schema_index; } } return (rc == pcmk_rc_ok)? TRUE: FALSE; } // LCOV_EXCL_STOP // End deprecated API diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index 3d42720490..22632d51b7 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -1,2950 +1,2951 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include // xmlNode #include // xmlXPathObject, etc. #include #include #include #include #include #include "fencing_private.h" CRM_TRACE_INIT_DATA(stonith); // Used as stonith_t:st_private typedef struct stonith_private_s { char *token; crm_ipc_t *ipc; mainloop_io_t *source; GHashTable *stonith_op_callback_table; GList *notify_list; int notify_refcnt; bool notify_deletes; void (*op_callback) (stonith_t * st, stonith_callback_data_t * data); } stonith_private_t; // Used as stonith_event_t:opaque struct event_private { pcmk__action_result_t result; }; typedef struct stonith_notify_client_s { const char *event; const char *obj_id; /* implement one day */ const char *obj_type; /* implement one day */ void (*notify) (stonith_t * st, stonith_event_t * e); bool delete; } stonith_notify_client_t; typedef struct stonith_callback_client_s { void (*callback) (stonith_t * st, stonith_callback_data_t * data); const char *id; void *user_data; gboolean only_success; gboolean allow_timeout_updates; struct timer_rec_s *timer; } stonith_callback_client_t; struct notify_blob_s { stonith_t *stonith; xmlNode *xml; }; struct timer_rec_s { int call_id; int timeout; guint ref; stonith_t *stonith; }; typedef int (*stonith_op_t) (const char *, int, const char *, xmlNode *, xmlNode *, xmlNode *, xmlNode **, xmlNode **); xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); static int stonith_send_command(stonith_t *stonith, const char *op, xmlNode *data, xmlNode **output_data, int call_options, int timeout); static void stonith_connection_destroy(gpointer user_data); static void stonith_send_notification(gpointer data, gpointer user_data); static int stonith_api_del_notification(stonith_t *stonith, const char *event); /*! * \internal * \brief Parse fence agent namespace from a string * * \param[in] namespace_s Name of namespace as string * * \return enum value parsed from \p namespace_s */ static enum stonith_namespace parse_namespace(const char *namespace_s) { if (pcmk__str_eq(namespace_s, "any", pcmk__str_null_matches)) { return st_namespace_any; } /* @TODO Is "redhat" still necessary except for stonith_text2namespace() * backward compatibility? */ if (pcmk__str_any_of(namespace_s, "redhat", "stonith-ng", NULL)) { return st_namespace_rhcs; } if (pcmk__str_eq(namespace_s, "internal", pcmk__str_none)) { return st_namespace_internal; } if (pcmk__str_eq(namespace_s, "heartbeat", pcmk__str_none)) { return st_namespace_lha; } return st_namespace_invalid; } /*! * \internal * \brief Get name of a fence agent namespace as a string * * \param[in] st_namespace Namespace as enum value * * \return Name of \p st_namespace as a string */ static const char * namespace_text(enum stonith_namespace st_namespace) { switch (st_namespace) { case st_namespace_any: return "any"; case st_namespace_rhcs: return "stonith-ng"; case st_namespace_internal: return "internal"; case st_namespace_lha: return "heartbeat"; default: return "unsupported"; } } /*! * \internal * \brief Determine fence agent namespace from agent name * * This involves external checks (for example, checking the existence of a file * or calling an external library function). * * \param[in] agent Fence agent name * * \return Namespace to which \p agent belongs, or \c st_namespace_invalid if * not found */ static enum stonith_namespace get_namespace_from_agent(const char *agent) { if (stonith__agent_is_rhcs(agent)) { return st_namespace_rhcs; } #if HAVE_STONITH_STONITH_H if (stonith__agent_is_lha(agent)) { return st_namespace_lha; } #endif // HAVE_STONITH_STONITH_H return st_namespace_invalid; } gboolean stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node) { gboolean rv = FALSE; stonith_t *stonith_api = (st != NULL)? st : stonith__api_new(); char *list = NULL; if(stonith_api) { if (stonith_api->state == stonith_disconnected) { int rc = stonith_api->cmds->connect(stonith_api, "stonith-api", NULL); if (rc != pcmk_ok) { crm_err("Failed connecting to Stonith-API for watchdog-fencing-query."); } } if (stonith_api->state != stonith_disconnected) { /* caveat!!! * this might fail when when stonithd is just updating the device-list * probably something we should fix as well for other api-calls */ int rc = stonith_api->cmds->list(stonith_api, st_opt_sync_call, STONITH_WATCHDOG_ID, &list, 0); if ((rc != pcmk_ok) || (list == NULL)) { /* due to the race described above it can happen that * we drop in here - so as not to make remote nodes * panic on that answer */ if (rc == -ENODEV) { crm_notice("Cluster does not have watchdog fencing device"); } else { crm_warn("Could not check for watchdog fencing device: %s", pcmk_strerror(rc)); } } else if (list[0] == '\0') { rv = TRUE; } else { GList *targets = stonith__parse_targets(list); rv = pcmk__str_in_list(node, targets, pcmk__str_casei); g_list_free_full(targets, free); } free(list); if (!st) { /* if we're provided the api we still might have done the * connection - but let's assume the caller won't bother */ stonith_api->cmds->disconnect(stonith_api); } } if (!st) { stonith__api_free(stonith_api); } } else { crm_err("Stonith-API for watchdog-fencing-query couldn't be created."); } crm_trace("Pacemaker assumes node %s %sto do watchdog-fencing.", node, rv?"":"not "); return rv; } gboolean stonith__watchdog_fencing_enabled_for_node(const char *node) { return stonith__watchdog_fencing_enabled_for_node_api(NULL, node); } /* when cycling through the list we don't want to delete items so just mark them and when we know nobody is using the list loop over it to remove the marked items */ static void foreach_notify_entry (stonith_private_t *private, GFunc func, gpointer user_data) { private->notify_refcnt++; g_list_foreach(private->notify_list, func, user_data); private->notify_refcnt--; if ((private->notify_refcnt == 0) && private->notify_deletes) { GList *list_item = private->notify_list; private->notify_deletes = FALSE; while (list_item != NULL) { stonith_notify_client_t *list_client = list_item->data; GList *next = g_list_next(list_item); if (list_client->delete) { free(list_client); private->notify_list = g_list_delete_link(private->notify_list, list_item); } list_item = next; } } } static void stonith_connection_destroy(gpointer user_data) { stonith_t *stonith = user_data; stonith_private_t *native = NULL; struct notify_blob_s blob; crm_trace("Sending destroyed notification"); blob.stonith = stonith; blob.xml = pcmk__xe_create(NULL, PCMK__XE_NOTIFY); native = stonith->st_private; native->ipc = NULL; native->source = NULL; free(native->token); native->token = NULL; stonith->state = stonith_disconnected; crm_xml_add(blob.xml, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY); crm_xml_add(blob.xml, PCMK__XA_SUBT, PCMK__VALUE_ST_NOTIFY_DISCONNECT); foreach_notify_entry(native, stonith_send_notification, &blob); pcmk__xml_free(blob.xml); } xmlNode * create_device_registration_xml(const char *id, enum stonith_namespace standard, const char *agent, const stonith_key_value_t *params, const char *rsc_provides) { xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_ST_DEVICE_ID); xmlNode *args = pcmk__xe_create(data, PCMK__XE_ATTRIBUTES); #if HAVE_STONITH_STONITH_H if (standard == st_namespace_any) { standard = get_namespace_from_agent(agent); } if (standard == st_namespace_lha) { hash2field((gpointer) "plugin", (gpointer) agent, args); agent = "fence_legacy"; } #endif crm_xml_add(data, PCMK_XA_ID, id); crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(data, PCMK_XA_AGENT, agent); if ((standard != st_namespace_any) && (standard != st_namespace_invalid)) { crm_xml_add(data, PCMK__XA_NAMESPACE, namespace_text(standard)); } if (rsc_provides) { crm_xml_add(data, PCMK__XA_RSC_PROVIDES, rsc_provides); } for (; params; params = params->next) { hash2field((gpointer) params->key, (gpointer) params->value, args); } return data; } static int stonith_api_register_device(stonith_t *st, int call_options, const char *id, const char *namespace_s, const char *agent, const stonith_key_value_t *params) { int rc = 0; xmlNode *data = NULL; data = create_device_registration_xml(id, parse_namespace(namespace_s), agent, params, NULL); rc = stonith_send_command(st, STONITH_OP_DEVICE_ADD, data, NULL, call_options, 0); pcmk__xml_free(data); return rc; } static int stonith_api_remove_device(stonith_t * st, int call_options, const char *name) { int rc = 0; xmlNode *data = NULL; data = pcmk__xe_create(NULL, PCMK__XE_ST_DEVICE_ID); crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(data, PCMK_XA_ID, name); rc = stonith_send_command(st, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0); pcmk__xml_free(data); return rc; } static int stonith_api_remove_level_full(stonith_t *st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level) { int rc = 0; xmlNode *data = NULL; CRM_CHECK(node || pattern || (attr && value), return -EINVAL); data = pcmk__xe_create(NULL, PCMK_XE_FENCING_LEVEL); crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__); if (node) { crm_xml_add(data, PCMK_XA_TARGET, node); } else if (pattern) { crm_xml_add(data, PCMK_XA_TARGET_PATTERN, pattern); } else { crm_xml_add(data, PCMK_XA_TARGET_ATTRIBUTE, attr); crm_xml_add(data, PCMK_XA_TARGET_VALUE, value); } crm_xml_add_int(data, PCMK_XA_INDEX, level); rc = stonith_send_command(st, STONITH_OP_LEVEL_DEL, data, NULL, options, 0); pcmk__xml_free(data); return rc; } static int stonith_api_remove_level(stonith_t * st, int options, const char *node, int level) { return stonith_api_remove_level_full(st, options, node, NULL, NULL, NULL, level); } /*! * \internal * \brief Create XML for fence topology level registration request * * \param[in] node If not NULL, target level by this node name * \param[in] pattern If not NULL, target by node name using this regex * \param[in] attr If not NULL, target by this node attribute * \param[in] value If not NULL, target by this node attribute value * \param[in] level Index number of level to register * \param[in] device_list List of devices in level * * \return Newly allocated XML tree on success, NULL otherwise * * \note The caller should set only one of node, pattern or attr/value. */ xmlNode * create_level_registration_xml(const char *node, const char *pattern, const char *attr, const char *value, int level, const stonith_key_value_t *device_list) { GString *list = NULL; xmlNode *data; CRM_CHECK(node || pattern || (attr && value), return NULL); data = pcmk__xe_create(NULL, PCMK_XE_FENCING_LEVEL); crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add_int(data, PCMK_XA_ID, level); crm_xml_add_int(data, PCMK_XA_INDEX, level); if (node) { crm_xml_add(data, PCMK_XA_TARGET, node); } else if (pattern) { crm_xml_add(data, PCMK_XA_TARGET_PATTERN, pattern); } else { crm_xml_add(data, PCMK_XA_TARGET_ATTRIBUTE, attr); crm_xml_add(data, PCMK_XA_TARGET_VALUE, value); } for (; device_list; device_list = device_list->next) { pcmk__add_separated_word(&list, 1024, device_list->value, ","); } if (list != NULL) { crm_xml_add(data, PCMK_XA_DEVICES, (const char *) list->str); g_string_free(list, TRUE); } return data; } static int stonith_api_register_level_full(stonith_t *st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level, const stonith_key_value_t *device_list) { int rc = 0; xmlNode *data = create_level_registration_xml(node, pattern, attr, value, level, device_list); CRM_CHECK(data != NULL, return -EINVAL); rc = stonith_send_command(st, STONITH_OP_LEVEL_ADD, data, NULL, options, 0); pcmk__xml_free(data); return rc; } static int stonith_api_register_level(stonith_t * st, int options, const char *node, int level, const stonith_key_value_t * device_list) { return stonith_api_register_level_full(st, options, node, NULL, NULL, NULL, level, device_list); } static int stonith_api_device_list(stonith_t *stonith, int call_options, const char *namespace_s, stonith_key_value_t **devices, int timeout) { int count = 0; enum stonith_namespace ns = parse_namespace(namespace_s); if (devices == NULL) { crm_err("Parameter error: stonith_api_device_list"); return -EFAULT; } #if HAVE_STONITH_STONITH_H // Include Linux-HA agents if requested if ((ns == st_namespace_any) || (ns == st_namespace_lha)) { count += stonith__list_lha_agents(devices); } #endif // Include Red Hat agents if requested if ((ns == st_namespace_any) || (ns == st_namespace_rhcs)) { count += stonith__list_rhcs_agents(devices); } return count; } // See stonith_api_operations_t:metadata() documentation static int stonith_api_device_metadata(stonith_t *stonith, int call_options, const char *agent, const char *namespace_s, char **output, int timeout_sec) { /* By executing meta-data directly, we can get it from stonith_admin when * the cluster is not running, which is important for higher-level tools. */ enum stonith_namespace ns = get_namespace_from_agent(agent); if (timeout_sec <= 0) { timeout_sec = PCMK_DEFAULT_ACTION_TIMEOUT_MS; } crm_trace("Looking up metadata for %s agent %s", namespace_text(ns), agent); switch (ns) { case st_namespace_rhcs: return stonith__rhcs_metadata(agent, timeout_sec, output); #if HAVE_STONITH_STONITH_H case st_namespace_lha: return stonith__lha_metadata(agent, timeout_sec, output); #endif default: crm_err("Can't get fence agent '%s' meta-data: No such agent", agent); break; } return -ENODEV; } static int stonith_api_query(stonith_t * stonith, int call_options, const char *target, stonith_key_value_t ** devices, int timeout) { int rc = 0, lpc = 0, max = 0; xmlNode *data = NULL; xmlNode *output = NULL; xmlXPathObject *xpathObj = NULL; CRM_CHECK(devices != NULL, return -EINVAL); data = pcmk__xe_create(NULL, PCMK__XE_ST_DEVICE_ID); crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_ST_TARGET, target); crm_xml_add(data, PCMK__XA_ST_DEVICE_ACTION, PCMK_ACTION_OFF); rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout); if (rc < 0) { return rc; } xpathObj = pcmk__xpath_search(output->doc, "//*[@" PCMK_XA_AGENT "]"); if (xpathObj) { max = pcmk__xpath_num_results(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = pcmk__xpath_result(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { const char *match_id = crm_element_value(match, PCMK_XA_ID); xmlChar *match_path = xmlGetNodePath(match); crm_info("//*[@" PCMK_XA_AGENT "][%d] = %s", lpc, match_path); free(match_path); *devices = stonith__key_value_add(*devices, NULL, match_id); } } xmlXPathFreeObject(xpathObj); } pcmk__xml_free(output); pcmk__xml_free(data); return max; } /*! * \internal * \brief Make a STONITH_OP_EXEC request * * \param[in,out] stonith Fencer connection * \param[in] call_options Bitmask of \c stonith_call_options * \param[in] id Fence device ID that request is for * \param[in] action Agent action to request (list, status, monitor) * \param[in] target Name of target node for requested action * \param[in] timeout_sec Error if not completed within this many seconds * \param[out] output Where to set agent output */ static int stonith_api_call(stonith_t *stonith, int call_options, const char *id, const char *action, const char *target, int timeout_sec, xmlNode **output) { int rc = 0; xmlNode *data = NULL; data = pcmk__xe_create(NULL, PCMK__XE_ST_DEVICE_ID); crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_ST_DEVICE_ID, id); crm_xml_add(data, PCMK__XA_ST_DEVICE_ACTION, action); crm_xml_add(data, PCMK__XA_ST_TARGET, target); rc = stonith_send_command(stonith, STONITH_OP_EXEC, data, output, call_options, timeout_sec); pcmk__xml_free(data); return rc; } static int stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **list_info, int timeout) { int rc; xmlNode *output = NULL; rc = stonith_api_call(stonith, call_options, id, PCMK_ACTION_LIST, NULL, timeout, &output); if (output && list_info) { const char *list_str; list_str = crm_element_value(output, PCMK__XA_ST_OUTPUT); if (list_str) { *list_info = strdup(list_str); } } if (output) { pcmk__xml_free(output); } return rc; } static int stonith_api_monitor(stonith_t * stonith, int call_options, const char *id, int timeout) { return stonith_api_call(stonith, call_options, id, PCMK_ACTION_MONITOR, NULL, timeout, NULL); } static int stonith_api_status(stonith_t * stonith, int call_options, const char *id, const char *port, int timeout) { return stonith_api_call(stonith, call_options, id, PCMK_ACTION_STATUS, port, timeout, NULL); } static int stonith_api_fence_with_delay(stonith_t * stonith, int call_options, const char *node, const char *action, int timeout, int tolerance, int delay) { int rc = 0; xmlNode *data = NULL; data = pcmk__xe_create(NULL, __func__); crm_xml_add(data, PCMK__XA_ST_TARGET, node); crm_xml_add(data, PCMK__XA_ST_DEVICE_ACTION, action); crm_xml_add_int(data, PCMK__XA_ST_TIMEOUT, timeout); crm_xml_add_int(data, PCMK__XA_ST_TOLERANCE, tolerance); crm_xml_add_int(data, PCMK__XA_ST_DELAY, delay); rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout); pcmk__xml_free(data); return rc; } static int stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const char *action, int timeout, int tolerance) { return stonith_api_fence_with_delay(stonith, call_options, node, action, timeout, tolerance, 0); } static int stonith_api_confirm(stonith_t * stonith, int call_options, const char *target) { stonith__set_call_options(call_options, target, st_opt_manual_ack); return stonith_api_fence(stonith, call_options, target, PCMK_ACTION_OFF, 0, 0); } static int stonith_api_history(stonith_t * stonith, int call_options, const char *node, stonith_history_t ** history, int timeout) { int rc = 0; xmlNode *data = NULL; xmlNode *output = NULL; stonith_history_t *last = NULL; *history = NULL; if (node) { data = pcmk__xe_create(NULL, __func__); crm_xml_add(data, PCMK__XA_ST_TARGET, node); } stonith__set_call_options(call_options, node, st_opt_sync_call); rc = stonith_send_command(stonith, STONITH_OP_FENCE_HISTORY, data, &output, call_options, timeout); pcmk__xml_free(data); if (rc == 0) { xmlNode *op = NULL; xmlNode *reply = pcmk__xpath_find_one(output->doc, "//" PCMK__XE_ST_HISTORY, LOG_NEVER); for (op = pcmk__xe_first_child(reply, NULL, NULL, NULL); op != NULL; op = pcmk__xe_next(op, NULL)) { stonith_history_t *kvp; long long completed; long long completed_nsec = 0L; kvp = pcmk__assert_alloc(1, sizeof(stonith_history_t)); kvp->target = crm_element_value_copy(op, PCMK__XA_ST_TARGET); kvp->action = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ACTION); kvp->origin = crm_element_value_copy(op, PCMK__XA_ST_ORIGIN); kvp->delegate = crm_element_value_copy(op, PCMK__XA_ST_DELEGATE); kvp->client = crm_element_value_copy(op, PCMK__XA_ST_CLIENTNAME); crm_element_value_ll(op, PCMK__XA_ST_DATE, &completed); kvp->completed = (time_t) completed; crm_element_value_ll(op, PCMK__XA_ST_DATE_NSEC, &completed_nsec); kvp->completed_nsec = completed_nsec; crm_element_value_int(op, PCMK__XA_ST_STATE, &kvp->state); kvp->exit_reason = crm_element_value_copy(op, PCMK_XA_EXIT_REASON); if (last) { last->next = kvp; } else { *history = kvp; } last = kvp; } } pcmk__xml_free(output); return rc; } /*! * \internal * \brief Free a list of fencing history objects and all members of each object * * \param[in,out] head Head of fencing history object list */ void stonith__history_free(stonith_history_t *head) { /* @COMPAT Drop "next" member of stonith_history_t, use a GList or GSList, * and use the appropriate free function (while ensuring the members get * freed) */ while (head != NULL) { stonith_history_t *next = head->next; free(head->target); free(head->action); free(head->origin); free(head->delegate); free(head->client); free(head->exit_reason); free(head); head = next; } } static gint stonithlib_GCompareFunc(gconstpointer a, gconstpointer b) { int rc = 0; const stonith_notify_client_t *a_client = a; const stonith_notify_client_t *b_client = b; if (a_client->delete || b_client->delete) { /* make entries marked for deletion not findable */ return -1; } CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0); rc = strcmp(a_client->event, b_client->event); if (rc == 0) { if (a_client->notify == NULL || b_client->notify == NULL) { return 0; } else if (a_client->notify == b_client->notify) { return 0; } else if (((long)a_client->notify) < ((long)b_client->notify)) { crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return -1; } crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return 1; } return rc; } xmlNode * stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options) { xmlNode *op_msg = NULL; CRM_CHECK(token != NULL, return NULL); op_msg = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND); crm_xml_add(op_msg, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(op_msg, PCMK__XA_ST_OP, op); crm_xml_add_int(op_msg, PCMK__XA_ST_CALLID, call_id); crm_trace("Sending call options: %.8lx, %d", (long)call_options, call_options); crm_xml_add_int(op_msg, PCMK__XA_ST_CALLOPT, call_options); if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(op_msg, PCMK__XE_ST_CALLDATA); pcmk__xml_copy(wrapper, data); } return op_msg; } static void stonith_destroy_op_callback(gpointer data) { stonith_callback_client_t *blob = data; if (blob->timer && blob->timer->ref > 0) { g_source_remove(blob->timer->ref); } free(blob->timer); free(blob); } static int stonith_api_signoff(stonith_t * stonith) { stonith_private_t *native = stonith->st_private; crm_debug("Disconnecting from the fencer"); if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } free(native->token); native->token = NULL; stonith->state = stonith_disconnected; return pcmk_ok; } static int stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks) { stonith_private_t *private = stonith->st_private; if (all_callbacks) { private->op_callback = NULL; g_hash_table_destroy(private->stonith_op_callback_table); private->stonith_op_callback_table = pcmk__intkey_table(stonith_destroy_op_callback); } else if (call_id == 0) { private->op_callback = NULL; } else { pcmk__intkey_table_remove(private->stonith_op_callback_table, call_id); } return pcmk_ok; } /*! * \internal * \brief Invoke a (single) specified fence action callback * * \param[in,out] st Fencer API connection * \param[in] call_id If positive, call ID of completed fence action, * otherwise legacy return code for early failure * \param[in,out] result Full result for action * \param[in,out] userdata User data to pass to callback * \param[in] callback Fence action callback to invoke */ static void invoke_fence_action_callback(stonith_t *st, int call_id, pcmk__action_result_t *result, void *userdata, void (*callback) (stonith_t *st, stonith_callback_data_t *data)) { stonith_callback_data_t data = { 0, }; data.call_id = call_id; data.rc = pcmk_rc2legacy(stonith__result2rc(result)); data.userdata = userdata; data.opaque = (void *) result; callback(st, &data); } /*! * \internal * \brief Invoke any callbacks registered for a specified fence action result * * Given a fence action result from the fencer, invoke any callback registered * for that action, as well as any global callback registered. * * \param[in,out] stonith Fencer API connection * \param[in] msg If non-NULL, fencer reply * \param[in] call_id If \p msg is NULL, call ID of action that timed out */ static void invoke_registered_callbacks(stonith_t *stonith, const xmlNode *msg, int call_id) { stonith_private_t *private = NULL; stonith_callback_client_t *cb_info = NULL; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; CRM_CHECK(stonith != NULL, return); CRM_CHECK(stonith->st_private != NULL, return); private = stonith->st_private; if (msg == NULL) { // Fencer didn't reply in time pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, "Fencer accepted request but did not reply in time"); CRM_LOG_ASSERT(call_id > 0); } else { // We have the fencer reply if ((crm_element_value_int(msg, PCMK__XA_ST_CALLID, &call_id) != 0) || (call_id <= 0)) { crm_log_xml_warn(msg, "Bad fencer reply"); } stonith__xe_get_result(msg, &result); } if (call_id > 0) { cb_info = pcmk__intkey_table_lookup(private->stonith_op_callback_table, call_id); } if ((cb_info != NULL) && (cb_info->callback != NULL) && (pcmk__result_ok(&result) || !(cb_info->only_success))) { crm_trace("Invoking callback %s for call %d", pcmk__s(cb_info->id, "without ID"), call_id); invoke_fence_action_callback(stonith, call_id, &result, cb_info->user_data, cb_info->callback); } else if ((private->op_callback == NULL) && !pcmk__result_ok(&result)) { crm_warn("Fencing action without registered callback failed: %d (%s%s%s)", result.exit_status, pcmk_exec_status_str(result.execution_status), ((result.exit_reason == NULL)? "" : ": "), ((result.exit_reason == NULL)? "" : result.exit_reason)); crm_log_xml_debug(msg, "Failed fence update"); } if (private->op_callback != NULL) { crm_trace("Invoking global callback for call %d", call_id); invoke_fence_action_callback(stonith, call_id, &result, NULL, private->op_callback); } if (cb_info != NULL) { stonith_api_del_callback(stonith, call_id, FALSE); } pcmk__reset_result(&result); } static gboolean stonith_async_timeout_handler(gpointer data) { struct timer_rec_s *timer = data; crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout); invoke_registered_callbacks(timer->stonith, NULL, timer->call_id); /* Always return TRUE, never remove the handler * We do that in stonith_del_callback() */ return TRUE; } static void set_callback_timeout(stonith_callback_client_t * callback, stonith_t * stonith, int call_id, int timeout) { struct timer_rec_s *async_timer = callback->timer; if (timeout <= 0) { return; } if (!async_timer) { async_timer = pcmk__assert_alloc(1, sizeof(struct timer_rec_s)); callback->timer = async_timer; } async_timer->stonith = stonith; async_timer->call_id = call_id; /* Allow a fair bit of grace to allow the server to tell us of a timeout * This is only a fallback */ async_timer->timeout = (timeout + 60) * 1000; if (async_timer->ref) { g_source_remove(async_timer->ref); } async_timer->ref = pcmk__create_timer(async_timer->timeout, stonith_async_timeout_handler, async_timer); } static void update_callback_timeout(int call_id, int timeout, stonith_t * st) { stonith_callback_client_t *callback = NULL; stonith_private_t *private = st->st_private; callback = pcmk__intkey_table_lookup(private->stonith_op_callback_table, call_id); if (!callback || !callback->allow_timeout_updates) { return; } set_callback_timeout(callback, st, call_id, timeout); } static int stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) { const char *type = NULL; struct notify_blob_s blob; stonith_t *st = userdata; stonith_private_t *private = NULL; pcmk__assert(st != NULL); private = st->st_private; blob.stonith = st; blob.xml = pcmk__xml_parse(buffer); if (blob.xml == NULL) { crm_warn("Received malformed message from fencer: %s", buffer); return 0; } /* do callbacks */ type = crm_element_value(blob.xml, PCMK__XA_T); crm_trace("Activating %s callbacks...", type); if (pcmk__str_eq(type, PCMK__VALUE_STONITH_NG, pcmk__str_none)) { invoke_registered_callbacks(st, blob.xml, 0); } else if (pcmk__str_eq(type, PCMK__VALUE_ST_NOTIFY, pcmk__str_none)) { foreach_notify_entry(private, stonith_send_notification, &blob); } else if (pcmk__str_eq(type, PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE, pcmk__str_none)) { int call_id = 0; int timeout = 0; crm_element_value_int(blob.xml, PCMK__XA_ST_TIMEOUT, &timeout); crm_element_value_int(blob.xml, PCMK__XA_ST_CALLID, &call_id); update_callback_timeout(call_id, timeout, st); } else { crm_err("Unknown message type: %s", type); crm_log_xml_warn(blob.xml, "BadReply"); } pcmk__xml_free(blob.xml); return 1; } static int stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd) { int rc = pcmk_ok; stonith_private_t *native = NULL; const char *display_name = name? name : "client"; struct ipc_client_callbacks st_callbacks = { .dispatch = stonith_dispatch_internal, .destroy = stonith_connection_destroy }; CRM_CHECK(stonith != NULL, return -EINVAL); native = stonith->st_private; pcmk__assert(native != NULL); crm_debug("Attempting fencer connection by %s with%s mainloop", display_name, (stonith_fd? "out" : "")); stonith->state = stonith_connected_command; if (stonith_fd) { /* No mainloop */ native->ipc = crm_ipc_new("stonith-ng", 0); if (native->ipc != NULL) { rc = pcmk__connect_generic_ipc(native->ipc); if (rc == pcmk_rc_ok) { rc = pcmk__ipc_fd(native->ipc, stonith_fd); if (rc != pcmk_rc_ok) { crm_debug("Couldn't get file descriptor for IPC: %s", pcmk_rc_str(rc)); } } if (rc != pcmk_rc_ok) { crm_ipc_close(native->ipc); crm_ipc_destroy(native->ipc); native->ipc = NULL; } } } else { /* With mainloop */ native->source = mainloop_add_ipc_client("stonith-ng", G_PRIORITY_MEDIUM, 0, stonith, &st_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { rc = -ENOTCONN; } else { xmlNode *reply = NULL; xmlNode *hello = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND); crm_xml_add(hello, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(hello, PCMK__XA_ST_OP, CRM_OP_REGISTER); crm_xml_add(hello, PCMK__XA_ST_CLIENTNAME, name); rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply); if (rc < 0) { crm_debug("Couldn't register with the fencer: %s " QB_XS " rc=%d", pcmk_strerror(rc), rc); rc = -ECOMM; } else if (reply == NULL) { crm_debug("Couldn't register with the fencer: no reply"); rc = -EPROTO; } else { const char *msg_type = crm_element_value(reply, PCMK__XA_ST_OP); native->token = crm_element_value_copy(reply, PCMK__XA_ST_CLIENTID); if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_none)) { crm_debug("Couldn't register with the fencer: invalid reply type '%s'", (msg_type? msg_type : "(missing)")); crm_log_xml_debug(reply, "Invalid fencer reply"); rc = -EPROTO; } else if (native->token == NULL) { crm_debug("Couldn't register with the fencer: no token in reply"); crm_log_xml_debug(reply, "Invalid fencer reply"); rc = -EPROTO; } else { crm_debug("Connection to fencer by %s succeeded (registration token: %s)", display_name, native->token); rc = pcmk_ok; } } pcmk__xml_free(reply); pcmk__xml_free(hello); } if (rc != pcmk_ok) { crm_debug("Connection attempt to fencer by %s failed: %s " QB_XS " rc=%d", display_name, pcmk_strerror(rc), rc); stonith->cmds->disconnect(stonith); } return rc; } static int stonith_set_notification(stonith_t * stonith, const char *callback, int enabled) { int rc = pcmk_ok; xmlNode *notify_msg = pcmk__xe_create(NULL, __func__); stonith_private_t *native = stonith->st_private; if (stonith->state != stonith_disconnected) { crm_xml_add(notify_msg, PCMK__XA_ST_OP, STONITH_OP_NOTIFY); if (enabled) { crm_xml_add(notify_msg, PCMK__XA_ST_NOTIFY_ACTIVATE, callback); } else { crm_xml_add(notify_msg, PCMK__XA_ST_NOTIFY_DEACTIVATE, callback); } rc = crm_ipc_send(native->ipc, notify_msg, crm_ipc_client_response, -1, NULL); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't register for fencing notifications: %d", rc); rc = -ECOMM; } else { rc = pcmk_ok; } } pcmk__xml_free(notify_msg); return rc; } static int stonith_api_add_notification(stonith_t * stonith, const char *event, void (*callback) (stonith_t * stonith, stonith_event_t * e)) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = NULL; private = stonith->st_private; crm_trace("Adding callback for %s events (%d)", event, g_list_length(private->notify_list)); new_client = pcmk__assert_alloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = callback; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); if (list_item != NULL) { crm_warn("Callback already present"); free(new_client); return -ENOTUNIQ; } else { private->notify_list = g_list_append(private->notify_list, new_client); stonith_set_notification(stonith, event, 1); crm_trace("Callback added (%d)", g_list_length(private->notify_list)); } return pcmk_ok; } static void del_notify_entry(gpointer data, gpointer user_data) { stonith_notify_client_t *entry = data; stonith_t * stonith = user_data; if (!entry->delete) { crm_debug("Removing callback for %s events", entry->event); stonith_api_del_notification(stonith, entry->event); } } static int stonith_api_del_notification(stonith_t * stonith, const char *event) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = stonith->st_private; if (event == NULL) { foreach_notify_entry(private, del_notify_entry, stonith); crm_trace("Removed callback"); return pcmk_ok; } crm_debug("Removing callback for %s events", event); new_client = pcmk__assert_alloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = NULL; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); stonith_set_notification(stonith, event, 0); if (list_item != NULL) { stonith_notify_client_t *list_client = list_item->data; if (private->notify_refcnt) { list_client->delete = TRUE; private->notify_deletes = TRUE; } else { private->notify_list = g_list_remove(private->notify_list, list_client); free(list_client); } crm_trace("Removed callback"); } else { crm_trace("Callback not present"); } free(new_client); return pcmk_ok; } static int stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int options, void *user_data, const char *callback_name, void (*callback) (stonith_t * st, stonith_callback_data_t * data)) { stonith_callback_client_t *blob = NULL; stonith_private_t *private = NULL; CRM_CHECK(stonith != NULL, return -EINVAL); CRM_CHECK(stonith->st_private != NULL, return -EINVAL); private = stonith->st_private; if (call_id == 0) { // Add global callback private->op_callback = callback; } else if (call_id < 0) { // Call failed immediately, so call callback now if (!(options & st_opt_report_only_success)) { pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id)); pcmk__set_result(&result, CRM_EX_ERROR, stonith__legacy2status(call_id), NULL); invoke_fence_action_callback(stonith, call_id, &result, user_data, callback); } else { crm_warn("Fencer call failed: %s", pcmk_strerror(call_id)); } return FALSE; } blob = pcmk__assert_alloc(1, sizeof(stonith_callback_client_t)); blob->id = callback_name; blob->only_success = (options & st_opt_report_only_success) ? TRUE : FALSE; blob->user_data = user_data; blob->callback = callback; blob->allow_timeout_updates = (options & st_opt_timeout_updates) ? TRUE : FALSE; if (timeout > 0) { set_callback_timeout(blob, stonith, call_id, timeout); } pcmk__intkey_table_insert(private->stonith_op_callback_table, call_id, blob); crm_trace("Added callback to %s for call %d", callback_name, call_id); return TRUE; } /*! * \internal * \brief Get the data section of a fencer notification * * \param[in] msg Notification XML * \param[in] ntype Notification type */ static xmlNode * get_event_data_xml(xmlNode *msg, const char *ntype) { char *data_addr = crm_strdup_printf("//%s", ntype); xmlNode *data = pcmk__xpath_find_one(msg->doc, data_addr, LOG_DEBUG); free(data_addr); return data; } /* */ static stonith_event_t * xml_to_event(xmlNode *msg) { stonith_event_t *event = pcmk__assert_alloc(1, sizeof(stonith_event_t)); struct event_private *event_private = NULL; event->opaque = pcmk__assert_alloc(1, sizeof(struct event_private)); event_private = (struct event_private *) event->opaque; crm_log_xml_trace(msg, "stonith_notify"); // All notification types have the operation result and notification subtype stonith__xe_get_result(msg, &event_private->result); event->operation = crm_element_value_copy(msg, PCMK__XA_ST_OP); // @COMPAT The API originally provided the result as a legacy return code event->result = pcmk_rc2legacy(stonith__result2rc(&event_private->result)); // Some notification subtypes have additional information if (pcmk__str_eq(event->operation, PCMK__VALUE_ST_NOTIFY_FENCE, pcmk__str_none)) { xmlNode *data = get_event_data_xml(msg, event->operation); if (data == NULL) { crm_err("No data for %s event", event->operation); crm_log_xml_notice(msg, "BadEvent"); } else { event->origin = crm_element_value_copy(data, PCMK__XA_ST_ORIGIN); event->action = crm_element_value_copy(data, PCMK__XA_ST_DEVICE_ACTION); event->target = crm_element_value_copy(data, PCMK__XA_ST_TARGET); event->executioner = crm_element_value_copy(data, PCMK__XA_ST_DELEGATE); event->id = crm_element_value_copy(data, PCMK__XA_ST_REMOTE_OP); event->client_origin = crm_element_value_copy(data, PCMK__XA_ST_CLIENTNAME); event->device = crm_element_value_copy(data, PCMK__XA_ST_DEVICE_ID); } } else if (pcmk__str_any_of(event->operation, STONITH_OP_DEVICE_ADD, STONITH_OP_DEVICE_DEL, STONITH_OP_LEVEL_ADD, STONITH_OP_LEVEL_DEL, NULL)) { xmlNode *data = get_event_data_xml(msg, event->operation); if (data == NULL) { crm_err("No data for %s event", event->operation); crm_log_xml_notice(msg, "BadEvent"); } else { event->device = crm_element_value_copy(data, PCMK__XA_ST_DEVICE_ID); } } return event; } static void event_free(stonith_event_t * event) { struct event_private *event_private = event->opaque; free(event->id); free(event->operation); free(event->origin); free(event->action); free(event->target); free(event->executioner); free(event->device); free(event->client_origin); pcmk__reset_result(&event_private->result); free(event->opaque); free(event); } static void stonith_send_notification(gpointer data, gpointer user_data) { struct notify_blob_s *blob = user_data; stonith_notify_client_t *entry = data; stonith_event_t *st_event = NULL; const char *event = NULL; if (blob->xml == NULL) { crm_warn("Skipping callback - NULL message"); return; } event = crm_element_value(blob->xml, PCMK__XA_SUBT); if (entry == NULL) { crm_warn("Skipping callback - NULL callback client"); return; } else if (entry->delete) { crm_trace("Skipping callback - marked for deletion"); return; } else if (entry->notify == NULL) { crm_warn("Skipping callback - NULL callback"); return; } else if (!pcmk__str_eq(entry->event, event, pcmk__str_none)) { crm_trace("Skipping callback - event mismatch %p/%s vs. %s", entry, entry->event, event); return; } st_event = xml_to_event(blob->xml); crm_trace("Invoking callback for %p/%s event...", entry, event); + // coverity[null_field] entry->notify(blob->stonith, st_event); crm_trace("Callback invoked..."); event_free(st_event); } /*! * \internal * \brief Create and send an API request * * \param[in,out] stonith Stonith connection * \param[in] op API operation to request * \param[in] data Data to attach to request * \param[out] output_data If not NULL, will be set to reply if synchronous * \param[in] call_options Bitmask of stonith_call_options to use * \param[in] timeout Error if not completed within this many seconds * * \return pcmk_ok (for synchronous requests) or positive call ID * (for asynchronous requests) on success, -errno otherwise */ static int stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data, int call_options, int timeout) { int rc = 0; int reply_id = -1; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; stonith_private_t *native = NULL; pcmk__assert((stonith != NULL) && (stonith->st_private != NULL) && (op != NULL)); native = stonith->st_private; if (output_data != NULL) { *output_data = NULL; } if ((stonith->state == stonith_disconnected) || (native->token == NULL)) { return -ENOTCONN; } /* Increment the call ID, which must be positive to avoid conflicting with * error codes. This shouldn't be a problem unless the client mucked with * it or the counter wrapped around. */ stonith->call_id++; if (stonith->call_id < 1) { stonith->call_id = 1; } op_msg = stonith_create_op(stonith->call_id, native->token, op, data, call_options); if (op_msg == NULL) { return -EINVAL; } crm_xml_add_int(op_msg, PCMK__XA_ST_TIMEOUT, timeout); crm_trace("Sending %s message to fencer with timeout %ds", op, timeout); if (data) { const char *delay_s = crm_element_value(data, PCMK__XA_ST_DELAY); if (delay_s) { crm_xml_add(op_msg, PCMK__XA_ST_DELAY, delay_s); } } { enum crm_ipc_flags ipc_flags = crm_ipc_flags_none; if (call_options & st_opt_sync_call) { pcmk__set_ipc_flags(ipc_flags, "stonith command", crm_ipc_client_response); } rc = crm_ipc_send(native->ipc, op_msg, ipc_flags, 1000 * (timeout + 60), &op_reply); } pcmk__xml_free(op_msg); if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%ds): %d", op, timeout, rc); rc = -ECOMM; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (!(call_options & st_opt_sync_call)) { crm_trace("Async call %d, returning", stonith->call_id); pcmk__xml_free(op_reply); return stonith->call_id; } crm_element_value_int(op_reply, PCMK__XA_ST_CALLID, &reply_id); if (reply_id == stonith->call_id) { pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; crm_trace("Synchronous reply %d received", reply_id); stonith__xe_get_result(op_reply, &result); rc = pcmk_rc2legacy(stonith__result2rc(&result)); pcmk__reset_result(&result); if ((call_options & st_opt_discard_reply) || output_data == NULL) { crm_trace("Discarding reply"); } else { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } } else if (reply_id <= 0) { crm_err("Received bad reply: No id set"); crm_log_xml_err(op_reply, "Bad reply"); pcmk__xml_free(op_reply); op_reply = NULL; rc = -ENOMSG; } else { crm_err("Received bad reply: %d (wanted %d)", reply_id, stonith->call_id); crm_log_xml_err(op_reply, "Old reply"); pcmk__xml_free(op_reply); op_reply = NULL; rc = -ENOMSG; } done: if (!crm_ipc_connected(native->ipc)) { crm_err("Fencer disconnected"); free(native->token); native->token = NULL; stonith->state = stonith_disconnected; } pcmk__xml_free(op_reply); return rc; } /*! * \internal * \brief Process IPC messages for a fencer API connection * * This is used for testing purposes in scenarios that don't use a mainloop to * dispatch messages automatically. * * \param[in,out] stonith_api Fencer API connetion object * * \return Standard Pacemaker return code */ int stonith__api_dispatch(stonith_t *stonith_api) { stonith_private_t *private = NULL; pcmk__assert(stonith_api != NULL); private = stonith_api->st_private; while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); stonith_dispatch_internal(msg, strlen(msg), stonith_api); pcmk__ipc_free_client_buffer(private->ipc); } if (!crm_ipc_connected(private->ipc)) { crm_err("Connection closed"); return ENOTCONN; } } return pcmk_rc_ok; } static int free_stonith_api(stonith_t *stonith) { int rc = pcmk_ok; crm_trace("Destroying %p", stonith); if (stonith->state != stonith_disconnected) { crm_trace("Unregistering notifications and disconnecting %p first", stonith); stonith->cmds->remove_notification(stonith, NULL); rc = stonith->cmds->disconnect(stonith); } if (stonith->state == stonith_disconnected) { stonith_private_t *private = stonith->st_private; crm_trace("Removing %d callbacks", g_hash_table_size(private->stonith_op_callback_table)); g_hash_table_destroy(private->stonith_op_callback_table); crm_trace("Destroying %d notification clients", g_list_length(private->notify_list)); g_list_free_full(private->notify_list, free); free(stonith->st_private); free(stonith->cmds); free(stonith); } else { crm_err("Not free'ing active connection: %s (%d)", pcmk_strerror(rc), rc); } return rc; } static gboolean is_stonith_param(gpointer key, gpointer value, gpointer user_data) { return pcmk_stonith_param(key); } int stonith__validate(stonith_t *st, int call_options, const char *rsc_id, const char *agent, GHashTable *params, int timeout_sec, char **output, char **error_output) { int rc = pcmk_rc_ok; /* Use a dummy node name in case the agent requires a target. We assume the * actual target doesn't matter for validation purposes (if in practice, * that is incorrect, we will need to allow the caller to pass the target). */ const char *target = "node1"; char *host_arg = NULL; if (params != NULL) { host_arg = pcmk__str_copy(g_hash_table_lookup(params, PCMK_STONITH_HOST_ARGUMENT)); /* Remove special stonith params from the table before doing anything else */ g_hash_table_foreach_remove(params, is_stonith_param, NULL); } #if PCMK__ENABLE_CIBSECRETS rc = pcmk__substitute_secrets(rsc_id, params); if (rc != pcmk_rc_ok) { crm_warn("Could not replace secret parameters for validation of %s: %s", agent, pcmk_rc_str(rc)); // rc is standard return value, don't return it in this function } #endif if (output) { *output = NULL; } if (error_output) { *error_output = NULL; } if (timeout_sec <= 0) { timeout_sec = PCMK_DEFAULT_ACTION_TIMEOUT_MS; } switch (get_namespace_from_agent(agent)) { case st_namespace_rhcs: rc = stonith__rhcs_validate(st, call_options, target, agent, params, host_arg, timeout_sec, output, error_output); rc = pcmk_legacy2rc(rc); break; #if HAVE_STONITH_STONITH_H case st_namespace_lha: rc = stonith__lha_validate(st, call_options, target, agent, params, timeout_sec, output, error_output); rc = pcmk_legacy2rc(rc); break; #endif case st_namespace_invalid: errno = ENOENT; rc = errno; if (error_output) { *error_output = crm_strdup_printf("Agent %s not found", agent); } else { crm_err("Agent %s not found", agent); } break; default: errno = EOPNOTSUPP; rc = errno; if (error_output) { *error_output = crm_strdup_printf("Agent %s does not support validation", agent); } else { crm_err("Agent %s does not support validation", agent); } break; } free(host_arg); return rc; } static int stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id, const char *namespace_s, const char *agent, const stonith_key_value_t *params, int timeout_sec, char **output, char **error_output) { /* Validation should be done directly via the agent, so we can get it from * stonith_admin when the cluster is not running, which is important for * higher-level tools. */ int rc = pcmk_ok; GHashTable *params_table = pcmk__strkey_table(free, free); // Convert parameter list to a hash table for (; params; params = params->next) { if (!pcmk_stonith_param(params->key)) { pcmk__insert_dup(params_table, params->key, params->value); } } rc = stonith__validate(st, call_options, rsc_id, agent, params_table, timeout_sec, output, error_output); g_hash_table_destroy(params_table); return rc; } /*! * \internal * \brief Create a new fencer API connection object * * \return Newly allocated fencer API connection object, or \c NULL on * allocation failure */ stonith_t * stonith__api_new(void) { stonith_t *new_stonith = NULL; stonith_private_t *private = NULL; new_stonith = calloc(1, sizeof(stonith_t)); if (new_stonith == NULL) { return NULL; } private = calloc(1, sizeof(stonith_private_t)); if (private == NULL) { free(new_stonith); return NULL; } new_stonith->st_private = private; private->stonith_op_callback_table = pcmk__intkey_table(stonith_destroy_op_callback); private->notify_list = NULL; private->notify_refcnt = 0; private->notify_deletes = FALSE; new_stonith->call_id = 1; new_stonith->state = stonith_disconnected; new_stonith->cmds = calloc(1, sizeof(stonith_api_operations_t)); if (new_stonith->cmds == NULL) { free(new_stonith->st_private); free(new_stonith); return NULL; } new_stonith->cmds->free = free_stonith_api; new_stonith->cmds->connect = stonith_api_signon; new_stonith->cmds->disconnect = stonith_api_signoff; new_stonith->cmds->list = stonith_api_list; new_stonith->cmds->monitor = stonith_api_monitor; new_stonith->cmds->status = stonith_api_status; new_stonith->cmds->fence = stonith_api_fence; new_stonith->cmds->fence_with_delay = stonith_api_fence_with_delay; new_stonith->cmds->confirm = stonith_api_confirm; new_stonith->cmds->history = stonith_api_history; new_stonith->cmds->list_agents = stonith_api_device_list; new_stonith->cmds->metadata = stonith_api_device_metadata; new_stonith->cmds->query = stonith_api_query; new_stonith->cmds->remove_device = stonith_api_remove_device; new_stonith->cmds->register_device = stonith_api_register_device; new_stonith->cmds->remove_level = stonith_api_remove_level; new_stonith->cmds->remove_level_full = stonith_api_remove_level_full; new_stonith->cmds->register_level = stonith_api_register_level; new_stonith->cmds->register_level_full = stonith_api_register_level_full; new_stonith->cmds->remove_callback = stonith_api_del_callback; new_stonith->cmds->register_callback = stonith_api_add_callback; new_stonith->cmds->remove_notification = stonith_api_del_notification; new_stonith->cmds->register_notification = stonith_api_add_notification; new_stonith->cmds->validate = stonith_api_validate; return new_stonith; } /*! * \internal * \brief Free a fencer API connection object * * \param[in,out] stonith_api Fencer API connection object */ void stonith__api_free(stonith_t *stonith_api) { crm_trace("Destroying %p", stonith_api); if (stonith_api != NULL) { stonith_api->cmds->free(stonith_api); } } /*! * \internal * \brief Connect to the fencer, retrying on failure * * \param[in,out] stonith Fencer API connection object * \param[in] name Client name to use with fencer * \param[in] max_attempts Maximum number of attempts * * \return \c pcmk_rc_ok on success, or result of last attempt otherwise */ int stonith__api_connect_retry(stonith_t *stonith_api, const char *name, int max_attempts) { int rc = EINVAL; // if max_attempts is not positive for (int attempt = 1; attempt <= max_attempts; attempt++) { rc = stonith_api->cmds->connect(stonith_api, name, NULL); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { return rc; } if (attempt < max_attempts) { crm_notice("Fencer connection attempt %d of %d failed " "(retrying in 2s): %s " QB_XS " rc=%d", attempt, max_attempts, pcmk_rc_str(rc), rc); sleep(2); } } crm_notice("Could not connect to fencer: %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); return rc; } /*! * \internal * \brief Append a newly allocated STONITH key-value pair to a list * * \param[in,out] head Head of key-value pair list (\c NULL for new list) * \param[in] key Key to add * \param[in] value Value to add * * \return Head of appended-to list (equal to \p head if \p head is not \c NULL) * \note The caller is responsible for freeing the return value using * \c stonith__key_value_freeall(). */ stonith_key_value_t * stonith__key_value_add(stonith_key_value_t *head, const char *key, const char *value) { /* @COMPAT Replace this function with pcmk_prepend_nvpair(), and reverse the * list when finished adding to it; or with a hash table where order does * not matter */ stonith_key_value_t *pair = pcmk__assert_alloc(1, sizeof(stonith_key_value_t)); pair->key = pcmk__str_copy(key); pair->value = pcmk__str_copy(value); if (head != NULL) { stonith_key_value_t *end = head; for (; end->next != NULL; end = end->next); end->next = pair; } else { head = pair; } return head; } /*! * \internal * \brief Free all items in a \c stonith_key_value_t list * * This means freeing the list itself with all of its nodes. Keys and values may * be freed depending on arguments. * * \param[in,out] head Head of list * \param[in] keys If \c true, free all keys * \param[in] values If \c true, free all values */ void stonith__key_value_freeall(stonith_key_value_t *head, bool keys, bool values) { while (head != NULL) { stonith_key_value_t *next = head->next; if (keys) { free(head->key); } if (values) { free(head->value); } free(head); head = next; } } #define api_log_open() openlog("stonith-api", LOG_CONS | LOG_NDELAY | LOG_PID, LOG_DAEMON) #define api_log(level, fmt, args...) syslog(level, "%s: "fmt, __func__, args) int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off) { int rc = pcmk_ok; stonith_t *st = stonith__api_new(); const char *action = off? PCMK_ACTION_OFF : PCMK_ACTION_REBOOT; api_log_open(); if (st == NULL) { api_log(LOG_ERR, "API initialization failed, could not kick (%s) node %u/%s", action, nodeid, uname); return -EPROTO; } rc = st->cmds->connect(st, "stonith-api", NULL); if (rc != pcmk_ok) { api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } else { char *name = (uname == NULL)? pcmk__itoa(nodeid) : strdup(uname); int opts = 0; stonith__set_call_options(opts, name, st_opt_sync_call|st_opt_allow_self_fencing); if ((uname == NULL) && (nodeid > 0)) { stonith__set_call_options(opts, name, st_opt_cs_nodeid); } rc = st->cmds->fence(st, opts, name, action, timeout, 0); free(name); if (rc != pcmk_ok) { api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } else { api_log(LOG_NOTICE, "Node %u/%s kicked: %s", nodeid, uname, action); } } stonith__api_free(st); return rc; } time_t stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress) { int rc = pcmk_ok; time_t when = 0; stonith_t *st = stonith__api_new(); stonith_history_t *history = NULL, *hp = NULL; if (st == NULL) { api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: " "API initialization failed", nodeid, uname); return when; } rc = st->cmds->connect(st, "stonith-api", NULL); if (rc != pcmk_ok) { api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc); } else { int entries = 0; int progress = 0; int completed = 0; int opts = 0; char *name = (uname == NULL)? pcmk__itoa(nodeid) : strdup(uname); stonith__set_call_options(opts, name, st_opt_sync_call); if ((uname == NULL) && (nodeid > 0)) { stonith__set_call_options(opts, name, st_opt_cs_nodeid); } rc = st->cmds->history(st, opts, name, &history, 120); free(name); for (hp = history; hp; hp = hp->next) { entries++; if (in_progress) { progress++; if (hp->state != st_done && hp->state != st_failed) { when = time(NULL); } } else if (hp->state == st_done) { completed++; if (hp->completed > when) { when = hp->completed; } } } stonith__history_free(history); if(rc == pcmk_ok) { api_log(LOG_INFO, "Found %d entries for %u/%s: %d in progress, %d completed", entries, nodeid, uname, progress, completed); } else { api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: %s (%d)", nodeid, uname, pcmk_strerror(rc), rc); } } stonith__api_free(st); if(when) { api_log(LOG_INFO, "Node %u/%s last kicked at: %ld", nodeid, uname, (long int)when); } return when; } /*! * \internal * \brief Check whether a fence agent with a given name exists * * \param[in] name Agent name * * \retval \c true If a fence agent named \p name exists * \retval \c false Otherwise */ bool stonith__agent_exists(const char *name) { stonith_t *stonith_api = NULL; stonith_key_value_t *agents = NULL; bool rc = false; if (name == NULL) { return false; } stonith_api = stonith__api_new(); if (stonith_api == NULL) { crm_err("Could not list fence agents: API memory allocation failed"); return false; } // The list_agents method ignores its timeout argument stonith_api->cmds->list_agents(stonith_api, st_opt_sync_call, NULL, &agents, 0); for (const stonith_key_value_t *iter = agents; iter != NULL; iter = iter->next) { if (pcmk__str_eq(iter->value, name, pcmk__str_none)) { rc = true; break; } } stonith__key_value_freeall(agents, true, true); stonith__api_free(stonith_api); return rc; } /*! * \internal * \brief Parse a target name from one line of a target list string * * \param[in] line One line of a target list string * \param[in] len String length of line * \param[in,out] output List to add newly allocated target name to */ static void parse_list_line(const char *line, int len, GList **output) { size_t i = 0; size_t entry_start = 0; if (line == NULL) { return; } /* Skip complaints about additional parameters device doesn't understand * * @TODO Document or eliminate the implied restriction of target names */ if (strstr(line, "invalid") || strstr(line, "variable")) { crm_debug("Skipping list output line: %s", line); return; } // Process line content, character by character for (i = 0; i <= len; i++) { if (isspace(line[i]) || (line[i] == ',') || (line[i] == ';') || (line[i] == '\0')) { // We've found a separator (i.e. the end of an entry) int rc = 0; char *entry = NULL; if (i == entry_start) { // Skip leading and sequential separators entry_start = i + 1; continue; } entry = pcmk__assert_alloc(i - entry_start + 1, sizeof(char)); /* Read entry, stopping at first separator * * @TODO Document or eliminate these character restrictions */ rc = sscanf(line + entry_start, "%[a-zA-Z0-9_-.]", entry); if (rc != 1) { crm_warn("Could not parse list output entry: %s " QB_XS " entry_start=%d position=%d", line + entry_start, entry_start, i); free(entry); } else if (pcmk__strcase_any_of(entry, PCMK_ACTION_ON, PCMK_ACTION_OFF, NULL)) { /* Some agents print the target status in the list output, * though none are known now (the separate list-status command * is used for this, but it can also print "UNKNOWN"). To handle * this possibility, skip such entries. * * @TODO Document or eliminate the implied restriction of target * names. */ free(entry); } else { // We have a valid entry *output = g_list_append(*output, entry); } entry_start = i + 1; } } } /*! * \internal * \brief Parse a list of targets from a string * * \param[in] list_output Target list as a string * * \return List of target names * \note The target list string format is flexible, to allow for user-specified * lists such pcmk_host_list and the output of an agent's list action * (whether direct or via the API, which escapes newlines). There may be * multiple lines, separated by either a newline or an escaped newline * (backslash n). Each line may have one or more target names, separated * by any combination of whitespace, commas, and semi-colons. Lines * containing "invalid" or "variable" will be ignored entirely. Target * names "on" or "off" (case-insensitive) will be ignored. Target names * may contain only alphanumeric characters, underbars (_), dashes (-), * and dots (.) (if any other character occurs in the name, it and all * subsequent characters in the name will be ignored). * \note The caller is responsible for freeing the result with * g_list_free_full(result, free). */ GList * stonith__parse_targets(const char *target_spec) { GList *targets = NULL; if (target_spec != NULL) { size_t out_len = strlen(target_spec); size_t line_start = 0; // Starting index of line being processed for (size_t i = 0; i <= out_len; ++i) { if ((target_spec[i] == '\n') || (target_spec[i] == '\0') || ((target_spec[i] == '\\') && (target_spec[i + 1] == 'n'))) { // We've reached the end of one line of output int len = i - line_start; if (len > 0) { char *line = strndup(target_spec + line_start, len); pcmk__assert(line != NULL); // cppcheck-suppress nullPointerOutOfMemory line[len] = '\0'; // Because it might be a newline parse_list_line(line, len, &targets); free(line); } if (target_spec[i] == '\\') { ++i; // backslash-n takes up two positions } line_start = i + 1; } } } return targets; } /*! * \internal * \brief Check whether a fencing failure was followed by an equivalent success * * \param[in] event Fencing failure * \param[in] top_history Complete fencing history (must be sorted by * stonith__sort_history() beforehand) * * \return The name of the node that executed the fencing if a later successful * event exists, or NULL if no such event exists */ const char * stonith__later_succeeded(const stonith_history_t *event, const stonith_history_t *top_history) { const char *other = NULL; for (const stonith_history_t *prev_hp = top_history; prev_hp != NULL; prev_hp = prev_hp->next) { if (prev_hp == event) { break; } if ((prev_hp->state == st_done) && pcmk__str_eq(event->target, prev_hp->target, pcmk__str_casei) && pcmk__str_eq(event->action, prev_hp->action, pcmk__str_none) && ((event->completed < prev_hp->completed) || ((event->completed == prev_hp->completed) && (event->completed_nsec < prev_hp->completed_nsec)))) { if ((event->delegate == NULL) || pcmk__str_eq(event->delegate, prev_hp->delegate, pcmk__str_casei)) { // Prefer equivalent fencing by same executioner return prev_hp->delegate; } else if (other == NULL) { // Otherwise remember first successful executioner other = (prev_hp->delegate == NULL)? "some node" : prev_hp->delegate; } } } return other; } /*! * \internal * \brief Sort fencing history, pending first then by most recently completed * * \param[in,out] history List of stonith actions * * \return New head of sorted \p history */ stonith_history_t * stonith__sort_history(stonith_history_t *history) { stonith_history_t *new = NULL, *pending = NULL, *hp, *np, *tmp; for (hp = history; hp; ) { tmp = hp->next; if ((hp->state == st_done) || (hp->state == st_failed)) { /* sort into new */ if ((!new) || (hp->completed > new->completed) || ((hp->completed == new->completed) && (hp->completed_nsec > new->completed_nsec))) { hp->next = new; new = hp; } else { np = new; do { if ((!np->next) || (hp->completed > np->next->completed) || ((hp->completed == np->next->completed) && (hp->completed_nsec > np->next->completed_nsec))) { hp->next = np->next; np->next = hp; break; } np = np->next; } while (1); } } else { /* put into pending */ hp->next = pending; pending = hp; } hp = tmp; } /* pending actions don't have a completed-stamp so make them go front */ if (pending) { stonith_history_t *last_pending = pending; while (last_pending->next) { last_pending = last_pending->next; } last_pending->next = new; new = pending; } return new; } /*! * \internal * \brief Return string equivalent of a fencing operation state value * * \param[in] state Fencing operation state value * * \return Human-friendly string equivalent of \p state */ const char * stonith__op_state_text(enum op_state state) { // @COMPAT Move this to the fencer after dropping stonith_op_state_str() switch (state) { case st_query: return "querying"; case st_exec: return "executing"; case st_done: return "completed"; case st_duplicate: return "duplicate"; case st_failed: return "failed"; default: return "unknown"; } } stonith_history_t * stonith__first_matching_event(stonith_history_t *history, bool (*matching_fn)(stonith_history_t *, void *), void *user_data) { for (stonith_history_t *hp = history; hp; hp = hp->next) { if (matching_fn(hp, user_data)) { return hp; } } return NULL; } bool stonith__event_state_pending(stonith_history_t *history, void *user_data) { return history->state != st_failed && history->state != st_done; } bool stonith__event_state_eq(stonith_history_t *history, void *user_data) { return history->state == GPOINTER_TO_INT(user_data); } bool stonith__event_state_neq(stonith_history_t *history, void *user_data) { return history->state != GPOINTER_TO_INT(user_data); } /*! * \internal * \brief Check whether a given parameter exists in a fence agent's metadata * * \param[in] metadata Agent metadata * \param[in] name Parameter name * * \retval \c true If \p name exists as a parameter in \p metadata * \retval \c false Otherwise */ static bool param_is_supported(xmlNode *metadata, const char *name) { char *xpath_s = crm_strdup_printf("//" PCMK_XE_PARAMETER "[@" PCMK_XA_NAME "='%s']", name); xmlXPathObject *xpath = pcmk__xpath_search(metadata->doc, xpath_s); bool supported = (pcmk__xpath_num_results(xpath) > 0); free(xpath_s); xmlXPathFreeObject(xpath); return supported; } /*! * \internal * \brief Get the default host argument based on a device's agent metadata * * If an agent supports the "plug" parameter, default to that. Otherwise default * to the "port" parameter if supported. Otherwise return \c NULL. * * \param[in] metadata Agent metadata * * \return Parameter name for default host argument */ const char * stonith__default_host_arg(xmlNode *metadata) { CRM_CHECK(metadata != NULL, return NULL); if (param_is_supported(metadata, "plug")) { return "plug"; } if (param_is_supported(metadata, "port")) { return "port"; } return NULL; } /*! * \internal * \brief Retrieve fence agent meta-data asynchronously * * \param[in] agent Agent to execute * \param[in] timeout_sec Error if not complete within this time * \param[in] callback Function to call with result (this will always be * called, whether by this function directly or * later via the main loop, and on success the * metadata will be in its result argument's * action_stdout) * \param[in,out] user_data User data to pass to callback * * \return Standard Pacemaker return code * \note The caller must use a main loop. This function is not a * stonith_api_operations_t method because it does not need a stonith_t * object and does not go through the fencer, but executes the agent * directly. */ int stonith__metadata_async(const char *agent, int timeout_sec, void (*callback)(int pid, const pcmk__action_result_t *result, void *user_data), void *user_data) { switch (get_namespace_from_agent(agent)) { case st_namespace_rhcs: { stonith_action_t *action = NULL; int rc = pcmk_ok; action = stonith__action_create(agent, PCMK_ACTION_METADATA, NULL, timeout_sec, NULL, NULL, NULL); rc = stonith__execute_async(action, user_data, callback, NULL); if (rc != pcmk_ok) { callback(0, stonith__action_result(action), user_data); stonith__destroy_action(action); } return pcmk_legacy2rc(rc); } #if HAVE_STONITH_STONITH_H case st_namespace_lha: // LHA metadata is simply synthesized, so simulate async { pcmk__action_result_t result = { .exit_status = CRM_EX_OK, .execution_status = PCMK_EXEC_DONE, .exit_reason = NULL, .action_stdout = NULL, .action_stderr = NULL, }; stonith__lha_metadata(agent, timeout_sec, &result.action_stdout); callback(0, &result, user_data); pcmk__reset_result(&result); return pcmk_rc_ok; } #endif default: { pcmk__action_result_t result = { .exit_status = CRM_EX_NOSUCH, .execution_status = PCMK_EXEC_ERROR_HARD, .exit_reason = crm_strdup_printf("No such agent '%s'", agent), .action_stdout = NULL, .action_stderr = NULL, }; callback(0, &result, user_data); pcmk__reset_result(&result); return ENOENT; } } } /*! * \internal * \brief Return the exit status from an async action callback * * \param[in] data Callback data * * \return Exit status from callback data */ int stonith__exit_status(const stonith_callback_data_t *data) { if ((data == NULL) || (data->opaque == NULL)) { return CRM_EX_ERROR; } return ((pcmk__action_result_t *) data->opaque)->exit_status; } /*! * \internal * \brief Return the execution status from an async action callback * * \param[in] data Callback data * * \return Execution status from callback data */ int stonith__execution_status(const stonith_callback_data_t *data) { if ((data == NULL) || (data->opaque == NULL)) { return PCMK_EXEC_UNKNOWN; } return ((pcmk__action_result_t *) data->opaque)->execution_status; } /*! * \internal * \brief Return the exit reason from an async action callback * * \param[in] data Callback data * * \return Exit reason from callback data */ const char * stonith__exit_reason(const stonith_callback_data_t *data) { if ((data == NULL) || (data->opaque == NULL)) { return NULL; } return ((pcmk__action_result_t *) data->opaque)->exit_reason; } /*! * \internal * \brief Return the exit status from an event notification * * \param[in] event Event * * \return Exit status from event */ int stonith__event_exit_status(const stonith_event_t *event) { if ((event == NULL) || (event->opaque == NULL)) { return CRM_EX_ERROR; } else { struct event_private *event_private = event->opaque; return event_private->result.exit_status; } } /*! * \internal * \brief Return the execution status from an event notification * * \param[in] event Event * * \return Execution status from event */ int stonith__event_execution_status(const stonith_event_t *event) { if ((event == NULL) || (event->opaque == NULL)) { return PCMK_EXEC_UNKNOWN; } else { struct event_private *event_private = event->opaque; return event_private->result.execution_status; } } /*! * \internal * \brief Return the exit reason from an event notification * * \param[in] event Event * * \return Exit reason from event */ const char * stonith__event_exit_reason(const stonith_event_t *event) { if ((event == NULL) || (event->opaque == NULL)) { return NULL; } else { struct event_private *event_private = event->opaque; return event_private->result.exit_reason; } } /*! * \internal * \brief Return a human-friendly description of a fencing event * * \param[in] event Event to describe * * \return Newly allocated string with description of \p event * \note The caller is responsible for freeing the return value. * This function asserts on memory errors and never returns NULL. */ char * stonith__event_description(const stonith_event_t *event) { // Use somewhat readable defaults const char *origin = pcmk__s(event->client_origin, "a client"); const char *origin_node = pcmk__s(event->origin, "a node"); const char *executioner = pcmk__s(event->executioner, "the cluster"); const char *device = pcmk__s(event->device, "unknown"); const char *action = pcmk__s(event->action, event->operation); const char *target = pcmk__s(event->target, "no node"); const char *reason = stonith__event_exit_reason(event); const char *status; if (action == NULL) { action = "(unknown)"; } if (stonith__event_execution_status(event) != PCMK_EXEC_DONE) { status = pcmk_exec_status_str(stonith__event_execution_status(event)); } else if (stonith__event_exit_status(event) != CRM_EX_OK) { status = pcmk_exec_status_str(PCMK_EXEC_ERROR); } else { status = crm_exit_str(CRM_EX_OK); } if (pcmk__str_eq(event->operation, PCMK__VALUE_ST_NOTIFY_HISTORY, pcmk__str_none)) { return crm_strdup_printf("Fencing history may have changed"); } else if (pcmk__str_eq(event->operation, STONITH_OP_DEVICE_ADD, pcmk__str_none)) { return crm_strdup_printf("A fencing device (%s) was added", device); } else if (pcmk__str_eq(event->operation, STONITH_OP_DEVICE_DEL, pcmk__str_none)) { return crm_strdup_printf("A fencing device (%s) was removed", device); } else if (pcmk__str_eq(event->operation, STONITH_OP_LEVEL_ADD, pcmk__str_none)) { return crm_strdup_printf("A fencing topology level (%s) was added", device); } else if (pcmk__str_eq(event->operation, STONITH_OP_LEVEL_DEL, pcmk__str_none)) { return crm_strdup_printf("A fencing topology level (%s) was removed", device); } // event->operation should be PCMK__VALUE_ST_NOTIFY_FENCE at this point return crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s%s%s%s (ref=%s)", action, target, executioner, origin, origin_node, status, ((reason == NULL)? "" : " ("), pcmk__s(reason, ""), ((reason == NULL)? "" : ")"), pcmk__s(event->id, "(none)")); } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START // See comments in stonith-ng.h for why we re-declare before defining stonith_t *stonith_api_new(void); stonith_t * stonith_api_new(void) { return stonith__api_new(); } void stonith_api_delete(stonith_t *stonith); void stonith_api_delete(stonith_t *stonith) { stonith__api_free(stonith); } static void stonith_dump_pending_op(gpointer key, gpointer value, gpointer user_data) { int call = GPOINTER_TO_INT(key); stonith_callback_client_t *blob = value; crm_debug("Call %d (%s): pending", call, pcmk__s(blob->id, "no ID")); } void stonith_dump_pending_callbacks(stonith_t *stonith); void stonith_dump_pending_callbacks(stonith_t *stonith) { stonith_private_t *private = stonith->st_private; if (private->stonith_op_callback_table == NULL) { return; } return g_hash_table_foreach(private->stonith_op_callback_table, stonith_dump_pending_op, NULL); } bool stonith_dispatch(stonith_t *stonith_api); bool stonith_dispatch(stonith_t *stonith_api) { return (stonith__api_dispatch(stonith_api) == pcmk_rc_ok); } stonith_key_value_t *stonith_key_value_add(stonith_key_value_t *head, const char *key, const char *value); stonith_key_value_t * stonith_key_value_add(stonith_key_value_t *head, const char *key, const char *value) { return stonith__key_value_add(head, key, value); } void stonith_key_value_freeall(stonith_key_value_t *head, int keys, int values); void stonith_key_value_freeall(stonith_key_value_t *head, int keys, int values) { stonith__key_value_freeall(head, (keys != 0), (values != 0)); } void stonith_history_free(stonith_history_t *head); void stonith_history_free(stonith_history_t *head) { stonith__history_free(head); } int stonith_api_connect_retry(stonith_t *st, const char *name, int max_attempts); int stonith_api_connect_retry(stonith_t *st, const char *name, int max_attempts) { return pcmk_rc2legacy(stonith__api_connect_retry(st, name, max_attempts)); } const char *stonith_op_state_str(enum op_state state); const char * stonith_op_state_str(enum op_state state) { return stonith__op_state_text(state); } bool stonith_agent_exists(const char *agent, int timeout); bool stonith_agent_exists(const char *agent, int timeout) { return stonith__agent_exists(agent); } const char *stonith_action_str(const char *action); const char * stonith_action_str(const char *action) { if (action == NULL) { return "fencing"; } else if (strcmp(action, PCMK_ACTION_ON) == 0) { return "unfencing"; } else if (strcmp(action, PCMK_ACTION_OFF) == 0) { return "turning off"; } else { return action; } } enum stonith_namespace stonith_text2namespace(const char *namespace_s); enum stonith_namespace stonith_text2namespace(const char *namespace_s) { return parse_namespace(namespace_s); } const char *stonith_namespace2text(enum stonith_namespace st_namespace); const char * stonith_namespace2text(enum stonith_namespace st_namespace) { return namespace_text(st_namespace); } enum stonith_namespace stonith_get_namespace(const char *agent, const char *namespace_s); enum stonith_namespace stonith_get_namespace(const char *agent, const char *namespace_s) { if (pcmk__str_eq(namespace_s, "internal", pcmk__str_none)) { return st_namespace_internal; } return get_namespace_from_agent(agent); } // LCOV_EXCL_STOP // End deprecated API diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c index c86d735e4b..fc368769e2 100644 --- a/lib/lrmd/lrmd_client.c +++ b/lib/lrmd/lrmd_client.c @@ -1,2684 +1,2684 @@ /* * Copyright 2012-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include // uint32_t, uint64_t #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // stonith__* #include #include #include #include #include #include #define MAX_TLS_RECV_WAIT 10000 CRM_TRACE_INIT_DATA(lrmd); static int lrmd_api_disconnect(lrmd_t * lrmd); static int lrmd_api_is_connected(lrmd_t * lrmd); /* IPC proxy functions */ int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); static void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg); void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); // GnuTLS client handshake timeout in seconds #define TLS_HANDSHAKE_TIMEOUT 5 static void lrmd_tls_disconnect(lrmd_t * lrmd); static int global_remote_msg_id = 0; static void lrmd_tls_connection_destroy(gpointer userdata); static int add_tls_to_mainloop(lrmd_t *lrmd, bool do_api_handshake); typedef struct lrmd_private_s { uint64_t type; char *token; mainloop_io_t *source; /* IPC parameters */ crm_ipc_t *ipc; pcmk__remote_t *remote; /* Extra TLS parameters */ char *remote_nodename; char *server; int port; pcmk__tls_t *tls; /* while the async connection is occurring, this is the id * of the connection timeout timer. */ int async_timer; int sock; /* since tls requires a round trip across the network for a * request/reply, there are times where we just want to be able * to send a request from the client and not wait around (or even care * about) what the reply is. */ int expected_late_replies; GList *pending_notify; crm_trigger_t *process_notify; crm_trigger_t *handshake_trigger; lrmd_event_callback callback; /* Internal IPC proxy msg passing for remote guests */ void (*proxy_callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg); void *proxy_callback_userdata; char *peer_version; } lrmd_private_t; static int process_lrmd_handshake_reply(xmlNode *reply, lrmd_private_t *native); static void report_async_connection_result(lrmd_t * lrmd, int rc); static lrmd_list_t * lrmd_list_add(lrmd_list_t * head, const char *value) { lrmd_list_t *p, *end; p = pcmk__assert_alloc(1, sizeof(lrmd_list_t)); p->val = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_list_freeall(lrmd_list_t * head) { lrmd_list_t *p; while (head) { char *val = (char *)head->val; p = head->next; free(val); free(head); head = p; } } lrmd_key_value_t * lrmd_key_value_add(lrmd_key_value_t * head, const char *key, const char *value) { lrmd_key_value_t *p, *end; p = pcmk__assert_alloc(1, sizeof(lrmd_key_value_t)); p->key = strdup(key); p->value = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_key_value_freeall(lrmd_key_value_t * head) { lrmd_key_value_t *p; while (head) { p = head->next; free(head->key); free(head->value); free(head); head = p; } } /*! * \brief Create a new lrmd_event_data_t object * * \param[in] rsc_id ID of resource involved in event * \param[in] task Action name * \param[in] interval_ms Action interval * * \return Newly allocated and initialized lrmd_event_data_t * \note This functions asserts on memory errors, so the return value is * guaranteed to be non-NULL. The caller is responsible for freeing the * result with lrmd_free_event(). */ lrmd_event_data_t * lrmd_new_event(const char *rsc_id, const char *task, guint interval_ms) { lrmd_event_data_t *event = pcmk__assert_alloc(1, sizeof(lrmd_event_data_t)); // lrmd_event_data_t has (const char *) members that lrmd_free_event() frees event->rsc_id = pcmk__str_copy(rsc_id); event->op_type = pcmk__str_copy(task); event->interval_ms = interval_ms; return event; } lrmd_event_data_t * lrmd_copy_event(lrmd_event_data_t * event) { lrmd_event_data_t *copy = NULL; copy = pcmk__assert_alloc(1, sizeof(lrmd_event_data_t)); copy->type = event->type; // lrmd_event_data_t has (const char *) members that lrmd_free_event() frees copy->rsc_id = pcmk__str_copy(event->rsc_id); copy->op_type = pcmk__str_copy(event->op_type); copy->user_data = pcmk__str_copy(event->user_data); copy->output = pcmk__str_copy(event->output); copy->remote_nodename = pcmk__str_copy(event->remote_nodename); copy->exit_reason = pcmk__str_copy(event->exit_reason); copy->call_id = event->call_id; copy->timeout = event->timeout; copy->interval_ms = event->interval_ms; copy->start_delay = event->start_delay; copy->rsc_deleted = event->rsc_deleted; copy->rc = event->rc; copy->op_status = event->op_status; copy->t_run = event->t_run; copy->t_rcchange = event->t_rcchange; copy->exec_time = event->exec_time; copy->queue_time = event->queue_time; copy->connection_rc = event->connection_rc; copy->params = pcmk__str_table_dup(event->params); return copy; } /*! * \brief Free an executor event * * \param[in,out] Executor event object to free */ void lrmd_free_event(lrmd_event_data_t *event) { if (event == NULL) { return; } // @TODO Why are these const char *? free((void *) event->rsc_id); free((void *) event->op_type); free((void *) event->user_data); free((void *) event->remote_nodename); lrmd__reset_result(event); if (event->params != NULL) { g_hash_table_destroy(event->params); } free(event); } static void lrmd_dispatch_internal(gpointer data, gpointer user_data) { xmlNode *msg = data; lrmd_t *lrmd = user_data; const char *type; const char *proxy_session = crm_element_value(msg, PCMK__XA_LRMD_IPC_SESSION); lrmd_private_t *native = lrmd->lrmd_private; lrmd_event_data_t event = { 0, }; if (proxy_session != NULL) { /* this is proxy business */ lrmd_internal_proxy_dispatch(lrmd, msg); return; } else if (!native->callback) { /* no callback set */ crm_trace("notify event received but client has not set callback"); return; } event.remote_nodename = native->remote_nodename; type = crm_element_value(msg, PCMK__XA_LRMD_OP); crm_element_value_int(msg, PCMK__XA_LRMD_CALLID, &event.call_id); event.rsc_id = crm_element_value(msg, PCMK__XA_LRMD_RSC_ID); if (pcmk__str_eq(type, LRMD_OP_RSC_REG, pcmk__str_none)) { event.type = lrmd_event_register; } else if (pcmk__str_eq(type, LRMD_OP_RSC_UNREG, pcmk__str_none)) { event.type = lrmd_event_unregister; } else if (pcmk__str_eq(type, LRMD_OP_RSC_EXEC, pcmk__str_none)) { int rc = 0; int exec_time = 0; int queue_time = 0; time_t epoch = 0; crm_element_value_int(msg, PCMK__XA_LRMD_TIMEOUT, &event.timeout); crm_element_value_ms(msg, PCMK__XA_LRMD_RSC_INTERVAL, &event.interval_ms); crm_element_value_int(msg, PCMK__XA_LRMD_RSC_START_DELAY, &event.start_delay); crm_element_value_int(msg, PCMK__XA_LRMD_EXEC_RC, &rc); event.rc = (enum ocf_exitcode) rc; crm_element_value_int(msg, PCMK__XA_LRMD_EXEC_OP_STATUS, &event.op_status); crm_element_value_int(msg, PCMK__XA_LRMD_RSC_DELETED, &event.rsc_deleted); crm_element_value_epoch(msg, PCMK__XA_LRMD_RUN_TIME, &epoch); event.t_run = epoch; crm_element_value_epoch(msg, PCMK__XA_LRMD_RCCHANGE_TIME, &epoch); event.t_rcchange = epoch; crm_element_value_int(msg, PCMK__XA_LRMD_EXEC_TIME, &exec_time); CRM_LOG_ASSERT(exec_time >= 0); event.exec_time = QB_MAX(0, exec_time); crm_element_value_int(msg, PCMK__XA_LRMD_QUEUE_TIME, &queue_time); CRM_LOG_ASSERT(queue_time >= 0); event.queue_time = QB_MAX(0, queue_time); event.op_type = crm_element_value(msg, PCMK__XA_LRMD_RSC_ACTION); event.user_data = crm_element_value(msg, PCMK__XA_LRMD_RSC_USERDATA_STR); event.type = lrmd_event_exec_complete; /* output and exit_reason may be freed by a callback */ event.output = crm_element_value_copy(msg, PCMK__XA_LRMD_RSC_OUTPUT); lrmd__set_result(&event, event.rc, event.op_status, crm_element_value(msg, PCMK__XA_LRMD_RSC_EXIT_REASON)); event.params = xml2list(msg); } else if (pcmk__str_eq(type, LRMD_OP_NEW_CLIENT, pcmk__str_none)) { event.type = lrmd_event_new_client; } else if (pcmk__str_eq(type, LRMD_OP_POKE, pcmk__str_none)) { event.type = lrmd_event_poke; } else { return; } crm_trace("op %s notify event received", type); native->callback(&event); if (event.params) { g_hash_table_destroy(event.params); } lrmd__reset_result(&event); } // \return Always 0, to indicate that IPC mainloop source should be kept static int lrmd_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; if (native->callback != NULL) { xmlNode *msg = pcmk__xml_parse(buffer); lrmd_dispatch_internal(msg, lrmd); pcmk__xml_free(msg); } return 0; } static void lrmd_free_xml(gpointer userdata) { pcmk__xml_free((xmlNode *) userdata); } static bool remote_executor_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; return (native->remote->tls_session != NULL); } static void handle_remote_msg(xmlNode *xml, lrmd_t *lrmd) { lrmd_private_t *native = lrmd->lrmd_private; const char *msg_type = NULL; msg_type = crm_element_value(xml, PCMK__XA_LRMD_REMOTE_MSG_TYPE); if (pcmk__str_eq(msg_type, "notify", pcmk__str_casei)) { lrmd_dispatch_internal(xml, lrmd); } else if (pcmk__str_eq(msg_type, "reply", pcmk__str_casei)) { const char *op = crm_element_value(xml, PCMK__XA_LRMD_OP); if (native->expected_late_replies > 0) { native->expected_late_replies--; /* The register op message we get as a response to lrmd_handshake_async * is a reply, so we have to handle that here. */ if (pcmk__str_eq(op, "register", pcmk__str_casei)) { int rc = process_lrmd_handshake_reply(xml, native); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); } } else { int reply_id = 0; crm_element_value_int(xml, PCMK__XA_LRMD_CALLID, &reply_id); /* if this happens, we want to know about it */ crm_err("Got outdated Pacemaker Remote reply %d", reply_id); } } } /*! * \internal * \brief Notify trigger handler * * \param[in,out] userdata API connection * * \return Always return G_SOURCE_CONTINUE to leave this trigger handler in the * mainloop */ static int process_pending_notifies(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; if (native->pending_notify == NULL) { return G_SOURCE_CONTINUE; } crm_trace("Processing pending notifies"); g_list_foreach(native->pending_notify, lrmd_dispatch_internal, lrmd); g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; return G_SOURCE_CONTINUE; } /*! * \internal * \brief TLS dispatch function for file descriptor sources * * \param[in,out] userdata API connection * * \return -1 on error to remove the source from the mainloop, or 0 otherwise * to leave it in the mainloop */ static int lrmd_tls_dispatch(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *xml = NULL; int rc = pcmk_rc_ok; if (!remote_executor_connected(lrmd)) { crm_trace("TLS dispatch triggered after disconnect"); return -1; } crm_trace("TLS dispatch triggered"); rc = pcmk__remote_ready(native->remote, 0); if (rc == pcmk_rc_ok) { rc = pcmk__read_remote_message(native->remote, -1); } if (rc != pcmk_rc_ok && rc != ETIME) { crm_info("Lost %s executor connection while reading data", (native->remote_nodename? native->remote_nodename : "local")); lrmd_tls_disconnect(lrmd); return -1; } /* If rc is ETIME, there was nothing to read but we may already have a * full message in the buffer */ xml = pcmk__remote_message_xml(native->remote); if (xml == NULL) { return 0; } handle_remote_msg(xml, lrmd); pcmk__xml_free(xml); return 0; } /* Not used with mainloop */ int lrmd_poll(lrmd_t * lrmd, int timeout) { lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: return crm_ipc_ready(native->ipc); case pcmk__client_tls: if (native->pending_notify) { return 1; } else { int rc = pcmk__remote_ready(native->remote, 0); switch (rc) { case pcmk_rc_ok: return 1; case ETIME: return 0; default: return pcmk_rc2legacy(rc); } } default: crm_err("Unsupported executor connection type (bug?): %d", native->type); return -EPROTONOSUPPORT; } } /* Not used with mainloop */ bool lrmd_dispatch(lrmd_t * lrmd) { lrmd_private_t *private = NULL; pcmk__assert(lrmd != NULL); private = lrmd->lrmd_private; switch (private->type) { case pcmk__client_ipc: while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); lrmd_ipc_dispatch(msg, strlen(msg), lrmd); pcmk__ipc_free_client_buffer(private->ipc); } } break; case pcmk__client_tls: lrmd_tls_dispatch(lrmd); break; default: crm_err("Unsupported executor connection type (bug?): %d", private->type); } if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("Connection closed"); return FALSE; } return TRUE; } static xmlNode * lrmd_create_op(const char *token, const char *op, xmlNode *data, int timeout, enum lrmd_call_options options) { xmlNode *op_msg = NULL; CRM_CHECK(token != NULL, return NULL); op_msg = pcmk__xe_create(NULL, PCMK__XE_LRMD_COMMAND); crm_xml_add(op_msg, PCMK__XA_T, PCMK__VALUE_LRMD); crm_xml_add(op_msg, PCMK__XA_LRMD_OP, op); crm_xml_add_int(op_msg, PCMK__XA_LRMD_TIMEOUT, timeout); crm_xml_add_int(op_msg, PCMK__XA_LRMD_CALLOPT, options); if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(op_msg, PCMK__XE_LRMD_CALLDATA); pcmk__xml_copy(wrapper, data); } crm_trace("Created executor %s command with call options %.8lx (%d)", op, (long)options, options); return op_msg; } static void lrmd_ipc_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: crm_info("Disconnected from local executor"); break; case pcmk__client_tls: crm_info("Disconnected from remote executor on %s", native->remote_nodename); break; default: crm_err("Unsupported executor connection type %d (bug?)", native->type); } /* Prevent these from being cleaned up in lrmd_api_disconnect() */ native->ipc = NULL; native->source = NULL; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_disconnect; event.remote_nodename = native->remote_nodename; native->callback(&event); } } static void lrmd_tls_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; crm_info("TLS connection destroyed"); if (native->remote->tls_session) { gnutls_bye(native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(native->remote->tls_session); native->remote->tls_session = NULL; } if (native->tls) { pcmk__free_tls(native->tls); native->tls = NULL; } if (native->sock >= 0) { close(native->sock); } if (native->process_notify) { mainloop_destroy_trigger(native->process_notify); native->process_notify = NULL; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } if (native->handshake_trigger != NULL) { mainloop_destroy_trigger(native->handshake_trigger); native->handshake_trigger = NULL; } free(native->remote->buffer); free(native->remote->start_state); native->remote->buffer = NULL; native->remote->start_state = NULL; native->source = 0; native->sock = -1; if (native->callback) { lrmd_event_data_t event = { 0, }; event.remote_nodename = native->remote_nodename; event.type = lrmd_event_disconnect; native->callback(&event); } return; } // \return Standard Pacemaker return code int lrmd__remote_send_xml(pcmk__remote_t *session, xmlNode *msg, uint32_t id, const char *msg_type) { crm_xml_add_int(msg, PCMK__XA_LRMD_REMOTE_MSG_ID, id); crm_xml_add(msg, PCMK__XA_LRMD_REMOTE_MSG_TYPE, msg_type); return pcmk__remote_send_xml(session, msg); } // \return Standard Pacemaker return code static int read_remote_reply(lrmd_t *lrmd, int total_timeout, int expected_reply_id, xmlNode **reply) { lrmd_private_t *native = lrmd->lrmd_private; time_t start = time(NULL); const char *msg_type = NULL; int reply_id = 0; int remaining_timeout = 0; int rc = pcmk_rc_ok; /* A timeout of 0 here makes no sense. We have to wait a period of time * for the response to come back. If -1 or 0, default to 10 seconds. */ if (total_timeout <= 0 || total_timeout > MAX_TLS_RECV_WAIT) { total_timeout = MAX_TLS_RECV_WAIT; } for (*reply = NULL; *reply == NULL; ) { *reply = pcmk__remote_message_xml(native->remote); if (*reply == NULL) { /* read some more off the tls buffer if we still have time left. */ if (remaining_timeout) { remaining_timeout = total_timeout - ((time(NULL) - start) * 1000); } else { remaining_timeout = total_timeout; } if (remaining_timeout <= 0) { return ETIME; } rc = pcmk__read_remote_message(native->remote, remaining_timeout); if (rc != pcmk_rc_ok) { return rc; } *reply = pcmk__remote_message_xml(native->remote); if (*reply == NULL) { return ENOMSG; } } crm_element_value_int(*reply, PCMK__XA_LRMD_REMOTE_MSG_ID, &reply_id); msg_type = crm_element_value(*reply, PCMK__XA_LRMD_REMOTE_MSG_TYPE); if (!msg_type) { crm_err("Empty msg type received while waiting for reply"); pcmk__xml_free(*reply); *reply = NULL; } else if (pcmk__str_eq(msg_type, "notify", pcmk__str_casei)) { /* got a notify while waiting for reply, trigger the notify to be processed later */ crm_info("queueing notify"); native->pending_notify = g_list_append(native->pending_notify, *reply); if (native->process_notify) { crm_info("notify trigger set."); mainloop_set_trigger(native->process_notify); } *reply = NULL; } else if (!pcmk__str_eq(msg_type, "reply", pcmk__str_casei)) { /* msg isn't a reply, make some noise */ crm_err("Expected a reply, got %s", msg_type); pcmk__xml_free(*reply); *reply = NULL; } else if (reply_id != expected_reply_id) { if (native->expected_late_replies > 0) { native->expected_late_replies--; } else { crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id); } pcmk__xml_free(*reply); *reply = NULL; } } if (native->remote->buffer && native->process_notify) { mainloop_set_trigger(native->process_notify); } return rc; } // \return Standard Pacemaker return code static int send_remote_message(lrmd_t *lrmd, xmlNode *msg) { int rc = pcmk_rc_ok; lrmd_private_t *native = lrmd->lrmd_private; global_remote_msg_id++; if (global_remote_msg_id <= 0) { global_remote_msg_id = 1; } rc = lrmd__remote_send_xml(native->remote, msg, global_remote_msg_id, "request"); if (rc != pcmk_rc_ok) { crm_err("Disconnecting because TLS message could not be sent to " "Pacemaker Remote: %s", pcmk_rc_str(rc)); lrmd_tls_disconnect(lrmd); } return rc; } static int lrmd_tls_send_recv(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = 0; xmlNode *xml = NULL; if (!remote_executor_connected(lrmd)) { return -ENOTCONN; } rc = send_remote_message(lrmd, msg); if (rc != pcmk_rc_ok) { return pcmk_rc2legacy(rc); } rc = read_remote_reply(lrmd, timeout, global_remote_msg_id, &xml); if (rc != pcmk_rc_ok) { crm_err("Disconnecting remote after request %d reply not received: %s " QB_XS " rc=%d timeout=%dms", global_remote_msg_id, pcmk_rc_str(rc), rc, timeout); lrmd_tls_disconnect(lrmd); } if (reply) { *reply = xml; } else { pcmk__xml_free(xml); } return pcmk_rc2legacy(rc); } static int lrmd_send_xml(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_response, timeout, reply); break; case pcmk__client_tls: rc = lrmd_tls_send_recv(lrmd, msg, timeout, reply); break; default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } return rc; } static int lrmd_send_xml_no_reply(lrmd_t * lrmd, xmlNode * msg) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: rc = crm_ipc_send(native->ipc, msg, crm_ipc_flags_none, 0, NULL); break; case pcmk__client_tls: rc = send_remote_message(lrmd, msg); if (rc == pcmk_rc_ok) { /* we don't want to wait around for the reply, but * since the request/reply protocol needs to behave the same * as libqb, a reply will eventually come later anyway. */ native->expected_late_replies++; } rc = pcmk_rc2legacy(rc); break; default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } return rc; } static int lrmd_api_is_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: return crm_ipc_connected(native->ipc); case pcmk__client_tls: return remote_executor_connected(lrmd); default: crm_err("Unsupported executor connection type (bug?): %d", native->type); return 0; } } /*! * \internal * \brief Send a prepared API command to the executor * * \param[in,out] lrmd Existing connection to the executor * \param[in] op Name of API command to send * \param[in] data Command data XML to add to the sent command * \param[out] output_data If expecting a reply, it will be stored here * \param[in] timeout Timeout in milliseconds (if 0, defaults to * a sensible value per the type of connection, * standard vs. pacemaker remote); * also propagated to the command XML * \param[in] call_options Call options to pass to server when sending * \param[in] expect_reply If true, wait for a reply from the server; * must be true for IPC (as opposed to TLS) clients * * \return pcmk_ok on success, -errno on error */ static int lrmd_send_command(lrmd_t *lrmd, const char *op, xmlNode *data, xmlNode **output_data, int timeout, enum lrmd_call_options options, bool expect_reply) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; if (!lrmd_api_is_connected(lrmd)) { return -ENOTCONN; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } CRM_LOG_ASSERT(native->token != NULL); crm_trace("Sending %s op to executor", op); op_msg = lrmd_create_op(native->token, op, data, timeout, options); if (op_msg == NULL) { return -EINVAL; } if (expect_reply) { rc = lrmd_send_xml(lrmd, op_msg, timeout, &op_reply); } else { rc = lrmd_send_xml_no_reply(lrmd, op_msg); goto done; } if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%d): %d", op, timeout, rc); goto done; } else if (op_reply == NULL) { rc = -ENOMSG; goto done; } rc = pcmk_ok; crm_trace("%s op reply received", op); if (crm_element_value_int(op_reply, PCMK__XA_LRMD_RC, &rc) != 0) { rc = -ENOMSG; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (output_data) { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } done: if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("Executor disconnected"); } pcmk__xml_free(op_msg); pcmk__xml_free(op_reply); return rc; } static int lrmd_api_poke_connection(lrmd_t * lrmd) { int rc; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC); crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__); rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, (native->type == pcmk__client_ipc)); pcmk__xml_free(data); return rc < 0 ? rc : pcmk_ok; } // \return Standard Pacemaker return code int lrmd__validate_remote_settings(lrmd_t *lrmd, GHashTable *hash) { int rc = pcmk_rc_ok; const char *value; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *data = pcmk__xe_create(NULL, PCMK__XA_LRMD_OP); crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__); value = g_hash_table_lookup(hash, PCMK_OPT_STONITH_WATCHDOG_TIMEOUT); if ((value) && (stonith__watchdog_fencing_enabled_for_node(native->remote_nodename))) { crm_xml_add(data, PCMK__XA_LRMD_WATCHDOG, value); } rc = lrmd_send_command(lrmd, LRMD_OP_CHECK, data, NULL, 0, 0, (native->type == pcmk__client_ipc)); pcmk__xml_free(data); return (rc < 0)? pcmk_legacy2rc(rc) : pcmk_rc_ok; } static xmlNode * lrmd_handshake_hello_msg(const char *name, bool is_proxy) { xmlNode *hello = pcmk__xe_create(NULL, PCMK__XE_LRMD_COMMAND); crm_xml_add(hello, PCMK__XA_T, PCMK__VALUE_LRMD); crm_xml_add(hello, PCMK__XA_LRMD_OP, CRM_OP_REGISTER); crm_xml_add(hello, PCMK__XA_LRMD_CLIENTNAME, name); crm_xml_add(hello, PCMK__XA_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); /* advertise that we are a proxy provider */ if (is_proxy) { pcmk__xe_set_bool_attr(hello, PCMK__XA_LRMD_IS_IPC_PROVIDER, true); } return hello; } static int process_lrmd_handshake_reply(xmlNode *reply, lrmd_private_t *native) { int rc = pcmk_rc_ok; const char *version = crm_element_value(reply, PCMK__XA_LRMD_PROTOCOL_VERSION); const char *msg_type = crm_element_value(reply, PCMK__XA_LRMD_OP); const char *tmp_ticket = crm_element_value(reply, PCMK__XA_LRMD_CLIENTID); const char *start_state = crm_element_value(reply, PCMK__XA_NODE_START_STATE); long long uptime = -1; crm_element_value_int(reply, PCMK__XA_LRMD_RC, &rc); rc = pcmk_legacy2rc(rc); /* The remote executor may add its uptime to the XML reply, which is useful * in handling transient attributes when the connection to the remote node * unexpectedly drops. If no parameter is given, just default to -1. */ crm_element_value_ll(reply, PCMK__XA_UPTIME, &uptime); native->remote->uptime = uptime; if (start_state) { native->remote->start_state = strdup(start_state); } if (rc == EPROTO) { crm_err("Executor protocol version mismatch between client (%s) and server (%s)", LRMD_PROTOCOL_VERSION, version); crm_log_xml_err(reply, "Protocol Error"); } else if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_casei)) { crm_err("Invalid registration message: %s", msg_type); crm_log_xml_err(reply, "Bad reply"); rc = EPROTO; } else if (tmp_ticket == NULL) { crm_err("No registration token provided"); crm_log_xml_err(reply, "Bad reply"); rc = EPROTO; } else { crm_trace("Obtained registration token: %s", tmp_ticket); native->token = strdup(tmp_ticket); native->peer_version = strdup(version?version:"1.0"); /* Included since 1.1 */ rc = pcmk_rc_ok; } return rc; } static int lrmd_handshake(lrmd_t * lrmd, const char *name) { int rc = pcmk_rc_ok; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *reply = NULL; xmlNode *hello = lrmd_handshake_hello_msg(name, native->proxy_callback != NULL); rc = lrmd_send_xml(lrmd, hello, -1, &reply); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't complete registration with the executor API: %d", rc); rc = ECOMM; } else if (reply == NULL) { crm_err("Did not receive registration reply"); rc = EPROTO; } else { rc = process_lrmd_handshake_reply(reply, native); } pcmk__xml_free(reply); pcmk__xml_free(hello); if (rc != pcmk_rc_ok) { lrmd_api_disconnect(lrmd); } return rc; } static int lrmd_handshake_async(lrmd_t * lrmd, const char *name) { int rc = pcmk_rc_ok; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *hello = lrmd_handshake_hello_msg(name, native->proxy_callback != NULL); rc = send_remote_message(lrmd, hello); if (rc == pcmk_rc_ok) { native->expected_late_replies++; } else { lrmd_api_disconnect(lrmd); } pcmk__xml_free(hello); return rc; } static int lrmd_ipc_connect(lrmd_t * lrmd, int *fd) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; struct ipc_client_callbacks lrmd_callbacks = { .dispatch = lrmd_ipc_dispatch, .destroy = lrmd_ipc_connection_destroy }; crm_info("Connecting to executor"); if (fd) { /* No mainloop */ native->ipc = crm_ipc_new(CRM_SYSTEM_LRMD, 0); if (native->ipc != NULL) { rc = pcmk__connect_generic_ipc(native->ipc); if (rc == pcmk_rc_ok) { rc = pcmk__ipc_fd(native->ipc, fd); } if (rc != pcmk_rc_ok) { crm_err("Connection to executor failed: %s", pcmk_rc_str(rc)); rc = -ENOTCONN; } } } else { native->source = mainloop_add_ipc_client(CRM_SYSTEM_LRMD, G_PRIORITY_HIGH, 0, lrmd, &lrmd_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { crm_debug("Could not connect to the executor API"); rc = -ENOTCONN; } return rc; } static void copy_gnutls_datum(gnutls_datum_t *dest, gnutls_datum_t *source) { pcmk__assert((dest != NULL) && (source != NULL) && (source->data != NULL)); dest->data = gnutls_malloc(source->size); pcmk__mem_assert(dest->data); memcpy(dest->data, source->data, source->size); dest->size = source->size; } static void clear_gnutls_datum(gnutls_datum_t *datum) { gnutls_free(datum->data); datum->data = NULL; datum->size = 0; } #define KEY_READ_LEN 256 // Chunk size for reading key from file // \return Standard Pacemaker return code static int read_gnutls_key(const char *location, gnutls_datum_t *key) { FILE *stream = NULL; size_t buf_len = KEY_READ_LEN; if ((location == NULL) || (key == NULL)) { return EINVAL; } stream = fopen(location, "r"); if (stream == NULL) { return errno; } key->data = gnutls_malloc(buf_len); key->size = 0; while (!feof(stream)) { int next = fgetc(stream); if (next == EOF) { if (!feof(stream)) { crm_warn("Pacemaker Remote key read was partially successful " "(copy in memory may be corrupted)"); } break; } if (key->size == buf_len) { buf_len = key->size + KEY_READ_LEN; key->data = gnutls_realloc(key->data, buf_len); pcmk__assert(key->data); } key->data[key->size++] = (unsigned char) next; } fclose(stream); if (key->size == 0) { clear_gnutls_datum(key); return ENOKEY; } return pcmk_rc_ok; } // Cache the most recently used Pacemaker Remote authentication key struct key_cache_s { time_t updated; // When cached key was read (valid for 1 minute) const char *location; // Where cached key was read from gnutls_datum_t key; // Cached key }; static bool key_is_cached(struct key_cache_s *key_cache) { return key_cache->updated != 0; } static bool key_cache_expired(struct key_cache_s *key_cache) { return (time(NULL) - key_cache->updated) >= 60; } static void clear_key_cache(struct key_cache_s *key_cache) { clear_gnutls_datum(&(key_cache->key)); if ((key_cache->updated != 0) || (key_cache->location != NULL)) { key_cache->updated = 0; key_cache->location = NULL; crm_debug("Cleared Pacemaker Remote key cache"); } } static void get_cached_key(struct key_cache_s *key_cache, gnutls_datum_t *key) { copy_gnutls_datum(key, &(key_cache->key)); crm_debug("Using cached Pacemaker Remote key from %s", pcmk__s(key_cache->location, "unknown location")); } static void cache_key(struct key_cache_s *key_cache, gnutls_datum_t *key, const char *location) { key_cache->updated = time(NULL); key_cache->location = location; copy_gnutls_datum(&(key_cache->key), key); crm_debug("Using (and cacheing) Pacemaker Remote key from %s", pcmk__s(location, "unknown location")); } /*! * \internal * \brief Get Pacemaker Remote authentication key from file or cache * * \param[in] location Path to key file to try (this memory must * persist across all calls of this function) * \param[out] key Key from location or cache * * \return Standard Pacemaker return code */ static int get_remote_key(const char *location, gnutls_datum_t *key) { static struct key_cache_s key_cache = { 0, }; int rc = pcmk_rc_ok; if ((location == NULL) || (key == NULL)) { return EINVAL; } if (key_is_cached(&key_cache)) { if (key_cache_expired(&key_cache)) { clear_key_cache(&key_cache); } else { get_cached_key(&key_cache, key); return pcmk_rc_ok; } } rc = read_gnutls_key(location, key); if (rc != pcmk_rc_ok) { return rc; } cache_key(&key_cache, key, location); return pcmk_rc_ok; } /*! * \internal * \brief Initialize the Pacemaker Remote authentication key * * Try loading the Pacemaker Remote authentication key from cache if available, * otherwise from these locations, in order of preference: * * - The value of the PCMK_authkey_location environment variable, if set * - The Pacemaker default key file location * * \param[out] key Where to store key * * \return Standard Pacemaker return code */ int lrmd__init_remote_key(gnutls_datum_t *key) { static const char *env_location = NULL; static bool need_env = true; int rc = pcmk_rc_ok; if (need_env) { env_location = pcmk__env_option(PCMK__ENV_AUTHKEY_LOCATION); need_env = false; } // Try location in environment variable, if set if (env_location != NULL) { rc = get_remote_key(env_location, key); if (rc == pcmk_rc_ok) { return pcmk_rc_ok; } crm_warn("Could not read Pacemaker Remote key from %s: %s", env_location, pcmk_rc_str(rc)); return ENOKEY; } // Try default location, if environment wasn't explicitly set to it rc = get_remote_key(DEFAULT_REMOTE_KEY_LOCATION, key); if (rc == pcmk_rc_ok) { return pcmk_rc_ok; } crm_warn("Could not read Pacemaker Remote key from default location %s: %s", DEFAULT_REMOTE_KEY_LOCATION, pcmk_rc_str(rc)); return ENOKEY; } static void report_async_connection_result(lrmd_t * lrmd, int rc) { lrmd_private_t *native = lrmd->lrmd_private; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_connect; event.remote_nodename = native->remote_nodename; event.connection_rc = rc; native->callback(&event); } } static void tls_handshake_failed(lrmd_t *lrmd, int tls_rc, int rc) { lrmd_private_t *native = lrmd->lrmd_private; crm_warn("Disconnecting after TLS handshake with " "Pacemaker Remote server %s:%d failed: %s", native->server, native->port, (rc == EPROTO)? gnutls_strerror(tls_rc) : pcmk_rc_str(rc)); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); gnutls_deinit(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); } static void tls_handshake_succeeded(lrmd_t *lrmd) { int rc = pcmk_rc_ok; lrmd_private_t *native = lrmd->lrmd_private; /* Now that the handshake is done, see if any client TLS certificate is * close to its expiration date and log if so. If a TLS certificate is not * in use, this function will just return so we don't need to check for the * session type here. */ pcmk__tls_check_cert_expiration(native->remote->tls_session); crm_info("TLS connection to Pacemaker Remote server %s:%d succeeded", native->server, native->port); rc = add_tls_to_mainloop(lrmd, true); /* If add_tls_to_mainloop failed, report that right now. Otherwise, we have * to wait until we read the async reply to report anything. */ if (rc != pcmk_rc_ok) { report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); } } /*! * \internal * \brief Perform a TLS client handshake with a Pacemaker Remote server * * \param[in] lrmd Newly established Pacemaker Remote executor connection * * \return Standard Pacemaker return code */ static int tls_client_handshake(lrmd_t *lrmd) { lrmd_private_t *native = lrmd->lrmd_private; int tls_rc = GNUTLS_E_SUCCESS; int rc = pcmk__tls_client_handshake(native->remote, TLS_HANDSHAKE_TIMEOUT, &tls_rc); if (rc != pcmk_rc_ok) { tls_handshake_failed(lrmd, tls_rc, rc); } return rc; } /*! * \internal * \brief Add trigger and file descriptor mainloop sources for TLS * * \param[in,out] lrmd API connection with established TLS session * \param[in] do_api_handshake Whether to perform executor handshake * * \return Standard Pacemaker return code */ static int add_tls_to_mainloop(lrmd_t *lrmd, bool do_api_handshake) { lrmd_private_t *native = lrmd->lrmd_private; int rc = pcmk_rc_ok; char *name = crm_strdup_printf("pacemaker-remote-%s:%d", native->server, native->port); struct mainloop_fd_callbacks tls_fd_callbacks = { .dispatch = lrmd_tls_dispatch, .destroy = lrmd_tls_connection_destroy, }; native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, process_pending_notifies, lrmd); native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &tls_fd_callbacks); /* Async connections lose the client name provided by the API caller, so we * have to use our generated name here to perform the executor handshake. * * @TODO Keep track of the caller-provided name. Perhaps we should be using * that name in this function instead of generating one anyway. */ if (do_api_handshake) { rc = lrmd_handshake_async(lrmd, name); } free(name); return rc; } struct handshake_data_s { lrmd_t *lrmd; time_t start_time; int timeout_sec; }; static gboolean try_handshake_cb(gpointer user_data) { struct handshake_data_s *hs = user_data; lrmd_t *lrmd = hs->lrmd; lrmd_private_t *native = lrmd->lrmd_private; pcmk__remote_t *remote = native->remote; int rc = pcmk_rc_ok; int tls_rc = GNUTLS_E_SUCCESS; if (time(NULL) >= hs->start_time + hs->timeout_sec) { rc = ETIME; tls_handshake_failed(lrmd, GNUTLS_E_TIMEDOUT, rc); free(hs); return 0; } rc = pcmk__tls_client_try_handshake(remote, &tls_rc); if (rc == pcmk_rc_ok) { tls_handshake_succeeded(lrmd); free(hs); return 0; } else if (rc == EAGAIN) { mainloop_set_trigger(native->handshake_trigger); return 1; } else { rc = EKEYREJECTED; tls_handshake_failed(lrmd, tls_rc, rc); free(hs); return 0; } } static void lrmd_tcp_connect_cb(void *userdata, int rc, int sock) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; int tls_rc = GNUTLS_E_SUCCESS; bool use_cert = pcmk__x509_enabled(); native->async_timer = 0; if (rc != pcmk_rc_ok) { lrmd_tls_connection_destroy(lrmd); crm_info("Could not connect to Pacemaker Remote at %s:%d: %s " QB_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); return; } /* The TCP connection was successful, so establish the TLS connection. */ native->sock = sock; if (native->tls == NULL) { rc = pcmk__init_tls(&native->tls, false, use_cert ? GNUTLS_CRD_CERTIFICATE : GNUTLS_CRD_PSK); - if (rc != pcmk_rc_ok) { + if ((rc != pcmk_rc_ok) || (native->tls == NULL)) { lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); return; } } if (!use_cert) { gnutls_datum_t psk_key = { NULL, 0 }; rc = lrmd__init_remote_key(&psk_key); if (rc != pcmk_rc_ok) { crm_info("Could not connect to Pacemaker Remote at %s:%d: %s " QB_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); return; } pcmk__tls_add_psk_key(native->tls, &psk_key); gnutls_free(psk_key.data); } native->remote->tls_session = pcmk__new_tls_session(native->tls, sock); if (native->remote->tls_session == NULL) { lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, -EPROTO); return; } /* If the TLS handshake immediately succeeds or fails, we can handle that * now without having to deal with mainloops and retries. Otherwise, add a * trigger to keep trying until we get a result (or it times out). */ rc = pcmk__tls_client_try_handshake(native->remote, &tls_rc); if (rc == EAGAIN) { struct handshake_data_s *hs = NULL; if (native->handshake_trigger != NULL) { return; } hs = pcmk__assert_alloc(1, sizeof(struct handshake_data_s)); hs->lrmd = lrmd; hs->start_time = time(NULL); hs->timeout_sec = TLS_HANDSHAKE_TIMEOUT; native->handshake_trigger = mainloop_add_trigger(G_PRIORITY_LOW, try_handshake_cb, hs); mainloop_set_trigger(native->handshake_trigger); } else if (rc == pcmk_rc_ok) { tls_handshake_succeeded(lrmd); } else { tls_handshake_failed(lrmd, tls_rc, rc); } } static int lrmd_tls_connect_async(lrmd_t * lrmd, int timeout /*ms */ ) { int rc = pcmk_rc_ok; int timer_id = 0; lrmd_private_t *native = lrmd->lrmd_private; native->sock = -1; rc = pcmk__connect_remote(native->server, native->port, timeout, &timer_id, &(native->sock), lrmd, lrmd_tcp_connect_cb); if (rc != pcmk_rc_ok) { crm_warn("Pacemaker Remote connection to %s:%d failed: %s " QB_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); return rc; } native->async_timer = timer_id; return rc; } static int lrmd_tls_connect(lrmd_t * lrmd, int *fd) { int rc = pcmk_rc_ok; bool use_cert = pcmk__x509_enabled(); lrmd_private_t *native = lrmd->lrmd_private; native->sock = -1; rc = pcmk__connect_remote(native->server, native->port, 0, NULL, &(native->sock), NULL, NULL); if (rc != pcmk_rc_ok) { crm_warn("Pacemaker Remote connection to %s:%d failed: %s " QB_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); lrmd_tls_connection_destroy(lrmd); return ENOTCONN; } if (native->tls == NULL) { rc = pcmk__init_tls(&native->tls, false, use_cert ? GNUTLS_CRD_CERTIFICATE : GNUTLS_CRD_PSK); if (rc != pcmk_rc_ok) { lrmd_tls_connection_destroy(lrmd); return rc; } } if (!use_cert) { gnutls_datum_t psk_key = { NULL, 0 }; rc = lrmd__init_remote_key(&psk_key); if (rc != pcmk_rc_ok) { lrmd_tls_connection_destroy(lrmd); return rc; } pcmk__tls_add_psk_key(native->tls, &psk_key); gnutls_free(psk_key.data); } native->remote->tls_session = pcmk__new_tls_session(native->tls, native->sock); if (native->remote->tls_session == NULL) { lrmd_tls_connection_destroy(lrmd); return EPROTO; } if (tls_client_handshake(lrmd) != pcmk_rc_ok) { return EKEYREJECTED; } crm_info("Client TLS connection established with Pacemaker Remote server %s:%d", native->server, native->port); if (fd) { *fd = native->sock; } else { rc = add_tls_to_mainloop(lrmd, false); } return rc; } static int lrmd_api_connect(lrmd_t * lrmd, const char *name, int *fd) { int rc = -ENOTCONN; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: rc = lrmd_ipc_connect(lrmd, fd); break; case pcmk__client_tls: rc = lrmd_tls_connect(lrmd, fd); rc = pcmk_rc2legacy(rc); break; default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } if (rc == pcmk_ok) { rc = lrmd_handshake(lrmd, name); rc = pcmk_rc2legacy(rc); } return rc; } static int lrmd_api_connect_async(lrmd_t * lrmd, const char *name, int timeout) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; CRM_CHECK(native && native->callback, return -EINVAL); switch (native->type) { case pcmk__client_ipc: /* fake async connection with ipc. it should be fast * enough that we gain very little from async */ rc = lrmd_api_connect(lrmd, name, NULL); if (!rc) { report_async_connection_result(lrmd, rc); } break; case pcmk__client_tls: rc = lrmd_tls_connect_async(lrmd, timeout); rc = pcmk_rc2legacy(rc); break; default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } return rc; } static void lrmd_ipc_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } } static void lrmd_tls_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->remote->tls_session) { gnutls_bye(native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(native->remote->tls_session); native->remote->tls_session = NULL; } if (native->async_timer) { g_source_remove(native->async_timer); native->async_timer = 0; } if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; } else if (native->sock >= 0) { close(native->sock); native->sock = -1; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } } static int lrmd_api_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; int rc = pcmk_ok; switch (native->type) { case pcmk__client_ipc: crm_debug("Disconnecting from local executor"); lrmd_ipc_disconnect(lrmd); break; case pcmk__client_tls: crm_debug("Disconnecting from remote executor on %s", native->remote_nodename); lrmd_tls_disconnect(lrmd); break; default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } free(native->token); native->token = NULL; free(native->peer_version); native->peer_version = NULL; return rc; } static int lrmd_api_register_rsc(lrmd_t * lrmd, const char *rsc_id, const char *class, const char *provider, const char *type, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = NULL; if (!class || !type || !rsc_id) { return -EINVAL; } if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider) && (provider == NULL)) { return -EINVAL; } data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC); crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_LRMD_RSC_ID, rsc_id); crm_xml_add(data, PCMK__XA_LRMD_CLASS, class); crm_xml_add(data, PCMK__XA_LRMD_PROVIDER, provider); crm_xml_add(data, PCMK__XA_LRMD_TYPE, type); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_REG, data, NULL, 0, options, true); pcmk__xml_free(data); return rc; } static int lrmd_api_unregister_rsc(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC); crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_LRMD_RSC_ID, rsc_id); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_UNREG, data, NULL, 0, options, true); pcmk__xml_free(data); return rc; } lrmd_rsc_info_t * lrmd_new_rsc_info(const char *rsc_id, const char *standard, const char *provider, const char *type) { lrmd_rsc_info_t *rsc_info = pcmk__assert_alloc(1, sizeof(lrmd_rsc_info_t)); rsc_info->id = pcmk__str_copy(rsc_id); rsc_info->standard = pcmk__str_copy(standard); rsc_info->provider = pcmk__str_copy(provider); rsc_info->type = pcmk__str_copy(type); return rsc_info; } lrmd_rsc_info_t * lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info) { return lrmd_new_rsc_info(rsc_info->id, rsc_info->standard, rsc_info->provider, rsc_info->type); } void lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info) { if (!rsc_info) { return; } free(rsc_info->id); free(rsc_info->type); free(rsc_info->standard); free(rsc_info->provider); free(rsc_info); } static lrmd_rsc_info_t * lrmd_api_get_rsc_info(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { lrmd_rsc_info_t *rsc_info = NULL; xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC); xmlNode *output = NULL; const char *class = NULL; const char *provider = NULL; const char *type = NULL; crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_LRMD_RSC_ID, rsc_id); lrmd_send_command(lrmd, LRMD_OP_RSC_INFO, data, &output, 0, options, true); pcmk__xml_free(data); if (!output) { return NULL; } class = crm_element_value(output, PCMK__XA_LRMD_CLASS); provider = crm_element_value(output, PCMK__XA_LRMD_PROVIDER); type = crm_element_value(output, PCMK__XA_LRMD_TYPE); if (!class || !type) { pcmk__xml_free(output); return NULL; } else if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider) && !provider) { pcmk__xml_free(output); return NULL; } rsc_info = lrmd_new_rsc_info(rsc_id, class, provider, type); pcmk__xml_free(output); return rsc_info; } void lrmd_free_op_info(lrmd_op_info_t *op_info) { if (op_info) { free(op_info->rsc_id); free(op_info->action); free(op_info->interval_ms_s); free(op_info->timeout_ms_s); free(op_info); } } static int lrmd_api_get_recurring_ops(lrmd_t *lrmd, const char *rsc_id, int timeout_ms, enum lrmd_call_options options, GList **output) { xmlNode *data = NULL; xmlNode *output_xml = NULL; int rc = pcmk_ok; if (output == NULL) { return -EINVAL; } *output = NULL; // Send request if (rsc_id) { data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC); crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_LRMD_RSC_ID, rsc_id); } rc = lrmd_send_command(lrmd, LRMD_OP_GET_RECURRING, data, &output_xml, timeout_ms, options, true); if (data) { pcmk__xml_free(data); } // Process reply if ((rc != pcmk_ok) || (output_xml == NULL)) { return rc; } for (const xmlNode *rsc_xml = pcmk__xe_first_child(output_xml, PCMK__XE_LRMD_RSC, NULL, NULL); (rsc_xml != NULL) && (rc == pcmk_ok); rsc_xml = pcmk__xe_next(rsc_xml, PCMK__XE_LRMD_RSC)) { rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID); if (rsc_id == NULL) { crm_err("Could not parse recurring operation information from executor"); continue; } for (const xmlNode *op_xml = pcmk__xe_first_child(rsc_xml, PCMK__XE_LRMD_RSC_OP, NULL, NULL); op_xml != NULL; op_xml = pcmk__xe_next(op_xml, PCMK__XE_LRMD_RSC_OP)) { lrmd_op_info_t *op_info = calloc(1, sizeof(lrmd_op_info_t)); if (op_info == NULL) { rc = -ENOMEM; break; } op_info->rsc_id = strdup(rsc_id); op_info->action = crm_element_value_copy(op_xml, PCMK__XA_LRMD_RSC_ACTION); op_info->interval_ms_s = crm_element_value_copy(op_xml, PCMK__XA_LRMD_RSC_INTERVAL); op_info->timeout_ms_s = crm_element_value_copy(op_xml, PCMK__XA_LRMD_TIMEOUT); *output = g_list_prepend(*output, op_info); } } pcmk__xml_free(output_xml); return rc; } static void lrmd_api_set_callback(lrmd_t * lrmd, lrmd_event_callback callback) { lrmd_private_t *native = lrmd->lrmd_private; native->callback = callback; } void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)) { lrmd_private_t *native = lrmd->lrmd_private; native->proxy_callback = callback; native->proxy_callback_userdata = userdata; } void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg) { lrmd_private_t *native = lrmd->lrmd_private; if (native->proxy_callback) { crm_log_xml_trace(msg, "PROXY_INBOUND"); native->proxy_callback(lrmd, native->proxy_callback_userdata, msg); } } int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg) { if (lrmd == NULL) { return -ENOTCONN; } crm_xml_add(msg, PCMK__XA_LRMD_OP, CRM_OP_IPC_FWD); crm_log_xml_trace(msg, "PROXY_OUTBOUND"); return lrmd_send_xml_no_reply(lrmd, msg); } static int stonith_get_metadata(const char *type, char **output) { int rc = pcmk_ok; stonith_t *stonith_api = stonith__api_new(); if (stonith_api == NULL) { crm_err("Could not get fence agent meta-data: API memory allocation failed"); return -ENOMEM; } rc = stonith_api->cmds->metadata(stonith_api, st_opt_sync_call, type, NULL, output, 0); if ((rc == pcmk_ok) && (*output == NULL)) { rc = -EIO; } stonith_api->cmds->free(stonith_api); return rc; } static int lrmd_api_get_metadata(lrmd_t *lrmd, const char *standard, const char *provider, const char *type, char **output, enum lrmd_call_options options) { return lrmd->cmds->get_metadata_params(lrmd, standard, provider, type, output, options, NULL); } static int lrmd_api_get_metadata_params(lrmd_t *lrmd, const char *standard, const char *provider, const char *type, char **output, enum lrmd_call_options options, lrmd_key_value_t *params) { svc_action_t *action = NULL; GHashTable *params_table = NULL; if (!standard || !type) { lrmd_key_value_freeall(params); return -EINVAL; } if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { lrmd_key_value_freeall(params); // stonith-class resources don't support a provider return stonith_get_metadata(type, output); } params_table = pcmk__strkey_table(free, free); for (const lrmd_key_value_t *param = params; param; param = param->next) { pcmk__insert_dup(params_table, param->key, param->value); } action = services__create_resource_action(type, standard, provider, type, PCMK_ACTION_META_DATA, 0, PCMK_DEFAULT_ACTION_TIMEOUT_MS, params_table, 0); lrmd_key_value_freeall(params); if (action == NULL) { return -ENOMEM; } if (action->rc != PCMK_OCF_UNKNOWN) { services_action_free(action); return -EINVAL; } if (!services_action_sync(action)) { crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } if (!action->stdout_data) { crm_err("Failed to receive meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } *output = strdup(action->stdout_data); services_action_free(action); return pcmk_ok; } static int lrmd_api_exec(lrmd_t *lrmd, const char *rsc_id, const char *action, const char *userdata, guint interval_ms, int timeout, /* ms */ int start_delay, /* ms */ enum lrmd_call_options options, lrmd_key_value_t * params) { int rc = pcmk_ok; xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC); xmlNode *args = pcmk__xe_create(data, PCMK__XE_ATTRIBUTES); lrmd_key_value_t *tmp = NULL; crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_LRMD_RSC_ID, rsc_id); crm_xml_add(data, PCMK__XA_LRMD_RSC_ACTION, action); crm_xml_add(data, PCMK__XA_LRMD_RSC_USERDATA_STR, userdata); crm_xml_add_ms(data, PCMK__XA_LRMD_RSC_INTERVAL, interval_ms); crm_xml_add_int(data, PCMK__XA_LRMD_TIMEOUT, timeout); crm_xml_add_int(data, PCMK__XA_LRMD_RSC_START_DELAY, start_delay); for (tmp = params; tmp; tmp = tmp->next) { hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args); } rc = lrmd_send_command(lrmd, LRMD_OP_RSC_EXEC, data, NULL, timeout, options, true); pcmk__xml_free(data); lrmd_key_value_freeall(params); return rc; } /* timeout is in ms */ static int lrmd_api_exec_alert(lrmd_t *lrmd, const char *alert_id, const char *alert_path, int timeout, lrmd_key_value_t *params) { int rc = pcmk_ok; xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_LRMD_ALERT); xmlNode *args = pcmk__xe_create(data, PCMK__XE_ATTRIBUTES); lrmd_key_value_t *tmp = NULL; crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_LRMD_ALERT_ID, alert_id); crm_xml_add(data, PCMK__XA_LRMD_ALERT_PATH, alert_path); crm_xml_add_int(data, PCMK__XA_LRMD_TIMEOUT, timeout); for (tmp = params; tmp; tmp = tmp->next) { hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args); } rc = lrmd_send_command(lrmd, LRMD_OP_ALERT_EXEC, data, NULL, timeout, lrmd_opt_notify_orig_only, true); pcmk__xml_free(data); lrmd_key_value_freeall(params); return rc; } static int lrmd_api_cancel(lrmd_t *lrmd, const char *rsc_id, const char *action, guint interval_ms) { int rc = pcmk_ok; xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC); crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_LRMD_RSC_ACTION, action); crm_xml_add(data, PCMK__XA_LRMD_RSC_ID, rsc_id); crm_xml_add_ms(data, PCMK__XA_LRMD_RSC_INTERVAL, interval_ms); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_CANCEL, data, NULL, 0, 0, true); pcmk__xml_free(data); return rc; } static int list_stonith_agents(lrmd_list_t ** resources) { int rc = 0; stonith_t *stonith_api = stonith__api_new(); stonith_key_value_t *stonith_resources = NULL; stonith_key_value_t *dIter = NULL; if (stonith_api == NULL) { crm_err("Could not list fence agents: API memory allocation failed"); return -ENOMEM; } stonith_api->cmds->list_agents(stonith_api, st_opt_sync_call, NULL, &stonith_resources, 0); stonith_api->cmds->free(stonith_api); for (dIter = stonith_resources; dIter; dIter = dIter->next) { rc++; if (resources) { *resources = lrmd_list_add(*resources, dIter->value); } } stonith__key_value_freeall(stonith_resources, true, false); return rc; } static int lrmd_api_list_agents(lrmd_t * lrmd, lrmd_list_t ** resources, const char *class, const char *provider) { int rc = 0; int stonith_count = 0; // Initially, whether to include stonith devices if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { stonith_count = 1; } else { GList *gIter = NULL; GList *agents = resources_list_agents(class, provider); for (gIter = agents; gIter != NULL; gIter = gIter->next) { *resources = lrmd_list_add(*resources, (const char *)gIter->data); rc++; } g_list_free_full(agents, free); if (!class) { stonith_count = 1; } } if (stonith_count) { // Now, if stonith devices are included, how many there are stonith_count = list_stonith_agents(resources); if (stonith_count > 0) { rc += stonith_count; } } if (rc == 0) { crm_notice("No agents found for class %s", class); rc = -EPROTONOSUPPORT; } return rc; } static bool does_provider_have_agent(const char *agent, const char *provider, const char *class) { bool found = false; GList *agents = NULL; GList *gIter2 = NULL; agents = resources_list_agents(class, provider); for (gIter2 = agents; gIter2 != NULL; gIter2 = gIter2->next) { if (pcmk__str_eq(agent, gIter2->data, pcmk__str_casei)) { found = true; } } g_list_free_full(agents, free); return found; } static int lrmd_api_list_ocf_providers(lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers) { int rc = pcmk_ok; char *provider = NULL; GList *ocf_providers = NULL; GList *gIter = NULL; ocf_providers = resources_list_providers(PCMK_RESOURCE_CLASS_OCF); for (gIter = ocf_providers; gIter != NULL; gIter = gIter->next) { provider = gIter->data; if (!agent || does_provider_have_agent(agent, provider, PCMK_RESOURCE_CLASS_OCF)) { *providers = lrmd_list_add(*providers, (const char *)gIter->data); rc++; } } g_list_free_full(ocf_providers, free); return rc; } static int lrmd_api_list_standards(lrmd_t * lrmd, lrmd_list_t ** supported) { int rc = 0; GList *standards = NULL; GList *gIter = NULL; standards = resources_list_standards(); for (gIter = standards; gIter != NULL; gIter = gIter->next) { *supported = lrmd_list_add(*supported, (const char *)gIter->data); rc++; } if (list_stonith_agents(NULL) > 0) { *supported = lrmd_list_add(*supported, PCMK_RESOURCE_CLASS_STONITH); rc++; } g_list_free_full(standards, free); return rc; } /*! * \internal * \brief Create an executor API object * * \param[out] api Will be set to newly created API object (it is the * caller's responsibility to free this value with * lrmd_api_delete() if this function succeeds) * \param[in] nodename If the object will be used for a remote connection, * the node name to use in cluster for remote executor * \param[in] server If the object will be used for a remote connection, * the resolvable host name to connect to * \param[in] port If the object will be used for a remote connection, * port number on \p server to connect to * * \return Standard Pacemaker return code * \note If the caller leaves one of \p nodename or \p server NULL, the other's * value will be used for both. If the caller leaves both NULL, an API * object will be created for a local executor connection. */ int lrmd__new(lrmd_t **api, const char *nodename, const char *server, int port) { lrmd_private_t *pvt = NULL; if (api == NULL) { return EINVAL; } *api = NULL; // Allocate all memory needed *api = calloc(1, sizeof(lrmd_t)); if (*api == NULL) { return ENOMEM; } pvt = calloc(1, sizeof(lrmd_private_t)); if (pvt == NULL) { lrmd_api_delete(*api); *api = NULL; return ENOMEM; } (*api)->lrmd_private = pvt; // @TODO Do we need to do this for local connections? pvt->remote = calloc(1, sizeof(pcmk__remote_t)); (*api)->cmds = calloc(1, sizeof(lrmd_api_operations_t)); if ((pvt->remote == NULL) || ((*api)->cmds == NULL)) { lrmd_api_delete(*api); *api = NULL; return ENOMEM; } // Set methods (*api)->cmds->connect = lrmd_api_connect; (*api)->cmds->connect_async = lrmd_api_connect_async; (*api)->cmds->is_connected = lrmd_api_is_connected; (*api)->cmds->poke_connection = lrmd_api_poke_connection; (*api)->cmds->disconnect = lrmd_api_disconnect; (*api)->cmds->register_rsc = lrmd_api_register_rsc; (*api)->cmds->unregister_rsc = lrmd_api_unregister_rsc; (*api)->cmds->get_rsc_info = lrmd_api_get_rsc_info; (*api)->cmds->get_recurring_ops = lrmd_api_get_recurring_ops; (*api)->cmds->set_callback = lrmd_api_set_callback; (*api)->cmds->get_metadata = lrmd_api_get_metadata; (*api)->cmds->exec = lrmd_api_exec; (*api)->cmds->cancel = lrmd_api_cancel; (*api)->cmds->list_agents = lrmd_api_list_agents; (*api)->cmds->list_ocf_providers = lrmd_api_list_ocf_providers; (*api)->cmds->list_standards = lrmd_api_list_standards; (*api)->cmds->exec_alert = lrmd_api_exec_alert; (*api)->cmds->get_metadata_params = lrmd_api_get_metadata_params; if ((nodename == NULL) && (server == NULL)) { pvt->type = pcmk__client_ipc; } else { if (nodename == NULL) { nodename = server; } else if (server == NULL) { server = nodename; } pvt->type = pcmk__client_tls; pvt->remote_nodename = strdup(nodename); pvt->server = strdup(server); if ((pvt->remote_nodename == NULL) || (pvt->server == NULL)) { lrmd_api_delete(*api); *api = NULL; return ENOMEM; } pvt->port = port; if (pvt->port == 0) { pvt->port = crm_default_remote_port(); } } return pcmk_rc_ok; } lrmd_t * lrmd_api_new(void) { lrmd_t *api = NULL; pcmk__assert(lrmd__new(&api, NULL, NULL, 0) == pcmk_rc_ok); return api; } lrmd_t * lrmd_remote_api_new(const char *nodename, const char *server, int port) { lrmd_t *api = NULL; pcmk__assert(lrmd__new(&api, nodename, server, port) == pcmk_rc_ok); return api; } void lrmd_api_delete(lrmd_t * lrmd) { if (lrmd == NULL) { return; } if (lrmd->cmds != NULL) { // Never NULL, but make static analysis happy if (lrmd->cmds->disconnect != NULL) { // Also never really NULL lrmd->cmds->disconnect(lrmd); // No-op if already disconnected } free(lrmd->cmds); } if (lrmd->lrmd_private != NULL) { lrmd_private_t *native = lrmd->lrmd_private; free(native->server); free(native->remote_nodename); free(native->remote); free(native->token); free(native->peer_version); free(lrmd->lrmd_private); } free(lrmd); } struct metadata_cb { void (*callback)(int pid, const pcmk__action_result_t *result, void *user_data); void *user_data; }; /*! * \internal * \brief Process asynchronous metadata completion * * \param[in,out] action Metadata action that completed */ static void metadata_complete(svc_action_t *action) { struct metadata_cb *metadata_cb = (struct metadata_cb *) action->cb_data; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; services__copy_result(action, &result); pcmk__set_result_output(&result, action->stdout_data, action->stderr_data); metadata_cb->callback(0, &result, metadata_cb->user_data); result.action_stdout = NULL; // Prevent free, because action owns it result.action_stderr = NULL; // Prevent free, because action owns it pcmk__reset_result(&result); free(metadata_cb); } /*! * \internal * \brief Retrieve agent metadata asynchronously * * \param[in] rsc Resource agent specification * \param[in] callback Function to call with result (this will always be * called, whether by this function directly or later * via the main loop, and on success the metadata will * be in its result argument's action_stdout) * \param[in,out] user_data User data to pass to callback * * \return Standard Pacemaker return code * \note This function is not a lrmd_api_operations_t method because it does not * need an lrmd_t object and does not go through the executor, but * executes the agent directly. */ int lrmd__metadata_async(const lrmd_rsc_info_t *rsc, void (*callback)(int pid, const pcmk__action_result_t *result, void *user_data), void *user_data) { svc_action_t *action = NULL; struct metadata_cb *metadata_cb = NULL; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; CRM_CHECK(callback != NULL, return EINVAL); if ((rsc == NULL) || (rsc->standard == NULL) || (rsc->type == NULL)) { pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR_FATAL, "Invalid resource specification"); callback(0, &result, user_data); pcmk__reset_result(&result); return EINVAL; } if (strcmp(rsc->standard, PCMK_RESOURCE_CLASS_STONITH) == 0) { return stonith__metadata_async(rsc->type, pcmk__timeout_ms2s(PCMK_DEFAULT_ACTION_TIMEOUT_MS), callback, user_data); } action = services__create_resource_action(pcmk__s(rsc->id, rsc->type), rsc->standard, rsc->provider, rsc->type, PCMK_ACTION_META_DATA, 0, PCMK_DEFAULT_ACTION_TIMEOUT_MS, NULL, 0); if (action == NULL) { pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "Out of memory"); callback(0, &result, user_data); pcmk__reset_result(&result); return ENOMEM; } if (action->rc != PCMK_OCF_UNKNOWN) { services__copy_result(action, &result); callback(0, &result, user_data); pcmk__reset_result(&result); services_action_free(action); return EINVAL; } action->cb_data = calloc(1, sizeof(struct metadata_cb)); if (action->cb_data == NULL) { services_action_free(action); pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "Out of memory"); callback(0, &result, user_data); pcmk__reset_result(&result); return ENOMEM; } metadata_cb = (struct metadata_cb *) action->cb_data; metadata_cb->callback = callback; metadata_cb->user_data = user_data; if (!services_action_async(action, metadata_complete)) { services_action_free(action); return pcmk_rc_error; // @TODO Derive from action->rc and ->status } // The services library has taken responsibility for action return pcmk_rc_ok; } /*! * \internal * \brief Set the result of an executor event * * \param[in,out] event Executor event to set * \param[in] rc OCF exit status of event * \param[in] op_status Executor status of event * \param[in] exit_reason Human-friendly description of event */ void lrmd__set_result(lrmd_event_data_t *event, enum ocf_exitcode rc, int op_status, const char *exit_reason) { if (event == NULL) { return; } event->rc = rc; event->op_status = op_status; // lrmd_event_data_t has (const char *) members that lrmd_free_event() frees pcmk__str_update((char **) &event->exit_reason, exit_reason); } /*! * \internal * \brief Clear an executor event's exit reason, output, and error output * * \param[in,out] event Executor event to reset */ void lrmd__reset_result(lrmd_event_data_t *event) { if (event == NULL) { return; } free((void *) event->exit_reason); event->exit_reason = NULL; free((void *) event->output); event->output = NULL; } /*! * \internal * \brief Get the uptime of a remote resource connection * * When the cluster connects to a remote resource, part of that resource's * handshake includes the uptime of the remote resource's connection. This * uptime is stored in the lrmd_t object. * * \return The connection's uptime, or -1 if unknown */ time_t lrmd__uptime(lrmd_t *lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->remote == NULL) { return -1; } else { return native->remote->uptime; } } const char * lrmd__node_start_state(lrmd_t *lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->remote == NULL) { return NULL; } else { return native->remote->start_state; } } diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c index 2c9ebcfd45..14e7be54d1 100644 --- a/lib/pacemaker/pcmk_sched_bundle.c +++ b/lib/pacemaker/pcmk_sched_bundle.c @@ -1,1080 +1,1085 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include // xmlNode #include #include #include "libpacemaker_private.h" struct assign_data { const pcmk_node_t *prefer; bool stop_if_fail; }; /*! * \internal * \brief Assign a single bundle replica's resources (other than container) * * \param[in,out] replica Replica to assign * \param[in] user_data Preferred node, if any * * \return true (to indicate that any further replicas should be processed) */ static bool assign_replica(pcmk__bundle_replica_t *replica, void *user_data) { pcmk_node_t *container_host = NULL; struct assign_data *assign_data = user_data; const pcmk_node_t *prefer = assign_data->prefer; bool stop_if_fail = assign_data->stop_if_fail; const pcmk_resource_t *bundle = pe__const_top_resource(replica->container, true); if (replica->ip != NULL) { pcmk__rsc_trace(bundle, "Assigning bundle %s IP %s", bundle->id, replica->ip->id); replica->ip->priv->cmds->assign(replica->ip, prefer, stop_if_fail); } container_host = replica->container->priv->assigned_node; if (replica->remote != NULL) { if (pcmk__is_pacemaker_remote_node(container_host)) { /* REMOTE_CONTAINER_HACK: "Nested" connection resources must be on * the same host because Pacemaker Remote only supports a single * active connection. */ pcmk__new_colocation("#replica-remote-with-host-remote", NULL, PCMK_SCORE_INFINITY, replica->remote, container_host->priv->remote, NULL, NULL, pcmk__coloc_influence); } pcmk__rsc_trace(bundle, "Assigning bundle %s connection %s", bundle->id, replica->remote->id); replica->remote->priv->cmds->assign(replica->remote, prefer, stop_if_fail); } if (replica->child != NULL) { pcmk_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, replica->child->priv->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (!pcmk__same_node(node, replica->node)) { node->assign->score = -PCMK_SCORE_INFINITY; } else if (!pcmk__threshold_reached(replica->child, node, NULL)) { node->assign->score = PCMK_SCORE_INFINITY; } } pcmk__set_rsc_flags(replica->child->priv->parent, pcmk__rsc_assigning); pcmk__rsc_trace(bundle, "Assigning bundle %s replica child %s", bundle->id, replica->child->id); replica->child->priv->cmds->assign(replica->child, replica->node, stop_if_fail); pcmk__clear_rsc_flags(replica->child->priv->parent, pcmk__rsc_assigning); } return true; } /*! * \internal * \brief Assign a bundle resource to a node * * \param[in,out] rsc Resource to assign to a node * \param[in] prefer Node to prefer, if all else is equal * \param[in] stop_if_fail If \c true and a primitive descendant of \p rsc * can't be assigned to a node, set the * descendant's next role to stopped and update * existing actions * * \return Node that \p rsc is assigned to, if assigned entirely to one node * * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can * completely undo the assignment. A successful assignment can be either * undone or left alone as final. A failed assignment has the same effect * as calling pcmk__unassign_resource(); there are no side effects on * roles or actions. */ pcmk_node_t * pcmk__bundle_assign(pcmk_resource_t *rsc, const pcmk_node_t *prefer, bool stop_if_fail) { GList *containers = NULL; pcmk_resource_t *bundled_resource = NULL; struct assign_data assign_data = { prefer, stop_if_fail }; pcmk__assert(pcmk__is_bundle(rsc)); pcmk__rsc_trace(rsc, "Assigning bundle %s", rsc->id); pcmk__set_rsc_flags(rsc, pcmk__rsc_assigning); pe__show_node_scores(!pcmk_is_set(rsc->priv->scheduler->flags, pcmk__sched_output_scores), rsc, __func__, rsc->priv->allowed_nodes, rsc->priv->scheduler); // Assign all containers first, so we know what nodes the bundle will be on containers = g_list_sort(pe__bundle_containers(rsc), pcmk__cmp_instance); pcmk__assign_instances(rsc, containers, pe__bundle_max(rsc), rsc->priv->fns->max_per_node(rsc)); g_list_free(containers); // Then assign remaining replica resources pe__foreach_bundle_replica(rsc, assign_replica, (void *) &assign_data); // Finally, assign the bundled resources to each bundle node bundled_resource = pe__bundled_resource(rsc); if (bundled_resource != NULL) { pcmk_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, bundled_resource->priv->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { if (pe__node_is_bundle_instance(rsc, node)) { node->assign->score = 0; } else { node->assign->score = -PCMK_SCORE_INFINITY; } } bundled_resource->priv->cmds->assign(bundled_resource, prefer, stop_if_fail); } pcmk__clear_rsc_flags(rsc, pcmk__rsc_assigning|pcmk__rsc_unassigned); return NULL; } /*! * \internal * \brief Create actions for a bundle replica's resources (other than child) * * \param[in,out] replica Replica to create actions for * \param[in] user_data Unused * * \return true (to indicate that any further replicas should be processed) */ static bool create_replica_actions(pcmk__bundle_replica_t *replica, void *user_data) { if (replica->ip != NULL) { replica->ip->priv->cmds->create_actions(replica->ip); } if (replica->container != NULL) { replica->container->priv->cmds->create_actions(replica->container); } if (replica->remote != NULL) { replica->remote->priv->cmds->create_actions(replica->remote); } return true; } /*! * \internal * \brief Create all actions needed for a given bundle resource * * \param[in,out] rsc Bundle resource to create actions for */ void pcmk__bundle_create_actions(pcmk_resource_t *rsc) { pcmk_action_t *action = NULL; GList *containers = NULL; pcmk_resource_t *bundled_resource = NULL; pcmk__assert(pcmk__is_bundle(rsc)); pe__foreach_bundle_replica(rsc, create_replica_actions, NULL); containers = pe__bundle_containers(rsc); pcmk__create_instance_actions(rsc, containers); g_list_free(containers); bundled_resource = pe__bundled_resource(rsc); if (bundled_resource != NULL) { bundled_resource->priv->cmds->create_actions(bundled_resource); if (pcmk_is_set(bundled_resource->flags, pcmk__rsc_promotable)) { pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_PROMOTE, true, true); action = pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_PROMOTED, true, true); action->priority = PCMK_SCORE_INFINITY; pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_DEMOTE, true, true); action = pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_DEMOTED, true, true); action->priority = PCMK_SCORE_INFINITY; } } } /*! * \internal * \brief Create internal constraints for a bundle replica's resources * * \param[in,out] replica Replica to create internal constraints for * \param[in,out] user_data Replica's parent bundle * * \return true (to indicate that any further replicas should be processed) */ static bool replica_internal_constraints(pcmk__bundle_replica_t *replica, void *user_data) { pcmk_resource_t *bundle = user_data; replica->container->priv->cmds->internal_constraints(replica->container); // Start bundle -> start replica container pcmk__order_starts(bundle, replica->container, pcmk__ar_unrunnable_first_blocks |pcmk__ar_then_implies_first_graphed); // Stop bundle -> stop replica child and container if (replica->child != NULL) { pcmk__order_stops(bundle, replica->child, pcmk__ar_then_implies_first_graphed); } pcmk__order_stops(bundle, replica->container, pcmk__ar_then_implies_first_graphed); // Start replica container -> bundle is started pcmk__order_resource_actions(replica->container, PCMK_ACTION_START, bundle, PCMK_ACTION_RUNNING, pcmk__ar_first_implies_then_graphed); // Stop replica container -> bundle is stopped pcmk__order_resource_actions(replica->container, PCMK_ACTION_STOP, bundle, PCMK_ACTION_STOPPED, pcmk__ar_first_implies_then_graphed); if (replica->ip != NULL) { replica->ip->priv->cmds->internal_constraints(replica->ip); // Replica IP address -> replica container (symmetric) pcmk__order_starts(replica->ip, replica->container, pcmk__ar_unrunnable_first_blocks |pcmk__ar_guest_allowed); pcmk__order_stops(replica->container, replica->ip, pcmk__ar_then_implies_first|pcmk__ar_guest_allowed); pcmk__new_colocation("#ip-with-container", NULL, PCMK_SCORE_INFINITY, replica->ip, replica->container, NULL, NULL, pcmk__coloc_influence); } if (replica->remote != NULL) { /* This handles ordering and colocating remote relative to container * (via "#resource-with-container"). Since IP is also ordered and * colocated relative to the container, we don't need to do anything * explicit here with IP. */ replica->remote->priv->cmds->internal_constraints(replica->remote); } if (replica->child != NULL) { pcmk__assert(replica->remote != NULL); // "Start remote then child" is implicit in scheduler's remote logic } return true; } /*! * \internal * \brief Create implicit constraints needed for a bundle resource * * \param[in,out] rsc Bundle resource to create implicit constraints for */ void pcmk__bundle_internal_constraints(pcmk_resource_t *rsc) { pcmk_resource_t *bundled_resource = NULL; pcmk__assert(pcmk__is_bundle(rsc)); pe__foreach_bundle_replica(rsc, replica_internal_constraints, rsc); bundled_resource = pe__bundled_resource(rsc); if (bundled_resource == NULL) { return; } // Start bundle -> start bundled clone pcmk__order_resource_actions(rsc, PCMK_ACTION_START, bundled_resource, PCMK_ACTION_START, pcmk__ar_then_implies_first_graphed); // Bundled clone is started -> bundle is started pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_RUNNING, rsc, PCMK_ACTION_RUNNING, pcmk__ar_first_implies_then_graphed); // Stop bundle -> stop bundled clone pcmk__order_resource_actions(rsc, PCMK_ACTION_STOP, bundled_resource, PCMK_ACTION_STOP, pcmk__ar_then_implies_first_graphed); // Bundled clone is stopped -> bundle is stopped pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_STOPPED, rsc, PCMK_ACTION_STOPPED, pcmk__ar_first_implies_then_graphed); bundled_resource->priv->cmds->internal_constraints(bundled_resource); if (!pcmk_is_set(bundled_resource->flags, pcmk__rsc_promotable)) { return; } pcmk__promotable_restart_ordering(rsc); // Demote bundle -> demote bundled clone pcmk__order_resource_actions(rsc, PCMK_ACTION_DEMOTE, bundled_resource, PCMK_ACTION_DEMOTE, pcmk__ar_then_implies_first_graphed); // Bundled clone is demoted -> bundle is demoted pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_DEMOTED, rsc, PCMK_ACTION_DEMOTED, pcmk__ar_first_implies_then_graphed); // Promote bundle -> promote bundled clone pcmk__order_resource_actions(rsc, PCMK_ACTION_PROMOTE, bundled_resource, PCMK_ACTION_PROMOTE, pcmk__ar_then_implies_first_graphed); // Bundled clone is promoted -> bundle is promoted pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_PROMOTED, rsc, PCMK_ACTION_PROMOTED, pcmk__ar_first_implies_then_graphed); } struct match_data { const pcmk_node_t *node; // Node to compare against replica pcmk_resource_t *container; // Replica container corresponding to node }; /*! * \internal * \brief Check whether a replica container is assigned to a given node * * \param[in] replica Replica to check * \param[in,out] user_data struct match_data with node to compare against * * \return true if the replica does not match (to indicate further replicas * should be processed), otherwise false */ static bool match_replica_container(const pcmk__bundle_replica_t *replica, void *user_data) { struct match_data *match_data = user_data; if (pcmk__instance_matches(replica->container, match_data->node, pcmk_role_unknown, false)) { match_data->container = replica->container; return false; // Match found, don't bother searching further replicas } return true; // No match, keep searching } /*! * \internal * \brief Get the host to which a bundle node is assigned * * \param[in] node Possible bundle node to check * * \return Node to which the container for \p node is assigned if \p node is a * bundle node, otherwise \p node itself */ static const pcmk_node_t * get_bundle_node_host(const pcmk_node_t *node) { if (pcmk__is_bundle_node(node)) { const pcmk_resource_t *container = NULL; container = node->priv->remote->priv->launcher; return container->priv->fns->location(container, NULL, pcmk__rsc_node_assigned); } return node; } /*! * \internal * \brief Find a bundle container compatible with a dependent resource * * \param[in] dependent Dependent resource in colocation with bundle * \param[in] bundle Bundle that \p dependent is colocated with * * \return A container from \p bundle assigned to the same node as \p dependent * if assigned, otherwise assigned to any of dependent's allowed nodes, * otherwise NULL. */ static pcmk_resource_t * compatible_container(const pcmk_resource_t *dependent, const pcmk_resource_t *bundle) { GList *scratch = NULL; struct match_data match_data = { NULL, NULL }; // If dependent is assigned, only check there match_data.node = dependent->priv->fns->location(dependent, NULL, pcmk__rsc_node_assigned); match_data.node = get_bundle_node_host(match_data.node); if (match_data.node != NULL) { pe__foreach_const_bundle_replica(bundle, match_replica_container, &match_data); return match_data.container; } // Otherwise, check for any of the dependent's allowed nodes scratch = g_hash_table_get_values(dependent->priv->allowed_nodes); scratch = pcmk__sort_nodes(scratch, NULL); for (const GList *iter = scratch; iter != NULL; iter = iter->next) { match_data.node = iter->data; match_data.node = get_bundle_node_host(match_data.node); if (match_data.node == NULL) { continue; } pe__foreach_const_bundle_replica(bundle, match_replica_container, &match_data); if (match_data.container != NULL) { break; } } g_list_free(scratch); return match_data.container; } struct coloc_data { const pcmk__colocation_t *colocation; pcmk_resource_t *dependent; GList *container_hosts; int priority_delta; }; /*! * \internal * \brief Apply a colocation score to replica node scores or resource priority * * \param[in] replica Replica of primary bundle resource in colocation * \param[in,out] user_data struct coloc_data for colocation being applied * * \return true (to indicate that any further replicas should be processed) */ static bool replica_apply_coloc_score(const pcmk__bundle_replica_t *replica, void *user_data) { struct coloc_data *coloc_data = user_data; pcmk_node_t *chosen = NULL; pcmk_resource_t *container = replica->container; if (coloc_data->colocation->score < PCMK_SCORE_INFINITY) { int priority_delta = container->priv->cmds->apply_coloc_score(coloc_data->dependent, container, coloc_data->colocation, false); coloc_data->priority_delta = pcmk__add_scores(coloc_data->priority_delta, priority_delta); return true; } chosen = container->priv->fns->location(container, NULL, pcmk__rsc_node_assigned); if ((chosen == NULL) || is_set_recursive(container, pcmk__rsc_blocked, true)) { return true; } if ((coloc_data->colocation->primary_role >= pcmk_role_promoted) && ((replica->child == NULL) || (replica->child->priv->next_role < pcmk_role_promoted))) { return true; } pcmk__rsc_trace(pe__const_top_resource(container, true), "Allowing mandatory colocation %s using %s @%d", coloc_data->colocation->id, pcmk__node_name(chosen), chosen->assign->score); coloc_data->container_hosts = g_list_prepend(coloc_data->container_hosts, chosen); return true; } /*! * \internal * \brief Apply a colocation's score to node scores or resource priority * * Given a colocation constraint, apply its score to the dependent's * allowed node scores (if we are still placing resources) or priority (if * we are choosing promotable clone instance roles). * * \param[in,out] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint to apply * \param[in] for_dependent true if called on behalf of dependent * * \return The score added to the dependent's priority */ int pcmk__bundle_apply_coloc_score(pcmk_resource_t *dependent, const pcmk_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent) { struct coloc_data coloc_data = { colocation, dependent, NULL, 0 }; /* This should never be called for the bundle itself as a dependent. * Instead, we add its colocation constraints to its containers and bundled * primitive and call the apply_coloc_score() method for them as dependents. */ pcmk__assert(pcmk__is_bundle(primary) && pcmk__is_primitive(dependent) && (colocation != NULL) && !for_dependent); if (pcmk_is_set(primary->flags, pcmk__rsc_unassigned)) { pcmk__rsc_trace(primary, "Skipping applying colocation %s " "because %s is still provisional", colocation->id, primary->id); return 0; } pcmk__rsc_trace(primary, "Applying colocation %s (%s with %s at %s)", colocation->id, dependent->id, primary->id, pcmk_readable_score(colocation->score)); /* If the constraint dependent is a clone or bundle, "dependent" here is one * of its instances. Look for a compatible instance of this bundle. */ if (colocation->dependent->priv->variant > pcmk__rsc_variant_group) { const pcmk_resource_t *primary_container = NULL; primary_container = compatible_container(dependent, primary); if (primary_container != NULL) { // Success, we found one pcmk__rsc_debug(primary, "Pairing %s with %s", dependent->id, primary_container->id); return dependent->priv->cmds->apply_coloc_score(dependent, primary_container, colocation, true); } if (colocation->score >= PCMK_SCORE_INFINITY) { // Failure, and it's fatal crm_notice("%s cannot run because there is no compatible " "instance of %s to colocate with", dependent->id, primary->id); pcmk__assign_resource(dependent, NULL, true, true); } else { // Failure, but we can ignore it pcmk__rsc_debug(primary, "%s cannot be colocated with any instance of %s", dependent->id, primary->id); } return 0; } pe__foreach_const_bundle_replica(primary, replica_apply_coloc_score, &coloc_data); if (colocation->score >= PCMK_SCORE_INFINITY) { pcmk__colocation_intersect_nodes(dependent, primary, colocation, coloc_data.container_hosts, false); } g_list_free(coloc_data.container_hosts); return coloc_data.priority_delta; } // Bundle implementation of pcmk__assignment_methods_t:with_this_colocations() void pcmk__with_bundle_colocations(const pcmk_resource_t *rsc, const pcmk_resource_t *orig_rsc, GList **list) { const pcmk_resource_t *bundled_rsc = NULL; pcmk__assert(pcmk__is_bundle(rsc) && (orig_rsc != NULL) && (list != NULL)); // The bundle itself and its containers always get its colocations if ((orig_rsc == rsc) || pcmk_is_set(orig_rsc->flags, pcmk__rsc_replica_container)) { pcmk__add_with_this_list(list, rsc->priv->with_this_colocations, orig_rsc); return; } /* The bundled resource gets the colocations if it's promotable and we've * begun choosing roles */ bundled_rsc = pe__bundled_resource(rsc); if ((bundled_rsc == NULL) || !pcmk_is_set(bundled_rsc->flags, pcmk__rsc_promotable) || (pe__const_top_resource(orig_rsc, false) != bundled_rsc)) { return; } if (orig_rsc == bundled_rsc) { if (pe__clone_flag_is_set(orig_rsc, pcmk__clone_promotion_constrained)) { /* orig_rsc is the clone and we're setting roles (or have already * done so) */ pcmk__add_with_this_list(list, rsc->priv->with_this_colocations, orig_rsc); } } else if (!pcmk_is_set(orig_rsc->flags, pcmk__rsc_unassigned)) { /* orig_rsc is an instance and is already assigned. If something * requests colocations for orig_rsc now, it's for setting roles. */ pcmk__add_with_this_list(list, rsc->priv->with_this_colocations, orig_rsc); } } // Bundle implementation of pcmk__assignment_methods_t:this_with_colocations() void pcmk__bundle_with_colocations(const pcmk_resource_t *rsc, const pcmk_resource_t *orig_rsc, GList **list) { const pcmk_resource_t *bundled_rsc = NULL; pcmk__assert(pcmk__is_bundle(rsc) && (orig_rsc != NULL) && (list != NULL)); // The bundle itself and its containers always get its colocations if ((orig_rsc == rsc) || pcmk_is_set(orig_rsc->flags, pcmk__rsc_replica_container)) { pcmk__add_this_with_list(list, rsc->priv->this_with_colocations, orig_rsc); return; } /* The bundled resource gets the colocations if it's promotable and we've * begun choosing roles */ bundled_rsc = pe__bundled_resource(rsc); if ((bundled_rsc == NULL) || !pcmk_is_set(bundled_rsc->flags, pcmk__rsc_promotable) || (pe__const_top_resource(orig_rsc, false) != bundled_rsc)) { return; } if (orig_rsc == bundled_rsc) { if (pe__clone_flag_is_set(orig_rsc, pcmk__clone_promotion_constrained)) { /* orig_rsc is the clone and we're setting roles (or have already * done so) */ pcmk__add_this_with_list(list, rsc->priv->this_with_colocations, orig_rsc); } } else if (!pcmk_is_set(orig_rsc->flags, pcmk__rsc_unassigned)) { /* orig_rsc is an instance and is already assigned. If something * requests colocations for orig_rsc now, it's for setting roles. */ pcmk__add_this_with_list(list, rsc->priv->this_with_colocations, orig_rsc); } } /*! * \internal * \brief Return action flags for a given bundle resource action * * \param[in,out] action Bundle resource action to get flags for * \param[in] node If not NULL, limit effects to this node * * \return Flags appropriate to \p action on \p node */ uint32_t pcmk__bundle_action_flags(pcmk_action_t *action, const pcmk_node_t *node) { GList *containers = NULL; uint32_t flags = 0; pcmk_resource_t *bundled_resource = NULL; pcmk__assert((action != NULL) && pcmk__is_bundle(action->rsc)); bundled_resource = pe__bundled_resource(action->rsc); if (bundled_resource != NULL) { GList *children = bundled_resource->priv->children; // Clone actions are done on the bundled clone resource, not container switch (get_complex_task(bundled_resource, action->task)) { case pcmk__action_unspecified: case pcmk__action_notify: case pcmk__action_notified: case pcmk__action_promote: case pcmk__action_promoted: case pcmk__action_demote: case pcmk__action_demoted: return pcmk__collective_action_flags(action, children, node); default: break; } } containers = pe__bundle_containers(action->rsc); flags = pcmk__collective_action_flags(action, containers, node); g_list_free(containers); return flags; } /*! * \internal * \brief Apply a location constraint to a bundle replica * * \param[in,out] replica Replica to apply constraint to * \param[in,out] user_data Location constraint to apply * * \return true (to indicate that any further replicas should be processed) */ static bool apply_location_to_replica(pcmk__bundle_replica_t *replica, void *user_data) { pcmk__location_t *location = user_data; replica->container->priv->cmds->apply_location(replica->container, location); if (replica->ip != NULL) { replica->ip->priv->cmds->apply_location(replica->ip, location); } return true; } /*! * \internal * \brief Apply a location constraint to a bundle resource's allowed node scores * * \param[in,out] rsc Bundle resource to apply constraint to * \param[in,out] location Location constraint to apply */ void pcmk__bundle_apply_location(pcmk_resource_t *rsc, pcmk__location_t *location) { pcmk_resource_t *bundled_resource = NULL; pcmk__assert((location != NULL) && pcmk__is_bundle(rsc)); pcmk__apply_location(rsc, location); pe__foreach_bundle_replica(rsc, apply_location_to_replica, location); bundled_resource = pe__bundled_resource(rsc); if ((bundled_resource != NULL) && ((location->role_filter == pcmk_role_unpromoted) || (location->role_filter == pcmk_role_promoted))) { bundled_resource->priv->cmds->apply_location(bundled_resource, location); bundled_resource->priv->location_constraints = g_list_prepend(bundled_resource->priv->location_constraints, location); } } #define XPATH_REMOTE "//nvpair[@name='" PCMK_REMOTE_RA_ADDR "']" /*! * \internal * \brief Add a bundle replica's actions to transition graph * * \param[in,out] replica Replica to add to graph * \param[in] user_data Bundle that replica belongs to (for logging only) * * \return true (to indicate that any further replicas should be processed) */ static bool add_replica_actions_to_graph(pcmk__bundle_replica_t *replica, void *user_data) { if ((replica->remote != NULL) && pe__bundle_needs_remote_name(replica->remote)) { /* REMOTE_CONTAINER_HACK: Allow remote nodes to run containers that * run the remote executor inside, without needing a separate IP for * the container. This is done by configuring the inner remote's * connection host as the magic string "#uname", then * replacing it with the underlying host when needed. */ xmlNode *nvpair = pcmk__xpath_find_one(replica->remote->priv->xml->doc, XPATH_REMOTE, LOG_ERR); const char *calculated_addr = NULL; // Replace the value in replica->remote->xml (if appropriate) calculated_addr = pe__add_bundle_remote_name(replica->remote, nvpair, PCMK_XA_VALUE); if (calculated_addr != NULL) { /* Since this is for the bundle as a resource, and not any * particular action, replace the value in the default * parameters (not evaluated for node). create_graph_action() * will grab it from there to replace it in node-evaluated * parameters. */ GHashTable *params = NULL; params = pe_rsc_params(replica->remote, NULL, replica->remote->priv->scheduler); pcmk__insert_dup(params, PCMK_REMOTE_RA_ADDR, calculated_addr); } else { pcmk_resource_t *bundle = user_data; /* The only way to get here is if the remote connection is * neither currently running nor scheduled to run. That means we * won't be doing any operations that require addr (only start * requires it; we additionally use it to compare digests when * unpacking status, promote, and migrate_from history, but * that's already happened by this point). */ pcmk__rsc_info(bundle, "Unable to determine address for bundle %s " "remote connection", bundle->id); } } if (replica->ip != NULL) { replica->ip->priv->cmds->add_actions_to_graph(replica->ip); } replica->container->priv->cmds->add_actions_to_graph(replica->container); if (replica->remote != NULL) { replica->remote->priv->cmds->add_actions_to_graph(replica->remote); } return true; } /*! * \internal * \brief Add a bundle resource's actions to the transition graph * * \param[in,out] rsc Bundle resource whose actions should be added */ void pcmk__bundle_add_actions_to_graph(pcmk_resource_t *rsc) { pcmk_resource_t *bundled_resource = NULL; pcmk__assert(pcmk__is_bundle(rsc)); bundled_resource = pe__bundled_resource(rsc); if (bundled_resource != NULL) { bundled_resource->priv->cmds->add_actions_to_graph(bundled_resource); } pe__foreach_bundle_replica(rsc, add_replica_actions_to_graph, rsc); } struct probe_data { pcmk_resource_t *bundle; // Bundle being probed pcmk_node_t *node; // Node to create probes on bool any_created; // Whether any probes have been created }; /*! * \internal * \brief Order a bundle replica's start after another replica's probe * * \param[in,out] replica Replica to order start for * \param[in,out] user_data Replica with probe to order after * * \return true (to indicate that any further replicas should be processed) */ static bool order_replica_start_after(pcmk__bundle_replica_t *replica, void *user_data) { pcmk__bundle_replica_t *probed_replica = user_data; if ((replica == probed_replica) || (replica->container == NULL)) { return true; } pcmk__new_ordering(probed_replica->container, pcmk__op_key(probed_replica->container->id, PCMK_ACTION_MONITOR, 0), NULL, replica->container, pcmk__op_key(replica->container->id, PCMK_ACTION_START, 0), NULL, pcmk__ar_ordered|pcmk__ar_if_on_same_node, replica->container->priv->scheduler); return true; } /*! * \internal * \brief Create probes for a bundle replica's resources * * \param[in,out] replica Replica to create probes for * \param[in,out] user_data struct probe_data * * \return true (to indicate that any further replicas should be processed) */ static bool create_replica_probes(pcmk__bundle_replica_t *replica, void *user_data) { struct probe_data *probe_data = user_data; pcmk_resource_t *bundle = probe_data->bundle; + /* This is guaranteed when constructing probe_data in pcmk__bundle_create_probe, + * but this makes static analysis happy. + */ + pcmk__assert(bundle != NULL); + if ((replica->ip != NULL) && replica->ip->priv->cmds->create_probe(replica->ip, probe_data->node)) { probe_data->any_created = true; } if ((replica->child != NULL) && pcmk__same_node(probe_data->node, replica->node) && replica->child->priv->cmds->create_probe(replica->child, probe_data->node)) { probe_data->any_created = true; } if (replica->container->priv->cmds->create_probe(replica->container, probe_data->node)) { probe_data->any_created = true; /* If we're limited to one replica per host (due to * the lack of an IP range probably), then we don't * want any of our peer containers starting until * we've established that no other copies are already * running. * * Partly this is to ensure that the maximum replicas per host is * observed, but also to ensure that the containers * don't fail to start because the necessary port * mappings (which won't include an IP for uniqueness) * are already taken */ if (bundle->priv->fns->max_per_node(bundle) == 1) { pe__foreach_bundle_replica(bundle, order_replica_start_after, replica); } } if ((replica->remote != NULL) && replica->remote->priv->cmds->create_probe(replica->remote, probe_data->node)) { /* Do not probe the remote resource until we know where the container is * running. This is required for REMOTE_CONTAINER_HACK to correctly * probe remote resources. */ char *probe_uuid = pcmk__op_key(replica->remote->id, PCMK_ACTION_MONITOR, 0); pcmk_action_t *probe = NULL; probe = find_first_action(replica->remote->priv->actions, probe_uuid, NULL, probe_data->node); free(probe_uuid); if (probe != NULL) { probe_data->any_created = true; pcmk__rsc_trace(bundle, "Ordering %s probe on %s", replica->remote->id, pcmk__node_name(probe_data->node)); pcmk__new_ordering(replica->container, pcmk__op_key(replica->container->id, PCMK_ACTION_START, 0), NULL, replica->remote, NULL, probe, pcmk__ar_nested_remote_probe, bundle->priv->scheduler); } } return true; } /*! * \internal * * \brief Schedule any probes needed for a bundle resource on a node * * \param[in,out] rsc Bundle resource to create probes for * \param[in,out] node Node to create probe on * * \return true if any probe was created, otherwise false */ bool pcmk__bundle_create_probe(pcmk_resource_t *rsc, pcmk_node_t *node) { struct probe_data probe_data = { rsc, node, false }; pcmk__assert(pcmk__is_bundle(rsc)); pe__foreach_bundle_replica(rsc, create_replica_probes, &probe_data); return probe_data.any_created; } /*! * \internal * \brief Output actions for one bundle replica * * \param[in,out] replica Replica to output actions for * \param[in] user_data Unused * * \return true (to indicate that any further replicas should be processed) */ static bool output_replica_actions(pcmk__bundle_replica_t *replica, void *user_data) { if (replica->ip != NULL) { replica->ip->priv->cmds->output_actions(replica->ip); } replica->container->priv->cmds->output_actions(replica->container); if (replica->remote != NULL) { replica->remote->priv->cmds->output_actions(replica->remote); } if (replica->child != NULL) { replica->child->priv->cmds->output_actions(replica->child); } return true; } /*! * \internal * \brief Output a summary of scheduled actions for a bundle resource * * \param[in,out] rsc Bundle resource to output actions for */ void pcmk__output_bundle_actions(pcmk_resource_t *rsc) { pcmk__assert(pcmk__is_bundle(rsc)); pe__foreach_bundle_replica(rsc, output_replica_actions, NULL); } // Bundle implementation of pcmk__assignment_methods_t:add_utilization() void pcmk__bundle_add_utilization(const pcmk_resource_t *rsc, const pcmk_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization) { pcmk_resource_t *container = NULL; pcmk__assert(pcmk__is_bundle(rsc)); if (!pcmk_is_set(rsc->flags, pcmk__rsc_unassigned)) { return; } /* All bundle replicas are identical, so using the utilization of the first * is sufficient for any. Only the implicit container resource can have * utilization values. */ container = pe__first_container(rsc); if (container != NULL) { container->priv->cmds->add_utilization(container, orig_rsc, all_rscs, utilization); } } // Bundle implementation of pcmk__assignment_methods_t:shutdown_lock() void pcmk__bundle_shutdown_lock(pcmk_resource_t *rsc) { pcmk__assert(pcmk__is_bundle(rsc)); // Bundles currently don't support shutdown locks } diff --git a/lib/pacemaker/pcmk_sched_ordering.c b/lib/pacemaker/pcmk_sched_ordering.c index 3370cc52e8..a5076ad7ec 100644 --- a/lib/pacemaker/pcmk_sched_ordering.c +++ b/lib/pacemaker/pcmk_sched_ordering.c @@ -1,1501 +1,1502 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include // PRIx32 #include // bool, true, false #include #include #include #include "libpacemaker_private.h" enum pe_order_kind { pe_order_kind_optional, pe_order_kind_mandatory, pe_order_kind_serialize, }; enum ordering_symmetry { ordering_asymmetric, // the only relation in an asymmetric ordering ordering_symmetric, // the normal relation in a symmetric ordering ordering_symmetric_inverse, // the inverse relation in a symmetric ordering }; // @TODO de-functionize this for readability and possibly better log messages #define EXPAND_CONSTRAINT_IDREF(__set, __rsc, __name) do { \ __rsc = pcmk__find_constraint_resource(scheduler->priv->resources, \ __name); \ if (__rsc == NULL) { \ pcmk__config_err("%s: No resource found for %s", __set, __name);\ return pcmk_rc_unpack_error; \ } \ } while (0) static const char * invert_action(const char *action) { if (pcmk__str_eq(action, PCMK_ACTION_START, pcmk__str_none)) { return PCMK_ACTION_STOP; } else if (pcmk__str_eq(action, PCMK_ACTION_STOP, pcmk__str_none)) { return PCMK_ACTION_START; } else if (pcmk__str_eq(action, PCMK_ACTION_PROMOTE, pcmk__str_none)) { return PCMK_ACTION_DEMOTE; } else if (pcmk__str_eq(action, PCMK_ACTION_DEMOTE, pcmk__str_none)) { return PCMK_ACTION_PROMOTE; } else if (pcmk__str_eq(action, PCMK_ACTION_PROMOTED, pcmk__str_none)) { return PCMK_ACTION_DEMOTED; } else if (pcmk__str_eq(action, PCMK_ACTION_DEMOTED, pcmk__str_none)) { return PCMK_ACTION_PROMOTED; } else if (pcmk__str_eq(action, PCMK_ACTION_RUNNING, pcmk__str_none)) { return PCMK_ACTION_STOPPED; } else if (pcmk__str_eq(action, PCMK_ACTION_STOPPED, pcmk__str_none)) { return PCMK_ACTION_RUNNING; } pcmk__config_warn("Unknown action '%s' specified in order constraint", action); return NULL; } static enum pe_order_kind get_ordering_type(const xmlNode *xml_obj) { enum pe_order_kind kind_e = pe_order_kind_mandatory; const char *kind = crm_element_value(xml_obj, PCMK_XA_KIND); if (kind == NULL) { const char *score = crm_element_value(xml_obj, PCMK_XA_SCORE); kind_e = pe_order_kind_mandatory; if (score) { // @COMPAT deprecated informally since 1.0.7, formally since 2.0.1 int score_i = 0; (void) pcmk_parse_score(score, &score_i, 0); if (score_i == 0) { kind_e = pe_order_kind_optional; } pcmk__warn_once(pcmk__wo_order_score, "Support for '" PCMK_XA_SCORE "' in " PCMK_XE_RSC_ORDER " is deprecated and will be " "removed in a future release " "(use '" PCMK_XA_KIND "' instead)"); } } else if (pcmk__str_eq(kind, PCMK_VALUE_MANDATORY, pcmk__str_none)) { kind_e = pe_order_kind_mandatory; } else if (pcmk__str_eq(kind, PCMK_VALUE_OPTIONAL, pcmk__str_none)) { kind_e = pe_order_kind_optional; } else if (pcmk__str_eq(kind, PCMK_VALUE_SERIALIZE, pcmk__str_none)) { kind_e = pe_order_kind_serialize; } else { pcmk__config_err("Resetting '" PCMK_XA_KIND "' for constraint %s to " "'" PCMK_VALUE_MANDATORY "' because '%s' is not valid", pcmk__s(pcmk__xe_id(xml_obj), "missing ID"), kind); } return kind_e; } /*! * \internal * \brief Get ordering symmetry from XML * * \param[in] xml_obj Ordering XML * \param[in] parent_kind Default ordering kind * \param[in] parent_symmetrical_s Parent element's \c PCMK_XA_SYMMETRICAL * setting, if any * * \retval ordering_symmetric Ordering is symmetric * \retval ordering_asymmetric Ordering is asymmetric */ static enum ordering_symmetry get_ordering_symmetry(const xmlNode *xml_obj, enum pe_order_kind parent_kind, const char *parent_symmetrical_s) { int rc = pcmk_rc_ok; bool symmetric = false; enum pe_order_kind kind = parent_kind; // Default to parent's kind // Check ordering XML for explicit kind if ((crm_element_value(xml_obj, PCMK_XA_KIND) != NULL) || (crm_element_value(xml_obj, PCMK_XA_SCORE) != NULL)) { kind = get_ordering_type(xml_obj); } // Check ordering XML (and parent) for explicit PCMK_XA_SYMMETRICAL setting rc = pcmk__xe_get_bool_attr(xml_obj, PCMK_XA_SYMMETRICAL, &symmetric); if (rc != pcmk_rc_ok && parent_symmetrical_s != NULL) { symmetric = crm_is_true(parent_symmetrical_s); rc = pcmk_rc_ok; } if (rc == pcmk_rc_ok) { if (symmetric) { if (kind == pe_order_kind_serialize) { pcmk__config_warn("Ignoring " PCMK_XA_SYMMETRICAL " for '%s' because not valid with " PCMK_XA_KIND " of '" PCMK_VALUE_SERIALIZE "'", pcmk__xe_id(xml_obj)); } else { return ordering_symmetric; } } return ordering_asymmetric; } // Use default symmetry if (kind == pe_order_kind_serialize) { return ordering_asymmetric; } return ordering_symmetric; } /*! * \internal * \brief Get ordering flags appropriate to ordering kind * * \param[in] kind Ordering kind * \param[in] first Action name for 'first' action * \param[in] symmetry This ordering's symmetry role * * \return Minimal ordering flags appropriate to \p kind */ static uint32_t ordering_flags_for_kind(enum pe_order_kind kind, const char *first, enum ordering_symmetry symmetry) { uint32_t flags = pcmk__ar_none; // so we trace-log all flags set switch (kind) { case pe_order_kind_optional: pcmk__set_relation_flags(flags, pcmk__ar_ordered); break; case pe_order_kind_serialize: /* This flag is not used anywhere directly but means the relation * will not match an equality comparison against pcmk__ar_none or * pcmk__ar_ordered. */ pcmk__set_relation_flags(flags, pcmk__ar_serialize); break; case pe_order_kind_mandatory: pcmk__set_relation_flags(flags, pcmk__ar_ordered); switch (symmetry) { case ordering_asymmetric: pcmk__set_relation_flags(flags, pcmk__ar_asymmetric); break; case ordering_symmetric: pcmk__set_relation_flags(flags, pcmk__ar_first_implies_then); if (pcmk__is_up_action(first)) { pcmk__set_relation_flags(flags, pcmk__ar_unrunnable_first_blocks); } break; case ordering_symmetric_inverse: pcmk__set_relation_flags(flags, pcmk__ar_then_implies_first); break; } break; } return flags; } /*! * \internal * \brief Find resource corresponding to ID specified in ordering * * \param[in] xml Ordering XML * \param[in] resource_attr XML attribute name for resource ID * \param[in] scheduler Scheduler data * * \return Resource corresponding to \p id, or NULL if none */ static pcmk_resource_t * get_ordering_resource(const xmlNode *xml, const char *resource_attr, const pcmk_scheduler_t *scheduler) { pcmk_resource_t *rsc = NULL; const char *rsc_id = crm_element_value(xml, resource_attr); if (rsc_id == NULL) { pcmk__config_err("Ignoring constraint '%s' without %s", pcmk__xe_id(xml), resource_attr); return NULL; } rsc = pcmk__find_constraint_resource(scheduler->priv->resources, rsc_id); if (rsc == NULL) { pcmk__config_err("Ignoring constraint '%s' because resource '%s' " "does not exist", pcmk__xe_id(xml), rsc_id); return NULL; } return rsc; } /*! * \internal * \brief Determine minimum number of 'first' instances required in ordering * * \param[in] rsc 'First' resource in ordering * \param[in] xml Ordering XML * * \return Minimum 'first' instances required (or 0 if not applicable) */ static int get_minimum_first_instances(const pcmk_resource_t *rsc, const xmlNode *xml) { const char *clone_min = NULL; bool require_all = false; if (!pcmk__is_clone(rsc)) { return 0; } clone_min = g_hash_table_lookup(rsc->priv->meta, PCMK_META_CLONE_MIN); if (clone_min != NULL) { int clone_min_int = 0; pcmk__scan_min_int(clone_min, &clone_min_int, 0); return clone_min_int; } /* @COMPAT 1.1.13: * PCMK_XA_REQUIRE_ALL=PCMK_VALUE_FALSE is deprecated equivalent of * PCMK_META_CLONE_MIN=1 */ if (pcmk__xe_get_bool_attr(xml, PCMK_XA_REQUIRE_ALL, &require_all) != ENODATA) { pcmk__warn_once(pcmk__wo_require_all, "Support for " PCMK_XA_REQUIRE_ALL " in ordering " "constraints is deprecated and will be removed in a " "future release (use " PCMK_META_CLONE_MIN " clone " "meta-attribute instead)"); if (!require_all) { return 1; } } return 0; } /*! * \internal * \brief Create orderings for a constraint with \c PCMK_META_CLONE_MIN > 0 * * \param[in] id Ordering ID * \param[in,out] rsc_first 'First' resource in ordering (a clone) * \param[in] action_first 'First' action in ordering * \param[in] rsc_then 'Then' resource in ordering * \param[in] action_then 'Then' action in ordering * \param[in] flags Ordering flags * \param[in] clone_min Minimum required instances of 'first' */ static void clone_min_ordering(const char *id, pcmk_resource_t *rsc_first, const char *action_first, pcmk_resource_t *rsc_then, const char *action_then, uint32_t flags, int clone_min) { // Create a pseudo-action for when the minimum instances are active char *task = crm_strdup_printf(PCMK_ACTION_CLONE_ONE_OR_MORE ":%s", id); pcmk_action_t *clone_min_met = get_pseudo_op(task, rsc_first->priv->scheduler); free(task); /* Require the pseudo-action to have the required number of actions to be * considered runnable before allowing the pseudo-action to be runnable. */ clone_min_met->required_runnable_before = clone_min; // Order the actions for each clone instance before the pseudo-action for (GList *iter = rsc_first->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *child = iter->data; pcmk__new_ordering(child, pcmk__op_key(child->id, action_first, 0), NULL, NULL, NULL, clone_min_met, pcmk__ar_min_runnable |pcmk__ar_first_implies_then_graphed, rsc_first->priv->scheduler); } // Order "then" action after the pseudo-action (if runnable) pcmk__new_ordering(NULL, NULL, clone_min_met, rsc_then, pcmk__op_key(rsc_then->id, action_then, 0), NULL, flags|pcmk__ar_unrunnable_first_blocks, rsc_first->priv->scheduler); } /*! * \internal * \brief Create new ordering for inverse of symmetric constraint * * \param[in] id Ordering ID (for logging only) * \param[in] kind Ordering kind * \param[in] rsc_first 'First' resource in ordering (a clone) * \param[in] action_first 'First' action in ordering * \param[in,out] rsc_then 'Then' resource in ordering * \param[in] action_then 'Then' action in ordering */ static void inverse_ordering(const char *id, enum pe_order_kind kind, pcmk_resource_t *rsc_first, const char *action_first, pcmk_resource_t *rsc_then, const char *action_then) { uint32_t flags; const char *inverted_first = invert_action(action_first); const char *inverted_then = invert_action(action_then); if ((inverted_then == NULL) || (inverted_first == NULL)) { pcmk__config_warn("Cannot invert constraint '%s' " "(please specify inverse manually)", id); return; } // Order inverted actions flags = ordering_flags_for_kind(kind, inverted_first, ordering_symmetric_inverse); pcmk__order_resource_actions(rsc_then, inverted_then, rsc_first, inverted_first, flags); } static void unpack_simple_rsc_order(xmlNode *xml_obj, pcmk_scheduler_t *scheduler) { pcmk_resource_t *rsc_then = NULL; pcmk_resource_t *rsc_first = NULL; int min_required_before = 0; enum pe_order_kind kind = pe_order_kind_mandatory; uint32_t flags = pcmk__ar_none; enum ordering_symmetry symmetry; const char *action_then = NULL; const char *action_first = NULL; const char *id = NULL; CRM_CHECK(xml_obj != NULL, return); id = crm_element_value(xml_obj, PCMK_XA_ID); if (id == NULL) { pcmk__config_err("Ignoring <%s> constraint without " PCMK_XA_ID, xml_obj->name); return; } rsc_first = get_ordering_resource(xml_obj, PCMK_XA_FIRST, scheduler); if (rsc_first == NULL) { return; } rsc_then = get_ordering_resource(xml_obj, PCMK_XA_THEN, scheduler); if (rsc_then == NULL) { return; } action_first = crm_element_value(xml_obj, PCMK_XA_FIRST_ACTION); if (action_first == NULL) { action_first = PCMK_ACTION_START; } action_then = crm_element_value(xml_obj, PCMK_XA_THEN_ACTION); if (action_then == NULL) { action_then = action_first; } kind = get_ordering_type(xml_obj); symmetry = get_ordering_symmetry(xml_obj, kind, NULL); flags = ordering_flags_for_kind(kind, action_first, symmetry); /* If there is a minimum number of instances that must be runnable before * the 'then' action is runnable, we use a pseudo-action for convenience: * minimum number of clone instances have runnable actions -> * pseudo-action is runnable -> dependency is runnable. */ min_required_before = get_minimum_first_instances(rsc_first, xml_obj); if (min_required_before > 0) { clone_min_ordering(id, rsc_first, action_first, rsc_then, action_then, flags, min_required_before); } else { pcmk__order_resource_actions(rsc_first, action_first, rsc_then, action_then, flags); } if (symmetry == ordering_symmetric) { inverse_ordering(id, kind, rsc_first, action_first, rsc_then, action_then); } } /*! * \internal * \brief Create a new ordering between two actions * * \param[in,out] first_rsc Resource for 'first' action (if NULL and * \p first_action is a resource action, that * resource will be used) * \param[in,out] first_action_task Action key for 'first' action (if NULL and * \p first_action is not NULL, its UUID will * be used) * \param[in,out] first_action 'first' action (if NULL, \p first_rsc and * \p first_action_task must be set) * * \param[in] then_rsc Resource for 'then' action (if NULL and * \p then_action is a resource action, that * resource will be used) * \param[in,out] then_action_task Action key for 'then' action (if NULL and * \p then_action is not NULL, its UUID will * be used) * \param[in] then_action 'then' action (if NULL, \p then_rsc and * \p then_action_task must be set) * * \param[in] flags Group of enum pcmk__action_relation_flags * \param[in,out] sched Scheduler data to add ordering to * * \note This function takes ownership of first_action_task and * then_action_task, which do not need to be freed by the caller. */ void pcmk__new_ordering(pcmk_resource_t *first_rsc, char *first_action_task, pcmk_action_t *first_action, pcmk_resource_t *then_rsc, char *then_action_task, pcmk_action_t *then_action, uint32_t flags, pcmk_scheduler_t *sched) { pcmk__action_relation_t *order = NULL; // One of action or resource must be specified for each side CRM_CHECK(((first_action != NULL) || (first_rsc != NULL)) && ((then_action != NULL) || (then_rsc != NULL)), free(first_action_task); free(then_action_task); return); if ((first_rsc == NULL) && (first_action != NULL)) { first_rsc = first_action->rsc; } if ((then_rsc == NULL) && (then_action != NULL)) { then_rsc = then_action->rsc; } order = pcmk__assert_alloc(1, sizeof(pcmk__action_relation_t)); order->id = sched->priv->next_ordering_id++; order->flags = flags; order->rsc1 = first_rsc; order->rsc2 = then_rsc; order->action1 = first_action; order->action2 = then_action; order->task1 = first_action_task; order->task2 = then_action_task; if ((order->task1 == NULL) && (first_action != NULL)) { order->task1 = strdup(first_action->uuid); } if ((order->task2 == NULL) && (then_action != NULL)) { order->task2 = strdup(then_action->uuid); } if ((order->rsc1 == NULL) && (first_action != NULL)) { order->rsc1 = first_action->rsc; } if ((order->rsc2 == NULL) && (then_action != NULL)) { order->rsc2 = then_action->rsc; } pcmk__rsc_trace(first_rsc, "Created ordering %d for %s then %s", (sched->priv->next_ordering_id - 1), pcmk__s(order->task1, "an underspecified action"), pcmk__s(order->task2, "an underspecified action")); sched->priv->ordering_constraints = g_list_prepend(sched->priv->ordering_constraints, order); pcmk__order_migration_equivalents(order); } /*! * \brief Unpack a set in an ordering constraint * * \param[in] set Set XML to unpack * \param[in] parent_kind \c PCMK_XE_RSC_ORDER XML \c PCMK_XA_KIND * attribute * \param[in] parent_symmetrical_s \c PCMK_XE_RSC_ORDER XML * \c PCMK_XA_SYMMETRICAL attribute * \param[in,out] scheduler Scheduler data * * \return Standard Pacemaker return code */ static int unpack_order_set(const xmlNode *set, enum pe_order_kind parent_kind, const char *parent_symmetrical_s, pcmk_scheduler_t *scheduler) { GList *set_iter = NULL; GList *resources = NULL; pcmk_resource_t *last = NULL; pcmk_resource_t *resource = NULL; int local_kind = parent_kind; bool sequential = false; uint32_t flags = pcmk__ar_ordered; enum ordering_symmetry symmetry; char *key = NULL; const char *id = pcmk__xe_id(set); const char *action = crm_element_value(set, PCMK_XA_ACTION); const char *sequential_s = crm_element_value(set, PCMK_XA_SEQUENTIAL); const char *kind_s = crm_element_value(set, PCMK_XA_KIND); if (action == NULL) { action = PCMK_ACTION_START; } if (kind_s) { local_kind = get_ordering_type(set); } if (sequential_s == NULL) { sequential_s = "1"; } sequential = crm_is_true(sequential_s); symmetry = get_ordering_symmetry(set, parent_kind, parent_symmetrical_s); flags = ordering_flags_for_kind(local_kind, action, symmetry); for (const xmlNode *xml_rsc = pcmk__xe_first_child(set, PCMK_XE_RESOURCE_REF, NULL, NULL); xml_rsc != NULL; xml_rsc = pcmk__xe_next(xml_rsc, PCMK_XE_RESOURCE_REF)) { EXPAND_CONSTRAINT_IDREF(id, resource, pcmk__xe_id(xml_rsc)); resources = g_list_append(resources, resource); } if (pcmk__list_of_1(resources)) { crm_trace("Single set: %s", id); goto done; } set_iter = resources; while (set_iter != NULL) { resource = (pcmk_resource_t *) set_iter->data; set_iter = set_iter->next; key = pcmk__op_key(resource->id, action, 0); if (local_kind == pe_order_kind_serialize) { /* Serialize before everything that comes after */ for (GList *iter = set_iter; iter != NULL; iter = iter->next) { pcmk_resource_t *then_rsc = iter->data; char *then_key = pcmk__op_key(then_rsc->id, action, 0); pcmk__new_ordering(resource, strdup(key), NULL, then_rsc, then_key, NULL, flags, scheduler); } } else if (sequential) { if (last != NULL) { pcmk__order_resource_actions(last, action, resource, action, flags); } last = resource; } free(key); } if (symmetry == ordering_asymmetric) { goto done; } last = NULL; action = invert_action(action); flags = ordering_flags_for_kind(local_kind, action, ordering_symmetric_inverse); set_iter = resources; while (set_iter != NULL) { resource = (pcmk_resource_t *) set_iter->data; set_iter = set_iter->next; if (sequential) { if (last != NULL) { pcmk__order_resource_actions(resource, action, last, action, flags); } last = resource; } } done: g_list_free(resources); return pcmk_rc_ok; } /*! * \brief Order two resource sets relative to each other * * \param[in] id Ordering ID (for logging) * \param[in] set1 First listed set * \param[in] set2 Second listed set * \param[in] kind Ordering kind * \param[in,out] scheduler Scheduler data * \param[in] symmetry Which ordering symmetry applies to this relation * * \return Standard Pacemaker return code */ static int order_rsc_sets(const char *id, const xmlNode *set1, const xmlNode *set2, enum pe_order_kind kind, pcmk_scheduler_t *scheduler, enum ordering_symmetry symmetry) { const xmlNode *xml_rsc = NULL; const xmlNode *xml_rsc_2 = NULL; pcmk_resource_t *rsc_1 = NULL; pcmk_resource_t *rsc_2 = NULL; const char *action_1 = crm_element_value(set1, PCMK_XA_ACTION); const char *action_2 = crm_element_value(set2, PCMK_XA_ACTION); uint32_t flags = pcmk__ar_none; bool require_all = true; (void) pcmk__xe_get_bool_attr(set1, PCMK_XA_REQUIRE_ALL, &require_all); if (action_1 == NULL) { action_1 = PCMK_ACTION_START; } if (action_2 == NULL) { action_2 = PCMK_ACTION_START; } if (symmetry == ordering_symmetric_inverse) { action_1 = invert_action(action_1); action_2 = invert_action(action_2); } if (pcmk__str_eq(PCMK_ACTION_STOP, action_1, pcmk__str_none) || pcmk__str_eq(PCMK_ACTION_DEMOTE, action_1, pcmk__str_none)) { /* Assuming: A -> ( B || C) -> D * The one-or-more logic only applies during the start/promote phase. * During shutdown neither B nor can shutdown until D is down, so simply * turn require_all back on. */ require_all = true; } flags = ordering_flags_for_kind(kind, action_1, symmetry); /* If we have an unordered set1, whether it is sequential or not is * irrelevant in regards to set2. */ if (!require_all) { char *task = crm_strdup_printf(PCMK_ACTION_ONE_OR_MORE ":%s", pcmk__xe_id(set1)); pcmk_action_t *unordered_action = get_pseudo_op(task, scheduler); free(task); unordered_action->required_runnable_before = 1; for (xml_rsc = pcmk__xe_first_child(set1, PCMK_XE_RESOURCE_REF, NULL, NULL); xml_rsc != NULL; xml_rsc = pcmk__xe_next(xml_rsc, PCMK_XE_RESOURCE_REF)) { EXPAND_CONSTRAINT_IDREF(id, rsc_1, pcmk__xe_id(xml_rsc)); /* Add an ordering constraint between every element in set1 and the * pseudo action. If any action in set1 is runnable the pseudo * action will be runnable. */ pcmk__new_ordering(rsc_1, pcmk__op_key(rsc_1->id, action_1, 0), NULL, NULL, NULL, unordered_action, pcmk__ar_min_runnable |pcmk__ar_first_implies_then_graphed, scheduler); } for (xml_rsc_2 = pcmk__xe_first_child(set2, PCMK_XE_RESOURCE_REF, NULL, NULL); xml_rsc_2 != NULL; xml_rsc_2 = pcmk__xe_next(xml_rsc_2, PCMK_XE_RESOURCE_REF)) { EXPAND_CONSTRAINT_IDREF(id, rsc_2, pcmk__xe_id(xml_rsc_2)); /* Add an ordering constraint between the pseudo-action and every * element in set2. If the pseudo-action is runnable, every action * in set2 will be runnable. */ pcmk__new_ordering(NULL, NULL, unordered_action, rsc_2, pcmk__op_key(rsc_2->id, action_2, 0), NULL, flags|pcmk__ar_unrunnable_first_blocks, scheduler); } return pcmk_rc_ok; } if (pcmk__xe_attr_is_true(set1, PCMK_XA_SEQUENTIAL)) { if (symmetry == ordering_symmetric_inverse) { // Get the first one xml_rsc = pcmk__xe_first_child(set1, PCMK_XE_RESOURCE_REF, NULL, NULL); if (xml_rsc != NULL) { EXPAND_CONSTRAINT_IDREF(id, rsc_1, pcmk__xe_id(xml_rsc)); } } else { // Get the last one const char *rid = NULL; for (xml_rsc = pcmk__xe_first_child(set1, PCMK_XE_RESOURCE_REF, NULL, NULL); xml_rsc != NULL; xml_rsc = pcmk__xe_next(xml_rsc, PCMK_XE_RESOURCE_REF)) { rid = pcmk__xe_id(xml_rsc); } EXPAND_CONSTRAINT_IDREF(id, rsc_1, rid); } } if (pcmk__xe_attr_is_true(set2, PCMK_XA_SEQUENTIAL)) { if (symmetry == ordering_symmetric_inverse) { // Get the last one const char *rid = NULL; for (xml_rsc = pcmk__xe_first_child(set2, PCMK_XE_RESOURCE_REF, NULL, NULL); xml_rsc != NULL; xml_rsc = pcmk__xe_next(xml_rsc, PCMK_XE_RESOURCE_REF)) { rid = pcmk__xe_id(xml_rsc); } EXPAND_CONSTRAINT_IDREF(id, rsc_2, rid); } else { // Get the first one xml_rsc = pcmk__xe_first_child(set2, PCMK_XE_RESOURCE_REF, NULL, NULL); if (xml_rsc != NULL) { EXPAND_CONSTRAINT_IDREF(id, rsc_2, pcmk__xe_id(xml_rsc)); } } } if ((rsc_1 != NULL) && (rsc_2 != NULL)) { pcmk__order_resource_actions(rsc_1, action_1, rsc_2, action_2, flags); } else if (rsc_1 != NULL) { for (xml_rsc = pcmk__xe_first_child(set2, PCMK_XE_RESOURCE_REF, NULL, NULL); xml_rsc != NULL; xml_rsc = pcmk__xe_next(xml_rsc, PCMK_XE_RESOURCE_REF)) { EXPAND_CONSTRAINT_IDREF(id, rsc_2, pcmk__xe_id(xml_rsc)); pcmk__order_resource_actions(rsc_1, action_1, rsc_2, action_2, flags); } } else if (rsc_2 != NULL) { for (xml_rsc = pcmk__xe_first_child(set1, PCMK_XE_RESOURCE_REF, NULL, NULL); xml_rsc != NULL; xml_rsc = pcmk__xe_next(xml_rsc, PCMK_XE_RESOURCE_REF)) { EXPAND_CONSTRAINT_IDREF(id, rsc_1, pcmk__xe_id(xml_rsc)); pcmk__order_resource_actions(rsc_1, action_1, rsc_2, action_2, flags); } } else { for (xml_rsc = pcmk__xe_first_child(set1, PCMK_XE_RESOURCE_REF, NULL, NULL); xml_rsc != NULL; xml_rsc = pcmk__xe_next(xml_rsc, PCMK_XE_RESOURCE_REF)) { EXPAND_CONSTRAINT_IDREF(id, rsc_1, pcmk__xe_id(xml_rsc)); for (xmlNode *xml_rsc_2 = pcmk__xe_first_child(set2, PCMK_XE_RESOURCE_REF, NULL, NULL); xml_rsc_2 != NULL; xml_rsc_2 = pcmk__xe_next(xml_rsc_2, PCMK_XE_RESOURCE_REF)) { EXPAND_CONSTRAINT_IDREF(id, rsc_2, pcmk__xe_id(xml_rsc_2)); pcmk__order_resource_actions(rsc_1, action_1, rsc_2, action_2, flags); } } } return pcmk_rc_ok; } /*! * \internal * \brief If an ordering constraint uses resource tags, expand them * * \param[in,out] xml_obj Ordering constraint XML * \param[out] expanded_xml Equivalent XML with tags expanded * \param[in] scheduler Scheduler data * * \return Standard Pacemaker return code (specifically, pcmk_rc_ok on success, * and pcmk_rc_unpack_error on invalid configuration) */ static int unpack_order_tags(xmlNode *xml_obj, xmlNode **expanded_xml, const pcmk_scheduler_t *scheduler) { const char *id_first = NULL; const char *id_then = NULL; const char *action_first = NULL; const char *action_then = NULL; pcmk_resource_t *rsc_first = NULL; pcmk_resource_t *rsc_then = NULL; pcmk__idref_t *tag_first = NULL; pcmk__idref_t *tag_then = NULL; xmlNode *rsc_set_first = NULL; xmlNode *rsc_set_then = NULL; bool any_sets = false; // Check whether there are any resource sets with template or tag references *expanded_xml = pcmk__expand_tags_in_sets(xml_obj, scheduler); if (*expanded_xml != NULL) { crm_log_xml_trace(*expanded_xml, "Expanded " PCMK_XE_RSC_ORDER); return pcmk_rc_ok; } id_first = crm_element_value(xml_obj, PCMK_XA_FIRST); id_then = crm_element_value(xml_obj, PCMK_XA_THEN); if ((id_first == NULL) || (id_then == NULL)) { return pcmk_rc_ok; } if (!pcmk__valid_resource_or_tag(scheduler, id_first, &rsc_first, &tag_first)) { pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " "valid resource or tag", pcmk__xe_id(xml_obj), id_first); return pcmk_rc_unpack_error; } if (!pcmk__valid_resource_or_tag(scheduler, id_then, &rsc_then, &tag_then)) { pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " "valid resource or tag", pcmk__xe_id(xml_obj), id_then); return pcmk_rc_unpack_error; } if ((rsc_first != NULL) && (rsc_then != NULL)) { // Neither side references a template or tag return pcmk_rc_ok; } action_first = crm_element_value(xml_obj, PCMK_XA_FIRST_ACTION); action_then = crm_element_value(xml_obj, PCMK_XA_THEN_ACTION); *expanded_xml = pcmk__xml_copy(NULL, xml_obj); /* Convert template/tag reference in PCMK_XA_FIRST into constraint * PCMK_XE_RESOURCE_SET */ if (!pcmk__tag_to_set(*expanded_xml, &rsc_set_first, PCMK_XA_FIRST, true, scheduler)) { pcmk__xml_free(*expanded_xml); *expanded_xml = NULL; return pcmk_rc_unpack_error; } if (rsc_set_first != NULL) { if (action_first != NULL) { /* Move PCMK_XA_FIRST_ACTION into converted PCMK_XE_RESOURCE_SET as * PCMK_XA_ACTION */ crm_xml_add(rsc_set_first, PCMK_XA_ACTION, action_first); pcmk__xe_remove_attr(*expanded_xml, PCMK_XA_FIRST_ACTION); } any_sets = true; } /* Convert template/tag reference in PCMK_XA_THEN into constraint * PCMK_XE_RESOURCE_SET */ if (!pcmk__tag_to_set(*expanded_xml, &rsc_set_then, PCMK_XA_THEN, true, scheduler)) { pcmk__xml_free(*expanded_xml); *expanded_xml = NULL; return pcmk_rc_unpack_error; } if (rsc_set_then != NULL) { if (action_then != NULL) { /* Move PCMK_XA_THEN_ACTION into converted PCMK_XE_RESOURCE_SET as * PCMK_XA_ACTION */ crm_xml_add(rsc_set_then, PCMK_XA_ACTION, action_then); pcmk__xe_remove_attr(*expanded_xml, PCMK_XA_THEN_ACTION); } any_sets = true; } if (any_sets) { crm_log_xml_trace(*expanded_xml, "Expanded " PCMK_XE_RSC_ORDER); } else { pcmk__xml_free(*expanded_xml); *expanded_xml = NULL; } return pcmk_rc_ok; } /*! * \internal * \brief Unpack ordering constraint XML * * \param[in,out] xml_obj Ordering constraint XML to unpack * \param[in,out] scheduler Scheduler data */ void pcmk__unpack_ordering(xmlNode *xml_obj, pcmk_scheduler_t *scheduler) { xmlNode *set = NULL; xmlNode *last = NULL; xmlNode *orig_xml = NULL; xmlNode *expanded_xml = NULL; const char *id = crm_element_value(xml_obj, PCMK_XA_ID); const char *invert = crm_element_value(xml_obj, PCMK_XA_SYMMETRICAL); enum pe_order_kind kind = get_ordering_type(xml_obj); enum ordering_symmetry symmetry = get_ordering_symmetry(xml_obj, kind, NULL); // Expand any resource tags in the constraint XML if (unpack_order_tags(xml_obj, &expanded_xml, scheduler) != pcmk_rc_ok) { return; } if (expanded_xml != NULL) { orig_xml = xml_obj; xml_obj = expanded_xml; } // If the constraint has resource sets, unpack them for (set = pcmk__xe_first_child(xml_obj, PCMK_XE_RESOURCE_SET, NULL, NULL); set != NULL; set = pcmk__xe_next(set, PCMK_XE_RESOURCE_SET)) { set = pcmk__xe_resolve_idref(set, scheduler->input); if ((set == NULL) // Configuration error, message already logged || (unpack_order_set(set, kind, invert, scheduler) != pcmk_rc_ok)) { if (expanded_xml != NULL) { pcmk__xml_free(expanded_xml); } return; } if (last != NULL) { if (order_rsc_sets(id, last, set, kind, scheduler, symmetry) != pcmk_rc_ok) { if (expanded_xml != NULL) { pcmk__xml_free(expanded_xml); } return; } if ((symmetry == ordering_symmetric) && (order_rsc_sets(id, set, last, kind, scheduler, ordering_symmetric_inverse) != pcmk_rc_ok)) { if (expanded_xml != NULL) { pcmk__xml_free(expanded_xml); } return; } } last = set; } if (expanded_xml) { pcmk__xml_free(expanded_xml); xml_obj = orig_xml; } // If the constraint has no resource sets, unpack it as a simple ordering if (last == NULL) { return unpack_simple_rsc_order(xml_obj, scheduler); } } static bool ordering_is_invalid(pcmk_action_t *action, pcmk__related_action_t *input) { /* Prevent user-defined ordering constraints between resources * running in a guest node and the resource that defines that node. */ if (!pcmk_is_set(input->flags, pcmk__ar_guest_allowed) && (input->action->rsc != NULL) && pcmk__rsc_corresponds_to_guest(action->rsc, input->action->node)) { pcmk__config_warn("Invalid ordering constraint between %s and %s", input->action->rsc->id, action->rsc->id); return true; } /* If there's an order like * "rscB_stop node2"-> "load_stopped_node2" -> "rscA_migrate_to node1" * * then rscA is being migrated from node1 to node2, while rscB is being * migrated from node2 to node1. If there would be a graph loop, * break the order "load_stopped_node2" -> "rscA_migrate_to node1". */ if ((input->flags == pcmk__ar_if_on_same_node_or_target) && (action->rsc != NULL) && pcmk__str_eq(action->task, PCMK_ACTION_MIGRATE_TO, pcmk__str_none) && pcmk__graph_has_loop(action, action, input)) { return true; } return false; } void pcmk__disable_invalid_orderings(pcmk_scheduler_t *scheduler) { for (GList *iter = scheduler->priv->actions; iter != NULL; iter = iter->next) { pcmk_action_t *action = (pcmk_action_t *) iter->data; pcmk__related_action_t *input = NULL; for (GList *input_iter = action->actions_before; input_iter != NULL; input_iter = input_iter->next) { input = input_iter->data; if (ordering_is_invalid(action, input)) { input->flags = pcmk__ar_none; } } } } /*! * \internal * \brief Order stops on a node before the node's shutdown * * \param[in,out] node Node being shut down * \param[in] shutdown_op Shutdown action for node */ void pcmk__order_stops_before_shutdown(pcmk_node_t *node, pcmk_action_t *shutdown_op) { for (GList *iter = node->priv->scheduler->priv->actions; iter != NULL; iter = iter->next) { pcmk_action_t *action = (pcmk_action_t *) iter->data; // Only stops on the node shutting down are relevant if (!pcmk__same_node(action->node, node) || !pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_none)) { continue; } // Resources and nodes in maintenance mode won't be touched if (pcmk_is_set(action->rsc->flags, pcmk__rsc_maintenance)) { pcmk__rsc_trace(action->rsc, "Not ordering %s before shutdown of %s because " "resource in maintenance mode", action->uuid, pcmk__node_name(node)); continue; } else if (node->details->maintenance) { pcmk__rsc_trace(action->rsc, "Not ordering %s before shutdown of %s because " "node in maintenance mode", action->uuid, pcmk__node_name(node)); continue; } /* Don't touch a resource that is unmanaged or blocked, to avoid * blocking the shutdown (though if another action depends on this one, * we may still end up blocking) * * @TODO This "if" looks wrong, create a regression test for these cases */ if (!pcmk_any_flags_set(action->rsc->flags, pcmk__rsc_managed|pcmk__rsc_blocked)) { pcmk__rsc_trace(action->rsc, "Not ordering %s before shutdown of %s because " "resource is unmanaged or blocked", action->uuid, pcmk__node_name(node)); continue; } pcmk__rsc_trace(action->rsc, "Ordering %s before shutdown of %s", action->uuid, pcmk__node_name(node)); pcmk__clear_action_flags(action, pcmk__action_optional); pcmk__new_ordering(action->rsc, NULL, action, NULL, strdup(PCMK_ACTION_DO_SHUTDOWN), shutdown_op, pcmk__ar_ordered|pcmk__ar_unrunnable_first_blocks, node->priv->scheduler); } } /*! * \brief Find resource actions matching directly or as child * * \param[in] rsc Resource to check * \param[in] original_key Action key to search for (possibly referencing * parent of \rsc) * * \return Newly allocated list of matching actions * \note It is the caller's responsibility to free the result with g_list_free() */ static GList * find_actions_by_task(const pcmk_resource_t *rsc, const char *original_key) { // Search under given task key directly GList *list = find_actions(rsc->priv->actions, original_key, NULL); if (list == NULL) { // Search again using this resource's ID char *key = NULL; char *task = NULL; guint interval_ms = 0; CRM_CHECK(parse_op_key(original_key, NULL, &task, &interval_ms), return NULL); key = pcmk__op_key(rsc->id, task, interval_ms); list = find_actions(rsc->priv->actions, key, NULL); free(key); free(task); } return list; } /*! * \internal * \brief Order relevant resource actions after a given action * * \param[in,out] first_action Action to order after (or NULL if none runnable) * \param[in] rsc Resource whose actions should be ordered * \param[in,out] order Ordering constraint being applied */ static void order_resource_actions_after(pcmk_action_t *first_action, const pcmk_resource_t *rsc, pcmk__action_relation_t *order) { GList *then_actions = NULL; uint32_t flags = pcmk__ar_none; CRM_CHECK((rsc != NULL) && (order != NULL), return); flags = order->flags; pcmk__rsc_trace(rsc, "Applying ordering %d for 'then' resource %s", order->id, rsc->id); if (order->action2 != NULL) { then_actions = g_list_prepend(NULL, order->action2); } else { then_actions = find_actions_by_task(rsc, order->task2); } if (then_actions == NULL) { pcmk__rsc_trace(rsc, "Ignoring ordering %d: no %s actions found for %s", order->id, order->task2, rsc->id); return; } if ((first_action != NULL) && (first_action->rsc == rsc) && pcmk_is_set(first_action->flags, pcmk__action_migration_abort)) { pcmk__rsc_trace(rsc, "Detected dangling migration ordering (%s then %s %s)", first_action->uuid, order->task2, rsc->id); pcmk__clear_relation_flags(flags, pcmk__ar_first_implies_then); } if ((first_action == NULL) && !pcmk_is_set(flags, pcmk__ar_first_implies_then)) { pcmk__rsc_debug(rsc, "Ignoring ordering %d for %s: No first action found", order->id, rsc->id); g_list_free(then_actions); return; } for (GList *iter = then_actions; iter != NULL; iter = iter->next) { pcmk_action_t *then_action_iter = (pcmk_action_t *) iter->data; if (first_action != NULL) { order_actions(first_action, then_action_iter, flags); } else { pcmk__clear_action_flags(then_action_iter, pcmk__action_runnable); + // coverity[null_field] order->rsc1 can't be NULL here crm_warn("%s of %s is unrunnable because there is no %s of %s " "to order it after", then_action_iter->task, rsc->id, order->task1, order->rsc1->id); } } g_list_free(then_actions); } static void rsc_order_first(pcmk_resource_t *first_rsc, pcmk__action_relation_t *order) { GList *first_actions = NULL; pcmk_action_t *first_action = order->action1; pcmk_resource_t *then_rsc = order->rsc2; pcmk__assert(first_rsc != NULL); pcmk__rsc_trace(first_rsc, "Applying ordering constraint %d (first: %s)", order->id, first_rsc->id); if (first_action != NULL) { first_actions = g_list_prepend(NULL, first_action); } else { first_actions = find_actions_by_task(first_rsc, order->task1); } if ((first_actions == NULL) && (first_rsc == then_rsc)) { pcmk__rsc_trace(first_rsc, "Ignoring constraint %d: first (%s for %s) not found", order->id, order->task1, first_rsc->id); } else if (first_actions == NULL) { char *key = NULL; char *op_type = NULL; guint interval_ms = 0; enum rsc_role_e first_role; parse_op_key(order->task1, NULL, &op_type, &interval_ms); key = pcmk__op_key(first_rsc->id, op_type, interval_ms); first_role = first_rsc->priv->fns->state(first_rsc, true); if ((first_role == pcmk_role_stopped) && pcmk__str_eq(op_type, PCMK_ACTION_STOP, pcmk__str_none)) { free(key); pcmk__rsc_trace(first_rsc, "Ignoring constraint %d: first (%s for %s) " "not found", order->id, order->task1, first_rsc->id); } else if ((first_role == pcmk_role_unpromoted) && pcmk__str_eq(op_type, PCMK_ACTION_DEMOTE, pcmk__str_none)) { free(key); pcmk__rsc_trace(first_rsc, "Ignoring constraint %d: first (%s for %s) " "not found", order->id, order->task1, first_rsc->id); } else { pcmk__rsc_trace(first_rsc, "Creating first (%s for %s) for constraint %d ", order->task1, first_rsc->id, order->id); first_action = custom_action(first_rsc, key, op_type, NULL, TRUE, first_rsc->priv->scheduler); first_actions = g_list_prepend(NULL, first_action); } free(op_type); } if (then_rsc == NULL) { if (order->action2 == NULL) { pcmk__rsc_trace(first_rsc, "Ignoring constraint %d: then not found", order->id); return; } then_rsc = order->action2->rsc; } for (GList *iter = first_actions; iter != NULL; iter = iter->next) { first_action = iter->data; if (then_rsc == NULL) { order_actions(first_action, order->action2, order->flags); } else { order_resource_actions_after(first_action, then_rsc, order); } } g_list_free(first_actions); } // GFunc to call pcmk__block_colocation_dependents() static void block_colocation_dependents(gpointer data, gpointer user_data) { pcmk__block_colocation_dependents(data); } // GFunc to call pcmk__update_action_for_orderings() static void update_action_for_orderings(gpointer data, gpointer user_data) { pcmk__update_action_for_orderings((pcmk_action_t *) data, (pcmk_scheduler_t *) user_data); } /*! * \internal * \brief Apply all ordering constraints * * \param[in,out] sched Scheduler data */ void pcmk__apply_orderings(pcmk_scheduler_t *sched) { crm_trace("Applying ordering constraints"); /* Ordering constraints need to be processed in the order they were created. * rsc_order_first() and order_resource_actions_after() require the relevant * actions to already exist in some cases, but rsc_order_first() will create * the 'first' action in certain cases. Thus calling rsc_order_first() can * change the behavior of later-created orderings. * * Also, g_list_append() should be avoided for performance reasons, so we * prepend orderings when creating them and reverse the list here. * * @TODO This is brittle and should be carefully redesigned so that the * order of creation doesn't matter, and the reverse becomes unneeded. */ sched->priv->ordering_constraints = g_list_reverse(sched->priv->ordering_constraints); for (GList *iter = sched->priv->ordering_constraints; iter != NULL; iter = iter->next) { pcmk__action_relation_t *order = iter->data; pcmk_resource_t *rsc = order->rsc1; if (rsc != NULL) { rsc_order_first(rsc, order); continue; } rsc = order->rsc2; if (rsc != NULL) { order_resource_actions_after(order->action1, rsc, order); } else { crm_trace("Applying ordering constraint %d (non-resource actions)", order->id); order_actions(order->action1, order->action2, order->flags); } } g_list_foreach(sched->priv->actions, block_colocation_dependents, NULL); crm_trace("Ordering probes"); pcmk__order_probes(sched); crm_trace("Updating %d actions", g_list_length(sched->priv->actions)); g_list_foreach(sched->priv->actions, update_action_for_orderings, sched); pcmk__disable_invalid_orderings(sched); } /*! * \internal * \brief Order a given action after each action in a given list * * \param[in,out] after "After" action * \param[in,out] list List of "before" actions */ void pcmk__order_after_each(pcmk_action_t *after, GList *list) { const char *after_desc = (after->task == NULL)? after->uuid : after->task; for (GList *iter = list; iter != NULL; iter = iter->next) { pcmk_action_t *before = (pcmk_action_t *) iter->data; const char *before_desc = before->task? before->task : before->uuid; crm_debug("Ordering %s on %s before %s on %s", before_desc, pcmk__node_name(before->node), after_desc, pcmk__node_name(after->node)); order_actions(before, after, pcmk__ar_ordered); } } /*! * \internal * \brief Order promotions and demotions for restarts of a clone or bundle * * \param[in,out] rsc Clone or bundle to order */ void pcmk__promotable_restart_ordering(pcmk_resource_t *rsc) { // Order start and promote after all instances are stopped pcmk__order_resource_actions(rsc, PCMK_ACTION_STOPPED, rsc, PCMK_ACTION_START, pcmk__ar_ordered); pcmk__order_resource_actions(rsc, PCMK_ACTION_STOPPED, rsc, PCMK_ACTION_PROMOTE, pcmk__ar_ordered); // Order stop, start, and promote after all instances are demoted pcmk__order_resource_actions(rsc, PCMK_ACTION_DEMOTED, rsc, PCMK_ACTION_STOP, pcmk__ar_ordered); pcmk__order_resource_actions(rsc, PCMK_ACTION_DEMOTED, rsc, PCMK_ACTION_START, pcmk__ar_ordered); pcmk__order_resource_actions(rsc, PCMK_ACTION_DEMOTED, rsc, PCMK_ACTION_PROMOTE, pcmk__ar_ordered); // Order promote after all instances are started pcmk__order_resource_actions(rsc, PCMK_ACTION_RUNNING, rsc, PCMK_ACTION_PROMOTE, pcmk__ar_ordered); // Order demote after all instances are demoted pcmk__order_resource_actions(rsc, PCMK_ACTION_DEMOTE, rsc, PCMK_ACTION_DEMOTED, pcmk__ar_ordered); } diff --git a/lib/pacemaker/pcmk_simulate.c b/lib/pacemaker/pcmk_simulate.c index 86e8a27cdc..29c401234c 100644 --- a/lib/pacemaker/pcmk_simulate.c +++ b/lib/pacemaker/pcmk_simulate.c @@ -1,1069 +1,1069 @@ /* * Copyright 2021-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include // uint32_t, uint64_t #include #include #include #include "libpacemaker_private.h" static const char *profiling_dir = NULL; static pcmk__output_t *out = NULL; static cib_t *fake_cib = NULL; static GList *fake_resource_list = NULL; static const GList *fake_op_fail_list = NULL; static void set_effective_date(pcmk_scheduler_t *scheduler, bool print_original, const char *use_date); /*! * \internal * \brief Create an action name for use in a dot graph * * \param[in] action Action to create name for * \param[in] verbose If true, add action ID to name * * \return Newly allocated string with action name * \note It is the caller's responsibility to free the result. */ static char * create_action_name(const pcmk_action_t *action, bool verbose) { char *action_name = NULL; const char *prefix = ""; const char *action_host = NULL; const char *history_id = NULL; const char *task = action->task; if (action->node != NULL) { action_host = action->node->priv->name; } else if (!pcmk_is_set(action->flags, pcmk__action_pseudo)) { action_host = ""; } if (pcmk__str_eq(action->task, PCMK_ACTION_CANCEL, pcmk__str_none)) { prefix = "Cancel "; task = action->cancel_task; } if (action->rsc != NULL) { history_id = action->rsc->priv->history_id; } if (history_id != NULL) { char *key = NULL; guint interval_ms = 0; if (pcmk__guint_from_hash(action->meta, PCMK_META_INTERVAL, 0, &interval_ms) != pcmk_rc_ok) { interval_ms = 0; } if (pcmk__strcase_any_of(action->task, PCMK_ACTION_NOTIFY, PCMK_ACTION_NOTIFIED, NULL)) { const char *n_type = g_hash_table_lookup(action->meta, "notify_key_type"); const char *n_task = g_hash_table_lookup(action->meta, "notify_key_operation"); pcmk__assert((n_type != NULL) && (n_task != NULL)); key = pcmk__notify_key(history_id, n_type, n_task); } else { key = pcmk__op_key(history_id, task, interval_ms); } if (action_host != NULL) { action_name = crm_strdup_printf("%s%s %s", prefix, key, action_host); } else { action_name = crm_strdup_printf("%s%s", prefix, key); } free(key); } else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) { const char *op = g_hash_table_lookup(action->meta, PCMK__META_STONITH_ACTION); action_name = crm_strdup_printf("%s%s '%s' %s", prefix, action->task, op, action_host); } else if (action->rsc && action_host) { action_name = crm_strdup_printf("%s%s %s", prefix, action->uuid, action_host); } else if (action_host) { action_name = crm_strdup_printf("%s%s %s", prefix, action->task, action_host); } else { action_name = crm_strdup_printf("%s", action->uuid); } if (verbose) { char *with_id = crm_strdup_printf("%s (%d)", action_name, action->id); free(action_name); action_name = with_id; } return action_name; } /*! * \internal * \brief Display the status of a cluster * * \param[in,out] scheduler Scheduler data * \param[in] show_opts How to modify display (as pcmk_show_opt_e flags) * \param[in] section_opts Sections to display (as pcmk_section_e flags) * \param[in] title What to use as list title * \param[in] print_spacer Whether to display a spacer first */ static void print_cluster_status(pcmk_scheduler_t *scheduler, uint32_t show_opts, uint32_t section_opts, const char *title, bool print_spacer) { pcmk__output_t *out = scheduler->priv->out; GList *all = NULL; crm_exit_t stonith_rc = 0; enum pcmk_pacemakerd_state state = pcmk_pacemakerd_state_invalid; section_opts |= pcmk_section_nodes | pcmk_section_resources; show_opts |= pcmk_show_inactive_rscs | pcmk_show_failed_detail; all = g_list_prepend(all, (gpointer) "*"); PCMK__OUTPUT_SPACER_IF(out, print_spacer); out->begin_list(out, NULL, NULL, "%s", title); out->message(out, "cluster-status", scheduler, state, stonith_rc, NULL, pcmk__fence_history_none, section_opts, show_opts, NULL, all, all); out->end_list(out); g_list_free(all); } /*! * \internal * \brief Display a summary of all actions scheduled in a transition * * \param[in,out] scheduler Scheduler data (fully scheduled) * \param[in] print_spacer Whether to display a spacer first */ static void print_transition_summary(pcmk_scheduler_t *scheduler, bool print_spacer) { pcmk__output_t *out = scheduler->priv->out; PCMK__OUTPUT_SPACER_IF(out, print_spacer); out->begin_list(out, NULL, NULL, "Transition Summary"); pcmk__output_actions(scheduler); out->end_list(out); } /*! * \internal * \brief Reset scheduler, set some members, and unpack status * * \param[in,out] scheduler Scheduler data * \param[in] input What to set as cluster input * \param[in] out What to set as cluster output object * \param[in] use_date What to set as cluster's current timestamp * \param[in] flags Group of enum pcmk__scheduler_flags to set */ static void reset(pcmk_scheduler_t *scheduler, xmlNodePtr input, pcmk__output_t *out, const char *use_date, unsigned int flags) { pcmk_reset_scheduler(scheduler); scheduler->input = input; scheduler->priv->out = out; set_effective_date(scheduler, true, use_date); if (pcmk_is_set(flags, pcmk_sim_sanitized)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_sanitized); } if (pcmk_is_set(flags, pcmk_sim_show_scores)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_output_scores); } if (pcmk_is_set(flags, pcmk_sim_show_utilization)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_show_utilization); } cluster_status(scheduler); } /*! * \brief Write out a file in dot(1) format describing the actions that will * be taken by the scheduler in response to an input CIB file. * * \param[in,out] scheduler Scheduler data * \param[in] dot_file The filename to write * \param[in] all_actions Write all actions, even those that are optional * or are on unmanaged resources * \param[in] verbose Add extra information, such as action IDs, to the * output * * \return Standard Pacemaker return code */ static int write_sim_dotfile(pcmk_scheduler_t *scheduler, const char *dot_file, bool all_actions, bool verbose) { GList *iter = NULL; FILE *dot_strm = fopen(dot_file, "w"); if (dot_strm == NULL) { return errno; } fprintf(dot_strm, " digraph \"g\" {\n"); for (iter = scheduler->priv->actions; iter != NULL; iter = iter->next) { pcmk_action_t *action = (pcmk_action_t *) iter->data; const char *style = "dashed"; const char *font = "black"; const char *color = NULL; char *action_name = create_action_name(action, verbose); if (pcmk_is_set(action->flags, pcmk__action_pseudo)) { font = "orange"; } if (pcmk_is_set(action->flags, pcmk__action_added_to_graph)) { style = PCMK__VALUE_BOLD; color = "green"; } else if ((action->rsc != NULL) && !pcmk_is_set(action->rsc->flags, pcmk__rsc_managed)) { color = "red"; font = "purple"; if (!all_actions) { goto do_not_write; } } else if (pcmk_is_set(action->flags, pcmk__action_optional)) { color = "blue"; if (!all_actions) { goto do_not_write; } } else { color = "red"; CRM_LOG_ASSERT(!pcmk_is_set(action->flags, pcmk__action_runnable)); } pcmk__set_action_flags(action, pcmk__action_added_to_graph); fprintf(dot_strm, "\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\"]\n", action_name, style, color, font); do_not_write: free(action_name); } for (iter = scheduler->priv->actions; iter != NULL; iter = iter->next) { pcmk_action_t *action = (pcmk_action_t *) iter->data; for (GList *before_iter = action->actions_before; before_iter != NULL; before_iter = before_iter->next) { pcmk__related_action_t *before = before_iter->data; char *before_name = NULL; char *after_name = NULL; const char *style = "dashed"; bool optional = true; if (before->graphed) { optional = false; style = PCMK__VALUE_BOLD; } else if (before->flags == pcmk__ar_none) { continue; } else if (pcmk_is_set(before->action->flags, pcmk__action_added_to_graph) && pcmk_is_set(action->flags, pcmk__action_added_to_graph) && before->flags != pcmk__ar_if_on_same_node_or_target) { optional = false; } if (all_actions || !optional) { before_name = create_action_name(before->action, verbose); after_name = create_action_name(action, verbose); fprintf(dot_strm, "\"%s\" -> \"%s\" [ style = %s]\n", before_name, after_name, style); free(before_name); free(after_name); } } } fprintf(dot_strm, "}\n"); fflush(dot_strm); fclose(dot_strm); return pcmk_rc_ok; } /*! * \internal * \brief \c scandir() filter for scheduler input CIB files to profile * * \param[in] entry Directory entry * * \retval 1 if the filename ends with ".xml", does not begin with ".", and * refers to a regular file * \retval 0 otherwise */ static int profile_filter(const struct dirent *entry) { const char *filename = entry->d_name; char *buf = NULL; struct stat sb; int rc = 0; if (pcmk__str_any_of(filename, ".", "..", NULL)) { // Skip current (".") and parent ("..") directory links goto done; } if (filename[0] == '.') { crm_trace("Not profiling hidden file '%s'", filename); goto done; } if (!pcmk__ends_with_ext(filename, ".xml")) { crm_trace("Not profiling file '%s' without '.xml' extension", filename); goto done; } buf = crm_strdup_printf("%s/%s", profiling_dir, filename); if ((stat(buf, &sb) != 0) || !S_ISREG(sb.st_mode)) { crm_trace("Not profiling file '%s': not a regular file", filename); goto done; } rc = 1; done: free(buf); return rc; } /*! * \internal * \brief Profile the configuration updates and scheduler actions in a single * CIB file, printing the profiling timings. * * \note \p scheduler->priv->out must have been set to a valid \c pcmk__output_t * object before this function is called. * * \param[in] xml_file The CIB file to profile * \param[in] repeat Number of times to run * \param[in,out] scheduler Scheduler data * \param[in,out] flags Group of enum pcmk__scheduler_flags to set * in addition to defaults * \param[in] use_date The date to set the cluster's time to (may be NULL) */ static void profile_file(const char *xml_file, unsigned int repeat, pcmk_scheduler_t *scheduler, uint64_t flags, const char *use_date) { pcmk__output_t *out = scheduler->priv->out; xmlNode *cib_object = NULL; clock_t start = 0; clock_t end; pcmk__assert(out != NULL); cib_object = pcmk__xml_read(xml_file); start = clock(); if (pcmk_find_cib_element(cib_object, PCMK_XE_STATUS) == NULL) { pcmk__xe_create(cib_object, PCMK_XE_STATUS); } if (pcmk__update_configured_schema(&cib_object, false) != pcmk_rc_ok) { goto done; } if (!pcmk__validate_xml(cib_object, NULL, NULL, NULL)) { goto done; } for (int i = 0; i < repeat; ++i) { pcmk_reset_scheduler(scheduler); scheduler->input = cib_object; pcmk__set_scheduler_flags(scheduler, flags); set_effective_date(scheduler, false, use_date); pcmk__schedule_actions(scheduler); // Avoid freeing cib_object in pcmk_reset_scheduler() scheduler->input = NULL; } pcmk_reset_scheduler(scheduler); end = clock(); out->message(out, "profile", xml_file, start, end); done: pcmk__xml_free(cib_object); } int pcmk__profile_dir(pcmk__output_t *out, uint32_t flags, const char *dir, unsigned int repeat, const char *use_date) { pcmk_scheduler_t *scheduler = NULL; uint64_t scheduler_flags = pcmk__sched_none; - struct dirent **namelist; + struct dirent **namelist = NULL; int num_files = 0; int rc = pcmk_rc_ok; pcmk__assert(out != NULL); scheduler = pcmk_new_scheduler(); if (scheduler == NULL) { return ENOMEM; } scheduler->priv->out = out; if (pcmk_is_set(flags, pcmk_sim_show_scores)) { scheduler_flags |= pcmk__sched_output_scores; } if (pcmk_is_set(flags, pcmk_sim_show_utilization)) { scheduler_flags |= pcmk__sched_show_utilization; } // Hack to pass user data to profile_filter profiling_dir = dir; num_files = scandir(dir, &namelist, profile_filter, alphasort); profiling_dir = NULL; if (num_files < 0) { rc = errno; goto done; } if (num_files == 0) { goto done; } out->begin_list(out, NULL, NULL, "Timings"); for (int i = 0; i < num_files; i++) { // glibc doesn't enforce PATH_MAX, so don't limit the buffer size char *path = crm_strdup_printf("%s/%s", dir, namelist[i]->d_name); profile_file(path, repeat, scheduler, scheduler_flags, use_date); free(path); free(namelist[i]); } out->end_list(out); done: pcmk_free_scheduler(scheduler); free(namelist); return rc; } /*! * \brief Set the date of the cluster, either to the value given by * \p use_date, or to the \c PCMK_XA_EXECUTION_DATE value in the CIB. * * \note \p scheduler->priv->out must have been set to a valid \p pcmk__output_t * object before this function is called. * * \param[in,out] scheduler Scheduler data * \param[in] print_original If \p true, the \c PCMK_XA_EXECUTION_DATE * should also be printed * \param[in] use_date The date to set the cluster's time to * (may be NULL) */ static void set_effective_date(pcmk_scheduler_t *scheduler, bool print_original, const char *use_date) { pcmk__output_t *out = scheduler->priv->out; time_t original_date = 0; pcmk__assert(out != NULL); crm_element_value_epoch(scheduler->input, PCMK_XA_EXECUTION_DATE, &original_date); if (use_date) { scheduler->priv->now = crm_time_new(use_date); out->info(out, "Setting effective cluster time: %s", use_date); crm_time_log(LOG_NOTICE, "Pretending 'now' is", scheduler->priv->now, crm_time_log_date | crm_time_log_timeofday); } else if (original_date != 0) { scheduler->priv->now = pcmk__copy_timet(original_date); if (print_original) { char *when = crm_time_as_string(scheduler->priv->now, crm_time_log_date |crm_time_log_timeofday); out->info(out, "Using the original execution date of: %s", when); free(when); } } } /*! * \internal * \brief Simulate successfully executing a pseudo-action in a graph * * \param[in,out] graph Graph to update with pseudo-action result * \param[in,out] action Pseudo-action to simulate executing * * \return Standard Pacemaker return code */ static int simulate_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { const char *node = crm_element_value(action->xml, PCMK__META_ON_NODE); const char *task = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY); pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); out->message(out, "inject-pseudo-action", node, task); pcmk__update_graph(graph, action); return pcmk_rc_ok; } /*! * \internal * \brief Simulate executing a resource action in a graph * * \param[in,out] graph Graph to update with resource action result * \param[in,out] action Resource action to simulate executing * * \return Standard Pacemaker return code */ static int simulate_resource_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { int rc; lrmd_event_data_t *op = NULL; int target_outcome = PCMK_OCF_OK; const char *rtype = NULL; const char *rclass = NULL; const char *resource = NULL; const char *rprovider = NULL; const char *resource_config_name = NULL; const char *operation = crm_element_value(action->xml, PCMK_XA_OPERATION); const char *target_rc_s = crm_meta_value(action->params, PCMK__META_OP_TARGET_RC); xmlNode *cib_node = NULL; xmlNode *cib_resource = NULL; xmlNode *action_rsc = pcmk__xe_first_child(action->xml, PCMK_XE_PRIMITIVE, NULL, NULL); char *node = crm_element_value_copy(action->xml, PCMK__META_ON_NODE); char *uuid = NULL; const char *router_node = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE); // Certain actions don't need to be displayed or history entries if (pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) { crm_debug("No history injection for %s op on %s", operation, node); goto done; // Confirm action and update graph } if (action_rsc == NULL) { // Shouldn't be possible crm_log_xml_err(action->xml, "Bad"); free(node); return EPROTO; } /* A resource might be known by different names in the configuration and in * the action (for example, a clone instance). Grab the configuration name * (which is preferred when writing history), and if necessary, the instance * name. */ resource_config_name = crm_element_value(action_rsc, PCMK_XA_ID); if (resource_config_name == NULL) { // Shouldn't be possible crm_log_xml_err(action->xml, "No ID"); free(node); return EPROTO; } resource = resource_config_name; if (pe_find_resource(fake_resource_list, resource) == NULL) { const char *longname = crm_element_value(action_rsc, PCMK__XA_LONG_ID); if ((longname != NULL) && (pe_find_resource(fake_resource_list, longname) != NULL)) { resource = longname; } } // Certain actions need to be displayed but don't need history entries if (pcmk__strcase_any_of(operation, PCMK_ACTION_DELETE, PCMK_ACTION_META_DATA, NULL)) { out->message(out, "inject-rsc-action", resource, operation, node, (guint) 0); goto done; // Confirm action and update graph } rclass = crm_element_value(action_rsc, PCMK_XA_CLASS); rtype = crm_element_value(action_rsc, PCMK_XA_TYPE); rprovider = crm_element_value(action_rsc, PCMK_XA_PROVIDER); pcmk__scan_min_int(target_rc_s, &target_outcome, 0); pcmk__assert(fake_cib->cmds->query(fake_cib, NULL, NULL, cib_sync_call) == pcmk_ok); // Ensure the action node is in the CIB uuid = crm_element_value_copy(action->xml, PCMK__META_ON_NODE_UUID); cib_node = pcmk__inject_node(fake_cib, node, ((router_node == NULL)? uuid: node)); free(uuid); pcmk__assert(cib_node != NULL); // Add a history entry for the action cib_resource = pcmk__inject_resource_history(out, cib_node, resource, resource_config_name, rclass, rtype, rprovider); if (cib_resource == NULL) { crm_err("Could not simulate action %d history for resource %s", action->id, resource); free(node); pcmk__xml_free(cib_node); return EINVAL; } // Simulate and display an executor event for the action result op = pcmk__event_from_graph_action(cib_resource, action, PCMK_EXEC_DONE, target_outcome, "User-injected result"); out->message(out, "inject-rsc-action", resource, op->op_type, node, op->interval_ms); // Check whether action is in a list of desired simulated failures for (const GList *iter = fake_op_fail_list; iter != NULL; iter = iter->next) { const char *spec = (const char *) iter->data; char *key = NULL; const char *match_name = NULL; const char *offset = NULL; // Allow user to specify anonymous clone with or without instance number key = crm_strdup_printf(PCMK__OP_FMT "@%s=", resource, op->op_type, op->interval_ms, node); if (strncasecmp(key, spec, strlen(key)) == 0) { match_name = resource; } free(key); // If not found, try the resource's name in the configuration if ((match_name == NULL) && (strcmp(resource, resource_config_name) != 0)) { key = crm_strdup_printf(PCMK__OP_FMT "@%s=", resource_config_name, op->op_type, op->interval_ms, node); if (strncasecmp(key, spec, strlen(key)) == 0) { match_name = resource_config_name; } free(key); } if (match_name == NULL) { continue; // This failed action entry doesn't match } // ${match_name}_${task}_${interval_in_ms}@${node}=${rc} rc = sscanf(spec, "%*[^=]=%d", (int *) &op->rc); if (rc != 1) { out->err(out, "Invalid failed operation '%s' " "(result code must be integer)", spec); continue; // Keep checking other list entries } out->info(out, "Pretending action %d failed with rc=%d", action->id, op->rc); pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); graph->abort_priority = PCMK_SCORE_INFINITY; if (pcmk__str_eq(op->op_type, PCMK_ACTION_START, pcmk__str_none)) { offset = pcmk__s(graph->failed_start_offset, PCMK_VALUE_INFINITY); } else if (pcmk__str_eq(op->op_type, PCMK_ACTION_STOP, pcmk__str_none)) { offset = pcmk__s(graph->failed_stop_offset, PCMK_VALUE_INFINITY); } pcmk__inject_failcount(out, fake_cib, cib_node, match_name, op->op_type, op->interval_ms, op->rc, pcmk_str_is_infinity(offset)); break; } pcmk__inject_action_result(cib_resource, op, node, target_outcome); lrmd_free_event(op); rc = fake_cib->cmds->modify(fake_cib, PCMK_XE_STATUS, cib_node, cib_sync_call); pcmk__assert(rc == pcmk_ok); done: free(node); pcmk__xml_free(cib_node); pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); pcmk__update_graph(graph, action); return pcmk_rc_ok; } /*! * \internal * \brief Simulate successfully executing a cluster action * * \param[in,out] graph Graph to update with action result * \param[in,out] action Cluster action to simulate * * \return Standard Pacemaker return code */ static int simulate_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { const char *node = crm_element_value(action->xml, PCMK__META_ON_NODE); const char *task = crm_element_value(action->xml, PCMK_XA_OPERATION); xmlNode *rsc = pcmk__xe_first_child(action->xml, PCMK_XE_PRIMITIVE, NULL, NULL); pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); out->message(out, "inject-cluster-action", node, task, rsc); pcmk__update_graph(graph, action); return pcmk_rc_ok; } /*! * \internal * \brief Simulate successfully executing a fencing action * * \param[in,out] graph Graph to update with action result * \param[in,out] action Fencing action to simulate * * \return Standard Pacemaker return code */ static int simulate_fencing_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { const char *op = crm_meta_value(action->params, PCMK__META_STONITH_ACTION); char *target = crm_element_value_copy(action->xml, PCMK__META_ON_NODE); out->message(out, "inject-fencing-action", target, op); if (!pcmk__str_eq(op, PCMK_ACTION_ON, pcmk__str_casei)) { int rc = pcmk_ok; GString *xpath = g_string_sized_new(512); // Set node state to offline xmlNode *cib_node = pcmk__inject_node_state_change(fake_cib, target, false); pcmk__assert(cib_node != NULL); crm_xml_add(cib_node, PCMK_XA_CRM_DEBUG_ORIGIN, __func__); rc = fake_cib->cmds->replace(fake_cib, PCMK_XE_STATUS, cib_node, cib_sync_call); pcmk__assert(rc == pcmk_ok); // Simulate controller clearing node's resource history and attributes pcmk__g_strcat(xpath, "//" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='", target, "']/" PCMK__XE_LRM, NULL); fake_cib->cmds->remove(fake_cib, (const char *) xpath->str, NULL, cib_xpath|cib_sync_call); g_string_truncate(xpath, 0); pcmk__g_strcat(xpath, "//" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='", target, "']" "/" PCMK__XE_TRANSIENT_ATTRIBUTES, NULL); fake_cib->cmds->remove(fake_cib, (const char *) xpath->str, NULL, cib_xpath|cib_sync_call); pcmk__xml_free(cib_node); g_string_free(xpath, TRUE); } pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); pcmk__update_graph(graph, action); free(target); return pcmk_rc_ok; } enum pcmk__graph_status pcmk__simulate_transition(pcmk_scheduler_t *scheduler, cib_t *cib, const GList *op_fail_list) { pcmk__graph_t *transition = NULL; enum pcmk__graph_status graph_rc; pcmk__graph_functions_t simulation_fns = { simulate_pseudo_action, simulate_resource_action, simulate_cluster_action, simulate_fencing_action, }; out = scheduler->priv->out; fake_cib = cib; fake_op_fail_list = op_fail_list; if (!out->is_quiet(out)) { out->begin_list(out, NULL, NULL, "Executing Cluster Transition"); } pcmk__set_graph_functions(&simulation_fns); transition = pcmk__unpack_graph(scheduler->priv->graph, crm_system_name); pcmk__log_graph(LOG_DEBUG, transition); fake_resource_list = scheduler->priv->resources; do { graph_rc = pcmk__execute_graph(transition); } while (graph_rc == pcmk__graph_active); fake_resource_list = NULL; if (graph_rc != pcmk__graph_complete) { out->err(out, "Transition failed: %s", pcmk__graph_status2text(graph_rc)); pcmk__log_graph(LOG_ERR, transition); out->err(out, "An invalid transition was produced"); } pcmk__free_graph(transition); if (!out->is_quiet(out)) { // If not quiet, we'll need the resulting CIB for later display xmlNode *cib_object = NULL; int rc = fake_cib->cmds->query(fake_cib, NULL, &cib_object, cib_sync_call); pcmk__assert(rc == pcmk_ok); pcmk_reset_scheduler(scheduler); scheduler->input = cib_object; out->end_list(out); } return graph_rc; } int pcmk__simulate(pcmk_scheduler_t *scheduler, pcmk__output_t *out, const pcmk_injections_t *injections, uint32_t flags, uint32_t section_opts, const char *use_date, const char *input_file, const char *graph_file, const char *dot_file) { int printed = pcmk_rc_no_output; int rc = pcmk_rc_ok; xmlNodePtr input = NULL; cib_t *cib = NULL; rc = cib__signon_query(out, &cib, &input); if (rc != pcmk_rc_ok) { goto simulate_done; } reset(scheduler, input, out, use_date, flags); if (!out->is_quiet(out)) { const bool show_pending = pcmk_is_set(flags, pcmk_sim_show_pending); if (pcmk_is_set(scheduler->flags, pcmk__sched_in_maintenance)) { printed = out->message(out, "maint-mode", scheduler->flags); } if ((scheduler->priv->disabled_resources > 0) || (scheduler->priv->blocked_resources > 0)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); printed = out->info(out, "%d of %d resource instances DISABLED and " "%d BLOCKED from further action due to failure", scheduler->priv->disabled_resources, scheduler->priv->ninstances, scheduler->priv->blocked_resources); } /* Most formatted output headers use caps for each word, but this one * only has the first word capitalized for compatibility with pcs. */ print_cluster_status(scheduler, (show_pending? pcmk_show_pending : 0), section_opts, "Current cluster status", (printed == pcmk_rc_ok)); printed = pcmk_rc_ok; } // If the user requested any injections, handle them if ((injections->node_down != NULL) || (injections->node_fail != NULL) || (injections->node_up != NULL) || (injections->op_inject != NULL) || (injections->ticket_activate != NULL) || (injections->ticket_grant != NULL) || (injections->ticket_revoke != NULL) || (injections->ticket_standby != NULL) || (injections->watchdog != NULL)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); pcmk__inject_scheduler_input(scheduler, cib, injections); printed = pcmk_rc_ok; rc = cib->cmds->query(cib, NULL, &input, cib_sync_call); if (rc != pcmk_rc_ok) { rc = pcmk_legacy2rc(rc); goto simulate_done; } reset(scheduler, input, out, use_date, flags); } if (input_file != NULL) { rc = pcmk__xml_write_file(input, input_file, false); if (rc != pcmk_rc_ok) { goto simulate_done; } } if (pcmk_any_flags_set(flags, pcmk_sim_process | pcmk_sim_simulate)) { pcmk__output_t *logger_out = NULL; if (pcmk_all_flags_set(scheduler->flags, pcmk__sched_output_scores |pcmk__sched_show_utilization)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Assignment Scores and Utilization Information"); printed = pcmk_rc_ok; } else if (pcmk_is_set(scheduler->flags, pcmk__sched_output_scores)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Assignment Scores"); printed = pcmk_rc_ok; } else if (pcmk_is_set(scheduler->flags, pcmk__sched_show_utilization)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Utilization Information"); printed = pcmk_rc_ok; } else { rc = pcmk__log_output_new(&logger_out); if (rc != pcmk_rc_ok) { goto simulate_done; } pe__register_messages(logger_out); pcmk__register_lib_messages(logger_out); scheduler->priv->out = logger_out; } pcmk__schedule_actions(scheduler); if (logger_out == NULL) { out->end_list(out); } else { logger_out->finish(logger_out, CRM_EX_OK, true, NULL); pcmk__output_free(logger_out); scheduler->priv->out = out; } input = NULL; /* Don't try and free it twice */ if (graph_file != NULL) { rc = pcmk__xml_write_file(scheduler->priv->graph, graph_file, false); if (rc != pcmk_rc_ok) { rc = pcmk_rc_graph_error; goto simulate_done; } } if (dot_file != NULL) { rc = write_sim_dotfile(scheduler, dot_file, pcmk_is_set(flags, pcmk_sim_all_actions), pcmk_is_set(flags, pcmk_sim_verbose)); if (rc != pcmk_rc_ok) { rc = pcmk_rc_dot_error; goto simulate_done; } } if (!out->is_quiet(out)) { print_transition_summary(scheduler, printed == pcmk_rc_ok); } } rc = pcmk_rc_ok; if (!pcmk_is_set(flags, pcmk_sim_simulate)) { goto simulate_done; } PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); if (pcmk__simulate_transition(scheduler, cib, injections->op_fail) != pcmk__graph_complete) { rc = pcmk_rc_invalid_transition; } if (out->is_quiet(out)) { goto simulate_done; } set_effective_date(scheduler, true, use_date); if (pcmk_is_set(flags, pcmk_sim_show_scores)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_output_scores); } if (pcmk_is_set(flags, pcmk_sim_show_utilization)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_show_utilization); } cluster_status(scheduler); print_cluster_status(scheduler, 0, section_opts, "Revised Cluster Status", true); simulate_done: cib__clean_up_connection(&cib); return rc; } // @COMPAT Use uint32_t for flags int pcmk_simulate(xmlNodePtr *xml, pcmk_scheduler_t *scheduler, const pcmk_injections_t *injections, unsigned int flags, unsigned int section_opts, const char *use_date, const char *input_file, const char *graph_file, const char *dot_file) { pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { return rc; } pe__register_messages(out); pcmk__register_lib_messages(out); rc = pcmk__simulate(scheduler, out, injections, (uint32_t) flags, (uint32_t) section_opts, use_date, input_file, graph_file, dot_file); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); return rc; } diff --git a/lib/pengine/bundle.c b/lib/pengine/bundle.c index f9bb6b7017..1197c92604 100644 --- a/lib/pengine/bundle.c +++ b/lib/pengine/bundle.c @@ -1,2098 +1,2099 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include // bool, true, false #include #include #include #include #include #include #include enum pe__bundle_mount_flags { pe__bundle_mount_none = 0x00, // mount instance-specific subdirectory rather than source directly pe__bundle_mount_subdir = 0x01 }; typedef struct { char *source; char *target; char *options; uint32_t flags; // bitmask of pe__bundle_mount_flags } pe__bundle_mount_t; typedef struct { char *source; char *target; } pe__bundle_port_t; enum pe__container_agent { PE__CONTAINER_AGENT_UNKNOWN, PE__CONTAINER_AGENT_DOCKER, PE__CONTAINER_AGENT_PODMAN, }; #define PE__CONTAINER_AGENT_UNKNOWN_S "unknown" #define PE__CONTAINER_AGENT_DOCKER_S "docker" #define PE__CONTAINER_AGENT_PODMAN_S "podman" typedef struct pe__bundle_variant_data_s { int promoted_max; int nreplicas; int nreplicas_per_host; char *prefix; char *image; const char *ip_last; char *host_network; char *host_netmask; char *control_port; char *container_network; char *ip_range_start; gboolean add_host; gchar *container_host_options; char *container_command; char *launcher_options; const char *attribute_target; pcmk_resource_t *child; GList *replicas; // pcmk__bundle_replica_t * GList *ports; // pe__bundle_port_t * GList *mounts; // pe__bundle_mount_t * /* @TODO Maybe use a more object-oriented design instead, with a set of * methods that are different per type rather than switching on this */ enum pe__container_agent agent_type; } pe__bundle_variant_data_t; #define get_bundle_variant_data(data, rsc) do { \ pcmk__assert(pcmk__is_bundle(rsc)); \ data = rsc->priv->variant_opaque; \ } while (0) /*! * \internal * \brief Get maximum number of bundle replicas allowed to run * * \param[in] rsc Bundle or bundled resource to check * * \return Maximum replicas for bundle corresponding to \p rsc */ int pe__bundle_max(const pcmk_resource_t *rsc) { const pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, pe__const_top_resource(rsc, true)); return bundle_data->nreplicas; } /*! * \internal * \brief Get the resource inside a bundle * * \param[in] bundle Bundle to check * * \return Resource inside \p bundle if any, otherwise NULL */ pcmk_resource_t * pe__bundled_resource(const pcmk_resource_t *rsc) { const pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, pe__const_top_resource(rsc, true)); return bundle_data->child; } /*! * \internal * \brief Get containerized resource corresponding to a given bundle container * * \param[in] instance Collective instance that might be a bundle container * * \return Bundled resource instance inside \p instance if it is a bundle * container instance, otherwise NULL */ const pcmk_resource_t * pe__get_rsc_in_container(const pcmk_resource_t *instance) { const pe__bundle_variant_data_t *data = NULL; const pcmk_resource_t *top = pe__const_top_resource(instance, true); if (!pcmk__is_bundle(top)) { return NULL; } get_bundle_variant_data(data, top); for (const GList *iter = data->replicas; iter != NULL; iter = iter->next) { const pcmk__bundle_replica_t *replica = iter->data; if (instance == replica->container) { return replica->child; } } return NULL; } /*! * \internal * \brief Check whether a given node is created by a bundle * * \param[in] bundle Bundle resource to check * \param[in] node Node to check * * \return true if \p node is an instance of \p bundle, otherwise false */ bool pe__node_is_bundle_instance(const pcmk_resource_t *bundle, const pcmk_node_t *node) { pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, bundle); for (GList *iter = bundle_data->replicas; iter != NULL; iter = iter->next) { pcmk__bundle_replica_t *replica = iter->data; if (pcmk__same_node(node, replica->node)) { return true; } } return false; } /*! * \internal * \brief Get the container of a bundle's first replica * * \param[in] bundle Bundle resource to get container for * * \return Container resource from first replica of \p bundle if any, * otherwise NULL */ pcmk_resource_t * pe__first_container(const pcmk_resource_t *bundle) { const pe__bundle_variant_data_t *bundle_data = NULL; const pcmk__bundle_replica_t *replica = NULL; get_bundle_variant_data(bundle_data, bundle); if (bundle_data->replicas == NULL) { return NULL; } replica = bundle_data->replicas->data; return replica->container; } /*! * \internal * \brief Iterate over bundle replicas * * \param[in,out] bundle Bundle to iterate over * \param[in] fn Function to call for each replica (its return value * indicates whether to continue iterating) * \param[in,out] user_data Pointer to pass to \p fn */ void pe__foreach_bundle_replica(pcmk_resource_t *bundle, bool (*fn)(pcmk__bundle_replica_t *, void *), void *user_data) { const pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, bundle); for (GList *iter = bundle_data->replicas; iter != NULL; iter = iter->next) { if (!fn((pcmk__bundle_replica_t *) iter->data, user_data)) { break; } } } /*! * \internal * \brief Iterate over const bundle replicas * * \param[in] bundle Bundle to iterate over * \param[in] fn Function to call for each replica (its return value * indicates whether to continue iterating) * \param[in,out] user_data Pointer to pass to \p fn */ void pe__foreach_const_bundle_replica(const pcmk_resource_t *bundle, bool (*fn)(const pcmk__bundle_replica_t *, void *), void *user_data) { const pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, bundle); for (const GList *iter = bundle_data->replicas; iter != NULL; iter = iter->next) { if (!fn((const pcmk__bundle_replica_t *) iter->data, user_data)) { break; } } } static char * next_ip(const char *last_ip) { unsigned int oct1 = 0; unsigned int oct2 = 0; unsigned int oct3 = 0; unsigned int oct4 = 0; int rc = sscanf(last_ip, "%u.%u.%u.%u", &oct1, &oct2, &oct3, &oct4); if (rc != 4) { /*@ TODO check for IPv6 */ return NULL; } else if (oct3 > 253) { return NULL; } else if (oct4 > 253) { ++oct3; oct4 = 1; } else { ++oct4; } return crm_strdup_printf("%u.%u.%u.%u", oct1, oct2, oct3, oct4); } static void allocate_ip(pe__bundle_variant_data_t *data, pcmk__bundle_replica_t *replica, GString *buffer) { if(data->ip_range_start == NULL) { return; } else if(data->ip_last) { replica->ipaddr = next_ip(data->ip_last); } else { replica->ipaddr = strdup(data->ip_range_start); } data->ip_last = replica->ipaddr; switch (data->agent_type) { case PE__CONTAINER_AGENT_DOCKER: case PE__CONTAINER_AGENT_PODMAN: if (data->add_host) { g_string_append_printf(buffer, " --add-host=%s-%d:%s", data->prefix, replica->offset, replica->ipaddr); } else { g_string_append_printf(buffer, " --hosts-entry=%s=%s-%d", replica->ipaddr, data->prefix, replica->offset); } break; default: // PE__CONTAINER_AGENT_UNKNOWN break; } } static xmlNode * create_resource(const char *name, const char *provider, const char *kind) { xmlNode *rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE); crm_xml_add(rsc, PCMK_XA_ID, name); crm_xml_add(rsc, PCMK_XA_CLASS, PCMK_RESOURCE_CLASS_OCF); crm_xml_add(rsc, PCMK_XA_PROVIDER, provider); crm_xml_add(rsc, PCMK_XA_TYPE, kind); return rsc; } /*! * \internal * \brief Check whether cluster can manage resource inside container * * \param[in,out] data Container variant data * * \return TRUE if networking configuration is acceptable, FALSE otherwise * * \note The resource is manageable if an IP range or control port has been * specified. If a control port is used without an IP range, replicas per * host must be 1. */ static bool valid_network(pe__bundle_variant_data_t *data) { if(data->ip_range_start) { return TRUE; } if(data->control_port) { if(data->nreplicas_per_host > 1) { pcmk__config_err("Specifying the '" PCMK_XA_CONTROL_PORT "' for %s " "requires '" PCMK_XA_REPLICAS_PER_HOST "=1'", data->prefix); data->nreplicas_per_host = 1; // @TODO to be sure: // pcmk__clear_rsc_flags(rsc, pcmk__rsc_unique); } return TRUE; } return FALSE; } static int create_ip_resource(pcmk_resource_t *parent, pe__bundle_variant_data_t *data, pcmk__bundle_replica_t *replica) { if(data->ip_range_start) { char *id = NULL; xmlNode *xml_ip = NULL; xmlNode *xml_obj = NULL; id = crm_strdup_printf("%s-ip-%s", data->prefix, replica->ipaddr); pcmk__xml_sanitize_id(id); xml_ip = create_resource(id, "heartbeat", "IPaddr2"); free(id); xml_obj = pcmk__xe_create(xml_ip, PCMK_XE_INSTANCE_ATTRIBUTES); pcmk__xe_set_id(xml_obj, "%s-attributes-%d", data->prefix, replica->offset); crm_create_nvpair_xml(xml_obj, NULL, "ip", replica->ipaddr); if(data->host_network) { crm_create_nvpair_xml(xml_obj, NULL, "nic", data->host_network); } if(data->host_netmask) { crm_create_nvpair_xml(xml_obj, NULL, "cidr_netmask", data->host_netmask); } else { crm_create_nvpair_xml(xml_obj, NULL, "cidr_netmask", "32"); } xml_obj = pcmk__xe_create(xml_ip, PCMK_XE_OPERATIONS); crm_create_op_xml(xml_obj, pcmk__xe_id(xml_ip), PCMK_ACTION_MONITOR, "60s", NULL); // TODO: Other ops? Timeouts and intervals from underlying resource? if (pe__unpack_resource(xml_ip, &replica->ip, parent, parent->priv->scheduler) != pcmk_rc_ok) { return pcmk_rc_unpack_error; } parent->priv->children = g_list_append(parent->priv->children, replica->ip); } return pcmk_rc_ok; } static const char* container_agent_str(enum pe__container_agent t) { switch (t) { case PE__CONTAINER_AGENT_DOCKER: return PE__CONTAINER_AGENT_DOCKER_S; case PE__CONTAINER_AGENT_PODMAN: return PE__CONTAINER_AGENT_PODMAN_S; default: // PE__CONTAINER_AGENT_UNKNOWN break; } return PE__CONTAINER_AGENT_UNKNOWN_S; } static int create_container_resource(pcmk_resource_t *parent, const pe__bundle_variant_data_t *data, pcmk__bundle_replica_t *replica) { char *id = NULL; xmlNode *xml_container = NULL; xmlNode *xml_obj = NULL; // Agent-specific const char *hostname_opt = NULL; const char *env_opt = NULL; const char *agent_str = NULL; GString *buffer = NULL; GString *dbuffer = NULL; // Where syntax differences are drop-in replacements, set them now switch (data->agent_type) { case PE__CONTAINER_AGENT_DOCKER: case PE__CONTAINER_AGENT_PODMAN: hostname_opt = "-h "; env_opt = "-e "; break; default: // PE__CONTAINER_AGENT_UNKNOWN return pcmk_rc_unpack_error; } agent_str = container_agent_str(data->agent_type); buffer = g_string_sized_new(4096); id = crm_strdup_printf("%s-%s-%d", data->prefix, agent_str, replica->offset); pcmk__xml_sanitize_id(id); xml_container = create_resource(id, "heartbeat", agent_str); free(id); xml_obj = pcmk__xe_create(xml_container, PCMK_XE_INSTANCE_ATTRIBUTES); pcmk__xe_set_id(xml_obj, "%s-attributes-%d", data->prefix, replica->offset); crm_create_nvpair_xml(xml_obj, NULL, "image", data->image); crm_create_nvpair_xml(xml_obj, NULL, "allow_pull", PCMK_VALUE_TRUE); crm_create_nvpair_xml(xml_obj, NULL, "force_kill", PCMK_VALUE_FALSE); crm_create_nvpair_xml(xml_obj, NULL, "reuse", PCMK_VALUE_FALSE); if (data->agent_type == PE__CONTAINER_AGENT_DOCKER) { g_string_append(buffer, " --restart=no"); } /* Set a container hostname only if we have an IP to map it to. The user can * set -h or --uts=host themselves if they want a nicer name for logs, but * this makes applications happy who need their hostname to match the IP * they bind to. */ if (data->ip_range_start != NULL) { g_string_append_printf(buffer, " %s%s-%d", hostname_opt, data->prefix, replica->offset); } pcmk__g_strcat(buffer, " ", env_opt, "PCMK_stderr=1", NULL); if (data->container_network != NULL) { pcmk__g_strcat(buffer, " --net=", data->container_network, NULL); } if (data->control_port != NULL) { pcmk__g_strcat(buffer, " ", env_opt, "PCMK_" PCMK__ENV_REMOTE_PORT "=", data->control_port, NULL); } else { g_string_append_printf(buffer, " %sPCMK_" PCMK__ENV_REMOTE_PORT "=%d", env_opt, DEFAULT_REMOTE_PORT); } for (GList *iter = data->mounts; iter != NULL; iter = iter->next) { pe__bundle_mount_t *mount = (pe__bundle_mount_t *) iter->data; char *source = NULL; if (pcmk_is_set(mount->flags, pe__bundle_mount_subdir)) { source = crm_strdup_printf("%s/%s-%d", mount->source, data->prefix, replica->offset); pcmk__add_separated_word(&dbuffer, 1024, source, ","); } switch (data->agent_type) { case PE__CONTAINER_AGENT_DOCKER: case PE__CONTAINER_AGENT_PODMAN: pcmk__g_strcat(buffer, " -v ", pcmk__s(source, mount->source), ":", mount->target, NULL); if (mount->options != NULL) { pcmk__g_strcat(buffer, ":", mount->options, NULL); } break; default: break; } free(source); } for (GList *iter = data->ports; iter != NULL; iter = iter->next) { pe__bundle_port_t *port = (pe__bundle_port_t *) iter->data; switch (data->agent_type) { case PE__CONTAINER_AGENT_DOCKER: case PE__CONTAINER_AGENT_PODMAN: if (replica->ipaddr != NULL) { pcmk__g_strcat(buffer, " -p ", replica->ipaddr, ":", port->source, ":", port->target, NULL); } else if (!pcmk__str_eq(data->container_network, PCMK_VALUE_HOST, pcmk__str_none)) { // No need to do port mapping if net == host pcmk__g_strcat(buffer, " -p ", port->source, ":", port->target, NULL); } break; default: break; } } /* @COMPAT: We should use pcmk__add_word() here, but we can't yet, because * it would cause restarts during rolling upgrades. * * In a previous version of the container resource creation logic, if * data->launcher_options is not NULL, we append * (" %s", data->launcher_options) even if data->launcher_options is an * empty string. Likewise for data->container_host_options. Using * * pcmk__add_word(buffer, 0, data->launcher_options) * * removes that extra trailing space, causing a resource definition change. */ if (data->launcher_options != NULL) { pcmk__g_strcat(buffer, " ", data->launcher_options, NULL); } if (data->container_host_options != NULL) { pcmk__g_strcat(buffer, " ", data->container_host_options, NULL); } crm_create_nvpair_xml(xml_obj, NULL, "run_opts", (const char *) buffer->str); g_string_free(buffer, TRUE); crm_create_nvpair_xml(xml_obj, NULL, "mount_points", (dbuffer != NULL)? (const char *) dbuffer->str : ""); if (dbuffer != NULL) { g_string_free(dbuffer, TRUE); } if (replica->child != NULL) { if (data->container_command != NULL) { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", data->container_command); } else { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", SBIN_DIR "/" PCMK__SERVER_REMOTED); } /* TODO: Allow users to specify their own? * * We just want to know if the container is alive; we'll monitor the * child independently. */ crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true"); #if 0 /* @TODO Consider supporting the use case where we can start and stop * resources, but not proxy local commands (such as setting node * attributes), by running the local executor in stand-alone mode. * However, this would probably be better done via ACLs as with other * Pacemaker Remote nodes. */ } else if ((child != NULL) && data->untrusted) { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", CRM_DAEMON_DIR "/" PCMK__SERVER_EXECD); crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", CRM_DAEMON_DIR "/pacemaker/cts-exec-helper -c poke"); #endif } else { if (data->container_command != NULL) { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", data->container_command); } /* TODO: Allow users to specify their own? * * We don't know what's in the container, so we just want to know if it * is alive. */ crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true"); } xml_obj = pcmk__xe_create(xml_container, PCMK_XE_OPERATIONS); crm_create_op_xml(xml_obj, pcmk__xe_id(xml_container), PCMK_ACTION_MONITOR, "60s", NULL); // TODO: Other ops? Timeouts and intervals from underlying resource? if (pe__unpack_resource(xml_container, &replica->container, parent, parent->priv->scheduler) != pcmk_rc_ok) { return pcmk_rc_unpack_error; } pcmk__set_rsc_flags(replica->container, pcmk__rsc_replica_container); parent->priv->children = g_list_append(parent->priv->children, replica->container); return pcmk_rc_ok; } /*! * \brief Ban a node from a resource's (and its children's) allowed nodes list * * \param[in,out] rsc Resource to modify * \param[in] uname Name of node to ban */ static void disallow_node(pcmk_resource_t *rsc, const char *uname) { gpointer match = g_hash_table_lookup(rsc->priv->allowed_nodes, uname); if (match) { ((pcmk_node_t *) match)->assign->score = -PCMK_SCORE_INFINITY; ((pcmk_node_t *) match)->assign->probe_mode = pcmk__probe_never; } g_list_foreach(rsc->priv->children, (GFunc) disallow_node, (gpointer) uname); } static int create_remote_resource(pcmk_resource_t *parent, pe__bundle_variant_data_t *data, pcmk__bundle_replica_t *replica) { if (replica->child && valid_network(data)) { GHashTableIter gIter; pcmk_node_t *node = NULL; xmlNode *xml_remote = NULL; char *id = crm_strdup_printf("%s-%d", data->prefix, replica->offset); char *port_s = NULL; const char *uname = NULL; const char *connect_name = NULL; pcmk_scheduler_t *scheduler = parent->priv->scheduler; if (pe_find_resource(scheduler->priv->resources, id) != NULL) { free(id); // The biggest hammer we have id = crm_strdup_printf("pcmk-internal-%s-remote-%d", replica->child->id, replica->offset); //@TODO return error instead of asserting? pcmk__assert(pe_find_resource(scheduler->priv->resources, id) == NULL); } /* REMOTE_CONTAINER_HACK: Using "#uname" as the server name when the * connection does not have its own IP is a magic string that we use to * support nested remotes (i.e. a bundle running on a remote node). */ connect_name = (replica->ipaddr? replica->ipaddr : "#uname"); if (data->control_port == NULL) { port_s = pcmk__itoa(DEFAULT_REMOTE_PORT); } /* This sets replica->container as replica->remote's container, which is * similar to what happens with guest nodes. This is how the scheduler * knows that the bundle node is fenced by recovering the container, and * that remote should be ordered relative to the container. */ xml_remote = pe_create_remote_xml(NULL, id, replica->container->id, NULL, NULL, NULL, connect_name, (data->control_port? data->control_port : port_s)); free(port_s); /* Abandon our created ID, and pull the copy from the XML, because we * need something that will get freed during scheduler data cleanup to * use as the node ID and uname. */ free(id); id = NULL; uname = pcmk__xe_id(xml_remote); /* Ensure a node has been created for the guest (it may have already * been, if it has a permanent node attribute), and ensure its weight is * -INFINITY so no other resources can run on it. */ node = pcmk_find_node(scheduler, uname); if (node == NULL) { node = pe_create_node(uname, uname, PCMK_VALUE_REMOTE, -PCMK_SCORE_INFINITY, scheduler); } else { node->assign->score = -PCMK_SCORE_INFINITY; } node->assign->probe_mode = pcmk__probe_never; /* unpack_remote_nodes() ensures that each remote node and guest node * has a pcmk_node_t entry. Ideally, it would do the same for bundle * nodes. Unfortunately, a bundle has to be mostly unpacked before it's * obvious what nodes will be needed, so we do it just above. * * Worse, that means that the node may have been utilized while * unpacking other resources, without our weight correction. The most * likely place for this to happen is when pe__unpack_resource() calls * resource_location() to set a default score in symmetric clusters. * This adds a node *copy* to each resource's allowed nodes, and these * copies will have the wrong weight. * * As a hacky workaround, fix those copies here. * * @TODO Possible alternative: ensure bundles are unpacked before other * resources, so the weight is correct before any copies are made. */ g_list_foreach(scheduler->priv->resources, (GFunc) disallow_node, (gpointer) uname); replica->node = pe__copy_node(node); replica->node->assign->score = 500; replica->node->assign->probe_mode = pcmk__probe_exclusive; /* Ensure the node shows up as allowed and with the correct discovery set */ if (replica->child->priv->allowed_nodes != NULL) { g_hash_table_destroy(replica->child->priv->allowed_nodes); } replica->child->priv->allowed_nodes = pcmk__strkey_table(NULL, pcmk__free_node_copy); g_hash_table_insert(replica->child->priv->allowed_nodes, (gpointer) replica->node->priv->id, pe__copy_node(replica->node)); { const pcmk_resource_t *parent = replica->child->priv->parent; pcmk_node_t *copy = pe__copy_node(replica->node); copy->assign->score = -PCMK_SCORE_INFINITY; g_hash_table_insert(parent->priv->allowed_nodes, (gpointer) replica->node->priv->id, copy); } if (pe__unpack_resource(xml_remote, &replica->remote, parent, scheduler) != pcmk_rc_ok) { return pcmk_rc_unpack_error; } g_hash_table_iter_init(&gIter, replica->remote->priv->allowed_nodes); while (g_hash_table_iter_next(&gIter, NULL, (void **)&node)) { if (pcmk__is_pacemaker_remote_node(node)) { /* Remote resources can only run on 'normal' cluster node */ node->assign->score = -PCMK_SCORE_INFINITY; } } replica->node->priv->remote = replica->remote; // Ensure pcmk__is_guest_or_bundle_node() functions correctly replica->remote->priv->launcher = replica->container; /* A bundle's #kind is closer to "container" (guest node) than the * "remote" set by pe_create_node(). */ pcmk__insert_dup(replica->node->priv->attrs, CRM_ATTR_KIND, "container"); /* One effect of this is that unpack_launcher() will add * replica->remote to replica->container's launched resources, which * will make pe__resource_contains_guest_node() true for * replica->container. * * replica->child does NOT get added to replica->container's launched * resources. The only noticeable effect if it did would be for its * fail count to be taken into account when checking * replica->container's migration threshold. */ parent->priv->children = g_list_append(parent->priv->children, replica->remote); } return pcmk_rc_ok; } static int create_replica_resources(pcmk_resource_t *parent, pe__bundle_variant_data_t *data, pcmk__bundle_replica_t *replica) { int rc = pcmk_rc_ok; rc = create_container_resource(parent, data, replica); if (rc != pcmk_rc_ok) { return rc; } rc = create_ip_resource(parent, data, replica); if (rc != pcmk_rc_ok) { return rc; } rc = create_remote_resource(parent, data, replica); if (rc != pcmk_rc_ok) { return rc; } if ((replica->child != NULL) && (replica->ipaddr != NULL)) { pcmk__insert_meta(replica->child->priv, "external-ip", replica->ipaddr); } if (replica->remote != NULL) { /* * Allow the remote connection resource to be allocated to a * different node than the one on which the container is active. * * This makes it possible to have Pacemaker Remote nodes running * containers with the remote executor inside in order to start * services inside those containers. */ pcmk__set_rsc_flags(replica->remote, pcmk__rsc_remote_nesting_allowed); } return rc; } static void mount_add(pe__bundle_variant_data_t *bundle_data, const char *source, const char *target, const char *options, uint32_t flags) { pe__bundle_mount_t *mount = pcmk__assert_alloc(1, sizeof(pe__bundle_mount_t)); mount->source = pcmk__str_copy(source); mount->target = pcmk__str_copy(target); mount->options = pcmk__str_copy(options); mount->flags = flags; bundle_data->mounts = g_list_append(bundle_data->mounts, mount); } static void mount_free(pe__bundle_mount_t *mount) { free(mount->source); free(mount->target); free(mount->options); free(mount); } static void port_free(pe__bundle_port_t *port) { free(port->source); free(port->target); free(port); } static pcmk__bundle_replica_t * replica_for_remote(pcmk_resource_t *remote) { pcmk_resource_t *top = remote; pe__bundle_variant_data_t *bundle_data = NULL; if (top == NULL) { return NULL; } while (top->priv->parent != NULL) { top = top->priv->parent; } get_bundle_variant_data(bundle_data, top); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pcmk__bundle_replica_t *replica = gIter->data; if (replica->remote == remote) { return replica; } } CRM_LOG_ASSERT(FALSE); return NULL; } bool pe__bundle_needs_remote_name(pcmk_resource_t *rsc) { const char *value; GHashTable *params = NULL; if (rsc == NULL) { return false; } // Use NULL node since pcmk__bundle_expand() uses that to set value params = pe_rsc_params(rsc, NULL, rsc->priv->scheduler); value = g_hash_table_lookup(params, PCMK_REMOTE_RA_ADDR); return pcmk__str_eq(value, "#uname", pcmk__str_casei) && xml_contains_remote_node(rsc->priv->xml); } const char * pe__add_bundle_remote_name(pcmk_resource_t *rsc, xmlNode *xml, const char *field) { // REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside pcmk_node_t *node = NULL; pcmk__bundle_replica_t *replica = NULL; if (!pe__bundle_needs_remote_name(rsc)) { return NULL; } replica = replica_for_remote(rsc); if (replica == NULL) { return NULL; } node = replica->container->priv->assigned_node; if (node == NULL) { /* If it won't be running anywhere after the * transition, go with where it's running now. */ node = pcmk__current_node(replica->container); } if(node == NULL) { crm_trace("Cannot determine address for bundle connection %s", rsc->id); return NULL; } crm_trace("Setting address for bundle connection %s to bundle host %s", rsc->id, pcmk__node_name(node)); if(xml != NULL && field != NULL) { crm_xml_add(xml, field, node->priv->name); } return node->priv->name; } #define pe__set_bundle_mount_flags(mount_xml, flags, flags_to_set) do { \ flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "Bundle mount", pcmk__xe_id(mount_xml), \ flags, (flags_to_set), #flags_to_set); \ } while (0) bool pe__unpack_bundle(pcmk_resource_t *rsc) { const char *value = NULL; xmlNode *xml_obj = NULL; const xmlNode *xml_child = NULL; xmlNode *xml_resource = NULL; pe__bundle_variant_data_t *bundle_data = NULL; bool need_log_mount = TRUE; pcmk__assert(rsc != NULL); pcmk__rsc_trace(rsc, "Processing resource %s...", rsc->id); bundle_data = pcmk__assert_alloc(1, sizeof(pe__bundle_variant_data_t)); rsc->priv->variant_opaque = bundle_data; bundle_data->prefix = strdup(rsc->id); xml_obj = pcmk__xe_first_child(rsc->priv->xml, PCMK_XE_DOCKER, NULL, NULL); if (xml_obj != NULL) { bundle_data->agent_type = PE__CONTAINER_AGENT_DOCKER; } if (xml_obj == NULL) { xml_obj = pcmk__xe_first_child(rsc->priv->xml, PCMK_XE_PODMAN, NULL, NULL); if (xml_obj != NULL) { bundle_data->agent_type = PE__CONTAINER_AGENT_PODMAN; } } if (xml_obj == NULL) { return FALSE; } // Use 0 for default, minimum, and invalid PCMK_XA_PROMOTED_MAX value = crm_element_value(xml_obj, PCMK_XA_PROMOTED_MAX); pcmk__scan_min_int(value, &bundle_data->promoted_max, 0); /* Default replicas to PCMK_XA_PROMOTED_MAX if it was specified and 1 * otherwise */ value = crm_element_value(xml_obj, PCMK_XA_REPLICAS); if ((value == NULL) && (bundle_data->promoted_max > 0)) { bundle_data->nreplicas = bundle_data->promoted_max; } else { pcmk__scan_min_int(value, &bundle_data->nreplicas, 1); } /* * Communication between containers on the same host via the * floating IPs only works if the container is started with: * --userland-proxy=false --ip-masq=false */ value = crm_element_value(xml_obj, PCMK_XA_REPLICAS_PER_HOST); pcmk__scan_min_int(value, &bundle_data->nreplicas_per_host, 1); if (bundle_data->nreplicas_per_host == 1) { pcmk__clear_rsc_flags(rsc, pcmk__rsc_unique); } bundle_data->container_command = crm_element_value_copy(xml_obj, PCMK_XA_RUN_COMMAND); bundle_data->launcher_options = crm_element_value_copy(xml_obj, PCMK_XA_OPTIONS); bundle_data->image = crm_element_value_copy(xml_obj, PCMK_XA_IMAGE); bundle_data->container_network = crm_element_value_copy(xml_obj, PCMK_XA_NETWORK); xml_obj = pcmk__xe_first_child(rsc->priv->xml, PCMK_XE_NETWORK, NULL, NULL); if(xml_obj) { bundle_data->ip_range_start = crm_element_value_copy(xml_obj, PCMK_XA_IP_RANGE_START); bundle_data->host_netmask = crm_element_value_copy(xml_obj, PCMK_XA_HOST_NETMASK); bundle_data->host_network = crm_element_value_copy(xml_obj, PCMK_XA_HOST_INTERFACE); bundle_data->control_port = crm_element_value_copy(xml_obj, PCMK_XA_CONTROL_PORT); value = crm_element_value(xml_obj, PCMK_XA_ADD_HOST); if (crm_str_to_boolean(value, &bundle_data->add_host) != 1) { bundle_data->add_host = TRUE; } for (xml_child = pcmk__xe_first_child(xml_obj, PCMK_XE_PORT_MAPPING, NULL, NULL); xml_child != NULL; xml_child = pcmk__xe_next(xml_child, PCMK_XE_PORT_MAPPING)) { pe__bundle_port_t *port = pcmk__assert_alloc(1, sizeof(pe__bundle_port_t)); port->source = crm_element_value_copy(xml_child, PCMK_XA_PORT); if(port->source == NULL) { port->source = crm_element_value_copy(xml_child, PCMK_XA_RANGE); } else { port->target = crm_element_value_copy(xml_child, PCMK_XA_INTERNAL_PORT); } if(port->source != NULL && strlen(port->source) > 0) { if(port->target == NULL) { port->target = strdup(port->source); } bundle_data->ports = g_list_append(bundle_data->ports, port); } else { pcmk__config_err("Invalid " PCMK_XA_PORT " directive %s", pcmk__xe_id(xml_child)); port_free(port); } } } xml_obj = pcmk__xe_first_child(rsc->priv->xml, PCMK_XE_STORAGE, NULL, NULL); for (xml_child = pcmk__xe_first_child(xml_obj, PCMK_XE_STORAGE_MAPPING, NULL, NULL); xml_child != NULL; xml_child = pcmk__xe_next(xml_child, PCMK_XE_STORAGE_MAPPING)) { const char *source = crm_element_value(xml_child, PCMK_XA_SOURCE_DIR); const char *target = crm_element_value(xml_child, PCMK_XA_TARGET_DIR); const char *options = crm_element_value(xml_child, PCMK_XA_OPTIONS); int flags = pe__bundle_mount_none; if (source == NULL) { source = crm_element_value(xml_child, PCMK_XA_SOURCE_DIR_ROOT); pe__set_bundle_mount_flags(xml_child, flags, pe__bundle_mount_subdir); } if (source && target) { mount_add(bundle_data, source, target, options, flags); if (strcmp(target, "/var/log") == 0) { need_log_mount = FALSE; } } else { pcmk__config_err("Invalid mount directive %s", pcmk__xe_id(xml_child)); } } xml_obj = pcmk__xe_first_child(rsc->priv->xml, PCMK_XE_PRIMITIVE, NULL, NULL); if (xml_obj && valid_network(bundle_data)) { const char *suffix = NULL; char *value = NULL; xmlNode *xml_set = NULL; xml_resource = pcmk__xe_create(NULL, PCMK_XE_CLONE); /* @COMPAT We no longer use the tag, but we need to keep it as * part of the resource name, so that bundles don't restart in a rolling * upgrade. (It also avoids needing to change regression tests.) */ suffix = (const char *) xml_resource->name; if (bundle_data->promoted_max > 0) { suffix = "master"; } pcmk__xe_set_id(xml_resource, "%s-%s", bundle_data->prefix, suffix); xml_set = pcmk__xe_create(xml_resource, PCMK_XE_META_ATTRIBUTES); pcmk__xe_set_id(xml_set, "%s-%s-meta", bundle_data->prefix, xml_resource->name); crm_create_nvpair_xml(xml_set, NULL, PCMK_META_ORDERED, PCMK_VALUE_TRUE); value = pcmk__itoa(bundle_data->nreplicas); crm_create_nvpair_xml(xml_set, NULL, PCMK_META_CLONE_MAX, value); free(value); value = pcmk__itoa(bundle_data->nreplicas_per_host); crm_create_nvpair_xml(xml_set, NULL, PCMK_META_CLONE_NODE_MAX, value); free(value); crm_create_nvpair_xml(xml_set, NULL, PCMK_META_GLOBALLY_UNIQUE, pcmk__btoa(bundle_data->nreplicas_per_host > 1)); if (bundle_data->promoted_max) { crm_create_nvpair_xml(xml_set, NULL, PCMK_META_PROMOTABLE, PCMK_VALUE_TRUE); value = pcmk__itoa(bundle_data->promoted_max); crm_create_nvpair_xml(xml_set, NULL, PCMK_META_PROMOTED_MAX, value); free(value); } //crm_xml_add(xml_obj, PCMK_XA_ID, bundle_data->prefix); pcmk__xml_copy(xml_resource, xml_obj); } else if(xml_obj) { pcmk__config_err("Cannot control %s inside %s without either " PCMK_XA_IP_RANGE_START " or " PCMK_XA_CONTROL_PORT, rsc->id, pcmk__xe_id(xml_obj)); return FALSE; } if(xml_resource) { int lpc = 0; GList *childIter = NULL; pe__bundle_port_t *port = NULL; GString *buffer = NULL; if (pe__unpack_resource(xml_resource, &(bundle_data->child), rsc, rsc->priv->scheduler) != pcmk_rc_ok) { return FALSE; } /* Currently, we always map the default authentication key location * into the same location inside the container. * * Ideally, we would respect the host's PCMK_authkey_location, but: * - it may be different on different nodes; * - the actual connection will do extra checking to make sure the key * file exists and is readable, that we can't do here on the DC * - tools such as crm_resource and crm_simulate may not have the same * environment variables as the cluster, causing operation digests to * differ * * Always using the default location inside the container is fine, * because we control the pacemaker_remote environment, and it avoids * having to pass another environment variable to the container. * * @TODO A better solution may be to have only pacemaker_remote use the * environment variable, and have the cluster nodes use a new * cluster option for key location. This would introduce the limitation * of the location being the same on all cluster nodes, but that's * reasonable. */ mount_add(bundle_data, DEFAULT_REMOTE_KEY_LOCATION, DEFAULT_REMOTE_KEY_LOCATION, NULL, pe__bundle_mount_none); if (need_log_mount) { mount_add(bundle_data, CRM_BUNDLE_DIR, "/var/log", NULL, pe__bundle_mount_subdir); } port = pcmk__assert_alloc(1, sizeof(pe__bundle_port_t)); if(bundle_data->control_port) { port->source = strdup(bundle_data->control_port); } else { /* If we wanted to respect PCMK_remote_port, we could use * crm_default_remote_port() here and elsewhere in this file instead * of DEFAULT_REMOTE_PORT. * * However, it gains nothing, since we control both the container * environment and the connection resource parameters, and the user * can use a different port if desired by setting * PCMK_XA_CONTROL_PORT. */ port->source = pcmk__itoa(DEFAULT_REMOTE_PORT); } port->target = strdup(port->source); bundle_data->ports = g_list_append(bundle_data->ports, port); buffer = g_string_sized_new(1024); for (childIter = bundle_data->child->priv->children; childIter != NULL; childIter = childIter->next) { pcmk__bundle_replica_t *replica = NULL; replica = pcmk__assert_alloc(1, sizeof(pcmk__bundle_replica_t)); replica->child = childIter->data; pcmk__set_rsc_flags(replica->child, pcmk__rsc_exclusive_probes); replica->offset = lpc++; // Ensure the child's notify gets set based on the underlying primitive's value if (pcmk_is_set(replica->child->flags, pcmk__rsc_notify)) { pcmk__set_rsc_flags(bundle_data->child, pcmk__rsc_notify); } allocate_ip(bundle_data, replica, buffer); bundle_data->replicas = g_list_append(bundle_data->replicas, replica); + // coverity[null_field] replica->child can't be NULL here bundle_data->attribute_target = g_hash_table_lookup(replica->child->priv->meta, PCMK_META_CONTAINER_ATTRIBUTE_TARGET); } bundle_data->container_host_options = g_string_free(buffer, FALSE); if (bundle_data->attribute_target) { pcmk__insert_dup(rsc->priv->meta, PCMK_META_CONTAINER_ATTRIBUTE_TARGET, bundle_data->attribute_target); pcmk__insert_dup(bundle_data->child->priv->meta, PCMK_META_CONTAINER_ATTRIBUTE_TARGET, bundle_data->attribute_target); } } else { // Just a naked container, no pacemaker-remote GString *buffer = g_string_sized_new(1024); for (int lpc = 0; lpc < bundle_data->nreplicas; lpc++) { pcmk__bundle_replica_t *replica = NULL; replica = pcmk__assert_alloc(1, sizeof(pcmk__bundle_replica_t)); replica->offset = lpc; allocate_ip(bundle_data, replica, buffer); bundle_data->replicas = g_list_append(bundle_data->replicas, replica); } bundle_data->container_host_options = g_string_free(buffer, FALSE); } for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pcmk__bundle_replica_t *replica = gIter->data; if (create_replica_resources(rsc, bundle_data, replica) != pcmk_rc_ok) { pcmk__config_err("Failed unpacking resource %s", rsc->id); pcmk__free_resource(rsc); return FALSE; } /* Utilization needs special handling for bundles. It makes no sense for * the inner primitive to have utilization, because it is tied * one-to-one to the guest node created by the container resource -- and * there's no way to set capacities for that guest node anyway. * * What the user really wants is to configure utilization for the * container. However, the schema only allows utilization for * primitives, and the container resource is implicit anyway, so the * user can *only* configure utilization for the inner primitive. If * they do, move the primitive's utilization values to the container. * * @TODO This means that bundles without an inner primitive can't have * utilization. An alternative might be to allow utilization values in * the top-level bundle XML in the schema, and copy those to each * container. */ if (replica->child != NULL) { GHashTable *empty = replica->container->priv->utilization; replica->container->priv->utilization = replica->child->priv->utilization; replica->child->priv->utilization = empty; } } if (bundle_data->child) { rsc->priv->children = g_list_append(rsc->priv->children, bundle_data->child); } return TRUE; } static int replica_resource_active(pcmk_resource_t *rsc, gboolean all) { if (rsc) { gboolean child_active = rsc->priv->fns->active(rsc, all); if (child_active && !all) { return TRUE; } else if (!child_active && all) { return FALSE; } } return -1; } bool pe__bundle_active(const pcmk_resource_t *rsc, bool all) { pe__bundle_variant_data_t *bundle_data = NULL; GList *iter = NULL; get_bundle_variant_data(bundle_data, rsc); for (iter = bundle_data->replicas; iter != NULL; iter = iter->next) { pcmk__bundle_replica_t *replica = iter->data; int rsc_active; rsc_active = replica_resource_active(replica->ip, all); if (rsc_active >= 0) { return (bool) rsc_active; } rsc_active = replica_resource_active(replica->child, all); if (rsc_active >= 0) { return (bool) rsc_active; } rsc_active = replica_resource_active(replica->container, all); if (rsc_active >= 0) { return (bool) rsc_active; } rsc_active = replica_resource_active(replica->remote, all); if (rsc_active >= 0) { return (bool) rsc_active; } } /* If "all" is TRUE, we've already checked that no resources were inactive, * so return TRUE; if "all" is FALSE, we didn't find any active resources, * so return FALSE. */ return all; } /*! * \internal * \brief Find the bundle replica corresponding to a given node * * \param[in] bundle Top-level bundle resource * \param[in] node Node to search for * * \return Bundle replica if found, NULL otherwise */ pcmk_resource_t * pe__find_bundle_replica(const pcmk_resource_t *bundle, const pcmk_node_t *node) { pe__bundle_variant_data_t *bundle_data = NULL; pcmk__assert((bundle != NULL) && (node != NULL)); get_bundle_variant_data(bundle_data, bundle); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pcmk__bundle_replica_t *replica = gIter->data; pcmk__assert((replica != NULL) && (replica->node != NULL)); if (pcmk__same_node(replica->node, node)) { return replica->child; } } return NULL; } PCMK__OUTPUT_ARGS("bundle", "uint32_t", "pcmk_resource_t *", "GList *", "GList *") int pe__bundle_xml(pcmk__output_t *out, va_list args) { uint32_t show_opts = va_arg(args, uint32_t); pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); pe__bundle_variant_data_t *bundle_data = NULL; int rc = pcmk_rc_no_output; gboolean printed_header = FALSE; bool print_everything = true; const char *desc = NULL; pcmk__assert(rsc != NULL); get_bundle_variant_data(bundle_data, rsc); if (rsc->priv->fns->is_filtered(rsc, only_rsc, true)) { return rc; } print_everything = pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pcmk__bundle_replica_t *replica = gIter->data; pcmk_resource_t *ip = replica->ip; pcmk_resource_t *child = replica->child; pcmk_resource_t *container = replica->container; pcmk_resource_t *remote = replica->remote; char *id = NULL; gboolean print_ip, print_child, print_ctnr, print_remote; pcmk__assert(replica != NULL); if (pcmk__rsc_filtered_by_node(container, only_node)) { continue; } print_ip = (ip != NULL) && !ip->priv->fns->is_filtered(ip, only_rsc, print_everything); print_child = (child != NULL) && !child->priv->fns->is_filtered(child, only_rsc, print_everything); print_ctnr = !container->priv->fns->is_filtered(container, only_rsc, print_everything); print_remote = (remote != NULL) && !remote->priv->fns->is_filtered(remote, only_rsc, print_everything); if (!print_everything && !print_ip && !print_child && !print_ctnr && !print_remote) { continue; } if (!printed_header) { const char *type = container_agent_str(bundle_data->agent_type); const char *unique = pcmk__flag_text(rsc->flags, pcmk__rsc_unique); const char *maintenance = pcmk__flag_text(rsc->flags, pcmk__rsc_maintenance); const char *managed = pcmk__flag_text(rsc->flags, pcmk__rsc_managed); const char *failed = pcmk__flag_text(rsc->flags, pcmk__rsc_failed); printed_header = TRUE; desc = pe__resource_description(rsc, show_opts); rc = pe__name_and_nvpairs_xml(out, true, PCMK_XE_BUNDLE, PCMK_XA_ID, rsc->id, PCMK_XA_TYPE, type, PCMK_XA_IMAGE, bundle_data->image, PCMK_XA_UNIQUE, unique, PCMK_XA_MAINTENANCE, maintenance, PCMK_XA_MANAGED, managed, PCMK_XA_FAILED, failed, PCMK_XA_DESCRIPTION, desc, NULL); pcmk__assert(rc == pcmk_rc_ok); } id = pcmk__itoa(replica->offset); rc = pe__name_and_nvpairs_xml(out, true, PCMK_XE_REPLICA, PCMK_XA_ID, id, NULL); free(id); pcmk__assert(rc == pcmk_rc_ok); if (print_ip) { out->message(out, (const char *) ip->priv->xml->name, show_opts, ip, only_node, only_rsc); } if (print_child) { out->message(out, (const char *) child->priv->xml->name, show_opts, child, only_node, only_rsc); } if (print_ctnr) { out->message(out, (const char *) container->priv->xml->name, show_opts, container, only_node, only_rsc); } if (print_remote) { out->message(out, (const char *) remote->priv->xml->name, show_opts, remote, only_node, only_rsc); } pcmk__output_xml_pop_parent(out); // replica } if (printed_header) { pcmk__output_xml_pop_parent(out); // bundle } return rc; } static void pe__bundle_replica_output_html(pcmk__output_t *out, pcmk__bundle_replica_t *replica, pcmk_node_t *node, uint32_t show_opts) { pcmk_resource_t *rsc = replica->child; int offset = 0; char buffer[LINE_MAX]; if(rsc == NULL) { rsc = replica->container; } if (replica->remote) { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(replica->remote)); } else { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(replica->container)); } if (replica->ipaddr) { offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)", replica->ipaddr); } pe__common_output_html(out, rsc, buffer, node, show_opts); } /*! * \internal * \brief Get a string describing a resource's unmanaged state or lack thereof * * \param[in] rsc Resource to describe * * \return A string indicating that a resource is in maintenance mode or * otherwise unmanaged, or an empty string otherwise */ static const char * get_unmanaged_str(const pcmk_resource_t *rsc) { if (pcmk_is_set(rsc->flags, pcmk__rsc_maintenance)) { return " (maintenance)"; } if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { return " (unmanaged)"; } return ""; } PCMK__OUTPUT_ARGS("bundle", "uint32_t", "pcmk_resource_t *", "GList *", "GList *") int pe__bundle_html(pcmk__output_t *out, va_list args) { uint32_t show_opts = va_arg(args, uint32_t); pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); const char *desc = NULL; pe__bundle_variant_data_t *bundle_data = NULL; int rc = pcmk_rc_no_output; bool print_everything = true; pcmk__assert(rsc != NULL); get_bundle_variant_data(bundle_data, rsc); desc = pe__resource_description(rsc, show_opts); if (rsc->priv->fns->is_filtered(rsc, only_rsc, true)) { return rc; } print_everything = pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pcmk__bundle_replica_t *replica = gIter->data; pcmk_resource_t *ip = replica->ip; pcmk_resource_t *child = replica->child; pcmk_resource_t *container = replica->container; pcmk_resource_t *remote = replica->remote; gboolean print_ip, print_child, print_ctnr, print_remote; pcmk__assert(replica != NULL); if (pcmk__rsc_filtered_by_node(container, only_node)) { continue; } print_ip = (ip != NULL) && !ip->priv->fns->is_filtered(ip, only_rsc, print_everything); print_child = (child != NULL) && !child->priv->fns->is_filtered(child, only_rsc, print_everything); print_ctnr = !container->priv->fns->is_filtered(container, only_rsc, print_everything); print_remote = (remote != NULL) && !remote->priv->fns->is_filtered(remote, only_rsc, print_everything); if (pcmk_is_set(show_opts, pcmk_show_implicit_rscs) || (print_everything == FALSE && (print_ip || print_child || print_ctnr || print_remote))) { /* The text output messages used below require pe_print_implicit to * be set to do anything. */ uint32_t new_show_opts = show_opts | pcmk_show_implicit_rscs; PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s%s%s%s", (bundle_data->nreplicas > 1)? " set" : "", rsc->id, bundle_data->image, pcmk_is_set(rsc->flags, pcmk__rsc_unique)? " (unique)" : "", desc ? " (" : "", desc ? desc : "", desc ? ")" : "", get_unmanaged_str(rsc)); if (pcmk__list_of_multiple(bundle_data->replicas)) { out->begin_list(out, NULL, NULL, "Replica[%d]", replica->offset); } if (print_ip) { out->message(out, (const char *) ip->priv->xml->name, new_show_opts, ip, only_node, only_rsc); } if (print_child) { out->message(out, (const char *) child->priv->xml->name, new_show_opts, child, only_node, only_rsc); } if (print_ctnr) { out->message(out, (const char *) container->priv->xml->name, new_show_opts, container, only_node, only_rsc); } if (print_remote) { out->message(out, (const char *) remote->priv->xml->name, new_show_opts, remote, only_node, only_rsc); } if (pcmk__list_of_multiple(bundle_data->replicas)) { out->end_list(out); } } else if (print_everything == FALSE && !(print_ip || print_child || print_ctnr || print_remote)) { continue; } else { PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s%s%s%s", (bundle_data->nreplicas > 1)? " set" : "", rsc->id, bundle_data->image, pcmk_is_set(rsc->flags, pcmk__rsc_unique)? " (unique)" : "", desc ? " (" : "", desc ? desc : "", desc ? ")" : "", get_unmanaged_str(rsc)); pe__bundle_replica_output_html(out, replica, pcmk__current_node(container), show_opts); } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } static void pe__bundle_replica_output_text(pcmk__output_t *out, pcmk__bundle_replica_t *replica, pcmk_node_t *node, uint32_t show_opts) { const pcmk_resource_t *rsc = replica->child; int offset = 0; char buffer[LINE_MAX]; if(rsc == NULL) { rsc = replica->container; } if (replica->remote) { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(replica->remote)); } else { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(replica->container)); } if (replica->ipaddr) { offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)", replica->ipaddr); } pe__common_output_text(out, rsc, buffer, node, show_opts); } PCMK__OUTPUT_ARGS("bundle", "uint32_t", "pcmk_resource_t *", "GList *", "GList *") int pe__bundle_text(pcmk__output_t *out, va_list args) { uint32_t show_opts = va_arg(args, uint32_t); pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); const char *desc = NULL; pe__bundle_variant_data_t *bundle_data = NULL; int rc = pcmk_rc_no_output; bool print_everything = true; desc = pe__resource_description(rsc, show_opts); pcmk__assert(rsc != NULL); get_bundle_variant_data(bundle_data, rsc); if (rsc->priv->fns->is_filtered(rsc, only_rsc, true)) { return rc; } print_everything = pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pcmk__bundle_replica_t *replica = gIter->data; pcmk_resource_t *ip = replica->ip; pcmk_resource_t *child = replica->child; pcmk_resource_t *container = replica->container; pcmk_resource_t *remote = replica->remote; gboolean print_ip, print_child, print_ctnr, print_remote; pcmk__assert(replica != NULL); if (pcmk__rsc_filtered_by_node(container, only_node)) { continue; } print_ip = (ip != NULL) && !ip->priv->fns->is_filtered(ip, only_rsc, print_everything); print_child = (child != NULL) && !child->priv->fns->is_filtered(child, only_rsc, print_everything); print_ctnr = !container->priv->fns->is_filtered(container, only_rsc, print_everything); print_remote = (remote != NULL) && !remote->priv->fns->is_filtered(remote, only_rsc, print_everything); if (pcmk_is_set(show_opts, pcmk_show_implicit_rscs) || (print_everything == FALSE && (print_ip || print_child || print_ctnr || print_remote))) { /* The text output messages used below require pe_print_implicit to * be set to do anything. */ uint32_t new_show_opts = show_opts | pcmk_show_implicit_rscs; PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s%s%s%s", (bundle_data->nreplicas > 1)? " set" : "", rsc->id, bundle_data->image, pcmk_is_set(rsc->flags, pcmk__rsc_unique)? " (unique)" : "", desc ? " (" : "", desc ? desc : "", desc ? ")" : "", get_unmanaged_str(rsc)); if (pcmk__list_of_multiple(bundle_data->replicas)) { out->list_item(out, NULL, "Replica[%d]", replica->offset); } out->begin_list(out, NULL, NULL, NULL); if (print_ip) { out->message(out, (const char *) ip->priv->xml->name, new_show_opts, ip, only_node, only_rsc); } if (print_child) { out->message(out, (const char *) child->priv->xml->name, new_show_opts, child, only_node, only_rsc); } if (print_ctnr) { out->message(out, (const char *) container->priv->xml->name, new_show_opts, container, only_node, only_rsc); } if (print_remote) { out->message(out, (const char *) remote->priv->xml->name, new_show_opts, remote, only_node, only_rsc); } out->end_list(out); } else if (print_everything == FALSE && !(print_ip || print_child || print_ctnr || print_remote)) { continue; } else { PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s%s%s%s", (bundle_data->nreplicas > 1)? " set" : "", rsc->id, bundle_data->image, pcmk_is_set(rsc->flags, pcmk__rsc_unique)? " (unique)" : "", desc ? " (" : "", desc ? desc : "", desc ? ")" : "", get_unmanaged_str(rsc)); pe__bundle_replica_output_text(out, replica, pcmk__current_node(container), show_opts); } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } static void free_bundle_replica(pcmk__bundle_replica_t *replica) { if (replica == NULL) { return; } pcmk__free_node_copy(replica->node); replica->node = NULL; if (replica->ip) { pcmk__xml_free(replica->ip->priv->xml); replica->ip->priv->xml = NULL; pcmk__free_resource(replica->ip); } if (replica->container) { pcmk__xml_free(replica->container->priv->xml); replica->container->priv->xml = NULL; pcmk__free_resource(replica->container); } if (replica->remote) { pcmk__xml_free(replica->remote->priv->xml); replica->remote->priv->xml = NULL; pcmk__free_resource(replica->remote); } free(replica->ipaddr); free(replica); } void pe__free_bundle(pcmk_resource_t *rsc) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); get_bundle_variant_data(bundle_data, rsc); pcmk__rsc_trace(rsc, "Freeing %s", rsc->id); free(bundle_data->prefix); free(bundle_data->image); free(bundle_data->control_port); free(bundle_data->host_network); free(bundle_data->host_netmask); free(bundle_data->ip_range_start); free(bundle_data->container_network); free(bundle_data->launcher_options); free(bundle_data->container_command); g_free(bundle_data->container_host_options); g_list_free_full(bundle_data->replicas, (GDestroyNotify) free_bundle_replica); g_list_free_full(bundle_data->mounts, (GDestroyNotify)mount_free); g_list_free_full(bundle_data->ports, (GDestroyNotify)port_free); g_list_free(rsc->priv->children); if(bundle_data->child) { pcmk__xml_free(bundle_data->child->priv->xml); bundle_data->child->priv->xml = NULL; pcmk__free_resource(bundle_data->child); } common_free(rsc); } enum rsc_role_e pe__bundle_resource_state(const pcmk_resource_t *rsc, bool current) { enum rsc_role_e container_role = pcmk_role_unknown; return container_role; } /*! * \brief Get the number of configured replicas in a bundle * * \param[in] rsc Bundle resource * * \return Number of configured replicas, or 0 on error */ int pe_bundle_replicas(const pcmk_resource_t *rsc) { if (pcmk__is_bundle(rsc)) { pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, rsc); return bundle_data->nreplicas; } return 0; } void pe__count_bundle(pcmk_resource_t *rsc) { pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, rsc); for (GList *item = bundle_data->replicas; item != NULL; item = item->next) { pcmk__bundle_replica_t *replica = item->data; if (replica->ip) { replica->ip->priv->fns->count(replica->ip); } if (replica->child) { replica->child->priv->fns->count(replica->child); } if (replica->container) { replica->container->priv->fns->count(replica->container); } if (replica->remote) { replica->remote->priv->fns->count(replica->remote); } } } bool pe__bundle_is_filtered(const pcmk_resource_t *rsc, const GList *only_rsc, bool check_parent) { bool passes = false; pe__bundle_variant_data_t *bundle_data = NULL; if (pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches)) { passes = true; } else { get_bundle_variant_data(bundle_data, rsc); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pcmk__bundle_replica_t *replica = gIter->data; pcmk_resource_t *ip = replica->ip; pcmk_resource_t *child = replica->child; pcmk_resource_t *container = replica->container; pcmk_resource_t *remote = replica->remote; if ((ip != NULL) && !ip->priv->fns->is_filtered(ip, only_rsc, false)) { passes = true; break; } if ((child != NULL) && !child->priv->fns->is_filtered(child, only_rsc, false)) { passes = true; break; } if (!container->priv->fns->is_filtered(container, only_rsc, false)) { passes = true; break; } if ((remote != NULL) && !remote->priv->fns->is_filtered(remote, only_rsc, false)) { passes = true; break; } } } return !passes; } /*! * \internal * \brief Get a list of a bundle's containers * * \param[in] bundle Bundle resource * * \return Newly created list of \p bundle's containers * \note It is the caller's responsibility to free the result with * g_list_free(). */ GList * pe__bundle_containers(const pcmk_resource_t *bundle) { /* @TODO It would be more efficient to do this once when unpacking the * bundle, creating a new GList* in the variant data */ GList *containers = NULL; const pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, bundle); for (GList *iter = data->replicas; iter != NULL; iter = iter->next) { pcmk__bundle_replica_t *replica = iter->data; containers = g_list_append(containers, replica->container); } return containers; } // Bundle implementation of pcmk__rsc_methods_t:active_node() pcmk_node_t * pe__bundle_active_node(const pcmk_resource_t *rsc, unsigned int *count_all, unsigned int *count_clean) { pcmk_node_t *active = NULL; pcmk_node_t *node = NULL; pcmk_resource_t *container = NULL; GList *containers = NULL; GList *iter = NULL; GHashTable *nodes = NULL; const pe__bundle_variant_data_t *data = NULL; if (count_all != NULL) { *count_all = 0; } if (count_clean != NULL) { *count_clean = 0; } if (rsc == NULL) { return NULL; } /* For the purposes of this method, we only care about where the bundle's * containers are active, so build a list of active containers. */ get_bundle_variant_data(data, rsc); for (iter = data->replicas; iter != NULL; iter = iter->next) { pcmk__bundle_replica_t *replica = iter->data; if (replica->container->priv->active_nodes != NULL) { containers = g_list_append(containers, replica->container); } } if (containers == NULL) { return NULL; } /* If the bundle has only a single active container, just use that * container's method. If live migration is ever supported for bundle * containers, this will allow us to prefer the migration source when there * is only one container and it is migrating. For now, this just lets us * avoid creating the nodes table. */ if (pcmk__list_of_1(containers)) { container = containers->data; node = container->priv->fns->active_node(container, count_all, count_clean); g_list_free(containers); return node; } // Add all containers' active nodes to a hash table (for uniqueness) nodes = g_hash_table_new(NULL, NULL); for (iter = containers; iter != NULL; iter = iter->next) { container = iter->data; for (GList *node_iter = container->priv->active_nodes; node_iter != NULL; node_iter = node_iter->next) { node = node_iter->data; // If insert returns true, we haven't counted this node yet if (g_hash_table_insert(nodes, (gpointer) node->details, (gpointer) node) && !pe__count_active_node(rsc, node, &active, count_all, count_clean)) { goto done; } } } done: g_list_free(containers); g_hash_table_destroy(nodes); return active; } /*! * \internal * \brief Get maximum bundle resource instances per node * * \param[in] rsc Bundle resource to check * * \return Maximum number of \p rsc instances that can be active on one node */ unsigned int pe__bundle_max_per_node(const pcmk_resource_t *rsc) { pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, rsc); pcmk__assert(bundle_data->nreplicas_per_host >= 0); return (unsigned int) bundle_data->nreplicas_per_host; } diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c index d7b9b97b3c..984da9f6cd 100644 --- a/lib/services/services_linux.c +++ b/lib/services/services_linux.c @@ -1,1479 +1,1479 @@ /* * Copyright 2010-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include "crm/crm.h" #include "crm/common/mainloop.h" #include "crm/services.h" #include "crm/services_internal.h" #include "services_private.h" static void close_pipe(int fildes[]); /* We have two alternative ways of handling SIGCHLD when synchronously waiting * for spawned processes to complete. Both rely on polling a file descriptor to * discover SIGCHLD events. * * If sys/signalfd.h is available (e.g. on Linux), we call signalfd() to * generate the file descriptor. Otherwise, we use the "self-pipe trick" * (opening a pipe and writing a byte to it when SIGCHLD is received). */ #ifdef HAVE_SYS_SIGNALFD_H // signalfd() implementation #include // Everything needed to manage SIGCHLD handling struct sigchld_data_s { sigset_t mask; // Signals to block now (including SIGCHLD) sigset_t old_mask; // Previous set of blocked signals bool ignored; // If SIGCHLD for another child has been ignored }; // Initialize SIGCHLD data and prepare for use static bool sigchld_setup(struct sigchld_data_s *data) { sigemptyset(&(data->mask)); sigaddset(&(data->mask), SIGCHLD); sigemptyset(&(data->old_mask)); // Block SIGCHLD (saving previous set of blocked signals to restore later) if (sigprocmask(SIG_BLOCK, &(data->mask), &(data->old_mask)) < 0) { crm_info("Wait for child process completion failed: %s " QB_XS " source=sigprocmask", pcmk_rc_str(errno)); return false; } data->ignored = false; return true; } // Get a file descriptor suitable for polling for SIGCHLD events static int sigchld_open(struct sigchld_data_s *data) { int fd; CRM_CHECK(data != NULL, return -1); fd = signalfd(-1, &(data->mask), SFD_NONBLOCK); if (fd < 0) { crm_info("Wait for child process completion failed: %s " QB_XS " source=signalfd", pcmk_rc_str(errno)); } return fd; } // Close a file descriptor returned by sigchld_open() static void sigchld_close(int fd) { if (fd > 0) { close(fd); } } // Return true if SIGCHLD was received from polled fd static bool sigchld_received(int fd, int pid, struct sigchld_data_s *data) { struct signalfd_siginfo fdsi; ssize_t s; if (fd < 0) { return false; } s = read(fd, &fdsi, sizeof(struct signalfd_siginfo)); if (s != sizeof(struct signalfd_siginfo)) { crm_info("Wait for child process completion failed: %s " QB_XS " source=read", pcmk_rc_str(errno)); } else if (fdsi.ssi_signo == SIGCHLD) { if (fdsi.ssi_pid == pid) { return true; } else { /* This SIGCHLD is for another child. We have to ignore it here but * will still need to resend it after this synchronous action has * completed and SIGCHLD has been restored to be handled by the * previous SIGCHLD handler, so that it will be handled. */ data->ignored = true; return false; } } return false; } // Do anything needed after done waiting for SIGCHLD static void sigchld_cleanup(struct sigchld_data_s *data) { // Restore the original set of blocked signals if ((sigismember(&(data->old_mask), SIGCHLD) == 0) && (sigprocmask(SIG_UNBLOCK, &(data->mask), NULL) < 0)) { crm_warn("Could not clean up after child process completion: %s", pcmk_rc_str(errno)); } // Resend any ignored SIGCHLD for other children so that they'll be handled. if (data->ignored && kill(getpid(), SIGCHLD) != 0) { crm_warn("Could not resend ignored SIGCHLD to ourselves: %s", pcmk_rc_str(errno)); } } #else // HAVE_SYS_SIGNALFD_H not defined // Self-pipe implementation (see above for function descriptions) struct sigchld_data_s { int pipe_fd[2]; // Pipe file descriptors struct sigaction sa; // Signal handling info (with SIGCHLD) struct sigaction old_sa; // Previous signal handling info bool ignored; // If SIGCHLD for another child has been ignored }; // We need a global to use in the signal handler volatile struct sigchld_data_s *last_sigchld_data = NULL; static void sigchld_handler(void) { // We received a SIGCHLD, so trigger pipe polling if ((last_sigchld_data != NULL) && (last_sigchld_data->pipe_fd[1] >= 0) && (write(last_sigchld_data->pipe_fd[1], "", 1) == -1)) { crm_info("Wait for child process completion failed: %s " QB_XS " source=write", pcmk_rc_str(errno)); } } static bool sigchld_setup(struct sigchld_data_s *data) { int rc; data->pipe_fd[0] = data->pipe_fd[1] = -1; if (pipe(data->pipe_fd) == -1) { crm_info("Wait for child process completion failed: %s " QB_XS " source=pipe", pcmk_rc_str(errno)); return false; } rc = pcmk__set_nonblocking(data->pipe_fd[0]); if (rc != pcmk_rc_ok) { crm_info("Could not set pipe input non-blocking: %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); } rc = pcmk__set_nonblocking(data->pipe_fd[1]); if (rc != pcmk_rc_ok) { crm_info("Could not set pipe output non-blocking: %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); } // Set SIGCHLD handler data->sa.sa_handler = (sighandler_t) sigchld_handler; data->sa.sa_flags = 0; sigemptyset(&(data->sa.sa_mask)); if (sigaction(SIGCHLD, &(data->sa), &(data->old_sa)) < 0) { crm_info("Wait for child process completion failed: %s " QB_XS " source=sigaction", pcmk_rc_str(errno)); } data->ignored = false; // Remember data for use in signal handler last_sigchld_data = data; return true; } static int sigchld_open(struct sigchld_data_s *data) { CRM_CHECK(data != NULL, return -1); return data->pipe_fd[0]; } static void sigchld_close(int fd) { // Pipe will be closed in sigchld_cleanup() return; } static bool sigchld_received(int fd, int pid, struct sigchld_data_s *data) { char ch; if (fd < 0) { return false; } // Clear out the self-pipe while (read(fd, &ch, 1) == 1) /*omit*/; return true; } static void sigchld_cleanup(struct sigchld_data_s *data) { // Restore the previous SIGCHLD handler if (sigaction(SIGCHLD, &(data->old_sa), NULL) < 0) { crm_warn("Could not clean up after child process completion: %s", pcmk_rc_str(errno)); } close_pipe(data->pipe_fd); // Resend any ignored SIGCHLD for other children so that they'll be handled. if (data->ignored && kill(getpid(), SIGCHLD) != 0) { crm_warn("Could not resend ignored SIGCHLD to ourselves: %s", pcmk_rc_str(errno)); } } #endif /*! * \internal * \brief Close the two file descriptors of a pipe * * \param[in,out] fildes Array of file descriptors opened by pipe() */ static void close_pipe(int fildes[]) { if (fildes[0] >= 0) { close(fildes[0]); fildes[0] = -1; } if (fildes[1] >= 0) { close(fildes[1]); fildes[1] = -1; } } #define out_type(is_stderr) ((is_stderr)? "stderr" : "stdout") // Maximum number of bytes of stdout or stderr we'll accept #define MAX_OUTPUT (10 * 1024 * 1024) static gboolean svc_read_output(int fd, svc_action_t * op, bool is_stderr) { char *data = NULL; ssize_t rc = 0; size_t len = 0; size_t discarded = 0; char buf[500]; static const size_t buf_read_len = sizeof(buf) - 1; if (fd < 0) { crm_trace("No fd for %s", op->id); return FALSE; } if (is_stderr && op->stderr_data) { len = strlen(op->stderr_data); data = op->stderr_data; crm_trace("Reading %s stderr into offset %zu", op->id, len); } else if (is_stderr == FALSE && op->stdout_data) { len = strlen(op->stdout_data); data = op->stdout_data; crm_trace("Reading %s stdout into offset %zu", op->id, len); } else { crm_trace("Reading %s %s", op->id, out_type(is_stderr)); } do { errno = 0; rc = read(fd, buf, buf_read_len); if (rc > 0) { if (len < MAX_OUTPUT) { buf[rc] = 0; crm_trace("Received %zd bytes of %s %s: %.80s", rc, op->id, out_type(is_stderr), buf); data = pcmk__realloc(data, len + rc + 1); strcpy(data + len, buf); len += rc; } else { discarded += rc; } } else if (errno != EINTR) { // Fatal error or EOF rc = 0; break; } } while ((rc == buf_read_len) || (rc < 0)); if (discarded > 0) { crm_warn("Truncated %s %s to %zu bytes (discarded %zu)", op->id, out_type(is_stderr), len, discarded); } if (is_stderr) { op->stderr_data = data; } else { op->stdout_data = data; } return rc != 0; } static int dispatch_stdout(gpointer userdata) { svc_action_t *op = (svc_action_t *) userdata; return svc_read_output(op->opaque->stdout_fd, op, FALSE); } static int dispatch_stderr(gpointer userdata) { svc_action_t *op = (svc_action_t *) userdata; return svc_read_output(op->opaque->stderr_fd, op, TRUE); } static void pipe_out_done(gpointer user_data) { svc_action_t *op = (svc_action_t *) user_data; crm_trace("%p", op); op->opaque->stdout_gsource = NULL; if (op->opaque->stdout_fd > STDOUT_FILENO) { close(op->opaque->stdout_fd); } op->opaque->stdout_fd = -1; } static void pipe_err_done(gpointer user_data) { svc_action_t *op = (svc_action_t *) user_data; op->opaque->stderr_gsource = NULL; if (op->opaque->stderr_fd > STDERR_FILENO) { close(op->opaque->stderr_fd); } op->opaque->stderr_fd = -1; } static struct mainloop_fd_callbacks stdout_callbacks = { .dispatch = dispatch_stdout, .destroy = pipe_out_done, }; static struct mainloop_fd_callbacks stderr_callbacks = { .dispatch = dispatch_stderr, .destroy = pipe_err_done, }; static void set_ocf_env(const char *key, const char *value, gpointer user_data) { if (setenv(key, value, 1) != 0) { crm_perror(LOG_ERR, "setenv failed for key:%s and value:%s", key, value); } } static void set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data) { char buffer[500]; snprintf(buffer, sizeof(buffer), strcmp(key, "OCF_CHECK_LEVEL") != 0 ? "OCF_RESKEY_%s" : "%s", (char *)key); set_ocf_env(buffer, value, user_data); } static void set_alert_env(gpointer key, gpointer value, gpointer user_data) { int rc; if (value != NULL) { rc = setenv(key, value, 1); } else { rc = unsetenv(key); } if (rc < 0) { crm_perror(LOG_ERR, "setenv %s=%s", (char*)key, (value? (char*)value : "")); } else { crm_trace("setenv %s=%s", (char*)key, (value? (char*)value : "")); } } /*! * \internal * \brief Add environment variables suitable for an action * * \param[in] op Action to use */ static void add_action_env_vars(const svc_action_t *op) { void (*env_setter)(gpointer, gpointer, gpointer) = NULL; if (op->agent == NULL) { env_setter = set_alert_env; /* we deal with alert handler */ } else if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_casei)) { env_setter = set_ocf_env_with_prefix; } if (env_setter != NULL && op->params != NULL) { g_hash_table_foreach(op->params, env_setter, NULL); } if (env_setter == NULL || env_setter == set_alert_env) { return; } set_ocf_env("OCF_RA_VERSION_MAJOR", PCMK_OCF_MAJOR_VERSION, NULL); set_ocf_env("OCF_RA_VERSION_MINOR", PCMK_OCF_MINOR_VERSION, NULL); set_ocf_env("OCF_ROOT", PCMK_OCF_ROOT, NULL); set_ocf_env("OCF_EXIT_REASON_PREFIX", PCMK_OCF_REASON_PREFIX, NULL); if (op->rsc) { set_ocf_env("OCF_RESOURCE_INSTANCE", op->rsc, NULL); } if (op->agent != NULL) { set_ocf_env("OCF_RESOURCE_TYPE", op->agent, NULL); } /* Notes: this is not added to specification yet. Sept 10,2004 */ if (op->provider != NULL) { set_ocf_env("OCF_RESOURCE_PROVIDER", op->provider, NULL); } } static void pipe_in_single_parameter(gpointer key, gpointer value, gpointer user_data) { svc_action_t *op = user_data; char *buffer = crm_strdup_printf("%s=%s\n", (char *)key, (char *) value); size_t len = strlen(buffer); size_t total = 0; ssize_t ret = 0; do { errno = 0; ret = write(op->opaque->stdin_fd, buffer + total, len - total); if (ret > 0) { total += ret; } } while ((errno == EINTR) && (total < len)); free(buffer); } /*! * \internal * \brief Pipe parameters in via stdin for action * * \param[in] op Action to use */ static void pipe_in_action_stdin_parameters(const svc_action_t *op) { if (op->params) { g_hash_table_foreach(op->params, pipe_in_single_parameter, (gpointer) op); } } gboolean recurring_action_timer(gpointer data) { svc_action_t *op = data; crm_debug("Scheduling another invocation of %s", op->id); /* Clean out the old result */ free(op->stdout_data); op->stdout_data = NULL; free(op->stderr_data); op->stderr_data = NULL; op->opaque->repeat_timer = 0; services_action_async(op, NULL); return FALSE; } /*! * \internal * \brief Finalize handling of an asynchronous operation * * Given a completed asynchronous operation, cancel or reschedule it as * appropriate if recurring, call its callback if registered, stop tracking it, * and clean it up. * * \param[in,out] op Operation to finalize * * \return Standard Pacemaker return code * \retval EINVAL Caller supplied NULL or invalid \p op * \retval EBUSY Uncanceled recurring action has only been cleaned up * \retval pcmk_rc_ok Action has been freed * * \note If the return value is not pcmk_rc_ok, the caller is responsible for * freeing the action. */ int services__finalize_async_op(svc_action_t *op) { CRM_CHECK((op != NULL) && !(op->synchronous), return EINVAL); if (op->interval_ms != 0) { // Recurring operations must be either cancelled or rescheduled if (op->cancel) { services__set_cancelled(op); cancel_recurring_action(op); } else { op->opaque->repeat_timer = pcmk__create_timer(op->interval_ms, recurring_action_timer, op); } } if (op->opaque->callback != NULL) { op->opaque->callback(op); } // Stop tracking the operation (as in-flight or blocked) op->pid = 0; services_untrack_op(op); if ((op->interval_ms != 0) && !(op->cancel)) { // Do not free recurring actions (they will get freed when cancelled) services_action_cleanup(op); return EBUSY; } services_action_free(op); return pcmk_rc_ok; } static void close_op_input(svc_action_t *op) { if (op->opaque->stdin_fd >= 0) { close(op->opaque->stdin_fd); } } static void finish_op_output(svc_action_t *op, bool is_stderr) { mainloop_io_t **source; int fd; if (is_stderr) { source = &(op->opaque->stderr_gsource); fd = op->opaque->stderr_fd; } else { source = &(op->opaque->stdout_gsource); fd = op->opaque->stdout_fd; } if (op->synchronous || *source) { crm_trace("Finish reading %s[%d] %s", op->id, op->pid, (is_stderr? "stderr" : "stdout")); svc_read_output(fd, op, is_stderr); if (op->synchronous) { close(fd); } else { mainloop_del_fd(*source); *source = NULL; } } } // Log an operation's stdout and stderr static void log_op_output(svc_action_t *op) { char *prefix = crm_strdup_printf("%s[%d] error output", op->id, op->pid); /* The library caller has better context to know how important the output * is, so log it at info and debug severity here. They can log it again at * higher severity if appropriate. */ crm_log_output(LOG_INFO, prefix, op->stderr_data); strcpy(prefix + strlen(prefix) - strlen("error output"), "output"); crm_log_output(LOG_DEBUG, prefix, op->stdout_data); free(prefix); } // Truncate exit reasons at this many characters #define EXIT_REASON_MAX_LEN 128 static void parse_exit_reason_from_stderr(svc_action_t *op) { const char *reason_start = NULL; const char *reason_end = NULL; const int prefix_len = strlen(PCMK_OCF_REASON_PREFIX); if ((op->stderr_data == NULL) || // Only OCF agents have exit reasons in stderr !pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_none)) { return; } // Find the last occurrence of the magic string indicating an exit reason for (const char *cur = strstr(op->stderr_data, PCMK_OCF_REASON_PREFIX); cur != NULL; cur = strstr(cur, PCMK_OCF_REASON_PREFIX)) { cur += prefix_len; // Skip over magic string reason_start = cur; } if ((reason_start == NULL) || (reason_start[0] == '\n') || (reason_start[0] == '\0')) { return; // No or empty exit reason } // Exit reason goes to end of line (or end of output) reason_end = strchr(reason_start, '\n'); if (reason_end == NULL) { reason_end = reason_start + strlen(reason_start); } // Limit size of exit reason to something reasonable if (reason_end > (reason_start + EXIT_REASON_MAX_LEN)) { reason_end = reason_start + EXIT_REASON_MAX_LEN; } free(op->opaque->exit_reason); op->opaque->exit_reason = strndup(reason_start, reason_end - reason_start); } /*! * \internal * \brief Process the completion of an asynchronous child process * * \param[in,out] p Child process that completed * \param[in] pid Process ID of child * \param[in] core (Unused) * \param[in] signo Signal that interrupted child, if any * \param[in] exitcode Exit status of child process */ static void async_action_complete(mainloop_child_t *p, pid_t pid, int core, int signo, int exitcode) { svc_action_t *op = mainloop_child_userdata(p); mainloop_clear_child_userdata(p); CRM_CHECK(op->pid == pid, services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR, "Bug in mainloop handling"); return); /* Depending on the priority the mainloop gives the stdout and stderr * file descriptors, this function could be called before everything has * been read from them, so force a final read now. */ finish_op_output(op, true); finish_op_output(op, false); close_op_input(op); if (signo == 0) { crm_debug("%s[%d] exited with status %d", op->id, op->pid, exitcode); services__set_result(op, exitcode, PCMK_EXEC_DONE, NULL); log_op_output(op); parse_exit_reason_from_stderr(op); } else if (mainloop_child_timeout(p)) { const char *kind = services__action_kind(op); crm_info("%s %s[%d] timed out after %s", kind, op->id, op->pid, pcmk__readable_interval(op->timeout)); services__format_result(op, services__generic_error(op), PCMK_EXEC_TIMEOUT, "%s did not complete within %s", kind, pcmk__readable_interval(op->timeout)); } else if (op->cancel) { /* If an in-flight recurring operation was killed because it was * cancelled, don't treat that as a failure. */ crm_info("%s[%d] terminated with signal %d (%s)", op->id, op->pid, signo, strsignal(signo)); services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_CANCELLED, NULL); } else { crm_info("%s[%d] terminated with signal %d (%s)", op->id, op->pid, signo, strsignal(signo)); services__format_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "%s interrupted by %s signal", services__action_kind(op), strsignal(signo)); } services__finalize_async_op(op); } /*! * \internal * \brief Return agent standard's exit status for "generic error" * * When returning an internal error for an action, a value that is appropriate * to the action's agent standard must be used. This function returns a value * appropriate for errors in general. * * \param[in] op Action that error is for * * \return Exit status appropriate to agent standard * \note Actions without a standard will get PCMK_OCF_UNKNOWN_ERROR. */ int services__generic_error(const svc_action_t *op) { if ((op == NULL) || (op->standard == NULL)) { return PCMK_OCF_UNKNOWN_ERROR; } #if PCMK__ENABLE_LSB if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei) && pcmk__str_eq(op->action, PCMK_ACTION_STATUS, pcmk__str_casei)) { return PCMK_LSB_STATUS_UNKNOWN; } #endif return PCMK_OCF_UNKNOWN_ERROR; } /*! * \internal * \brief Return agent standard's exit status for "not installed" * * When returning an internal error for an action, a value that is appropriate * to the action's agent standard must be used. This function returns a value * appropriate for "not installed" errors. * * \param[in] op Action that error is for * * \return Exit status appropriate to agent standard * \note Actions without a standard will get PCMK_OCF_UNKNOWN_ERROR. */ int services__not_installed_error(const svc_action_t *op) { if ((op == NULL) || (op->standard == NULL)) { return PCMK_OCF_UNKNOWN_ERROR; } #if PCMK__ENABLE_LSB if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei) && pcmk__str_eq(op->action, PCMK_ACTION_STATUS, pcmk__str_casei)) { return PCMK_LSB_STATUS_NOT_INSTALLED; } #endif return PCMK_OCF_NOT_INSTALLED; } /*! * \internal * \brief Return agent standard's exit status for "insufficient privileges" * * When returning an internal error for an action, a value that is appropriate * to the action's agent standard must be used. This function returns a value * appropriate for "insufficient privileges" errors. * * \param[in] op Action that error is for * * \return Exit status appropriate to agent standard * \note Actions without a standard will get PCMK_OCF_UNKNOWN_ERROR. */ int services__authorization_error(const svc_action_t *op) { if ((op == NULL) || (op->standard == NULL)) { return PCMK_OCF_UNKNOWN_ERROR; } #if PCMK__ENABLE_LSB if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei) && pcmk__str_eq(op->action, PCMK_ACTION_STATUS, pcmk__str_casei)) { return PCMK_LSB_STATUS_INSUFFICIENT_PRIV; } #endif return PCMK_OCF_INSUFFICIENT_PRIV; } /*! * \internal * \brief Return agent standard's exit status for "not configured" * * When returning an internal error for an action, a value that is appropriate * to the action's agent standard must be used. This function returns a value * appropriate for "not configured" errors. * * \param[in] op Action that error is for * \param[in] is_fatal Whether problem is cluster-wide instead of only local * * \return Exit status appropriate to agent standard * \note Actions without a standard will get PCMK_OCF_UNKNOWN_ERROR. */ int services__configuration_error(const svc_action_t *op, bool is_fatal) { if ((op == NULL) || (op->standard == NULL)) { return PCMK_OCF_UNKNOWN_ERROR; } #if PCMK__ENABLE_LSB if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei) && pcmk__str_eq(op->action, PCMK_ACTION_STATUS, pcmk__str_casei)) { return PCMK_LSB_NOT_CONFIGURED; } #endif return is_fatal? PCMK_OCF_NOT_CONFIGURED : PCMK_OCF_INVALID_PARAM; } /*! * \internal * \brief Set operation rc and status per errno from stat(), fork() or execvp() * * \param[in,out] op Operation to set rc and status for * \param[in] error Value of errno after system call * * \return void */ void services__handle_exec_error(svc_action_t * op, int error) { const char *name = op->opaque->exec; if (name == NULL) { name = op->agent; if (name == NULL) { name = op->id; } } switch (error) { /* see execve(2), stat(2) and fork(2) */ case ENOENT: /* No such file or directory */ case EISDIR: /* Is a directory */ case ENOTDIR: /* Path component is not a directory */ case EINVAL: /* Invalid executable format */ case ENOEXEC: /* Invalid executable format */ services__format_result(op, services__not_installed_error(op), PCMK_EXEC_NOT_INSTALLED, "%s: %s", name, pcmk_rc_str(error)); break; case EACCES: /* permission denied (various errors) */ case EPERM: /* permission denied (various errors) */ services__format_result(op, services__authorization_error(op), PCMK_EXEC_ERROR, "%s: %s", name, pcmk_rc_str(error)); break; default: services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR, pcmk_rc_str(error)); } } /*! * \internal * \brief Exit a child process that failed before executing agent * * \param[in] op Action that failed * \param[in] exit_status Exit status code to use * \param[in] exit_reason Exit reason to output if for OCF agent */ static void exit_child(const svc_action_t *op, int exit_status, const char *exit_reason) { if ((op != NULL) && (exit_reason != NULL) && pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_none)) { fprintf(stderr, PCMK_OCF_REASON_PREFIX "%s\n", exit_reason); } pcmk_common_cleanup(); _exit(exit_status); } static void action_launch_child(svc_action_t *op) { int rc; /* SIGPIPE is ignored (which is different from signal blocking) by the gnutls library. * Depending on the libqb version in use, libqb may set SIGPIPE to be ignored as well. * We do not want this to be inherited by the child process. By resetting this the signal * to the default behavior, we avoid some potential odd problems that occur during OCF * scripts when SIGPIPE is ignored by the environment. */ signal(SIGPIPE, SIG_DFL); if (sched_getscheduler(0) != SCHED_OTHER) { struct sched_param sp; memset(&sp, 0, sizeof(sp)); sp.sched_priority = 0; if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) { crm_info("Could not reset scheduling policy for %s", op->id); } } if (setpriority(PRIO_PROCESS, 0, 0) == -1) { crm_info("Could not reset process priority for %s", op->id); } /* Man: The call setpgrp() is equivalent to setpgid(0,0) * _and_ compiles on BSD variants too * need to investigate if it works the same too. */ setpgid(0, 0); pcmk__close_fds_in_child(false); /* It would be nice if errors in this function could be reported as * execution status (for example, PCMK_EXEC_NO_SECRETS for the secrets error * below) instead of exit status. However, we've already forked, so * exit status is all we have. At least for OCF actions, we can output an * exit reason for the parent to parse. * * @TODO It might be better to substitute secrets in the parent before * forking, so that if it fails, we can give a better message and result, * and avoid the fork. */ #if PCMK__ENABLE_CIBSECRETS rc = pcmk__substitute_secrets(op->rsc, op->params); if (rc != pcmk_rc_ok) { if (pcmk__str_eq(op->action, PCMK_ACTION_STOP, pcmk__str_casei)) { crm_info("Proceeding with stop operation for %s " "despite being unable to load CIB secrets (%s)", op->rsc, pcmk_rc_str(rc)); } else { crm_err("Considering %s unconfigured " "because unable to load CIB secrets: %s", op->rsc, pcmk_rc_str(rc)); exit_child(op, services__configuration_error(op, false), "Unable to load CIB secrets"); } } #endif add_action_env_vars(op); /* Become the desired user */ if (op->opaque->uid && (geteuid() == 0)) { // If requested, set effective group if (op->opaque->gid && (setgid(op->opaque->gid) < 0)) { crm_err("Considering %s unauthorized because could not set " "child group to %d: %s", op->id, op->opaque->gid, strerror(errno)); exit_child(op, services__authorization_error(op), "Could not set group for child process"); } // Erase supplementary group list // (We could do initgroups() if we kept a copy of the username) if (setgroups(0, NULL) < 0) { crm_err("Considering %s unauthorized because could not " "clear supplementary groups: %s", op->id, strerror(errno)); exit_child(op, services__authorization_error(op), "Could not clear supplementary groups for child process"); } // Set effective user if (setuid(op->opaque->uid) < 0) { crm_err("Considering %s unauthorized because could not set user " "to %d: %s", op->id, op->opaque->uid, strerror(errno)); exit_child(op, services__authorization_error(op), "Could not set user for child process"); } } // Execute the agent (doesn't return if successful) execvp(op->opaque->exec, op->opaque->args); // An earlier stat() should have avoided most possible errors rc = errno; services__handle_exec_error(op, rc); crm_err("Unable to execute %s: %s", op->id, strerror(rc)); exit_child(op, op->rc, "Child process was unable to execute file"); } /*! * \internal * \brief Wait for synchronous action to complete, and set its result * * \param[in,out] op Action to wait for * \param[in,out] data Child signal data */ static void wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data) { int status = 0; int timeout = op->timeout; time_t start = time(NULL); struct pollfd fds[3]; int wait_rc = 0; const char *wait_reason = NULL; fds[0].fd = op->opaque->stdout_fd; fds[0].events = POLLIN; fds[0].revents = 0; fds[1].fd = op->opaque->stderr_fd; fds[1].events = POLLIN; fds[1].revents = 0; fds[2].fd = sigchld_open(data); fds[2].events = POLLIN; fds[2].revents = 0; crm_trace("Waiting for %s[%d]", op->id, op->pid); do { int poll_rc = poll(fds, 3, timeout); wait_reason = NULL; if (poll_rc > 0) { if (fds[0].revents & POLLIN) { svc_read_output(op->opaque->stdout_fd, op, FALSE); } if (fds[1].revents & POLLIN) { svc_read_output(op->opaque->stderr_fd, op, TRUE); } if ((fds[2].revents & POLLIN) && sigchld_received(fds[2].fd, op->pid, data)) { wait_rc = waitpid(op->pid, &status, WNOHANG); if ((wait_rc > 0) || ((wait_rc < 0) && (errno == ECHILD))) { // Child process exited or doesn't exist break; } else if (wait_rc < 0) { wait_reason = pcmk_rc_str(errno); crm_info("Wait for completion of %s[%d] failed: %s " QB_XS " source=waitpid", op->id, op->pid, wait_reason); wait_rc = 0; // Act as if process is still running #ifndef HAVE_SYS_SIGNALFD_H } else { /* The child hasn't exited, so this SIGCHLD could be for * another child. We have to ignore it here but will still * need to resend it after this synchronous action has * completed and SIGCHLD has been restored to be handled by * the previous handler, so that it will be handled. */ data->ignored = true; #endif } } } else if (poll_rc == 0) { // Poll timed out with no descriptors ready timeout = 0; break; } else if ((poll_rc < 0) && (errno != EINTR)) { wait_reason = pcmk_rc_str(errno); crm_info("Wait for completion of %s[%d] failed: %s " QB_XS " source=poll", op->id, op->pid, wait_reason); break; } timeout = op->timeout - (time(NULL) - start) * 1000; } while ((op->timeout < 0 || timeout > 0)); crm_trace("Stopped waiting for %s[%d]", op->id, op->pid); finish_op_output(op, true); finish_op_output(op, false); close_op_input(op); sigchld_close(fds[2].fd); if (wait_rc <= 0) { if ((op->timeout > 0) && (timeout <= 0)) { services__format_result(op, services__generic_error(op), PCMK_EXEC_TIMEOUT, "%s did not exit within specified timeout", services__action_kind(op)); crm_info("%s[%d] timed out after %dms", op->id, op->pid, op->timeout); } else { services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR, wait_reason); } /* If only child hasn't been successfully waited for, yet. This is to limit killing wrong target a bit more. */ if ((wait_rc == 0) && (waitpid(op->pid, &status, WNOHANG) == 0)) { if (kill(op->pid, SIGKILL)) { crm_warn("Could not kill rogue child %s[%d]: %s", op->id, op->pid, pcmk_rc_str(errno)); } /* Safe to skip WNOHANG here as we sent non-ignorable signal. */ while ((waitpid(op->pid, &status, 0) == (pid_t) -1) && (errno == EINTR)) { /* keep waiting */; } } } else if (WIFEXITED(status)) { services__set_result(op, WEXITSTATUS(status), PCMK_EXEC_DONE, NULL); parse_exit_reason_from_stderr(op); crm_info("%s[%d] exited with status %d", op->id, op->pid, op->rc); } else if (WIFSIGNALED(status)) { int signo = WTERMSIG(status); services__format_result(op, services__generic_error(op), PCMK_EXEC_ERROR, "%s interrupted by %s signal", services__action_kind(op), strsignal(signo)); crm_info("%s[%d] terminated with signal %d (%s)", op->id, op->pid, signo, strsignal(signo)); #ifdef WCOREDUMP if (WCOREDUMP(status)) { crm_warn("%s[%d] dumped core", op->id, op->pid); } #endif } else { // Shouldn't be possible to get here services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR, "Unable to wait for child to complete"); } } /*! * \internal * \brief Execute an action whose standard uses executable files * * \param[in,out] op Action to execute * * \return Standard Pacemaker return value * \retval EBUSY Recurring operation could not be initiated * \retval pcmk_rc_error Synchronous action failed * \retval pcmk_rc_ok Synchronous action succeeded, or asynchronous action * should not be freed (because it's pending or because * it failed to execute and was already freed) * * \note If the return value for an asynchronous action is not pcmk_rc_ok, the * caller is responsible for freeing the action. */ int services__execute_file(svc_action_t *op) { int stdout_fd[2]; int stderr_fd[2]; int stdin_fd[2] = {-1, -1}; int rc; struct stat st; struct sigchld_data_s data = { .ignored = false }; // Catch common failure conditions early if (stat(op->opaque->exec, &st) != 0) { rc = errno; crm_info("Cannot execute '%s': %s " QB_XS " stat rc=%d", op->opaque->exec, pcmk_rc_str(rc), rc); services__handle_exec_error(op, rc); goto done; } if (pipe(stdout_fd) < 0) { rc = errno; crm_info("Cannot execute '%s': %s " QB_XS " pipe(stdout) rc=%d", op->opaque->exec, pcmk_rc_str(rc), rc); services__handle_exec_error(op, rc); goto done; } if (pipe(stderr_fd) < 0) { rc = errno; close_pipe(stdout_fd); crm_info("Cannot execute '%s': %s " QB_XS " pipe(stderr) rc=%d", op->opaque->exec, pcmk_rc_str(rc), rc); services__handle_exec_error(op, rc); goto done; } if (pcmk_is_set(pcmk_get_ra_caps(op->standard), pcmk_ra_cap_stdin)) { if (pipe(stdin_fd) < 0) { rc = errno; close_pipe(stdout_fd); close_pipe(stderr_fd); crm_info("Cannot execute '%s': %s " QB_XS " pipe(stdin) rc=%d", op->opaque->exec, pcmk_rc_str(rc), rc); services__handle_exec_error(op, rc); goto done; } } if (op->synchronous && !sigchld_setup(&data)) { close_pipe(stdin_fd); close_pipe(stdout_fd); close_pipe(stderr_fd); sigchld_cleanup(&data); services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR, "Could not manage signals for child process"); goto done; } op->pid = fork(); switch (op->pid) { case -1: rc = errno; close_pipe(stdin_fd); close_pipe(stdout_fd); close_pipe(stderr_fd); crm_info("Cannot execute '%s': %s " QB_XS " fork rc=%d", op->opaque->exec, pcmk_rc_str(rc), rc); services__handle_exec_error(op, rc); if (op->synchronous) { sigchld_cleanup(&data); } goto done; break; case 0: /* Child */ close(stdout_fd[0]); close(stderr_fd[0]); if (stdin_fd[1] >= 0) { close(stdin_fd[1]); } if (STDOUT_FILENO != stdout_fd[1]) { if (dup2(stdout_fd[1], STDOUT_FILENO) != STDOUT_FILENO) { crm_warn("Can't redirect output from '%s': %s " QB_XS " errno=%d", op->opaque->exec, pcmk_rc_str(errno), errno); } close(stdout_fd[1]); } if (STDERR_FILENO != stderr_fd[1]) { if (dup2(stderr_fd[1], STDERR_FILENO) != STDERR_FILENO) { crm_warn("Can't redirect error output from '%s': %s " QB_XS " errno=%d", op->opaque->exec, pcmk_rc_str(errno), errno); } close(stderr_fd[1]); } if ((stdin_fd[0] >= 0) && (STDIN_FILENO != stdin_fd[0])) { if (dup2(stdin_fd[0], STDIN_FILENO) != STDIN_FILENO) { crm_warn("Can't redirect input to '%s': %s " QB_XS " errno=%d", op->opaque->exec, pcmk_rc_str(errno), errno); } close(stdin_fd[0]); } if (op->synchronous) { sigchld_cleanup(&data); } action_launch_child(op); pcmk__assert(false); // action_launch_child() should not return } /* Only the parent reaches here */ close(stdout_fd[1]); close(stderr_fd[1]); if (stdin_fd[0] >= 0) { close(stdin_fd[0]); } op->opaque->stdout_fd = stdout_fd[0]; rc = pcmk__set_nonblocking(op->opaque->stdout_fd); if (rc != pcmk_rc_ok) { crm_info("Could not set '%s' output non-blocking: %s " QB_XS " rc=%d", op->opaque->exec, pcmk_rc_str(rc), rc); } op->opaque->stderr_fd = stderr_fd[0]; rc = pcmk__set_nonblocking(op->opaque->stderr_fd); if (rc != pcmk_rc_ok) { crm_info("Could not set '%s' error output non-blocking: %s " QB_XS " rc=%d", op->opaque->exec, pcmk_rc_str(rc), rc); } op->opaque->stdin_fd = stdin_fd[1]; if (op->opaque->stdin_fd >= 0) { // using buffer behind non-blocking-fd here - that could be improved // as long as no other standard uses stdin_fd assume stonith rc = pcmk__set_nonblocking(op->opaque->stdin_fd); if (rc != pcmk_rc_ok) { crm_info("Could not set '%s' input non-blocking: %s " QB_XS " fd=%d,rc=%d", op->opaque->exec, pcmk_rc_str(rc), op->opaque->stdin_fd, rc); } pipe_in_action_stdin_parameters(op); // as long as we are handling parameters directly in here just close close(op->opaque->stdin_fd); op->opaque->stdin_fd = -1; } // after fds are setup properly and before we plug anything into mainloop if (op->opaque->fork_callback) { op->opaque->fork_callback(op); } if (op->synchronous) { wait_for_sync_result(op, &data); sigchld_cleanup(&data); goto done; } crm_trace("Waiting async for '%s'[%d]", op->opaque->exec, op->pid); mainloop_child_add_with_flags(op->pid, op->timeout, op->id, op, pcmk_is_set(op->flags, SVC_ACTION_LEAVE_GROUP)? mainloop_leave_pid_group : 0, async_action_complete); op->opaque->stdout_gsource = mainloop_add_fd(op->id, G_PRIORITY_LOW, op->opaque->stdout_fd, op, &stdout_callbacks); op->opaque->stderr_gsource = mainloop_add_fd(op->id, G_PRIORITY_LOW, op->opaque->stderr_fd, op, &stderr_callbacks); services_add_inflight_op(op); return pcmk_rc_ok; done: if (op->synchronous) { return (op->rc == PCMK_OCF_OK)? pcmk_rc_ok : pcmk_rc_error; } else { return services__finalize_async_op(op); } } GList * services_os_get_single_directory_list(const char *root, gboolean files, gboolean executable) { GList *list = NULL; - struct dirent **namelist; + struct dirent **namelist = NULL; int entries = 0, lpc = 0; char buffer[PATH_MAX]; entries = scandir(root, &namelist, NULL, alphasort); if (entries <= 0) { return list; } for (lpc = 0; lpc < entries; lpc++) { struct stat sb; if ('.' == namelist[lpc]->d_name[0]) { free(namelist[lpc]); continue; } snprintf(buffer, sizeof(buffer), "%s/%s", root, namelist[lpc]->d_name); if (stat(buffer, &sb)) { continue; } if (S_ISDIR(sb.st_mode)) { if (files) { free(namelist[lpc]); continue; } } else if (S_ISREG(sb.st_mode)) { if (files == FALSE) { free(namelist[lpc]); continue; } else if (executable && (sb.st_mode & S_IXUSR) == 0 && (sb.st_mode & S_IXGRP) == 0 && (sb.st_mode & S_IXOTH) == 0) { free(namelist[lpc]); continue; } } list = g_list_append(list, strdup(namelist[lpc]->d_name)); free(namelist[lpc]); } free(namelist); return list; } GList * services_os_get_directory_list(const char *root, gboolean files, gboolean executable) { GList *result = NULL; char *dirs = strdup(root); char *dir = NULL; if (pcmk__str_empty(dirs)) { free(dirs); return result; } for (dir = strtok(dirs, ":"); dir != NULL; dir = strtok(NULL, ":")) { GList *tmp = services_os_get_single_directory_list(dir, files, executable); if (tmp) { result = g_list_concat(result, tmp); } } free(dirs); return result; }