diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c index f280065c01..bbe7d5d220 100644 --- a/lib/services/services_linux.c +++ b/lib/services/services_linux.c @@ -1,1434 +1,1448 @@ /* * Copyright 2010-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include "crm/crm.h" #include "crm/common/mainloop.h" #include "crm/services.h" #include "crm/services_internal.h" #include "services_private.h" static void close_pipe(int fildes[]); /* We have two alternative ways of handling SIGCHLD when synchronously waiting * for spawned processes to complete. Both rely on polling a file descriptor to * discover SIGCHLD events. * * If sys/signalfd.h is available (e.g. on Linux), we call signalfd() to * generate the file descriptor. Otherwise, we use the "self-pipe trick" * (opening a pipe and writing a byte to it when SIGCHLD is received). */ #ifdef HAVE_SYS_SIGNALFD_H // signalfd() implementation #include // Everything needed to manage SIGCHLD handling struct sigchld_data_s { sigset_t mask; // Signals to block now (including SIGCHLD) sigset_t old_mask; // Previous set of blocked signals }; // Initialize SIGCHLD data and prepare for use static bool sigchld_setup(struct sigchld_data_s *data) { sigemptyset(&(data->mask)); sigaddset(&(data->mask), SIGCHLD); sigemptyset(&(data->old_mask)); // Block SIGCHLD (saving previous set of blocked signals to restore later) if (sigprocmask(SIG_BLOCK, &(data->mask), &(data->old_mask)) < 0) { crm_info("Wait for child process completion failed: %s " CRM_XS " source=sigprocmask", pcmk_rc_str(errno)); return false; } return true; } // Get a file descriptor suitable for polling for SIGCHLD events static int sigchld_open(struct sigchld_data_s *data) { int fd; CRM_CHECK(data != NULL, return -1); fd = signalfd(-1, &(data->mask), SFD_NONBLOCK); if (fd < 0) { crm_info("Wait for child process completion failed: %s " CRM_XS " source=signalfd", pcmk_rc_str(errno)); } return fd; } // Close a file descriptor returned by sigchld_open() static void sigchld_close(int fd) { if (fd > 0) { close(fd); } } // Return true if SIGCHLD was received from polled fd static bool sigchld_received(int fd) { struct signalfd_siginfo fdsi; ssize_t s; if (fd < 0) { return false; } s = read(fd, &fdsi, sizeof(struct signalfd_siginfo)); if (s != sizeof(struct signalfd_siginfo)) { crm_info("Wait for child process completion failed: %s " CRM_XS " source=read", pcmk_rc_str(errno)); } else if (fdsi.ssi_signo == SIGCHLD) { return true; } return false; } // Do anything needed after done waiting for SIGCHLD static void sigchld_cleanup(struct sigchld_data_s *data) { // Restore the original set of blocked signals if ((sigismember(&(data->old_mask), SIGCHLD) == 0) && (sigprocmask(SIG_UNBLOCK, &(data->mask), NULL) < 0)) { crm_warn("Could not clean up after child process completion: %s", pcmk_rc_str(errno)); } } #else // HAVE_SYS_SIGNALFD_H not defined // Self-pipe implementation (see above for function descriptions) struct sigchld_data_s { int pipe_fd[2]; // Pipe file descriptors struct sigaction sa; // Signal handling info (with SIGCHLD) struct sigaction old_sa; // Previous signal handling info }; // We need a global to use in the signal handler volatile struct sigchld_data_s *last_sigchld_data = NULL; static void sigchld_handler() { // We received a SIGCHLD, so trigger pipe polling if ((last_sigchld_data != NULL) && (last_sigchld_data->pipe_fd[1] >= 0) && (write(last_sigchld_data->pipe_fd[1], "", 1) == -1)) { crm_info("Wait for child process completion failed: %s " CRM_XS " source=write", pcmk_rc_str(errno)); } } static bool sigchld_setup(struct sigchld_data_s *data) { int rc; data->pipe_fd[0] = data->pipe_fd[1] = -1; if (pipe(data->pipe_fd) == -1) { crm_info("Wait for child process completion failed: %s " CRM_XS " source=pipe", pcmk_rc_str(errno)); return false; } rc = pcmk__set_nonblocking(data->pipe_fd[0]); if (rc != pcmk_rc_ok) { crm_info("Could not set pipe input non-blocking: %s " CRM_XS " rc=%d", pcmk_rc_str(rc), rc); } rc = pcmk__set_nonblocking(data->pipe_fd[1]); if (rc != pcmk_rc_ok) { crm_info("Could not set pipe output non-blocking: %s " CRM_XS " rc=%d", pcmk_rc_str(rc), rc); } // Set SIGCHLD handler data->sa.sa_handler = sigchld_handler; data->sa.sa_flags = 0; sigemptyset(&(data->sa.sa_mask)); if (sigaction(SIGCHLD, &(data->sa), &(data->old_sa)) < 0) { crm_info("Wait for child process completion failed: %s " CRM_XS " source=sigaction", pcmk_rc_str(errno)); } // Remember data for use in signal handler last_sigchld_data = data; return true; } static int sigchld_open(struct sigchld_data_s *data) { CRM_CHECK(data != NULL, return -1); return data->pipe_fd[0]; } static void sigchld_close(int fd) { // Pipe will be closed in sigchld_cleanup() return; } static bool sigchld_received(int fd) { char ch; if (fd < 0) { return false; } // Clear out the self-pipe while (read(fd, &ch, 1) == 1) /*omit*/; return true; } static void sigchld_cleanup(struct sigchld_data_s *data) { // Restore the previous SIGCHLD handler if (sigaction(SIGCHLD, &(data->old_sa), NULL) < 0) { crm_warn("Could not clean up after child process completion: %s", pcmk_rc_str(errno)); } close_pipe(data->pipe_fd); } #endif /*! * \internal * \brief Close the two file descriptors of a pipe * * \param[in] fildes Array of file descriptors opened by pipe() */ static void close_pipe(int fildes[]) { if (fildes[0] >= 0) { close(fildes[0]); fildes[0] = -1; } if (fildes[1] >= 0) { close(fildes[1]); fildes[1] = -1; } } static gboolean svc_read_output(int fd, svc_action_t * op, bool is_stderr) { char *data = NULL; int rc = 0, len = 0; char buf[500]; static const size_t buf_read_len = sizeof(buf) - 1; if (fd < 0) { crm_trace("No fd for %s", op->id); return FALSE; } if (is_stderr && op->stderr_data) { len = strlen(op->stderr_data); data = op->stderr_data; crm_trace("Reading %s stderr into offset %d", op->id, len); } else if (is_stderr == FALSE && op->stdout_data) { len = strlen(op->stdout_data); data = op->stdout_data; crm_trace("Reading %s stdout into offset %d", op->id, len); } else { crm_trace("Reading %s %s into offset %d", op->id, is_stderr?"stderr":"stdout", len); } do { rc = read(fd, buf, buf_read_len); if (rc > 0) { buf[rc] = 0; crm_trace("Got %d chars: %.80s", rc, buf); data = pcmk__realloc(data, len + rc + 1); len += sprintf(data + len, "%s", buf); } else if (errno != EINTR) { /* error or EOF * Cleanup happens in pipe_done() */ rc = FALSE; break; } } while (rc == buf_read_len || rc < 0); if (is_stderr) { op->stderr_data = data; } else { op->stdout_data = data; } return rc; } static int dispatch_stdout(gpointer userdata) { svc_action_t *op = (svc_action_t *) userdata; return svc_read_output(op->opaque->stdout_fd, op, FALSE); } static int dispatch_stderr(gpointer userdata) { svc_action_t *op = (svc_action_t *) userdata; return svc_read_output(op->opaque->stderr_fd, op, TRUE); } static void pipe_out_done(gpointer user_data) { svc_action_t *op = (svc_action_t *) user_data; crm_trace("%p", op); op->opaque->stdout_gsource = NULL; if (op->opaque->stdout_fd > STDOUT_FILENO) { close(op->opaque->stdout_fd); } op->opaque->stdout_fd = -1; } static void pipe_err_done(gpointer user_data) { svc_action_t *op = (svc_action_t *) user_data; op->opaque->stderr_gsource = NULL; if (op->opaque->stderr_fd > STDERR_FILENO) { close(op->opaque->stderr_fd); } op->opaque->stderr_fd = -1; } static struct mainloop_fd_callbacks stdout_callbacks = { .dispatch = dispatch_stdout, .destroy = pipe_out_done, }; static struct mainloop_fd_callbacks stderr_callbacks = { .dispatch = dispatch_stderr, .destroy = pipe_err_done, }; static void set_ocf_env(const char *key, const char *value, gpointer user_data) { if (setenv(key, value, 1) != 0) { crm_perror(LOG_ERR, "setenv failed for key:%s and value:%s", key, value); } } static void set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data) { char buffer[500]; snprintf(buffer, sizeof(buffer), strcmp(key, "OCF_CHECK_LEVEL") != 0 ? "OCF_RESKEY_%s" : "%s", (char *)key); set_ocf_env(buffer, value, user_data); } static void set_alert_env(gpointer key, gpointer value, gpointer user_data) { int rc; if (value != NULL) { rc = setenv(key, value, 1); } else { rc = unsetenv(key); } if (rc < 0) { crm_perror(LOG_ERR, "setenv %s=%s", (char*)key, (value? (char*)value : "")); } else { crm_trace("setenv %s=%s", (char*)key, (value? (char*)value : "")); } } /*! * \internal * \brief Add environment variables suitable for an action * * \param[in] op Action to use */ static void add_action_env_vars(const svc_action_t *op) { void (*env_setter)(gpointer, gpointer, gpointer) = NULL; if (op->agent == NULL) { env_setter = set_alert_env; /* we deal with alert handler */ } else if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_casei)) { env_setter = set_ocf_env_with_prefix; } if (env_setter != NULL && op->params != NULL) { g_hash_table_foreach(op->params, env_setter, NULL); } if (env_setter == NULL || env_setter == set_alert_env) { return; } set_ocf_env("OCF_RA_VERSION_MAJOR", PCMK_OCF_MAJOR_VERSION, NULL); set_ocf_env("OCF_RA_VERSION_MINOR", PCMK_OCF_MINOR_VERSION, NULL); set_ocf_env("OCF_ROOT", OCF_ROOT_DIR, NULL); set_ocf_env("OCF_EXIT_REASON_PREFIX", PCMK_OCF_REASON_PREFIX, NULL); if (op->rsc) { set_ocf_env("OCF_RESOURCE_INSTANCE", op->rsc, NULL); } if (op->agent != NULL) { set_ocf_env("OCF_RESOURCE_TYPE", op->agent, NULL); } /* Notes: this is not added to specification yet. Sept 10,2004 */ if (op->provider != NULL) { set_ocf_env("OCF_RESOURCE_PROVIDER", op->provider, NULL); } } static void pipe_in_single_parameter(gpointer key, gpointer value, gpointer user_data) { svc_action_t *op = user_data; char *buffer = crm_strdup_printf("%s=%s\n", (char *)key, (char *) value); int ret, total = 0, len = strlen(buffer); do { errno = 0; ret = write(op->opaque->stdin_fd, buffer + total, len - total); if (ret > 0) { total += ret; } } while ((errno == EINTR) && (total < len)); free(buffer); } /*! * \internal * \brief Pipe parameters in via stdin for action * * \param[in] op Action to use */ static void pipe_in_action_stdin_parameters(const svc_action_t *op) { crm_debug("sending args"); if (op->params) { g_hash_table_foreach(op->params, pipe_in_single_parameter, (gpointer) op); } } gboolean recurring_action_timer(gpointer data) { svc_action_t *op = data; crm_debug("Scheduling another invocation of %s", op->id); /* Clean out the old result */ free(op->stdout_data); op->stdout_data = NULL; free(op->stderr_data); op->stderr_data = NULL; op->opaque->repeat_timer = 0; services_action_async(op, NULL); return FALSE; } /*! * \internal * \brief Finalize handling of an asynchronous operation * * Given a completed asynchronous operation, cancel or reschedule it as * appropriate if recurring, call its callback if registered, stop tracking it, * and clean it up. * * \param[in,out] op Operation to finalize * * \return Standard Pacemaker return code * \retval EINVAL Caller supplied NULL or invalid \p op * \retval EBUSY Uncanceled recurring action has only been cleaned up * \retval pcmk_rc_ok Action has been freed * * \note If the return value is not pcmk_rc_ok, the caller is responsible for * freeing the action. */ int services__finalize_async_op(svc_action_t *op) { CRM_CHECK((op != NULL) && !(op->synchronous), return EINVAL); if (op->interval_ms != 0) { // Recurring operations must be either cancelled or rescheduled if (op->cancel) { services__set_cancelled(op); cancel_recurring_action(op); } else { op->opaque->repeat_timer = g_timeout_add(op->interval_ms, recurring_action_timer, (void *) op); } } if (op->opaque->callback != NULL) { op->opaque->callback(op); } // Stop tracking the operation (as in-flight or blocked) op->pid = 0; services_untrack_op(op); if ((op->interval_ms != 0) && !(op->cancel)) { // Do not free recurring actions (they will get freed when cancelled) services_action_cleanup(op); return EBUSY; } services_action_free(op); return pcmk_rc_ok; } static void close_op_input(svc_action_t *op) { if (op->opaque->stdin_fd >= 0) { close(op->opaque->stdin_fd); } } static void finish_op_output(svc_action_t *op, bool is_stderr) { mainloop_io_t **source; int fd; if (is_stderr) { source = &(op->opaque->stderr_gsource); fd = op->opaque->stderr_fd; } else { source = &(op->opaque->stdout_gsource); fd = op->opaque->stdout_fd; } if (op->synchronous || *source) { crm_trace("Finish reading %s[%d] %s", op->id, op->pid, (is_stderr? "stdout" : "stderr")); svc_read_output(fd, op, is_stderr); if (op->synchronous) { close(fd); } else { mainloop_del_fd(*source); *source = NULL; } } } // Log an operation's stdout and stderr static void log_op_output(svc_action_t *op) { char *prefix = crm_strdup_printf("%s[%d] error output", op->id, op->pid); /* The library caller has better context to know how important the output * is, so log it at info and debug severity here. They can log it again at * higher severity if appropriate. */ crm_log_output(LOG_INFO, prefix, op->stderr_data); strcpy(prefix + strlen(prefix) - strlen("error output"), "output"); crm_log_output(LOG_DEBUG, prefix, op->stdout_data); free(prefix); } // Truncate exit reasons at this many characters #define EXIT_REASON_MAX_LEN 128 static void parse_exit_reason_from_stderr(svc_action_t *op) { const char *reason_start = NULL; const char *reason_end = NULL; const int prefix_len = strlen(PCMK_OCF_REASON_PREFIX); if ((op->stderr_data == NULL) || // Only OCF agents have exit reasons in stderr !pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_none)) { return; } // Find the last occurrence of the magic string indicating an exit reason for (const char *cur = strstr(op->stderr_data, PCMK_OCF_REASON_PREFIX); cur != NULL; cur = strstr(cur, PCMK_OCF_REASON_PREFIX)) { cur += prefix_len; // Skip over magic string reason_start = cur; } if ((reason_start == NULL) || (reason_start[0] == '\n') || (reason_start[0] == '\0')) { return; // No or empty exit reason } // Exit reason goes to end of line (or end of output) reason_end = strchr(reason_start, '\n'); if (reason_end == NULL) { reason_end = reason_start + strlen(reason_start); } // Limit size of exit reason to something reasonable if (reason_end > (reason_start + EXIT_REASON_MAX_LEN)) { reason_end = reason_start + EXIT_REASON_MAX_LEN; } free(op->opaque->exit_reason); op->opaque->exit_reason = strndup(reason_start, reason_end - reason_start); } /*! * \internal * \brief Process the completion of an asynchronous child process * * \param[in] p Child process that completed * \param[in] pid Process ID of child * \param[in] core (unused) * \param[in] signo Signal that interrupted child, if any * \param[in] exitcode Exit status of child process */ static void async_action_complete(mainloop_child_t *p, pid_t pid, int core, int signo, int exitcode) { svc_action_t *op = mainloop_child_userdata(p); mainloop_clear_child_userdata(p); CRM_CHECK(op->pid == pid, services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR, "Bug in mainloop handling"); return); /* Depending on the priority the mainloop gives the stdout and stderr * file descriptors, this function could be called before everything has * been read from them, so force a final read now. */ finish_op_output(op, true); finish_op_output(op, false); close_op_input(op); if (signo == 0) { crm_debug("%s[%d] exited with status %d", op->id, op->pid, exitcode); services__set_result(op, exitcode, PCMK_EXEC_DONE, NULL); log_op_output(op); parse_exit_reason_from_stderr(op); } else if (mainloop_child_timeout(p)) { const char *what = NULL; if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_none)) { what = "Fence agent"; } else if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_ALERT, pcmk__str_none)) { what = "Alert agent"; } else if (op->standard != NULL) { what = "Resource agent"; } else { what = "Process"; } crm_info("%s[%d] timed out after %dms", op->id, op->pid, op->timeout); services__format_result(op, services__generic_error(op), PCMK_EXEC_TIMEOUT, "%s did not complete in time", what); } else if (op->cancel) { /* If an in-flight recurring operation was killed because it was * cancelled, don't treat that as a failure. */ crm_info("%s[%d] terminated with signal %d (%s)", op->id, op->pid, signo, strsignal(signo)); services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_CANCELLED, NULL); } else { crm_info("%s[%d] terminated with signal %d (%s)", op->id, op->pid, signo, strsignal(signo)); - services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, - "Process interrupted by signal"); + services__format_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "Process interrupted by %s signal", + strsignal(signo)); } services__finalize_async_op(op); } /*! * \internal * \brief Return agent standard's exit status for "generic error" * * When returning an internal error for an action, a value that is appropriate * to the action's agent standard must be used. This function returns a value * appropriate for errors in general. * * \param[in] op Action that error is for * * \return Exit status appropriate to agent standard * \note Actions without a standard will get PCMK_OCF_UNKNOWN_ERROR. */ int services__generic_error(svc_action_t *op) { if ((op == NULL) || (op->standard == NULL)) { return PCMK_OCF_UNKNOWN_ERROR; } if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei) && pcmk__str_eq(op->action, "status", pcmk__str_casei)) { return PCMK_LSB_STATUS_UNKNOWN; } #if SUPPORT_NAGIOS if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) { return NAGIOS_STATE_UNKNOWN; } #endif return PCMK_OCF_UNKNOWN_ERROR; } /*! * \internal * \brief Return agent standard's exit status for "not installed" * * When returning an internal error for an action, a value that is appropriate * to the action's agent standard must be used. This function returns a value * appropriate for "not installed" errors. * * \param[in] op Action that error is for * * \return Exit status appropriate to agent standard * \note Actions without a standard will get PCMK_OCF_UNKNOWN_ERROR. */ int services__not_installed_error(svc_action_t *op) { if ((op == NULL) || (op->standard == NULL)) { return PCMK_OCF_UNKNOWN_ERROR; } if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei) && pcmk__str_eq(op->action, "status", pcmk__str_casei)) { return PCMK_LSB_STATUS_NOT_INSTALLED; } #if SUPPORT_NAGIOS if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) { return NAGIOS_STATE_UNKNOWN; } #endif return PCMK_OCF_NOT_INSTALLED; } /*! * \internal * \brief Return agent standard's exit status for "insufficient privileges" * * When returning an internal error for an action, a value that is appropriate * to the action's agent standard must be used. This function returns a value * appropriate for "insufficient privileges" errors. * * \param[in] op Action that error is for * * \return Exit status appropriate to agent standard * \note Actions without a standard will get PCMK_OCF_UNKNOWN_ERROR. */ int services__authorization_error(svc_action_t *op) { if ((op == NULL) || (op->standard == NULL)) { return PCMK_OCF_UNKNOWN_ERROR; } if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei) && pcmk__str_eq(op->action, "status", pcmk__str_casei)) { return PCMK_LSB_STATUS_INSUFFICIENT_PRIV; } #if SUPPORT_NAGIOS if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) { return NAGIOS_INSUFFICIENT_PRIV; } #endif return PCMK_OCF_INSUFFICIENT_PRIV; } /*! * \internal * \brief Return agent standard's exit status for "not configured" * * When returning an internal error for an action, a value that is appropriate * to the action's agent standard must be used. This function returns a value * appropriate for "not configured" errors. * * \param[in] op Action that error is for * \param[in] is_fatal Whether problem is cluster-wide instead of only local * * \return Exit status appropriate to agent standard * \note Actions without a standard will get PCMK_OCF_UNKNOWN_ERROR. */ int services__configuration_error(svc_action_t *op, bool is_fatal) { if ((op == NULL) || (op->standard == NULL)) { return PCMK_OCF_UNKNOWN_ERROR; } if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei) && pcmk__str_eq(op->action, "status", pcmk__str_casei)) { return PCMK_LSB_NOT_CONFIGURED; } #if SUPPORT_NAGIOS if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) { return NAGIOS_STATE_UNKNOWN; } #endif return is_fatal? PCMK_OCF_NOT_CONFIGURED : PCMK_OCF_INVALID_PARAM; } /*! * \internal * \brief Set operation rc and status per errno from stat(), fork() or execvp() * * \param[in,out] op Operation to set rc and status for * \param[in] error Value of errno after system call * * \return void */ void services__handle_exec_error(svc_action_t * op, int error) { + const char *name = op->opaque->exec; + + if (name == NULL) { + name = op->agent; + if (name == NULL) { + name = op->id; + } + } + switch (error) { /* see execve(2), stat(2) and fork(2) */ case ENOENT: /* No such file or directory */ case EISDIR: /* Is a directory */ case ENOTDIR: /* Path component is not a directory */ case EINVAL: /* Invalid executable format */ case ENOEXEC: /* Invalid executable format */ - services__set_result(op, services__not_installed_error(op), - PCMK_EXEC_NOT_INSTALLED, pcmk_rc_str(error)); + services__format_result(op, services__not_installed_error(op), + PCMK_EXEC_NOT_INSTALLED, "%s: %s", + name, pcmk_rc_str(error)); break; case EACCES: /* permission denied (various errors) */ case EPERM: /* permission denied (various errors) */ - services__set_result(op, services__authorization_error(op), - PCMK_EXEC_ERROR, pcmk_rc_str(error)); + services__format_result(op, services__authorization_error(op), + PCMK_EXEC_ERROR, "%s: %s", + name, pcmk_rc_str(error)); break; default: services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR, pcmk_rc_str(error)); } } /*! * \internal * \brief Exit a child process that failed before executing agent * * \param[in] op Action that failed * \param[in] exit_status Exit status code to use * \param[in] exit_reason Exit reason to output if for OCF agent */ static void exit_child(svc_action_t *op, int exit_status, const char *exit_reason) { if ((op != NULL) && (exit_reason != NULL) && pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_none)) { fprintf(stderr, PCMK_OCF_REASON_PREFIX "%s\n", exit_reason); } _exit(exit_status); } static void action_launch_child(svc_action_t *op) { int rc; /* SIGPIPE is ignored (which is different from signal blocking) by the gnutls library. * Depending on the libqb version in use, libqb may set SIGPIPE to be ignored as well. * We do not want this to be inherited by the child process. By resetting this the signal * to the default behavior, we avoid some potential odd problems that occur during OCF * scripts when SIGPIPE is ignored by the environment. */ signal(SIGPIPE, SIG_DFL); #if defined(HAVE_SCHED_SETSCHEDULER) if (sched_getscheduler(0) != SCHED_OTHER) { struct sched_param sp; memset(&sp, 0, sizeof(sp)); sp.sched_priority = 0; if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) { crm_info("Could not reset scheduling policy for %s", op->id); } } #endif if (setpriority(PRIO_PROCESS, 0, 0) == -1) { crm_info("Could not reset process priority for %s", op->id); } /* Man: The call setpgrp() is equivalent to setpgid(0,0) * _and_ compiles on BSD variants too * need to investigate if it works the same too. */ setpgid(0, 0); pcmk__close_fds_in_child(false); /* It would be nice if errors in this function could be reported as * execution status (for example, PCMK_EXEC_NO_SECRETS for the secrets error * below) instead of exit status. However, we've already forked, so * exit status is all we have. At least for OCF actions, we can output an * exit reason for the parent to parse. */ #if SUPPORT_CIBSECRETS rc = pcmk__substitute_secrets(op->rsc, op->params); if (rc != pcmk_rc_ok) { if (pcmk__str_eq(op->action, "stop", pcmk__str_casei)) { crm_info("Proceeding with stop operation for %s " "despite being unable to load CIB secrets (%s)", op->rsc, pcmk_rc_str(rc)); } else { crm_err("Considering %s unconfigured " "because unable to load CIB secrets: %s", op->rsc, pcmk_rc_str(rc)); exit_child(op, services__configuration_error(op, false), "Unable to load CIB secrets"); } } #endif add_action_env_vars(op); /* Become the desired user */ if (op->opaque->uid && (geteuid() == 0)) { // If requested, set effective group if (op->opaque->gid && (setgid(op->opaque->gid) < 0)) { crm_err("Considering %s unauthorized because could not set " "child group to %d: %s", op->id, op->opaque->gid, strerror(errno)); exit_child(op, services__authorization_error(op), "Could not set group for child process"); } // Erase supplementary group list // (We could do initgroups() if we kept a copy of the username) if (setgroups(0, NULL) < 0) { crm_err("Considering %s unauthorized because could not " "clear supplementary groups: %s", op->id, strerror(errno)); exit_child(op, services__authorization_error(op), "Could not clear supplementary groups for child process"); } // Set effective user if (setuid(op->opaque->uid) < 0) { crm_err("Considering %s unauthorized because could not set user " "to %d: %s", op->id, op->opaque->uid, strerror(errno)); exit_child(op, services__authorization_error(op), "Could not set user for child process"); } } // Execute the agent (doesn't return if successful) execvp(op->opaque->exec, op->opaque->args); // An earlier stat() should have avoided most possible errors rc = errno; services__handle_exec_error(op, rc); crm_err("Unable to execute %s: %s", op->id, strerror(rc)); exit_child(op, op->rc, "Child process was unable to execute file"); } /*! * \internal * \brief Wait for synchronous action to complete, and set its result * * \param[in] op Action to wait for * \param[in] data Child signal data */ static void wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data) { int status = 0; int timeout = op->timeout; time_t start = time(NULL); struct pollfd fds[3]; int wait_rc = 0; const char *wait_reason = NULL; fds[0].fd = op->opaque->stdout_fd; fds[0].events = POLLIN; fds[0].revents = 0; fds[1].fd = op->opaque->stderr_fd; fds[1].events = POLLIN; fds[1].revents = 0; fds[2].fd = sigchld_open(data); fds[2].events = POLLIN; fds[2].revents = 0; crm_trace("Waiting for %s[%d]", op->id, op->pid); do { int poll_rc = poll(fds, 3, timeout); wait_reason = NULL; if (poll_rc > 0) { if (fds[0].revents & POLLIN) { svc_read_output(op->opaque->stdout_fd, op, FALSE); } if (fds[1].revents & POLLIN) { svc_read_output(op->opaque->stderr_fd, op, TRUE); } if ((fds[2].revents & POLLIN) && sigchld_received(fds[2].fd)) { wait_rc = waitpid(op->pid, &status, WNOHANG); if ((wait_rc > 0) || ((wait_rc < 0) && (errno == ECHILD))) { // Child process exited or doesn't exist break; } else if (wait_rc < 0) { wait_reason = pcmk_rc_str(errno); crm_info("Wait for completion of %s[%d] failed: %s " CRM_XS " source=waitpid", op->id, op->pid, wait_reason); wait_rc = 0; // Act as if process is still running } } } else if (poll_rc == 0) { // Poll timed out with no descriptors ready timeout = 0; break; } else if ((poll_rc < 0) && (errno != EINTR)) { wait_reason = pcmk_rc_str(errno); crm_info("Wait for completion of %s[%d] failed: %s " CRM_XS " source=poll", op->id, op->pid, wait_reason); break; } timeout = op->timeout - (time(NULL) - start) * 1000; } while ((op->timeout < 0 || timeout > 0)); crm_trace("Stopped waiting for %s[%d]", op->id, op->pid); finish_op_output(op, true); finish_op_output(op, false); close_op_input(op); sigchld_close(fds[2].fd); if (wait_rc <= 0) { if ((op->timeout > 0) && (timeout <= 0)) { services__set_result(op, services__generic_error(op), PCMK_EXEC_TIMEOUT, "Process did not exit within specified timeout"); crm_info("%s[%d] timed out after %dms", op->id, op->pid, op->timeout); } else { services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR, wait_reason); } /* If only child hasn't been successfully waited for, yet. This is to limit killing wrong target a bit more. */ if ((wait_rc == 0) && (waitpid(op->pid, &status, WNOHANG) == 0)) { if (kill(op->pid, SIGKILL)) { crm_warn("Could not kill rogue child %s[%d]: %s", op->id, op->pid, pcmk_rc_str(errno)); } /* Safe to skip WNOHANG here as we sent non-ignorable signal. */ while ((waitpid(op->pid, &status, 0) == (pid_t) -1) && (errno == EINTR)) { /* keep waiting */; } } } else if (WIFEXITED(status)) { services__set_result(op, WEXITSTATUS(status), PCMK_EXEC_DONE, NULL); parse_exit_reason_from_stderr(op); crm_info("%s[%d] exited with status %d", op->id, op->pid, op->rc); } else if (WIFSIGNALED(status)) { int signo = WTERMSIG(status); - services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR, - "Process interrupted by signal"); + services__format_result(op, services__generic_error(op), + PCMK_EXEC_ERROR, + "Process interrupted by %s signal", + strsignal(signo)); crm_info("%s[%d] terminated with signal %d (%s)", op->id, op->pid, signo, strsignal(signo)); #ifdef WCOREDUMP if (WCOREDUMP(status)) { crm_warn("%s[%d] dumped core", op->id, op->pid); } #endif } else { // Shouldn't be possible to get here services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR, "Unable to wait for child to complete"); } } /*! * \internal * \brief Execute an action whose standard uses executable files * * \param[in] op Action to execute * * \return Standard Pacemaker return value * \retval EBUSY Recurring operation could not be initiated * \retval pcmk_rc_error Synchronous action failed * \retval pcmk_rc_ok Synchronous action succeeded, or asynchronous action * should not be freed (because it's pending or because * it failed to execute and was already freed) * * \note If the return value for an asynchronous action is not pcmk_rc_ok, the * caller is responsible for freeing the action. */ int services__execute_file(svc_action_t *op) { int stdout_fd[2]; int stderr_fd[2]; int stdin_fd[2] = {-1, -1}; int rc; struct stat st; struct sigchld_data_s data; // Catch common failure conditions early if (stat(op->opaque->exec, &st) != 0) { rc = errno; crm_info("Cannot execute '%s': %s " CRM_XS " stat rc=%d", op->opaque->exec, pcmk_strerror(rc), rc); services__handle_exec_error(op, rc); goto done; } if (pipe(stdout_fd) < 0) { rc = errno; crm_info("Cannot execute '%s': %s " CRM_XS " pipe(stdout) rc=%d", op->opaque->exec, pcmk_strerror(rc), rc); services__handle_exec_error(op, rc); goto done; } if (pipe(stderr_fd) < 0) { rc = errno; close_pipe(stdout_fd); crm_info("Cannot execute '%s': %s " CRM_XS " pipe(stderr) rc=%d", op->opaque->exec, pcmk_strerror(rc), rc); services__handle_exec_error(op, rc); goto done; } if (pcmk_is_set(pcmk_get_ra_caps(op->standard), pcmk_ra_cap_stdin)) { if (pipe(stdin_fd) < 0) { rc = errno; close_pipe(stdout_fd); close_pipe(stderr_fd); crm_info("Cannot execute '%s': %s " CRM_XS " pipe(stdin) rc=%d", op->opaque->exec, pcmk_strerror(rc), rc); services__handle_exec_error(op, rc); goto done; } } if (op->synchronous && !sigchld_setup(&data)) { close_pipe(stdin_fd); close_pipe(stdout_fd); close_pipe(stderr_fd); sigchld_cleanup(&data); services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR, "Could not manage signals for child process"); goto done; } op->pid = fork(); switch (op->pid) { case -1: rc = errno; close_pipe(stdin_fd); close_pipe(stdout_fd); close_pipe(stderr_fd); crm_info("Cannot execute '%s': %s " CRM_XS " fork rc=%d", op->opaque->exec, pcmk_strerror(rc), rc); services__handle_exec_error(op, rc); if (op->synchronous) { sigchld_cleanup(&data); } goto done; break; case 0: /* Child */ close(stdout_fd[0]); close(stderr_fd[0]); if (stdin_fd[1] >= 0) { close(stdin_fd[1]); } if (STDOUT_FILENO != stdout_fd[1]) { if (dup2(stdout_fd[1], STDOUT_FILENO) != STDOUT_FILENO) { crm_warn("Can't redirect output from '%s': %s " CRM_XS " errno=%d", op->opaque->exec, pcmk_rc_str(errno), errno); } close(stdout_fd[1]); } if (STDERR_FILENO != stderr_fd[1]) { if (dup2(stderr_fd[1], STDERR_FILENO) != STDERR_FILENO) { crm_warn("Can't redirect error output from '%s': %s " CRM_XS " errno=%d", op->opaque->exec, pcmk_rc_str(errno), errno); } close(stderr_fd[1]); } if ((stdin_fd[0] >= 0) && (STDIN_FILENO != stdin_fd[0])) { if (dup2(stdin_fd[0], STDIN_FILENO) != STDIN_FILENO) { crm_warn("Can't redirect input to '%s': %s " CRM_XS " errno=%d", op->opaque->exec, pcmk_rc_str(errno), errno); } close(stdin_fd[0]); } if (op->synchronous) { sigchld_cleanup(&data); } action_launch_child(op); CRM_ASSERT(0); /* action_launch_child is effectively noreturn */ } /* Only the parent reaches here */ close(stdout_fd[1]); close(stderr_fd[1]); if (stdin_fd[0] >= 0) { close(stdin_fd[0]); } op->opaque->stdout_fd = stdout_fd[0]; rc = pcmk__set_nonblocking(op->opaque->stdout_fd); if (rc != pcmk_rc_ok) { crm_info("Could not set '%s' output non-blocking: %s " CRM_XS " rc=%d", op->opaque->exec, pcmk_rc_str(rc), rc); } op->opaque->stderr_fd = stderr_fd[0]; rc = pcmk__set_nonblocking(op->opaque->stderr_fd); if (rc != pcmk_rc_ok) { crm_info("Could not set '%s' error output non-blocking: %s " CRM_XS " rc=%d", op->opaque->exec, pcmk_rc_str(rc), rc); } op->opaque->stdin_fd = stdin_fd[1]; if (op->opaque->stdin_fd >= 0) { // using buffer behind non-blocking-fd here - that could be improved // as long as no other standard uses stdin_fd assume stonith rc = pcmk__set_nonblocking(op->opaque->stdin_fd); if (rc != pcmk_rc_ok) { crm_info("Could not set '%s' input non-blocking: %s " CRM_XS " fd=%d,rc=%d", op->opaque->exec, pcmk_rc_str(rc), op->opaque->stdin_fd, rc); } pipe_in_action_stdin_parameters(op); // as long as we are handling parameters directly in here just close close(op->opaque->stdin_fd); op->opaque->stdin_fd = -1; } // after fds are setup properly and before we plug anything into mainloop if (op->opaque->fork_callback) { op->opaque->fork_callback(op); } if (op->synchronous) { wait_for_sync_result(op, &data); sigchld_cleanup(&data); goto done; } crm_trace("Waiting async for '%s'[%d]", op->opaque->exec, op->pid); mainloop_child_add_with_flags(op->pid, op->timeout, op->id, op, pcmk_is_set(op->flags, SVC_ACTION_LEAVE_GROUP)? mainloop_leave_pid_group : 0, async_action_complete); op->opaque->stdout_gsource = mainloop_add_fd(op->id, G_PRIORITY_LOW, op->opaque->stdout_fd, op, &stdout_callbacks); op->opaque->stderr_gsource = mainloop_add_fd(op->id, G_PRIORITY_LOW, op->opaque->stderr_fd, op, &stderr_callbacks); services_add_inflight_op(op); return pcmk_rc_ok; done: if (op->synchronous) { return (op->rc == PCMK_OCF_OK)? pcmk_rc_ok : pcmk_rc_error; } else { return services__finalize_async_op(op); } } GList * services_os_get_single_directory_list(const char *root, gboolean files, gboolean executable) { GList *list = NULL; struct dirent **namelist; int entries = 0, lpc = 0; char buffer[PATH_MAX]; entries = scandir(root, &namelist, NULL, alphasort); if (entries <= 0) { return list; } for (lpc = 0; lpc < entries; lpc++) { struct stat sb; if ('.' == namelist[lpc]->d_name[0]) { free(namelist[lpc]); continue; } snprintf(buffer, sizeof(buffer), "%s/%s", root, namelist[lpc]->d_name); if (stat(buffer, &sb)) { continue; } if (S_ISDIR(sb.st_mode)) { if (files) { free(namelist[lpc]); continue; } } else if (S_ISREG(sb.st_mode)) { if (files == FALSE) { free(namelist[lpc]); continue; } else if (executable && (sb.st_mode & S_IXUSR) == 0 && (sb.st_mode & S_IXGRP) == 0 && (sb.st_mode & S_IXOTH) == 0) { free(namelist[lpc]); continue; } } list = g_list_append(list, strdup(namelist[lpc]->d_name)); free(namelist[lpc]); } free(namelist); return list; } GList * services_os_get_directory_list(const char *root, gboolean files, gboolean executable) { GList *result = NULL; char *dirs = strdup(root); char *dir = NULL; if (pcmk__str_empty(dirs)) { free(dirs); return result; } for (dir = strtok(dirs, ":"); dir != NULL; dir = strtok(NULL, ":")) { GList *tmp = services_os_get_single_directory_list(dir, files, executable); if (tmp) { result = g_list_concat(result, tmp); } } free(dirs); return result; } diff --git a/lib/services/systemd.c b/lib/services/systemd.c index bed1861077..9883e5b2ae 100644 --- a/lib/services/systemd.c +++ b/lib/services/systemd.c @@ -1,1088 +1,1094 @@ /* * Copyright 2012-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include static void invoke_unit_by_path(svc_action_t *op, const char *unit); #define BUS_NAME "org.freedesktop.systemd1" #define BUS_NAME_MANAGER BUS_NAME ".Manager" #define BUS_NAME_UNIT BUS_NAME ".Unit" #define BUS_PATH "/org/freedesktop/systemd1" /*! * \internal * \brief Prepare a systemd action * * \param[in] op Action to prepare * * \return Standard Pacemaker return code */ int services__systemd_prepare(svc_action_t *op) { op->opaque->exec = strdup("systemd-dbus"); if (op->opaque->exec == NULL) { return ENOMEM; } return pcmk_rc_ok; } /*! * \internal * \brief Map a systemd result to a standard OCF result * * \param[in] exit_status Systemd result * * \return Standard OCF result */ enum ocf_exitcode services__systemd2ocf(int exit_status) { // This library uses OCF codes for systemd actions return (enum ocf_exitcode) exit_status; } static inline DBusMessage * systemd_new_method(const char *method) { crm_trace("Calling: %s on " BUS_NAME_MANAGER, method); return dbus_message_new_method_call(BUS_NAME, BUS_PATH, BUS_NAME_MANAGER, method); } /* * Functions to manage a static DBus connection */ static DBusConnection* systemd_proxy = NULL; static inline DBusPendingCall * systemd_send(DBusMessage *msg, void(*done)(DBusPendingCall *pending, void *user_data), void *user_data, int timeout) { return pcmk_dbus_send(msg, systemd_proxy, done, user_data, timeout); } static inline DBusMessage * systemd_send_recv(DBusMessage *msg, DBusError *error, int timeout) { return pcmk_dbus_send_recv(msg, systemd_proxy, error, timeout); } /*! * \internal * \brief Send a method to systemd without arguments, and wait for reply * * \param[in] method Method to send * * \return Systemd reply on success, NULL (and error will be logged) otherwise * * \note The caller must call dbus_message_unref() on the reply after * handling it. */ static DBusMessage * systemd_call_simple_method(const char *method) { DBusMessage *msg = systemd_new_method(method); DBusMessage *reply = NULL; DBusError error; /* Don't call systemd_init() here, because that calls this */ CRM_CHECK(systemd_proxy, return NULL); if (msg == NULL) { crm_err("Could not create message to send %s to systemd", method); return NULL; } dbus_error_init(&error); reply = systemd_send_recv(msg, &error, DBUS_TIMEOUT_USE_DEFAULT); dbus_message_unref(msg); if (dbus_error_is_set(&error)) { crm_err("Could not send %s to systemd: %s (%s)", method, error.message, error.name); dbus_error_free(&error); return NULL; } else if (reply == NULL) { crm_err("Could not send %s to systemd: no reply received", method); return NULL; } return reply; } static gboolean systemd_init(void) { static int need_init = 1; // https://dbus.freedesktop.org/doc/api/html/group__DBusConnection.html if (systemd_proxy && dbus_connection_get_is_connected(systemd_proxy) == FALSE) { crm_warn("Connection to System DBus is closed. Reconnecting..."); pcmk_dbus_disconnect(systemd_proxy); systemd_proxy = NULL; need_init = 1; } if (need_init) { need_init = 0; systemd_proxy = pcmk_dbus_connect(); } if (systemd_proxy == NULL) { return FALSE; } return TRUE; } static inline char * systemd_get_property(const char *unit, const char *name, void (*callback)(const char *name, const char *value, void *userdata), void *userdata, DBusPendingCall **pending, int timeout) { return systemd_proxy? pcmk_dbus_get_property(systemd_proxy, BUS_NAME, unit, BUS_NAME_UNIT, name, callback, userdata, pending, timeout) : NULL; } void systemd_cleanup(void) { if (systemd_proxy) { pcmk_dbus_disconnect(systemd_proxy); systemd_proxy = NULL; } } /* * end of systemd_proxy functions */ /*! * \internal * \brief Check whether a file name represents a manageable systemd unit * * \param[in] name File name to check * * \return Pointer to "dot" before filename extension if so, NULL otherwise */ static const char * systemd_unit_extension(const char *name) { if (name) { const char *dot = strrchr(name, '.'); if (dot && (!strcmp(dot, ".service") || !strcmp(dot, ".socket") || !strcmp(dot, ".mount") || !strcmp(dot, ".timer") || !strcmp(dot, ".path"))) { return dot; } } return NULL; } static char * systemd_service_name(const char *name, bool add_instance_name) { const char *dot = NULL; if (pcmk__str_empty(name)) { return NULL; } /* Services that end with an @ sign are systemd templates. They expect an * instance name to follow the service name. If no instance name was * provided, just add "pacemaker" to the string as the instance name. It * doesn't seem to matter for purposes of looking up whether a service * exists or not. * * A template can be specified either with or without the unit extension, * so this block handles both cases. */ dot = systemd_unit_extension(name); if (dot) { if (dot != name && *(dot-1) == '@') { char *s = NULL; if (asprintf(&s, "%.*spacemaker%s", (int) (dot-name), name, dot) == -1) { /* If asprintf fails, just return name. */ return strdup(name); } return s; } else { return strdup(name); } } else if (add_instance_name && *(name+strlen(name)-1) == '@') { return crm_strdup_printf("%spacemaker.service", name); } else { return crm_strdup_printf("%s.service", name); } } static void systemd_daemon_reload_complete(DBusPendingCall *pending, void *user_data) { DBusError error; DBusMessage *reply = NULL; unsigned int reload_count = GPOINTER_TO_UINT(user_data); dbus_error_init(&error); if(pending) { reply = dbus_pending_call_steal_reply(pending); } if (pcmk_dbus_find_error(pending, reply, &error)) { crm_warn("Could not issue systemd reload %d: %s", reload_count, error.message); dbus_error_free(&error); } else { crm_trace("Reload %d complete", reload_count); } if(pending) { dbus_pending_call_unref(pending); } if(reply) { dbus_message_unref(reply); } } static bool systemd_daemon_reload(int timeout) { static unsigned int reload_count = 0; DBusMessage *msg = systemd_new_method("Reload"); reload_count++; CRM_ASSERT(msg != NULL); systemd_send(msg, systemd_daemon_reload_complete, GUINT_TO_POINTER(reload_count), timeout); dbus_message_unref(msg); return TRUE; } /*! * \internal * \brief Set an action result based on a method error * * \param[in] op Action to set result for * \param[in] error Method error */ static void set_result_from_method_error(svc_action_t *op, const DBusError *error) { services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "Unable to invoke systemd DBus method"); if (strstr(error->name, "org.freedesktop.systemd1.InvalidName") || strstr(error->name, "org.freedesktop.systemd1.LoadFailed") || strstr(error->name, "org.freedesktop.systemd1.NoSuchUnit")) { if (pcmk__str_eq(op->action, "stop", pcmk__str_casei)) { crm_trace("Masking systemd stop failure (%s) for %s " "because unknown service can be considered stopped", error->name, crm_str(op->rsc)); services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); return; } - services__set_result(op, PCMK_OCF_NOT_INSTALLED, - PCMK_EXEC_NOT_INSTALLED, "systemd unit not found"); + services__format_result(op, PCMK_OCF_NOT_INSTALLED, + PCMK_EXEC_NOT_INSTALLED, + "systemd unit %s not found", op->agent); } crm_info("DBus request for %s of systemd unit %s for resource %s failed: %s", op->action, op->agent, crm_str(op->rsc), error->message); } /*! * \internal * \brief Extract unit path from LoadUnit reply, and execute action * * \param[in] reply LoadUnit reply * \param[in] op Action to execute (or NULL to just return path) * * \return DBus object path for specified unit if successful (only valid for * lifetime of \p reply), otherwise NULL */ static const char * execute_after_loadunit(DBusMessage *reply, svc_action_t *op) { const char *path = NULL; DBusError error; /* path here is not used other than as a non-NULL flag to indicate that a * request was indeed sent */ if (pcmk_dbus_find_error((void *) &path, reply, &error)) { if (op != NULL) { set_result_from_method_error(op, &error); } dbus_error_free(&error); } else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, __func__, __LINE__)) { if (op != NULL) { services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "systemd DBus method had unexpected reply"); crm_info("Could not load systemd unit %s for %s: " "DBus reply has unexpected type", op->agent, op->id); } else { crm_info("Could not load systemd unit: " "DBus reply has unexpected type"); } } else { dbus_message_get_args (reply, NULL, DBUS_TYPE_OBJECT_PATH, &path, DBUS_TYPE_INVALID); } if (op != NULL) { if (path != NULL) { invoke_unit_by_path(op, path); } else if (!(op->synchronous)) { - services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, - "No DBus object found for systemd unit"); + services__format_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "No DBus object found for systemd unit %s", + op->agent); services__finalize_async_op(op); } } return path; } /*! * \internal * \brief Execute a systemd action after its LoadUnit completes * * \param[in] pending If not NULL, DBus call associated with LoadUnit request * \param[in] user_data Action to execute */ static void loadunit_completed(DBusPendingCall *pending, void *user_data) { DBusMessage *reply = NULL; svc_action_t *op = user_data; crm_trace("LoadUnit result for %s arrived", op->id); // Grab the reply if (pending != NULL) { reply = dbus_pending_call_steal_reply(pending); } // The call is no longer pending CRM_LOG_ASSERT(pending == op->opaque->pending); services_set_op_pending(op, NULL); // Execute the desired action based on the reply execute_after_loadunit(reply, user_data); if (reply != NULL) { dbus_message_unref(reply); } } /*! * \internal * \brief Execute a systemd action, given the unit name * * \param[in] arg_name Unit name (possibly shortened, i.e. without ".service") * \param[in] op Action to execute (if NULL, just get the object path) * \param[out] path If non-NULL and \p op is NULL or synchronous, where to * store DBus object path for specified unit * * \return Standard Pacemaker return code (for NULL \p op, pcmk_rc_ok means unit * was found; for synchronous actions, pcmk_rc_ok means unit was * executed, with the actual result stored in \p op; for asynchronous * actions, pcmk_rc_ok means action was initiated) * \note It is the caller's responsibility to free the path. */ static int invoke_unit_by_name(const char *arg_name, svc_action_t *op, char **path) { DBusMessage *msg; DBusMessage *reply = NULL; DBusPendingCall *pending = NULL; char *name = NULL; if (!systemd_init()) { if (op != NULL) { services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "No DBus connection"); } return ENOTCONN; } /* Create a LoadUnit DBus method (equivalent to GetUnit if already loaded), * which makes the unit usable via further DBus methods. * * * * * */ msg = systemd_new_method("LoadUnit"); CRM_ASSERT(msg != NULL); // Add the (expanded) unit name as the argument name = systemd_service_name(arg_name, op == NULL || pcmk__str_eq(op->action, "meta-data", pcmk__str_none)); CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID)); free(name); if ((op == NULL) || op->synchronous) { // For synchronous ops, wait for a reply and extract the result const char *unit = NULL; int rc = pcmk_rc_ok; reply = systemd_send_recv(msg, NULL, (op? op->timeout : DBUS_TIMEOUT_USE_DEFAULT)); dbus_message_unref(msg); unit = execute_after_loadunit(reply, op); if (unit == NULL) { rc = ENOENT; if (path != NULL) { *path = NULL; } } else if (path != NULL) { *path = strdup(unit); if (*path == NULL) { rc = ENOMEM; } } if (reply != NULL) { dbus_message_unref(reply); } return rc; } // For asynchronous ops, initiate the LoadUnit call and return pending = systemd_send(msg, loadunit_completed, op, op->timeout); if (pending == NULL) { services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "Unable to send DBus message"); dbus_message_unref(msg); return ECOMM; } // LoadUnit was successfully initiated services__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); services_set_op_pending(op, pending); dbus_message_unref(msg); return pcmk_rc_ok; } /*! * \internal * \brief Compare two strings alphabetically (case-insensitive) * * \param[in] a First string to compare * \param[in] b Second string to compare * * \return 0 if strings are equal, -1 if a < b, 1 if a > b * * \note Usable as a GCompareFunc with g_list_sort(). * NULL is considered less than non-NULL. */ static gint sort_str(gconstpointer a, gconstpointer b) { if (!a && !b) { return 0; } else if (!a) { return -1; } else if (!b) { return 1; } return strcasecmp(a, b); } GList * systemd_unit_listall(void) { int nfiles = 0; GList *units = NULL; DBusMessageIter args; DBusMessageIter unit; DBusMessageIter elem; DBusMessage *reply = NULL; if (systemd_init() == FALSE) { return NULL; } /* " \n" \ " \n" \ " \n" \ */ reply = systemd_call_simple_method("ListUnitFiles"); if (reply == NULL) { return NULL; } if (!dbus_message_iter_init(reply, &args)) { crm_err("Could not list systemd unit files: systemd reply has no arguments"); dbus_message_unref(reply); return NULL; } if (!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_ARRAY, __func__, __LINE__)) { crm_err("Could not list systemd unit files: systemd reply has invalid arguments"); dbus_message_unref(reply); return NULL; } dbus_message_iter_recurse(&args, &unit); for (; dbus_message_iter_get_arg_type(&unit) != DBUS_TYPE_INVALID; dbus_message_iter_next(&unit)) { DBusBasicValue value; const char *match = NULL; char *unit_name = NULL; char *basename = NULL; if(!pcmk_dbus_type_check(reply, &unit, DBUS_TYPE_STRUCT, __func__, __LINE__)) { crm_warn("Skipping systemd reply argument with unexpected type"); continue; } dbus_message_iter_recurse(&unit, &elem); if(!pcmk_dbus_type_check(reply, &elem, DBUS_TYPE_STRING, __func__, __LINE__)) { crm_warn("Skipping systemd reply argument with no string"); continue; } dbus_message_iter_get_basic(&elem, &value); if (value.str == NULL) { crm_debug("ListUnitFiles reply did not provide a string"); continue; } crm_trace("DBus ListUnitFiles listed: %s", value.str); match = systemd_unit_extension(value.str); if (match == NULL) { // This is not a unit file type we know how to manage crm_debug("ListUnitFiles entry '%s' is not supported as resource", value.str); continue; } // ListUnitFiles returns full path names, we just want base name basename = strrchr(value.str, '/'); if (basename) { basename = basename + 1; } else { basename = value.str; } if (!strcmp(match, ".service")) { // Service is the "default" unit type, so strip it unit_name = strndup(basename, match - basename); } else { unit_name = strdup(basename); } nfiles++; units = g_list_prepend(units, unit_name); } dbus_message_unref(reply); crm_trace("Found %d manageable systemd unit files", nfiles); units = g_list_sort(units, sort_str); return units; } gboolean systemd_unit_exists(const char *name) { char *path = NULL; char *state = NULL; /* Note: Makes a blocking dbus calls * Used by resources_find_service_class() when resource class=service */ if ((invoke_unit_by_name(name, NULL, &path) != pcmk_rc_ok) || (path == NULL)) { return FALSE; } /* A successful LoadUnit is not sufficient to determine the unit's * existence; it merely means the LoadUnit request received a reply. * We must make another blocking call to check the LoadState property. */ state = systemd_get_property(path, "LoadState", NULL, NULL, NULL, DBUS_TIMEOUT_USE_DEFAULT); free(path); if (pcmk__str_any_of(state, "loaded", "masked", NULL)) { free(state); return TRUE; } free(state); return FALSE; } #define METADATA_FORMAT \ "\n" \ "\n" \ "\n" \ " 1.1\n" \ " \n" \ " %s\n" \ " \n" \ " systemd unit file for %s\n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ "\n" static char * systemd_unit_metadata(const char *name, int timeout) { char *meta = NULL; char *desc = NULL; char *path = NULL; char *escaped = NULL; if (invoke_unit_by_name(name, NULL, &path) == pcmk_rc_ok) { /* TODO: Worth a making blocking call for? Probably not. Possibly if cached. */ desc = systemd_get_property(path, "Description", NULL, NULL, NULL, timeout); } else { desc = crm_strdup_printf("Systemd unit file for %s", name); } escaped = crm_xml_escape(desc); meta = crm_strdup_printf(METADATA_FORMAT, name, escaped, name); free(desc); free(path); free(escaped); return meta; } /*! * \internal * \brief Determine result of method from reply * * \param[in] reply Reply to start, stop, or restart request * \param[in] op Action that was executed */ static void process_unit_method_reply(DBusMessage *reply, svc_action_t *op) { DBusError error; /* The first use of error here is not used other than as a non-NULL flag to * indicate that a request was indeed sent */ if (pcmk_dbus_find_error((void *) &error, reply, &error)) { set_result_from_method_error(op, &error); dbus_error_free(&error); } else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, __func__, __LINE__)) { crm_info("DBus request for %s of %s succeeded but " "return type was unexpected", op->action, crm_str(op->rsc)); services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, "systemd DBus method had unexpected reply"); } else { const char *path = NULL; dbus_message_get_args(reply, NULL, DBUS_TYPE_OBJECT_PATH, &path, DBUS_TYPE_INVALID); crm_debug("DBus request for %s of %s using %s succeeded", op->action, crm_str(op->rsc), path); services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); } } /*! * \internal * \brief Process the completion of an asynchronous unit start, stop, or restart * * \param[in] pending If not NULL, DBus call associated with request * \param[in] user_data Action that was executed */ static void unit_method_complete(DBusPendingCall *pending, void *user_data) { DBusMessage *reply = NULL; svc_action_t *op = user_data; crm_trace("Result for %s arrived", op->id); // Grab the reply if (pending != NULL) { reply = dbus_pending_call_steal_reply(pending); } // The call is no longer pending CRM_LOG_ASSERT(pending == op->opaque->pending); services_set_op_pending(op, NULL); // Determine result and finalize action process_unit_method_reply(reply, op); services__finalize_async_op(op); if (reply != NULL) { dbus_message_unref(reply); } } #define SYSTEMD_OVERRIDE_ROOT "/run/systemd/system/" /* When the cluster manages a systemd resource, we create a unit file override * to order the service "before" pacemaker. The "before" relationship won't * actually be used, since systemd won't ever start the resource -- we're * interested in the reverse shutdown ordering it creates, to ensure that * systemd doesn't stop the resource at shutdown while pacemaker is still * running. * * @TODO Add start timeout */ #define SYSTEMD_OVERRIDE_TEMPLATE \ "[Unit]\n" \ "Description=Cluster Controlled %s\n" \ "Before=pacemaker.service pacemaker_remote.service\n" \ "\n" \ "[Service]\n" \ "Restart=no\n" // Temporarily use rwxr-xr-x umask when opening a file for writing static FILE * create_world_readable(const char *filename) { mode_t orig_umask = umask(S_IWGRP | S_IWOTH); FILE *fp = fopen(filename, "w"); umask(orig_umask); return fp; } static void create_override_dir(const char *agent) { char *override_dir = crm_strdup_printf(SYSTEMD_OVERRIDE_ROOT "/%s.service.d", agent); int rc = pcmk__build_path(override_dir, 0755); if (rc != pcmk_rc_ok) { crm_warn("Could not create systemd override directory %s: %s", override_dir, pcmk_rc_str(rc)); } free(override_dir); } static char * get_override_filename(const char *agent) { return crm_strdup_printf(SYSTEMD_OVERRIDE_ROOT "/%s.service.d/50-pacemaker.conf", agent); } static void systemd_create_override(const char *agent, int timeout) { FILE *file_strm = NULL; char *override_file = get_override_filename(agent); create_override_dir(agent); /* Ensure the override file is world-readable. This is not strictly * necessary, but it avoids a systemd warning in the logs. */ file_strm = create_world_readable(override_file); if (file_strm == NULL) { crm_err("Cannot open systemd override file %s for writing", override_file); } else { char *override = crm_strdup_printf(SYSTEMD_OVERRIDE_TEMPLATE, agent); int rc = fprintf(file_strm, "%s\n", override); free(override); if (rc < 0) { crm_perror(LOG_WARNING, "Cannot write to systemd override file %s", override_file); } fflush(file_strm); fclose(file_strm); systemd_daemon_reload(timeout); } free(override_file); } static void systemd_remove_override(const char *agent, int timeout) { char *override_file = get_override_filename(agent); int rc = unlink(override_file); if (rc < 0) { // Stop may be called when already stopped, which is fine crm_perror(LOG_DEBUG, "Cannot remove systemd override file %s", override_file); } else { systemd_daemon_reload(timeout); } free(override_file); } /*! * \internal * \brief Parse result of systemd status check * * Set a status action's exit status and execution status based on a DBus * property check result, and finalize the action if asynchronous. * * \param[in] name DBus interface name for property that was checked * \param[in] state Property value * \param[in] userdata Status action that check was done for */ static void parse_status_result(const char *name, const char *state, void *userdata) { svc_action_t *op = userdata; crm_trace("Resource %s has %s='%s'", crm_str(op->rsc), name, crm_str(state)); if (pcmk__str_eq(state, "active", pcmk__str_none)) { services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); } else if (pcmk__str_eq(state, "reloading", pcmk__str_none)) { services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); } else if (pcmk__str_eq(state, "activating", pcmk__str_none)) { services__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); } else if (pcmk__str_eq(state, "deactivating", pcmk__str_none)) { services__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); } else { services__set_result(op, PCMK_OCF_NOT_RUNNING, PCMK_EXEC_DONE, state); } if (!(op->synchronous)) { services_set_op_pending(op, NULL); services__finalize_async_op(op); } } /*! * \internal * \brief Invoke a systemd unit, given its DBus object path * * \param[in] op Action to execute * \param[in] unit DBus object path of systemd unit to invoke */ static void invoke_unit_by_path(svc_action_t *op, const char *unit) { const char *method = NULL; DBusMessage *msg = NULL; DBusMessage *reply = NULL; if (pcmk__str_any_of(op->action, "monitor", "status", NULL)) { DBusPendingCall *pending = NULL; char *state; state = systemd_get_property(unit, "ActiveState", (op->synchronous? NULL : parse_status_result), op, (op->synchronous? NULL : &pending), op->timeout); if (op->synchronous) { parse_status_result("ActiveState", state, op); free(state); } else if (pending == NULL) { // Could not get ActiveState property - services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, - "Could not get unit state from DBus"); + services__format_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "Could not get state for unit %s from DBus", + op->agent); services__finalize_async_op(op); } else { services_set_op_pending(op, pending); } return; } else if (pcmk__str_eq(op->action, "start", pcmk__str_none)) { method = "StartUnit"; systemd_create_override(op->agent, op->timeout); } else if (pcmk__str_eq(op->action, "stop", pcmk__str_none)) { method = "StopUnit"; systemd_remove_override(op->agent, op->timeout); } else if (pcmk__str_eq(op->action, "restart", pcmk__str_none)) { method = "RestartUnit"; } else { - services__set_result(op, PCMK_OCF_UNIMPLEMENT_FEATURE, PCMK_EXEC_ERROR, - "Action not implemented for systemd resources"); + services__format_result(op, PCMK_OCF_UNIMPLEMENT_FEATURE, + PCMK_EXEC_ERROR, + "Action %s not implemented " + "for systemd resources", crm_str(op->action)); if (!(op->synchronous)) { services__finalize_async_op(op); } return; } crm_trace("Calling %s for unit path %s named %s", method, unit, crm_str(op->rsc)); msg = systemd_new_method(method); CRM_ASSERT(msg != NULL); /* (ss) */ { const char *replace_s = "replace"; char *name = systemd_service_name(op->agent, pcmk__str_eq(op->action, "meta-data", pcmk__str_none)); CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID)); CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &replace_s, DBUS_TYPE_INVALID)); free(name); } if (op->synchronous) { reply = systemd_send_recv(msg, NULL, op->timeout); dbus_message_unref(msg); process_unit_method_reply(reply, op); if (reply != NULL) { dbus_message_unref(reply); } } else { DBusPendingCall *pending = systemd_send(msg, unit_method_complete, op, op->timeout); dbus_message_unref(msg); if (pending == NULL) { services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "Unable to send DBus message"); services__finalize_async_op(op); } else { services_set_op_pending(op, pending); } } } static gboolean systemd_timeout_callback(gpointer p) { svc_action_t * op = p; op->opaque->timerid = 0; crm_info("%s action for systemd unit %s named '%s' timed out", op->action, op->agent, op->rsc); - services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT, - "Systemd unit action did not complete in time"); + services__format_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT, + "%s action for systemd unit %s " + "did not complete in time", op->action, op->agent); services__finalize_async_op(op); return FALSE; } /*! * \internal * \brief Execute a systemd action * * \param[in] op Action to execute * * \return Standard Pacemaker return code * \retval EBUSY Recurring operation could not be initiated * \retval pcmk_rc_error Synchronous action failed * \retval pcmk_rc_ok Synchronous action succeeded, or asynchronous action * should not be freed (because it's pending or because * it failed to execute and was already freed) * * \note If the return value for an asynchronous action is not pcmk_rc_ok, the * caller is responsible for freeing the action. */ int services__execute_systemd(svc_action_t *op) { CRM_ASSERT(op != NULL); if ((op->action == NULL) || (op->agent == NULL)) { services__set_result(op, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR_FATAL, "Bug in action caller"); goto done; } if (!systemd_init()) { services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "No DBus connection"); goto done; } crm_debug("Performing %ssynchronous %s op on systemd unit %s named '%s'", (op->synchronous? "" : "a"), op->action, op->agent, crm_str(op->rsc)); if (pcmk__str_eq(op->action, "meta-data", pcmk__str_casei)) { op->stdout_data = systemd_unit_metadata(op->agent, op->timeout); services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); goto done; } /* invoke_unit_by_name() should always override these values, which are here * just as a fail-safe in case there are any code paths that neglect to */ services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "Bug in service library"); if (invoke_unit_by_name(op->agent, op, NULL) == pcmk_rc_ok) { op->opaque->timerid = g_timeout_add(op->timeout + 5000, systemd_timeout_callback, op); services_add_inflight_op(op); return pcmk_rc_ok; } done: if (op->synchronous) { return (op->rc == PCMK_OCF_OK)? pcmk_rc_ok : pcmk_rc_error; } else { return services__finalize_async_op(op); } } diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index b877beea69..8c61d3b1b1 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -1,2011 +1,2012 @@ /* - * Copyright 2004-2021 the Pacemaker project contributors + * Copyright 2004-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include resource_checks_t * cli_check_resource(pe_resource_t *rsc, char *role_s, char *managed) { pe_resource_t *parent = uber_parent(rsc); resource_checks_t *rc = calloc(1, sizeof(resource_checks_t)); if (role_s) { enum rsc_role_e role = text2role(role_s); if (role == RSC_ROLE_STOPPED) { rc->flags |= rsc_remain_stopped; } else if (pcmk_is_set(parent->flags, pe_rsc_promotable) && (role == RSC_ROLE_UNPROMOTED)) { rc->flags |= rsc_unpromotable; } } if (managed && !crm_is_true(managed)) { rc->flags |= rsc_unmanaged; } if (rsc->lock_node) { rc->lock_node = rsc->lock_node->details->uname; } rc->rsc = rsc; return rc; } static GList * build_node_info_list(pe_resource_t *rsc) { GList *retval = NULL; for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { pe_resource_t *child = (pe_resource_t *) iter->data; for (GList *iter2 = child->running_on; iter2 != NULL; iter2 = iter2->next) { pe_node_t *node = (pe_node_t *) iter2->data; node_info_t *ni = calloc(1, sizeof(node_info_t)); ni->node_name = node->details->uname; ni->promoted = pcmk_is_set(rsc->flags, pe_rsc_promotable) && child->fns->state(child, TRUE) == RSC_ROLE_PROMOTED; retval = g_list_prepend(retval, ni); } } return retval; } GList * cli_resource_search(pe_resource_t *rsc, const char *requested_name, pe_working_set_t *data_set) { GList *retval = NULL; pe_resource_t *parent = uber_parent(rsc); if (pe_rsc_is_clone(rsc)) { retval = build_node_info_list(rsc); /* The anonymous clone children's common ID is supplied */ } else if (pe_rsc_is_clone(parent) && !pcmk_is_set(rsc->flags, pe_rsc_unique) && rsc->clone_name && pcmk__str_eq(requested_name, rsc->clone_name, pcmk__str_casei) && !pcmk__str_eq(requested_name, rsc->id, pcmk__str_casei)) { retval = build_node_info_list(parent); } else if (rsc->running_on != NULL) { for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) { pe_node_t *node = (pe_node_t *) iter->data; node_info_t *ni = calloc(1, sizeof(node_info_t)); ni->node_name = node->details->uname; ni->promoted = (rsc->fns->state(rsc, TRUE) == RSC_ROLE_PROMOTED); retval = g_list_prepend(retval, ni); } } return retval; } #define XPATH_MAX 1024 // \return Standard Pacemaker return code static int find_resource_attr(pcmk__output_t *out, cib_t * the_cib, const char *attr, const char *rsc, const char *attr_set_type, const char *set_name, const char *attr_id, const char *attr_name, char **value) { int offset = 0; int rc = pcmk_rc_ok; xmlNode *xml_search = NULL; char *xpath_string = NULL; const char *xpath_base = NULL; if(value) { *value = NULL; } if(the_cib == NULL) { return ENOTCONN; } xpath_base = pcmk_cib_xpath_for(XML_CIB_TAG_RESOURCES); if (xpath_base == NULL) { crm_err(XML_CIB_TAG_RESOURCES " CIB element not known (bug?)"); return ENOMSG; } xpath_string = calloc(1, XPATH_MAX); offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "%s", xpath_base); offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "//*[@id=\"%s\"]", rsc); if (attr_set_type) { offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "/%s", attr_set_type); if (set_name) { offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "[@id=\"%s\"]", set_name); } } offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "//nvpair["); if (attr_id) { offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "@id=\"%s\"", attr_id); } if (attr_name) { if (attr_id) { offset += snprintf(xpath_string + offset, XPATH_MAX - offset, " and "); } offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "@name=\"%s\"", attr_name); } offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "]"); CRM_LOG_ASSERT(offset > 0); rc = the_cib->cmds->query(the_cib, xpath_string, &xml_search, cib_sync_call | cib_scope_local | cib_xpath); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { goto done; } crm_log_xml_debug(xml_search, "Match"); if (xml_has_children(xml_search)) { xmlNode *child = NULL; rc = ENOTUNIQ; out->info(out, "Multiple attributes match name=%s", attr_name); for (child = pcmk__xml_first_child(xml_search); child != NULL; child = pcmk__xml_next(child)) { out->info(out, " Value: %s \t(id=%s)", crm_element_value(child, XML_NVPAIR_ATTR_VALUE), ID(child)); } out->spacer(out); } else if(value) { const char *tmp = crm_element_value(xml_search, attr); if (tmp) { *value = strdup(tmp); } } done: free(xpath_string); free_xml(xml_search); return rc; } /* PRIVATE. Use the find_matching_attr_resources instead. */ static void find_matching_attr_resources_recursive(pcmk__output_t *out, GList/* */ ** result, pe_resource_t * rsc, const char * rsc_id, const char * attr_set, const char * attr_set_type, const char * attr_id, const char * attr_name, cib_t * cib, const char * cmd, int depth) { int rc = pcmk_rc_ok; char *lookup_id = clone_strip(rsc->id); char *local_attr_id = NULL; /* visit the children */ for(GList *gIter = rsc->children; gIter; gIter = gIter->next) { find_matching_attr_resources_recursive(out, result, (pe_resource_t*)gIter->data, rsc_id, attr_set, attr_set_type, attr_id, attr_name, cib, cmd, depth+1); /* do it only once for clones */ if(pe_clone == rsc->variant) { break; } } rc = find_resource_attr(out, cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); /* Post-order traversal. * The root is always on the list and it is the last item. */ if((0 == depth) || (pcmk_rc_ok == rc)) { /* push the head */ *result = g_list_append(*result, rsc); } free(local_attr_id); free(lookup_id); } /* The result is a linearized pre-ordered tree of resources. */ static GList/**/ * find_matching_attr_resources(pcmk__output_t *out, pe_resource_t * rsc, const char * rsc_id, const char * attr_set, const char * attr_set_type, const char * attr_id, const char * attr_name, cib_t * cib, const char * cmd, gboolean force) { int rc = pcmk_rc_ok; char *lookup_id = NULL; char *local_attr_id = NULL; GList * result = NULL; /* If --force is used, update only the requested resource (clone or primitive). * Otherwise, if the primitive has the attribute, use that. * Otherwise use the clone. */ if(force == TRUE) { return g_list_append(result, rsc); } if(rsc->parent && pe_clone == rsc->parent->variant) { int rc = pcmk_rc_ok; char *local_attr_id = NULL; rc = find_resource_attr(out, cib, XML_ATTR_ID, rsc_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); free(local_attr_id); if(rc != pcmk_rc_ok) { rsc = rsc->parent; out->info(out, "Performing %s of '%s' on '%s', the parent of '%s'", cmd, attr_name, rsc->id, rsc_id); } return g_list_append(result, rsc); } else if(rsc->parent == NULL && rsc->children && pe_clone == rsc->variant) { pe_resource_t *child = rsc->children->data; if(child->variant == pe_native) { lookup_id = clone_strip(child->id); /* Could be a cloned group! */ rc = find_resource_attr(out, cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if(rc == pcmk_rc_ok) { rsc = child; out->info(out, "A value for '%s' already exists in child '%s', performing %s on that instead of '%s'", attr_name, lookup_id, cmd, rsc_id); } free(local_attr_id); free(lookup_id); } return g_list_append(result, rsc); } /* If the resource is a group ==> children inherit the attribute if defined. */ find_matching_attr_resources_recursive(out, &result, rsc, rsc_id, attr_set, attr_set_type, attr_id, attr_name, cib, cmd, 0); return result; } // \return Standard Pacemaker return code int cli_resource_update_attribute(pe_resource_t *rsc, const char *requested_name, const char *attr_set, const char *attr_set_type, const char *attr_id, const char *attr_name, const char *attr_value, gboolean recursive, cib_t *cib, int cib_options, pe_working_set_t *data_set, gboolean force) { pcmk__output_t *out = data_set->priv; int rc = pcmk_rc_ok; static bool need_init = TRUE; char *local_attr_id = NULL; char *local_attr_set = NULL; GList/**/ *resources = NULL; const char *common_attr_id = attr_id; if (attr_id == NULL && force == FALSE) { find_resource_attr (out, cib, XML_ATTR_ID, uber_parent(rsc)->id, NULL, NULL, NULL, attr_name, NULL); } if (pcmk__str_eq(attr_set_type, XML_TAG_ATTR_SETS, pcmk__str_casei)) { if (force == FALSE) { rc = find_resource_attr(out, cib, XML_ATTR_ID, uber_parent(rsc)->id, XML_TAG_META_SETS, attr_set, attr_id, attr_name, &local_attr_id); if (rc == pcmk_rc_ok && !out->is_quiet(out)) { out->err(out, "WARNING: There is already a meta attribute for '%s' called '%s' (id=%s)", uber_parent(rsc)->id, attr_name, local_attr_id); out->err(out, " Delete '%s' first or use the force option to override", local_attr_id); } free(local_attr_id); if (rc == pcmk_rc_ok) { return ENOTUNIQ; } } resources = g_list_append(resources, rsc); } else { resources = find_matching_attr_resources(out, rsc, requested_name, attr_set, attr_set_type, attr_id, attr_name, cib, "update", force); } /* If either attr_set or attr_id is specified, * one clearly intends to modify a single resource. * It is the last item on the resource list.*/ for(GList *gIter = (attr_set||attr_id) ? g_list_last(resources) : resources ; gIter; gIter = gIter->next) { char *lookup_id = NULL; xmlNode *xml_top = NULL; xmlNode *xml_obj = NULL; local_attr_id = NULL; local_attr_set = NULL; rsc = (pe_resource_t*)gIter->data; attr_id = common_attr_id; lookup_id = clone_strip(rsc->id); /* Could be a cloned group! */ rc = find_resource_attr(out, cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if (rc == pcmk_rc_ok) { crm_debug("Found a match for name=%s: id=%s", attr_name, local_attr_id); attr_id = local_attr_id; } else if (rc != ENXIO) { free(lookup_id); free(local_attr_id); g_list_free(resources); return rc; } else { const char *tag = crm_element_name(rsc->xml); if (attr_set == NULL) { local_attr_set = crm_strdup_printf("%s-%s", lookup_id, attr_set_type); attr_set = local_attr_set; } if (attr_id == NULL) { local_attr_id = crm_strdup_printf("%s-%s", attr_set, attr_name); attr_id = local_attr_id; } xml_top = create_xml_node(NULL, tag); crm_xml_add(xml_top, XML_ATTR_ID, lookup_id); xml_obj = create_xml_node(xml_top, attr_set_type); crm_xml_add(xml_obj, XML_ATTR_ID, attr_set); } xml_obj = crm_create_nvpair_xml(xml_obj, attr_id, attr_name, attr_value); if (xml_top == NULL) { xml_top = xml_obj; } crm_log_xml_debug(xml_top, "Update"); rc = cib->cmds->modify(cib, XML_CIB_TAG_RESOURCES, xml_top, cib_options); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { out->info(out, "Set '%s' option: id=%s%s%s%s%s value=%s", lookup_id, local_attr_id, attr_set ? " set=" : "", attr_set ? attr_set : "", attr_name ? " name=" : "", attr_name ? attr_name : "", attr_value); } free_xml(xml_top); free(lookup_id); free(local_attr_id); free(local_attr_set); if(recursive && pcmk__str_eq(attr_set_type, XML_TAG_META_SETS, pcmk__str_casei)) { GList *lpc = NULL; if(need_init) { need_init = FALSE; pcmk__unpack_constraints(data_set); pe__clear_resource_flags_on_all(data_set, pe_rsc_allocating); } crm_debug("Looking for dependencies %p", rsc->rsc_cons_lhs); pe__set_resource_flags(rsc, pe_rsc_allocating); for (lpc = rsc->rsc_cons_lhs; lpc != NULL; lpc = lpc->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data; crm_debug("Checking %s %d", cons->id, cons->score); if ((cons->score > 0) && !pcmk_is_set(cons->dependent->flags, pe_rsc_allocating)) { /* Don't get into colocation loops */ crm_debug("Setting %s=%s for dependent resource %s", attr_name, attr_value, cons->dependent->id); cli_resource_update_attribute(cons->dependent, cons->dependent->id, NULL, attr_set_type, NULL, attr_name, attr_value, recursive, cib, cib_options, data_set, force); } } } } g_list_free(resources); return rc; } // \return Standard Pacemaker return code int cli_resource_delete_attribute(pe_resource_t *rsc, const char *requested_name, const char *attr_set, const char *attr_set_type, const char *attr_id, const char *attr_name, cib_t *cib, int cib_options, pe_working_set_t *data_set, gboolean force) { pcmk__output_t *out = data_set->priv; int rc = pcmk_rc_ok; GList/**/ *resources = NULL; if (attr_id == NULL && force == FALSE) { find_resource_attr(out, cib, XML_ATTR_ID, uber_parent(rsc)->id, NULL, NULL, NULL, attr_name, NULL); } if(pcmk__str_eq(attr_set_type, XML_TAG_META_SETS, pcmk__str_casei)) { resources = find_matching_attr_resources(out, rsc, requested_name, attr_set, attr_set_type, attr_id, attr_name, cib, "delete", force); } else { resources = g_list_append(resources, rsc); } for(GList *gIter = resources; gIter; gIter = gIter->next) { char *lookup_id = NULL; xmlNode *xml_obj = NULL; char *local_attr_id = NULL; rsc = (pe_resource_t*)gIter->data; lookup_id = clone_strip(rsc->id); rc = find_resource_attr(out, cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if (rc == ENXIO) { free(lookup_id); rc = pcmk_rc_ok; continue; } else if (rc != pcmk_rc_ok) { free(lookup_id); g_list_free(resources); return rc; } if (attr_id == NULL) { attr_id = local_attr_id; } xml_obj = crm_create_nvpair_xml(NULL, attr_id, attr_name, NULL); crm_log_xml_debug(xml_obj, "Delete"); CRM_ASSERT(cib); rc = cib->cmds->remove(cib, XML_CIB_TAG_RESOURCES, xml_obj, cib_options); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { out->info(out, "Deleted '%s' option: id=%s%s%s%s%s", lookup_id, local_attr_id, attr_set ? " set=" : "", attr_set ? attr_set : "", attr_name ? " name=" : "", attr_name ? attr_name : ""); } free(lookup_id); free_xml(xml_obj); free(local_attr_id); } g_list_free(resources); return rc; } // \return Standard Pacemaker return code static int send_lrm_rsc_op(pcmk_ipc_api_t *controld_api, bool do_fail_resource, const char *host_uname, const char *rsc_id, pe_working_set_t *data_set) { pcmk__output_t *out = data_set->priv; const char *router_node = host_uname; const char *rsc_api_id = NULL; const char *rsc_long_id = NULL; const char *rsc_class = NULL; const char *rsc_provider = NULL; const char *rsc_type = NULL; bool cib_only = false; pe_resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL) { out->err(out, "Resource %s not found", rsc_id); return ENXIO; } else if (rsc->variant != pe_native) { out->err(out, "We can only process primitive resources, not %s", rsc_id); return EINVAL; } rsc_class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); rsc_provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER), rsc_type = crm_element_value(rsc->xml, XML_ATTR_TYPE); if ((rsc_class == NULL) || (rsc_type == NULL)) { out->err(out, "Resource %s does not have a class and type", rsc_id); return EINVAL; } { pe_node_t *node = pe_find_node(data_set->nodes, host_uname); if (node == NULL) { out->err(out, "Node %s not found", host_uname); return pcmk_rc_node_unknown; } if (!(node->details->online)) { if (do_fail_resource) { out->err(out, "Node %s is not online", host_uname); return ENOTCONN; } else { cib_only = true; } } if (!cib_only && pe__is_guest_or_remote_node(node)) { node = pe__current_node(node->details->remote_rsc); if (node == NULL) { out->err(out, "No cluster connection to Pacemaker Remote node %s detected", host_uname); return ENOTCONN; } router_node = node->details->uname; } } if (rsc->clone_name) { rsc_api_id = rsc->clone_name; rsc_long_id = rsc->id; } else { rsc_api_id = rsc->id; } if (do_fail_resource) { return pcmk_controld_api_fail(controld_api, host_uname, router_node, rsc_api_id, rsc_long_id, rsc_class, rsc_provider, rsc_type); } else { return pcmk_controld_api_refresh(controld_api, host_uname, router_node, rsc_api_id, rsc_long_id, rsc_class, rsc_provider, rsc_type, cib_only); } } /*! * \internal * \brief Get resource name as used in failure-related node attributes * * \param[in] rsc Resource to check * * \return Newly allocated string containing resource's fail name * \note The caller is responsible for freeing the result. */ static inline char * rsc_fail_name(pe_resource_t *rsc) { const char *name = (rsc->clone_name? rsc->clone_name : rsc->id); return pcmk_is_set(rsc->flags, pe_rsc_unique)? strdup(name) : clone_strip(name); } // \return Standard Pacemaker return code static int clear_rsc_history(pcmk_ipc_api_t *controld_api, const char *host_uname, const char *rsc_id, pe_working_set_t *data_set) { int rc = pcmk_rc_ok; /* Erase the resource's entire LRM history in the CIB, even if we're only * clearing a single operation's fail count. If we erased only entries for a * single operation, we might wind up with a wrong idea of the current * resource state, and we might not re-probe the resource. */ rc = send_lrm_rsc_op(controld_api, false, host_uname, rsc_id, data_set); if (rc != pcmk_rc_ok) { return rc; } crm_trace("Processing %d mainloop inputs", pcmk_controld_api_replies_expected(controld_api)); while (g_main_context_iteration(NULL, FALSE)) { crm_trace("Processed mainloop input, %d still remaining", pcmk_controld_api_replies_expected(controld_api)); } return rc; } // \return Standard Pacemaker return code static int clear_rsc_failures(pcmk__output_t *out, pcmk_ipc_api_t *controld_api, const char *node_name, const char *rsc_id, const char *operation, const char *interval_spec, pe_working_set_t *data_set) { int rc = pcmk_rc_ok; const char *failed_value = NULL; const char *failed_id = NULL; const char *interval_ms_s = NULL; GHashTable *rscs = NULL; GHashTableIter iter; /* Create a hash table to use as a set of resources to clean. This lets us * clean each resource only once (per node) regardless of how many failed * operations it has. */ rscs = pcmk__strkey_table(NULL, NULL); // Normalize interval to milliseconds for comparison to history entry if (operation) { interval_ms_s = crm_strdup_printf("%u", crm_parse_interval_spec(interval_spec)); } for (xmlNode *xml_op = pcmk__xml_first_child(data_set->failed); xml_op != NULL; xml_op = pcmk__xml_next(xml_op)) { failed_id = crm_element_value(xml_op, XML_LRM_ATTR_RSCID); if (failed_id == NULL) { // Malformed history entry, should never happen continue; } // No resource specified means all resources match if (rsc_id) { pe_resource_t *fail_rsc = pe_find_resource_with_flags(data_set->resources, failed_id, pe_find_renamed|pe_find_anon); if (!fail_rsc || !pcmk__str_eq(rsc_id, fail_rsc->id, pcmk__str_casei)) { continue; } } // Host name should always have been provided by this point failed_value = crm_element_value(xml_op, XML_ATTR_UNAME); if (!pcmk__str_eq(node_name, failed_value, pcmk__str_casei)) { continue; } // No operation specified means all operations match if (operation) { failed_value = crm_element_value(xml_op, XML_LRM_ATTR_TASK); if (!pcmk__str_eq(operation, failed_value, pcmk__str_casei)) { continue; } // Interval (if operation was specified) defaults to 0 (not all) failed_value = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS); if (!pcmk__str_eq(interval_ms_s, failed_value, pcmk__str_casei)) { continue; } } g_hash_table_add(rscs, (gpointer) failed_id); } g_hash_table_iter_init(&iter, rscs); while (g_hash_table_iter_next(&iter, (gpointer *) &failed_id, NULL)) { crm_debug("Erasing failures of %s on %s", failed_id, node_name); rc = clear_rsc_history(controld_api, node_name, failed_id, data_set); if (rc != pcmk_rc_ok) { return rc; } } g_hash_table_destroy(rscs); return rc; } // \return Standard Pacemaker return code static int clear_rsc_fail_attrs(pe_resource_t *rsc, const char *operation, const char *interval_spec, pe_node_t *node) { int rc = pcmk_rc_ok; int attr_options = pcmk__node_attr_none; char *rsc_name = rsc_fail_name(rsc); if (pe__is_guest_or_remote_node(node)) { attr_options |= pcmk__node_attr_remote; } rc = pcmk__node_attr_request_clear(NULL, node->details->uname, rsc_name, operation, interval_spec, NULL, attr_options); free(rsc_name); return rc; } // \return Standard Pacemaker return code int cli_resource_delete(pcmk_ipc_api_t *controld_api, const char *host_uname, pe_resource_t *rsc, const char *operation, const char *interval_spec, bool just_failures, pe_working_set_t *data_set, gboolean force) { pcmk__output_t *out = data_set->priv; int rc = pcmk_rc_ok; pe_node_t *node = NULL; if (rsc == NULL) { return ENXIO; } else if (rsc->children) { GList *lpc = NULL; for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) { pe_resource_t *child = (pe_resource_t *) lpc->data; rc = cli_resource_delete(controld_api, host_uname, child, operation, interval_spec, just_failures, data_set, force); if (rc != pcmk_rc_ok) { return rc; } } return pcmk_rc_ok; } else if (host_uname == NULL) { GList *lpc = NULL; GList *nodes = g_hash_table_get_values(rsc->known_on); if(nodes == NULL && force) { nodes = pcmk__copy_node_list(data_set->nodes, false); } else if(nodes == NULL && rsc->exclusive_discover) { GHashTableIter iter; pe_node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void**)&node)) { if(node->weight >= 0) { nodes = g_list_prepend(nodes, node); } } } else if(nodes == NULL) { nodes = g_hash_table_get_values(rsc->allowed_nodes); } for (lpc = nodes; lpc != NULL; lpc = lpc->next) { node = (pe_node_t *) lpc->data; if (node->details->online) { rc = cli_resource_delete(controld_api, node->details->uname, rsc, operation, interval_spec, just_failures, data_set, force); } if (rc != pcmk_rc_ok) { g_list_free(nodes); return rc; } } g_list_free(nodes); return pcmk_rc_ok; } node = pe_find_node(data_set->nodes, host_uname); if (node == NULL) { out->err(out, "Unable to clean up %s because node %s not found", rsc->id, host_uname); return ENODEV; } if (!node->details->rsc_discovery_enabled) { out->err(out, "Unable to clean up %s because resource discovery disabled on %s", rsc->id, host_uname); return EOPNOTSUPP; } if (controld_api == NULL) { out->err(out, "Dry run: skipping clean-up of %s on %s due to CIB_file", rsc->id, host_uname); return pcmk_rc_ok; } rc = clear_rsc_fail_attrs(rsc, operation, interval_spec, node); if (rc != pcmk_rc_ok) { out->err(out, "Unable to clean up %s failures on %s: %s", rsc->id, host_uname, pcmk_rc_str(rc)); return rc; } if (just_failures) { rc = clear_rsc_failures(out, controld_api, host_uname, rsc->id, operation, interval_spec, data_set); } else { rc = clear_rsc_history(controld_api, host_uname, rsc->id, data_set); } if (rc != pcmk_rc_ok) { out->err(out, "Cleaned %s failures on %s, but unable to clean history: %s", rsc->id, host_uname, pcmk_strerror(rc)); } else { out->info(out, "Cleaned up %s on %s", rsc->id, host_uname); } return rc; } // \return Standard Pacemaker return code int cli_cleanup_all(pcmk_ipc_api_t *controld_api, const char *node_name, const char *operation, const char *interval_spec, pe_working_set_t *data_set) { pcmk__output_t *out = data_set->priv; int rc = pcmk_rc_ok; int attr_options = pcmk__node_attr_none; const char *display_name = node_name? node_name : "all nodes"; if (controld_api == NULL) { out->info(out, "Dry run: skipping clean-up of %s due to CIB_file", display_name); return rc; } if (node_name) { pe_node_t *node = pe_find_node(data_set->nodes, node_name); if (node == NULL) { out->err(out, "Unknown node: %s", node_name); return ENXIO; } if (pe__is_guest_or_remote_node(node)) { attr_options |= pcmk__node_attr_remote; } } rc = pcmk__node_attr_request_clear(NULL, node_name, NULL, operation, interval_spec, NULL, attr_options); if (rc != pcmk_rc_ok) { out->err(out, "Unable to clean up all failures on %s: %s", display_name, pcmk_rc_str(rc)); return rc; } if (node_name) { rc = clear_rsc_failures(out, controld_api, node_name, NULL, operation, interval_spec, data_set); if (rc != pcmk_rc_ok) { out->err(out, "Cleaned all resource failures on %s, but unable to clean history: %s", node_name, pcmk_strerror(rc)); return rc; } } else { for (GList *iter = data_set->nodes; iter; iter = iter->next) { pe_node_t *node = (pe_node_t *) iter->data; rc = clear_rsc_failures(out, controld_api, node->details->uname, NULL, operation, interval_spec, data_set); if (rc != pcmk_rc_ok) { out->err(out, "Cleaned all resource failures on all nodes, but unable to clean history: %s", pcmk_strerror(rc)); return rc; } } } out->info(out, "Cleaned up all resources on %s", display_name); return rc; } int cli_resource_check(pcmk__output_t *out, cib_t * cib_conn, pe_resource_t *rsc) { char *role_s = NULL; char *managed = NULL; pe_resource_t *parent = uber_parent(rsc); int rc = pcmk_rc_no_output; resource_checks_t *checks = NULL; find_resource_attr(out, cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, NULL, NULL, NULL, XML_RSC_ATTR_MANAGED, &managed); find_resource_attr(out, cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, NULL, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &role_s); checks = cli_check_resource(rsc, role_s, managed); if (checks->flags != 0 || checks->lock_node != NULL) { rc = out->message(out, "resource-check-list", checks); } free(role_s); free(managed); free(checks); return rc; } // \return Standard Pacemaker return code int cli_resource_fail(pcmk_ipc_api_t *controld_api, const char *host_uname, const char *rsc_id, pe_working_set_t *data_set) { crm_notice("Failing %s on %s", rsc_id, host_uname); return send_lrm_rsc_op(controld_api, true, host_uname, rsc_id, data_set); } static GHashTable * generate_resource_params(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set) { GHashTable *params = NULL; GHashTable *meta = NULL; GHashTable *combined = NULL; GHashTableIter iter; char *key = NULL; char *value = NULL; combined = pcmk__strkey_table(free, free); params = pe_rsc_params(rsc, node, data_set); if (params != NULL) { g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { g_hash_table_insert(combined, strdup(key), strdup(value)); } } meta = pcmk__strkey_table(free, free); get_meta_attributes(meta, rsc, node, data_set); if (meta != NULL) { g_hash_table_iter_init(&iter, meta); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { char *crm_name = crm_meta_name(key); g_hash_table_insert(combined, crm_name, strdup(value)); } g_hash_table_destroy(meta); } return combined; } bool resource_is_running_on(pe_resource_t *rsc, const char *host) { bool found = TRUE; GList *hIter = NULL; GList *hosts = NULL; if(rsc == NULL) { return FALSE; } rsc->fns->location(rsc, &hosts, TRUE); for (hIter = hosts; host != NULL && hIter != NULL; hIter = hIter->next) { pe_node_t *node = (pe_node_t *) hIter->data; if(strcmp(host, node->details->uname) == 0) { crm_trace("Resource %s is running on %s\n", rsc->id, host); goto done; } else if(strcmp(host, node->details->id) == 0) { crm_trace("Resource %s is running on %s\n", rsc->id, host); goto done; } } if(host != NULL) { crm_trace("Resource %s is not running on: %s\n", rsc->id, host); found = FALSE; } else if(host == NULL && hosts == NULL) { crm_trace("Resource %s is not running\n", rsc->id); found = FALSE; } done: g_list_free(hosts); return found; } /*! * \internal * \brief Create a list of all resources active on host from a given list * * \param[in] host Name of host to check whether resources are active * \param[in] rsc_list List of resources to check * * \return New list of resources from list that are active on host */ static GList * get_active_resources(const char *host, GList *rsc_list) { GList *rIter = NULL; GList *active = NULL; for (rIter = rsc_list; rIter != NULL; rIter = rIter->next) { pe_resource_t *rsc = (pe_resource_t *) rIter->data; /* Expand groups to their members, because if we're restarting a member * other than the first, we can't otherwise tell which resources are * stopping and starting. */ if (rsc->variant == pe_group) { active = g_list_concat(active, get_active_resources(host, rsc->children)); } else if (resource_is_running_on(rsc, host)) { active = g_list_append(active, strdup(rsc->id)); } } return active; } static void dump_list(GList *items, const char *tag) { int lpc = 0; GList *item = NULL; for (item = items; item != NULL; item = item->next) { crm_trace("%s[%d]: %s", tag, lpc, (char*)item->data); lpc++; } } static void display_list(pcmk__output_t *out, GList *items, const char *tag) { GList *item = NULL; for (item = items; item != NULL; item = item->next) { out->info(out, "%s%s", tag, (const char *)item->data); } } /*! * \internal * \brief Upgrade XML to latest schema version and use it as working set input * * This also updates the working set timestamp to the current time. * * \param[in] data_set Working set instance to update * \param[in] xml XML to use as input * * \return Standard Pacemaker return code * \note On success, caller is responsible for freeing memory allocated for * data_set->now. * \todo This follows the example of other callers of cli_config_update() * and returns ENOKEY ("Required key not available") if that fails, * but perhaps pcmk_rc_schema_validation would be better in that case. */ int update_working_set_xml(pe_working_set_t *data_set, xmlNode **xml) { if (cli_config_update(xml, NULL, FALSE) == FALSE) { return ENOKEY; } data_set->input = *xml; data_set->now = crm_time_new(NULL); return pcmk_rc_ok; } /*! * \internal * \brief Update a working set's XML input based on a CIB query * * \param[in] data_set Data set instance to initialize * \param[in] cib Connection to the CIB manager * * \return Standard Pacemaker return code * \note On success, caller is responsible for freeing memory allocated for * data_set->input and data_set->now. */ static int update_working_set_from_cib(pcmk__output_t *out, pe_working_set_t * data_set, cib_t *cib) { xmlNode *cib_xml_copy = NULL; int rc = pcmk_rc_ok; rc = cib->cmds->query(cib, NULL, &cib_xml_copy, cib_scope_local | cib_sync_call); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { out->err(out, "Could not obtain the current CIB: %s (%d)", pcmk_strerror(rc), rc); return rc; } rc = update_working_set_xml(data_set, &cib_xml_copy); if (rc != pcmk_rc_ok) { out->err(out, "Could not upgrade the current CIB XML"); free_xml(cib_xml_copy); return rc; } return rc; } // \return Standard Pacemaker return code static int update_dataset(cib_t *cib, pe_working_set_t * data_set, bool simulate) { char *pid = NULL; char *shadow_file = NULL; cib_t *shadow_cib = NULL; int rc = pcmk_rc_ok; pcmk__output_t *out = data_set->priv; pe_reset_working_set(data_set); rc = update_working_set_from_cib(out, data_set, cib); if (rc != pcmk_rc_ok) { return rc; } if(simulate) { bool prev_quiet = false; pid = pcmk__getpid_s(); shadow_cib = cib_shadow_new(pid); shadow_file = get_shadow_file(pid); if (shadow_cib == NULL) { out->err(out, "Could not create shadow cib: '%s'", pid); rc = ENXIO; goto done; } rc = write_xml_file(data_set->input, shadow_file, FALSE); if (rc < 0) { out->err(out, "Could not populate shadow cib: %s (%d)", pcmk_strerror(rc), rc); goto done; } rc = shadow_cib->cmds->signon(shadow_cib, crm_system_name, cib_command); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { out->err(out, "Could not connect to shadow cib: %s (%d)", pcmk_strerror(rc), rc); goto done; } pcmk__schedule_actions(data_set, data_set->input, NULL); prev_quiet = out->is_quiet(out); out->quiet = true; pcmk__simulate_transition(data_set, shadow_cib, NULL); out->quiet = prev_quiet; rc = update_dataset(shadow_cib, data_set, FALSE); } else { cluster_status(data_set); } done: /* Do not free data_set->input here, we need rsc->xml to be valid later on */ cib_delete(shadow_cib); free(pid); if(shadow_file) { unlink(shadow_file); free(shadow_file); } return rc; } static int max_delay_for_resource(pe_working_set_t * data_set, pe_resource_t *rsc) { int delay = 0; int max_delay = 0; if(rsc && rsc->children) { GList *iter = NULL; for(iter = rsc->children; iter; iter = iter->next) { pe_resource_t *child = (pe_resource_t *)iter->data; delay = max_delay_for_resource(data_set, child); if(delay > max_delay) { double seconds = delay / 1000.0; crm_trace("Calculated new delay of %.1fs due to %s", seconds, child->id); max_delay = delay; } } } else if(rsc) { char *key = crm_strdup_printf("%s_%s_0", rsc->id, RSC_STOP); pe_action_t *stop = custom_action(rsc, key, RSC_STOP, NULL, TRUE, FALSE, data_set); const char *value = g_hash_table_lookup(stop->meta, XML_ATTR_TIMEOUT); long long result_ll; if ((pcmk__scan_ll(value, &result_ll, -1LL) == pcmk_rc_ok) && (result_ll >= 0) && (result_ll <= INT_MAX)) { max_delay = (int) result_ll; } else { max_delay = -1; } pe_free_action(stop); } return max_delay; } static int max_delay_in(pe_working_set_t * data_set, GList *resources) { int max_delay = 0; GList *item = NULL; for (item = resources; item != NULL; item = item->next) { int delay = 0; pe_resource_t *rsc = pe_find_resource(data_set->resources, (const char *)item->data); delay = max_delay_for_resource(data_set, rsc); if(delay > max_delay) { double seconds = delay / 1000.0; crm_trace("Calculated new delay of %.1fs due to %s", seconds, rsc->id); max_delay = delay; } } return 5 + (max_delay / 1000); } #define waiting_for_starts(d, r, h) ((d != NULL) || \ (!resource_is_running_on((r), (h)))) /*! * \internal * \brief Restart a resource (on a particular host if requested). * * \param[in] rsc The resource to restart * \param[in] host The host to restart the resource on (or NULL for all) * \param[in] timeout_ms Consider failed if actions do not complete in this time * (specified in milliseconds, but a two-second * granularity is actually used; if 0, a timeout will be * calculated based on the resource timeout) * \param[in] cib Connection to the CIB manager * * \return Standard Pacemaker return code (exits on certain failures) */ int cli_resource_restart(pcmk__output_t *out, pe_resource_t *rsc, const char *host, const char *move_lifetime, int timeout_ms, cib_t *cib, int cib_options, gboolean promoted_role_only, gboolean force) { int rc = pcmk_rc_ok; int lpc = 0; int before = 0; int step_timeout_s = 0; int sleep_interval = 2; int timeout = timeout_ms / 1000; bool stop_via_ban = FALSE; char *rsc_id = NULL; char *orig_target_role = NULL; GList *list_delta = NULL; GList *target_active = NULL; GList *current_active = NULL; GList *restart_target_active = NULL; pe_working_set_t *data_set = NULL; if (!resource_is_running_on(rsc, host)) { const char *id = rsc->clone_name?rsc->clone_name:rsc->id; if(host) { out->err(out, "%s is not running on %s and so cannot be restarted", id, host); } else { out->err(out, "%s is not running anywhere and so cannot be restarted", id); } return ENXIO; } rsc_id = strdup(rsc->id); if ((pe_rsc_is_clone(rsc) || pe_bundle_replicas(rsc)) && host) { stop_via_ban = TRUE; } /* grab full cib determine originally active resources disable or ban poll cib and watch for affected resources to get stopped without --timeout, calculate the stop timeout for each step and wait for that if we hit --timeout or the service timeout, re-enable or un-ban, report failure and indicate which resources we couldn't take down if everything stopped, re-enable or un-ban poll cib and watch for affected resources to get started without --timeout, calculate the start timeout for each step and wait for that if we hit --timeout or the service timeout, report (different) failure and indicate which resources we couldn't bring back up report success Optimizations: - use constraints to determine ordered list of affected resources - Allow a --no-deps option (aka. --force-restart) */ data_set = pe_new_working_set(); if (data_set == NULL) { crm_perror(LOG_ERR, "Could not allocate working set"); rc = ENOMEM; goto done; } data_set->priv = out; pe__set_working_set_flags(data_set, pe_flag_no_counts|pe_flag_no_compat); rc = update_dataset(cib, data_set, FALSE); if(rc != pcmk_rc_ok) { out->err(out, "Could not get new resource list: %s (%d)", pcmk_strerror(rc), rc); goto done; } restart_target_active = get_active_resources(host, data_set->resources); current_active = get_active_resources(host, data_set->resources); dump_list(current_active, "Origin"); if (stop_via_ban) { /* Stop the clone or bundle instance by banning it from the host */ out->quiet = true; rc = cli_resource_ban(out, rsc_id, host, move_lifetime, NULL, cib, cib_options, promoted_role_only); } else { /* Stop the resource by setting target-role to Stopped. * Remember any existing target-role so we can restore it later * (though it only makes any difference if it's Unpromoted). */ char *lookup_id = clone_strip(rsc->id); find_resource_attr(out, cib, XML_NVPAIR_ATTR_VALUE, lookup_id, NULL, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &orig_target_role); free(lookup_id); rc = cli_resource_update_attribute(rsc, rsc_id, NULL, XML_TAG_META_SETS, NULL, XML_RSC_ATTR_TARGET_ROLE, RSC_STOPPED, FALSE, cib, cib_options, data_set, force); } if(rc != pcmk_rc_ok) { out->err(out, "Could not set target-role for %s: %s (%d)", rsc_id, pcmk_strerror(rc), rc); if (current_active) { g_list_free_full(current_active, free); } if (restart_target_active) { g_list_free_full(restart_target_active, free); } goto done; } rc = update_dataset(cib, data_set, TRUE); if(rc != pcmk_rc_ok) { out->err(out, "Could not determine which resources would be stopped"); goto failure; } target_active = get_active_resources(host, data_set->resources); dump_list(target_active, "Target"); list_delta = pcmk__subtract_lists(current_active, target_active, (GCompareFunc) strcmp); out->info(out, "Waiting for %d resources to stop:", g_list_length(list_delta)); display_list(out, list_delta, " * "); step_timeout_s = timeout / sleep_interval; while (list_delta != NULL) { before = g_list_length(list_delta); if(timeout_ms == 0) { step_timeout_s = max_delay_in(data_set, list_delta) / sleep_interval; } /* We probably don't need the entire step timeout */ for(lpc = 0; (lpc < step_timeout_s) && (list_delta != NULL); lpc++) { sleep(sleep_interval); if(timeout) { timeout -= sleep_interval; crm_trace("%ds remaining", timeout); } rc = update_dataset(cib, data_set, FALSE); if(rc != pcmk_rc_ok) { out->err(out, "Could not determine which resources were stopped"); goto failure; } if (current_active) { g_list_free_full(current_active, free); } current_active = get_active_resources(host, data_set->resources); g_list_free(list_delta); list_delta = pcmk__subtract_lists(current_active, target_active, (GCompareFunc) strcmp); dump_list(current_active, "Current"); dump_list(list_delta, "Delta"); } crm_trace("%d (was %d) resources remaining", g_list_length(list_delta), before); if(before == g_list_length(list_delta)) { /* aborted during stop phase, print the contents of list_delta */ out->err(out, "Could not complete shutdown of %s, %d resources remaining", rsc_id, g_list_length(list_delta)); display_list(out, list_delta, " * "); rc = ETIME; goto failure; } } if (stop_via_ban) { rc = cli_resource_clear(rsc_id, host, NULL, cib, cib_options, TRUE, force); } else if (orig_target_role) { rc = cli_resource_update_attribute(rsc, rsc_id, NULL, XML_TAG_META_SETS, NULL, XML_RSC_ATTR_TARGET_ROLE, orig_target_role, FALSE, cib, cib_options, data_set, force); free(orig_target_role); orig_target_role = NULL; } else { rc = cli_resource_delete_attribute(rsc, rsc_id, NULL, XML_TAG_META_SETS, NULL, XML_RSC_ATTR_TARGET_ROLE, cib, cib_options, data_set, force); } if(rc != pcmk_rc_ok) { out->err(out, "Could not unset target-role for %s: %s (%d)", rsc_id, pcmk_strerror(rc), rc); goto done; } if (target_active) { g_list_free_full(target_active, free); } target_active = restart_target_active; list_delta = pcmk__subtract_lists(target_active, current_active, (GCompareFunc) strcmp); out->info(out, "Waiting for %d resources to start again:", g_list_length(list_delta)); display_list(out, list_delta, " * "); step_timeout_s = timeout / sleep_interval; while (waiting_for_starts(list_delta, rsc, host)) { before = g_list_length(list_delta); if(timeout_ms == 0) { step_timeout_s = max_delay_in(data_set, list_delta) / sleep_interval; } /* We probably don't need the entire step timeout */ for (lpc = 0; (lpc < step_timeout_s) && waiting_for_starts(list_delta, rsc, host); lpc++) { sleep(sleep_interval); if(timeout) { timeout -= sleep_interval; crm_trace("%ds remaining", timeout); } rc = update_dataset(cib, data_set, FALSE); if(rc != pcmk_rc_ok) { out->err(out, "Could not determine which resources were started"); goto failure; } if (current_active) { g_list_free_full(current_active, free); } /* It's OK if dependent resources moved to a different node, * so we check active resources on all nodes. */ current_active = get_active_resources(NULL, data_set->resources); g_list_free(list_delta); list_delta = pcmk__subtract_lists(target_active, current_active, (GCompareFunc) strcmp); dump_list(current_active, "Current"); dump_list(list_delta, "Delta"); } if(before == g_list_length(list_delta)) { /* aborted during start phase, print the contents of list_delta */ out->err(out, "Could not complete restart of %s, %d resources remaining", rsc_id, g_list_length(list_delta)); display_list(out, list_delta, " * "); rc = ETIME; goto failure; } } rc = pcmk_rc_ok; goto done; failure: if (stop_via_ban) { cli_resource_clear(rsc_id, host, NULL, cib, cib_options, TRUE, force); } else if (orig_target_role) { cli_resource_update_attribute(rsc, rsc_id, NULL, XML_TAG_META_SETS, NULL, XML_RSC_ATTR_TARGET_ROLE, orig_target_role, FALSE, cib, cib_options, data_set, force); free(orig_target_role); } else { cli_resource_delete_attribute(rsc, rsc_id, NULL, XML_TAG_META_SETS, NULL, XML_RSC_ATTR_TARGET_ROLE, cib, cib_options, data_set, force); } done: if (list_delta) { g_list_free(list_delta); } if (current_active) { g_list_free_full(current_active, free); } if (target_active && (target_active != restart_target_active)) { g_list_free_full(target_active, free); } if (restart_target_active) { g_list_free_full(restart_target_active, free); } free(rsc_id); pe_free_working_set(data_set); return rc; } static inline bool action_is_pending(pe_action_t *action) { if (pcmk_any_flags_set(action->flags, pe_action_optional|pe_action_pseudo) || !pcmk_is_set(action->flags, pe_action_runnable) || pcmk__str_eq("notify", action->task, pcmk__str_casei)) { return false; } return true; } /*! * \internal * \brief Return TRUE if any actions in a list are pending * * \param[in] actions List of actions to check * * \return TRUE if any actions in the list are pending, FALSE otherwise */ static bool actions_are_pending(GList *actions) { GList *action; for (action = actions; action != NULL; action = action->next) { pe_action_t *a = (pe_action_t *)action->data; if (action_is_pending(a)) { crm_notice("Waiting for %s (flags=%#.8x)", a->uuid, a->flags); return TRUE; } } return FALSE; } static void print_pending_actions(pcmk__output_t *out, GList *actions) { GList *action; out->info(out, "Pending actions:"); for (action = actions; action != NULL; action = action->next) { pe_action_t *a = (pe_action_t *) action->data; if (!action_is_pending(a)) { continue; } if (a->node) { out->info(out, "\tAction %d: %s\ton %s", a->id, a->uuid, a->node->details->uname); } else { out->info(out, "\tAction %d: %s", a->id, a->uuid); } } } /* For --wait, timeout (in seconds) to use if caller doesn't specify one */ #define WAIT_DEFAULT_TIMEOUT_S (60 * 60) /* For --wait, how long to sleep between cluster state checks */ #define WAIT_SLEEP_S (2) /*! * \internal * \brief Wait until all pending cluster actions are complete * * This waits until either the CIB's transition graph is idle or a timeout is * reached. * * \param[in] timeout_ms Consider failed if actions do not complete in this time * (specified in milliseconds, but one-second granularity * is actually used; if 0, a default will be used) * \param[in] cib Connection to the CIB manager * * \return Standard Pacemaker return code */ int wait_till_stable(pcmk__output_t *out, int timeout_ms, cib_t * cib) { pe_working_set_t *data_set = NULL; int rc = pcmk_rc_ok; int timeout_s = timeout_ms? ((timeout_ms + 999) / 1000) : WAIT_DEFAULT_TIMEOUT_S; time_t expire_time = time(NULL) + timeout_s; time_t time_diff; bool printed_version_warning = out->is_quiet(out); // i.e. don't print if quiet data_set = pe_new_working_set(); if (data_set == NULL) { return ENOMEM; } pe__set_working_set_flags(data_set, pe_flag_no_counts|pe_flag_no_compat); do { /* Abort if timeout is reached */ time_diff = expire_time - time(NULL); if (time_diff > 0) { crm_info("Waiting up to %ld seconds for cluster actions to complete", time_diff); } else { print_pending_actions(out, data_set->actions); pe_free_working_set(data_set); return ETIME; } if (rc == pcmk_rc_ok) { /* this avoids sleep on first loop iteration */ sleep(WAIT_SLEEP_S); } /* Get latest transition graph */ pe_reset_working_set(data_set); rc = update_working_set_from_cib(out, data_set, cib); if (rc != pcmk_rc_ok) { pe_free_working_set(data_set); return rc; } pcmk__schedule_actions(data_set, data_set->input, NULL); if (!printed_version_warning) { /* If the DC has a different version than the local node, the two * could come to different conclusions about what actions need to be * done. Warn the user in this case. * * @TODO A possible long-term solution would be to reimplement the * wait as a new controller operation that would be forwarded to the * DC. However, that would have potential problems of its own. */ const char *dc_version = g_hash_table_lookup(data_set->config_hash, "dc-version"); if (!pcmk__str_eq(dc_version, PACEMAKER_VERSION "-" BUILD_VERSION, pcmk__str_casei)) { out->info(out, "warning: wait option may not work properly in " "mixed-version cluster"); printed_version_warning = TRUE; } } } while (actions_are_pending(data_set->actions)); pe_free_working_set(data_set); return rc; } static const char * get_action(const char *rsc_action) { const char *action = NULL; if (pcmk__str_eq(rsc_action, "validate", pcmk__str_casei)) { action = "validate-all"; } else if (pcmk__str_eq(rsc_action, "force-check", pcmk__str_casei)) { action = "monitor"; } else if (pcmk__strcase_any_of(rsc_action, "force-start", "force-stop", "force-demote", "force-promote", NULL)) { action = rsc_action+6; } else { action = rsc_action; } return action; } /*! * \brief Set up environment variables as expected by resource agents * * When the cluster executes resource agents, it adds certain environment * variables (directly or via resource meta-attributes) expected by some * resource agents. Add the essential ones that many resource agents expect, so * the behavior is the same for command-line execution. * * \param[in] params Resource parameters that will be passed to agent * \param[in] timeout_ms Action timeout (in milliseconds) * \param[in] check_level OCF check level * \param[in] verbosity Verbosity level */ static void set_agent_environment(GHashTable *params, int timeout_ms, int check_level, int verbosity) { g_hash_table_insert(params, strdup("CRM_meta_timeout"), crm_strdup_printf("%d", timeout_ms)); g_hash_table_insert(params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); if (check_level >= 0) { char *level = crm_strdup_printf("%d", check_level); setenv("OCF_CHECK_LEVEL", level, 1); free(level); } setenv("HA_debug", (verbosity > 0)? "1" : "0", 1); if (verbosity > 1) { setenv("OCF_TRACE_RA", "1", 1); } /* A resource agent using the standard ocf-shellfuncs library will not print * messages to stderr if it doesn't have a controlling terminal (e.g. if * crm_resource is called via script or ssh). This forces it to do so. */ setenv("OCF_TRACE_FILE", "/dev/stderr", 0); } /*! * \internal * \brief Apply command-line overrides to resource parameters * * \param[in] params Parameters to be passed to agent * \param[in] overrides Parameters to override (or NULL if none) */ static void apply_overrides(GHashTable *params, GHashTable *overrides) { if (overrides != NULL) { GHashTableIter iter; char *name = NULL; char *value = NULL; g_hash_table_iter_init(&iter, overrides); while (g_hash_table_iter_next(&iter, (gpointer *) &name, (gpointer *) &value)) { g_hash_table_replace(params, strdup(name), strdup(value)); } } } crm_exit_t cli_resource_execute_from_params(pcmk__output_t *out, const char *rsc_name, const char *rsc_class, const char *rsc_prov, const char *rsc_type, const char *rsc_action, GHashTable *params, GHashTable *override_hash, int timeout_ms, int resource_verbose, gboolean force, int check_level) { const char *class = rsc_class; const char *action = get_action(rsc_action); crm_exit_t exit_code = CRM_EX_OK; svc_action_t *op = NULL; // If no timeout was provided, use the same default as the cluster if (timeout_ms == 0) { timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S); } set_agent_environment(params, timeout_ms, check_level, resource_verbose); apply_overrides(params, override_hash); op = services__create_resource_action(rsc_name? rsc_name : "test", rsc_class, rsc_prov, rsc_type, action, 0, timeout_ms, params, 0); if (op == NULL) { out->err(out, "Could not execute %s using %s%s%s:%s: %s", action, rsc_class, (rsc_prov? ":" : ""), (rsc_prov? rsc_prov : ""), rsc_type, strerror(ENOMEM)); g_hash_table_destroy(params); return CRM_EX_OSERR; } if (pcmk__str_eq(rsc_class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) { class = resources_find_service_class(rsc_type); } if (!pcmk__strcase_any_of(class, PCMK_RESOURCE_CLASS_OCF, PCMK_RESOURCE_CLASS_LSB, NULL)) { - services__set_result(op, CRM_EX_UNIMPLEMENT_FEATURE, PCMK_EXEC_ERROR, - "Manual execution of this standard is unsupported"); + services__format_result(op, CRM_EX_UNIMPLEMENT_FEATURE, PCMK_EXEC_ERROR, + "Manual execution of the %s standard " + "is unsupported", crm_str(class)); } if (op->rc != PCMK_OCF_UNKNOWN) { exit_code = op->rc; goto done; } services_action_sync(op); // Map results to OCF codes for consistent reporting to user { enum ocf_exitcode ocf_code = services_result2ocf(class, action, op->rc); // Cast variable instead of function return to keep compilers happy exit_code = (crm_exit_t) ocf_code; } done: out->message(out, "resource-agent-action", resource_verbose, rsc_class, rsc_prov, rsc_type, rsc_name, rsc_action, override_hash, exit_code, op->status, services__exit_reason(op), op->stdout_data, op->stderr_data); services_action_free(op); return exit_code; } crm_exit_t cli_resource_execute(pe_resource_t *rsc, const char *requested_name, const char *rsc_action, GHashTable *override_hash, int timeout_ms, cib_t * cib, pe_working_set_t *data_set, int resource_verbose, gboolean force, int check_level) { pcmk__output_t *out = data_set->priv; crm_exit_t exit_code = CRM_EX_OK; const char *rid = NULL; const char *rtype = NULL; const char *rprov = NULL; const char *rclass = NULL; GHashTable *params = NULL; if (pcmk__strcase_any_of(rsc_action, "force-start", "force-demote", "force-promote", NULL)) { if(pe_rsc_is_clone(rsc)) { GList *nodes = cli_resource_search(rsc, requested_name, data_set); if(nodes != NULL && force == FALSE) { out->err(out, "It is not safe to %s %s here: the cluster claims it is already active", rsc_action, rsc->id); out->err(out, "Try setting target-role=Stopped first or specifying " "the force option"); return CRM_EX_UNSAFE; } g_list_free_full(nodes, free); } } if(pe_rsc_is_clone(rsc)) { /* Grab the first child resource in the hope it's not a group */ rsc = rsc->children->data; } if(rsc->variant == pe_group) { out->err(out, "Sorry, the %s option doesn't support group resources", rsc_action); return CRM_EX_UNIMPLEMENT_FEATURE; } else if (rsc->variant == pe_container || pe_rsc_is_bundled(rsc)) { out->err(out, "Sorry, the %s option doesn't support bundled resources", rsc_action); return CRM_EX_UNIMPLEMENT_FEATURE; } rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); rprov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); params = generate_resource_params(rsc, NULL /* @TODO use local node */, data_set); if (timeout_ms == 0) { timeout_ms = pe_get_configured_timeout(rsc, get_action(rsc_action), data_set); } rid = pe_rsc_is_anon_clone(rsc->parent)? requested_name : rsc->id; exit_code = cli_resource_execute_from_params(out, rid, rclass, rprov, rtype, rsc_action, params, override_hash, timeout_ms, resource_verbose, force, check_level); return exit_code; } // \return Standard Pacemaker return code int cli_resource_move(pe_resource_t *rsc, const char *rsc_id, const char *host_name, const char *move_lifetime, cib_t *cib, int cib_options, pe_working_set_t *data_set, gboolean promoted_role_only, gboolean force) { pcmk__output_t *out = data_set->priv; int rc = pcmk_rc_ok; unsigned int count = 0; pe_node_t *current = NULL; pe_node_t *dest = pe_find_node(data_set->nodes, host_name); bool cur_is_dest = FALSE; if (dest == NULL) { return pcmk_rc_node_unknown; } if (promoted_role_only && !pcmk_is_set(rsc->flags, pe_rsc_promotable)) { pe_resource_t *p = uber_parent(rsc); if (pcmk_is_set(p->flags, pe_rsc_promotable)) { out->info(out, "Using parent '%s' for move instead of '%s'.", rsc->id, rsc_id); rsc_id = p->id; rsc = p; } else { out->info(out, "Ignoring master option: %s is not promotable", rsc_id); promoted_role_only = FALSE; } } current = pe__find_active_requires(rsc, &count); if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { GList *iter = NULL; unsigned int promoted_count = 0; pe_node_t *promoted_node = NULL; for(iter = rsc->children; iter; iter = iter->next) { pe_resource_t *child = (pe_resource_t *)iter->data; enum rsc_role_e child_role = child->fns->state(child, TRUE); if (child_role == RSC_ROLE_PROMOTED) { rsc = child; promoted_node = pe__current_node(child); promoted_count++; } } if (promoted_role_only || (promoted_count != 0)) { count = promoted_count; current = promoted_node; } } if (count > 1) { if (pe_rsc_is_clone(rsc)) { current = NULL; } else { return pcmk_rc_multiple; } } if (current && (current->details == dest->details)) { cur_is_dest = TRUE; if (force) { crm_info("%s is already %s on %s, reinforcing placement with location constraint.", rsc_id, promoted_role_only?"promoted":"active", dest->details->uname); } else { return pcmk_rc_already; } } /* Clear any previous prefer constraints across all nodes. */ cli_resource_clear(rsc_id, NULL, data_set->nodes, cib, cib_options, FALSE, force); /* Clear any previous ban constraints on 'dest'. */ cli_resource_clear(rsc_id, dest->details->uname, data_set->nodes, cib, cib_options, TRUE, force); /* Record an explicit preference for 'dest' */ rc = cli_resource_prefer(out, rsc_id, dest->details->uname, move_lifetime, cib, cib_options, promoted_role_only); crm_trace("%s%s now prefers node %s%s", rsc->id, (promoted_role_only? " (promoted)" : ""), dest->details->uname, force?"(forced)":""); /* only ban the previous location if current location != destination location. * it is possible to use -M to enforce a location without regard of where the * resource is currently located */ if(force && (cur_is_dest == FALSE)) { /* Ban the original location if possible */ if(current) { (void)cli_resource_ban(out, rsc_id, current->details->uname, move_lifetime, NULL, cib, cib_options, promoted_role_only); } else if(count > 1) { out->info(out, "Resource '%s' is currently %s in %d locations. " "One may now move to %s", rsc_id, (promoted_role_only? "promoted" : "active"), count, dest->details->uname); out->info(out, "To prevent '%s' from being %s at a specific location, " "specify a node.", rsc_id, (promoted_role_only? "promoted" : "active")); } else { crm_trace("Not banning %s from its current location: not active", rsc_id); } } return rc; }