Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4624617
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
151 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c
index f280065c01..bbe7d5d220 100644
--- a/lib/services/services_linux.c
+++ b/lib/services/services_linux.c
@@ -1,1434 +1,1448 @@
/*
* Copyright 2010-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <errno.h>
#include <unistd.h>
#include <dirent.h>
#include <grp.h>
#include <string.h>
#include <sys/time.h>
#include <sys/resource.h>
#include "crm/crm.h"
#include "crm/common/mainloop.h"
#include "crm/services.h"
#include "crm/services_internal.h"
#include "services_private.h"
static void close_pipe(int fildes[]);
/* We have two alternative ways of handling SIGCHLD when synchronously waiting
* for spawned processes to complete. Both rely on polling a file descriptor to
* discover SIGCHLD events.
*
* If sys/signalfd.h is available (e.g. on Linux), we call signalfd() to
* generate the file descriptor. Otherwise, we use the "self-pipe trick"
* (opening a pipe and writing a byte to it when SIGCHLD is received).
*/
#ifdef HAVE_SYS_SIGNALFD_H
// signalfd() implementation
#include <sys/signalfd.h>
// Everything needed to manage SIGCHLD handling
struct sigchld_data_s {
sigset_t mask; // Signals to block now (including SIGCHLD)
sigset_t old_mask; // Previous set of blocked signals
};
// Initialize SIGCHLD data and prepare for use
static bool
sigchld_setup(struct sigchld_data_s *data)
{
sigemptyset(&(data->mask));
sigaddset(&(data->mask), SIGCHLD);
sigemptyset(&(data->old_mask));
// Block SIGCHLD (saving previous set of blocked signals to restore later)
if (sigprocmask(SIG_BLOCK, &(data->mask), &(data->old_mask)) < 0) {
crm_info("Wait for child process completion failed: %s "
CRM_XS " source=sigprocmask", pcmk_rc_str(errno));
return false;
}
return true;
}
// Get a file descriptor suitable for polling for SIGCHLD events
static int
sigchld_open(struct sigchld_data_s *data)
{
int fd;
CRM_CHECK(data != NULL, return -1);
fd = signalfd(-1, &(data->mask), SFD_NONBLOCK);
if (fd < 0) {
crm_info("Wait for child process completion failed: %s "
CRM_XS " source=signalfd", pcmk_rc_str(errno));
}
return fd;
}
// Close a file descriptor returned by sigchld_open()
static void
sigchld_close(int fd)
{
if (fd > 0) {
close(fd);
}
}
// Return true if SIGCHLD was received from polled fd
static bool
sigchld_received(int fd)
{
struct signalfd_siginfo fdsi;
ssize_t s;
if (fd < 0) {
return false;
}
s = read(fd, &fdsi, sizeof(struct signalfd_siginfo));
if (s != sizeof(struct signalfd_siginfo)) {
crm_info("Wait for child process completion failed: %s "
CRM_XS " source=read", pcmk_rc_str(errno));
} else if (fdsi.ssi_signo == SIGCHLD) {
return true;
}
return false;
}
// Do anything needed after done waiting for SIGCHLD
static void
sigchld_cleanup(struct sigchld_data_s *data)
{
// Restore the original set of blocked signals
if ((sigismember(&(data->old_mask), SIGCHLD) == 0)
&& (sigprocmask(SIG_UNBLOCK, &(data->mask), NULL) < 0)) {
crm_warn("Could not clean up after child process completion: %s",
pcmk_rc_str(errno));
}
}
#else // HAVE_SYS_SIGNALFD_H not defined
// Self-pipe implementation (see above for function descriptions)
struct sigchld_data_s {
int pipe_fd[2]; // Pipe file descriptors
struct sigaction sa; // Signal handling info (with SIGCHLD)
struct sigaction old_sa; // Previous signal handling info
};
// We need a global to use in the signal handler
volatile struct sigchld_data_s *last_sigchld_data = NULL;
static void
sigchld_handler()
{
// We received a SIGCHLD, so trigger pipe polling
if ((last_sigchld_data != NULL)
&& (last_sigchld_data->pipe_fd[1] >= 0)
&& (write(last_sigchld_data->pipe_fd[1], "", 1) == -1)) {
crm_info("Wait for child process completion failed: %s "
CRM_XS " source=write", pcmk_rc_str(errno));
}
}
static bool
sigchld_setup(struct sigchld_data_s *data)
{
int rc;
data->pipe_fd[0] = data->pipe_fd[1] = -1;
if (pipe(data->pipe_fd) == -1) {
crm_info("Wait for child process completion failed: %s "
CRM_XS " source=pipe", pcmk_rc_str(errno));
return false;
}
rc = pcmk__set_nonblocking(data->pipe_fd[0]);
if (rc != pcmk_rc_ok) {
crm_info("Could not set pipe input non-blocking: %s " CRM_XS " rc=%d",
pcmk_rc_str(rc), rc);
}
rc = pcmk__set_nonblocking(data->pipe_fd[1]);
if (rc != pcmk_rc_ok) {
crm_info("Could not set pipe output non-blocking: %s " CRM_XS " rc=%d",
pcmk_rc_str(rc), rc);
}
// Set SIGCHLD handler
data->sa.sa_handler = sigchld_handler;
data->sa.sa_flags = 0;
sigemptyset(&(data->sa.sa_mask));
if (sigaction(SIGCHLD, &(data->sa), &(data->old_sa)) < 0) {
crm_info("Wait for child process completion failed: %s "
CRM_XS " source=sigaction", pcmk_rc_str(errno));
}
// Remember data for use in signal handler
last_sigchld_data = data;
return true;
}
static int
sigchld_open(struct sigchld_data_s *data)
{
CRM_CHECK(data != NULL, return -1);
return data->pipe_fd[0];
}
static void
sigchld_close(int fd)
{
// Pipe will be closed in sigchld_cleanup()
return;
}
static bool
sigchld_received(int fd)
{
char ch;
if (fd < 0) {
return false;
}
// Clear out the self-pipe
while (read(fd, &ch, 1) == 1) /*omit*/;
return true;
}
static void
sigchld_cleanup(struct sigchld_data_s *data)
{
// Restore the previous SIGCHLD handler
if (sigaction(SIGCHLD, &(data->old_sa), NULL) < 0) {
crm_warn("Could not clean up after child process completion: %s",
pcmk_rc_str(errno));
}
close_pipe(data->pipe_fd);
}
#endif
/*!
* \internal
* \brief Close the two file descriptors of a pipe
*
* \param[in] fildes Array of file descriptors opened by pipe()
*/
static void
close_pipe(int fildes[])
{
if (fildes[0] >= 0) {
close(fildes[0]);
fildes[0] = -1;
}
if (fildes[1] >= 0) {
close(fildes[1]);
fildes[1] = -1;
}
}
static gboolean
svc_read_output(int fd, svc_action_t * op, bool is_stderr)
{
char *data = NULL;
int rc = 0, len = 0;
char buf[500];
static const size_t buf_read_len = sizeof(buf) - 1;
if (fd < 0) {
crm_trace("No fd for %s", op->id);
return FALSE;
}
if (is_stderr && op->stderr_data) {
len = strlen(op->stderr_data);
data = op->stderr_data;
crm_trace("Reading %s stderr into offset %d", op->id, len);
} else if (is_stderr == FALSE && op->stdout_data) {
len = strlen(op->stdout_data);
data = op->stdout_data;
crm_trace("Reading %s stdout into offset %d", op->id, len);
} else {
crm_trace("Reading %s %s into offset %d", op->id, is_stderr?"stderr":"stdout", len);
}
do {
rc = read(fd, buf, buf_read_len);
if (rc > 0) {
buf[rc] = 0;
crm_trace("Got %d chars: %.80s", rc, buf);
data = pcmk__realloc(data, len + rc + 1);
len += sprintf(data + len, "%s", buf);
} else if (errno != EINTR) {
/* error or EOF
* Cleanup happens in pipe_done()
*/
rc = FALSE;
break;
}
} while (rc == buf_read_len || rc < 0);
if (is_stderr) {
op->stderr_data = data;
} else {
op->stdout_data = data;
}
return rc;
}
static int
dispatch_stdout(gpointer userdata)
{
svc_action_t *op = (svc_action_t *) userdata;
return svc_read_output(op->opaque->stdout_fd, op, FALSE);
}
static int
dispatch_stderr(gpointer userdata)
{
svc_action_t *op = (svc_action_t *) userdata;
return svc_read_output(op->opaque->stderr_fd, op, TRUE);
}
static void
pipe_out_done(gpointer user_data)
{
svc_action_t *op = (svc_action_t *) user_data;
crm_trace("%p", op);
op->opaque->stdout_gsource = NULL;
if (op->opaque->stdout_fd > STDOUT_FILENO) {
close(op->opaque->stdout_fd);
}
op->opaque->stdout_fd = -1;
}
static void
pipe_err_done(gpointer user_data)
{
svc_action_t *op = (svc_action_t *) user_data;
op->opaque->stderr_gsource = NULL;
if (op->opaque->stderr_fd > STDERR_FILENO) {
close(op->opaque->stderr_fd);
}
op->opaque->stderr_fd = -1;
}
static struct mainloop_fd_callbacks stdout_callbacks = {
.dispatch = dispatch_stdout,
.destroy = pipe_out_done,
};
static struct mainloop_fd_callbacks stderr_callbacks = {
.dispatch = dispatch_stderr,
.destroy = pipe_err_done,
};
static void
set_ocf_env(const char *key, const char *value, gpointer user_data)
{
if (setenv(key, value, 1) != 0) {
crm_perror(LOG_ERR, "setenv failed for key:%s and value:%s", key, value);
}
}
static void
set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data)
{
char buffer[500];
snprintf(buffer, sizeof(buffer), strcmp(key, "OCF_CHECK_LEVEL") != 0 ? "OCF_RESKEY_%s" : "%s", (char *)key);
set_ocf_env(buffer, value, user_data);
}
static void
set_alert_env(gpointer key, gpointer value, gpointer user_data)
{
int rc;
if (value != NULL) {
rc = setenv(key, value, 1);
} else {
rc = unsetenv(key);
}
if (rc < 0) {
crm_perror(LOG_ERR, "setenv %s=%s",
(char*)key, (value? (char*)value : ""));
} else {
crm_trace("setenv %s=%s", (char*)key, (value? (char*)value : ""));
}
}
/*!
* \internal
* \brief Add environment variables suitable for an action
*
* \param[in] op Action to use
*/
static void
add_action_env_vars(const svc_action_t *op)
{
void (*env_setter)(gpointer, gpointer, gpointer) = NULL;
if (op->agent == NULL) {
env_setter = set_alert_env; /* we deal with alert handler */
} else if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_casei)) {
env_setter = set_ocf_env_with_prefix;
}
if (env_setter != NULL && op->params != NULL) {
g_hash_table_foreach(op->params, env_setter, NULL);
}
if (env_setter == NULL || env_setter == set_alert_env) {
return;
}
set_ocf_env("OCF_RA_VERSION_MAJOR", PCMK_OCF_MAJOR_VERSION, NULL);
set_ocf_env("OCF_RA_VERSION_MINOR", PCMK_OCF_MINOR_VERSION, NULL);
set_ocf_env("OCF_ROOT", OCF_ROOT_DIR, NULL);
set_ocf_env("OCF_EXIT_REASON_PREFIX", PCMK_OCF_REASON_PREFIX, NULL);
if (op->rsc) {
set_ocf_env("OCF_RESOURCE_INSTANCE", op->rsc, NULL);
}
if (op->agent != NULL) {
set_ocf_env("OCF_RESOURCE_TYPE", op->agent, NULL);
}
/* Notes: this is not added to specification yet. Sept 10,2004 */
if (op->provider != NULL) {
set_ocf_env("OCF_RESOURCE_PROVIDER", op->provider, NULL);
}
}
static void
pipe_in_single_parameter(gpointer key, gpointer value, gpointer user_data)
{
svc_action_t *op = user_data;
char *buffer = crm_strdup_printf("%s=%s\n", (char *)key, (char *) value);
int ret, total = 0, len = strlen(buffer);
do {
errno = 0;
ret = write(op->opaque->stdin_fd, buffer + total, len - total);
if (ret > 0) {
total += ret;
}
} while ((errno == EINTR) && (total < len));
free(buffer);
}
/*!
* \internal
* \brief Pipe parameters in via stdin for action
*
* \param[in] op Action to use
*/
static void
pipe_in_action_stdin_parameters(const svc_action_t *op)
{
crm_debug("sending args");
if (op->params) {
g_hash_table_foreach(op->params, pipe_in_single_parameter, (gpointer) op);
}
}
gboolean
recurring_action_timer(gpointer data)
{
svc_action_t *op = data;
crm_debug("Scheduling another invocation of %s", op->id);
/* Clean out the old result */
free(op->stdout_data);
op->stdout_data = NULL;
free(op->stderr_data);
op->stderr_data = NULL;
op->opaque->repeat_timer = 0;
services_action_async(op, NULL);
return FALSE;
}
/*!
* \internal
* \brief Finalize handling of an asynchronous operation
*
* Given a completed asynchronous operation, cancel or reschedule it as
* appropriate if recurring, call its callback if registered, stop tracking it,
* and clean it up.
*
* \param[in,out] op Operation to finalize
*
* \return Standard Pacemaker return code
* \retval EINVAL Caller supplied NULL or invalid \p op
* \retval EBUSY Uncanceled recurring action has only been cleaned up
* \retval pcmk_rc_ok Action has been freed
*
* \note If the return value is not pcmk_rc_ok, the caller is responsible for
* freeing the action.
*/
int
services__finalize_async_op(svc_action_t *op)
{
CRM_CHECK((op != NULL) && !(op->synchronous), return EINVAL);
if (op->interval_ms != 0) {
// Recurring operations must be either cancelled or rescheduled
if (op->cancel) {
services__set_cancelled(op);
cancel_recurring_action(op);
} else {
op->opaque->repeat_timer = g_timeout_add(op->interval_ms,
recurring_action_timer,
(void *) op);
}
}
if (op->opaque->callback != NULL) {
op->opaque->callback(op);
}
// Stop tracking the operation (as in-flight or blocked)
op->pid = 0;
services_untrack_op(op);
if ((op->interval_ms != 0) && !(op->cancel)) {
// Do not free recurring actions (they will get freed when cancelled)
services_action_cleanup(op);
return EBUSY;
}
services_action_free(op);
return pcmk_rc_ok;
}
static void
close_op_input(svc_action_t *op)
{
if (op->opaque->stdin_fd >= 0) {
close(op->opaque->stdin_fd);
}
}
static void
finish_op_output(svc_action_t *op, bool is_stderr)
{
mainloop_io_t **source;
int fd;
if (is_stderr) {
source = &(op->opaque->stderr_gsource);
fd = op->opaque->stderr_fd;
} else {
source = &(op->opaque->stdout_gsource);
fd = op->opaque->stdout_fd;
}
if (op->synchronous || *source) {
crm_trace("Finish reading %s[%d] %s",
op->id, op->pid, (is_stderr? "stdout" : "stderr"));
svc_read_output(fd, op, is_stderr);
if (op->synchronous) {
close(fd);
} else {
mainloop_del_fd(*source);
*source = NULL;
}
}
}
// Log an operation's stdout and stderr
static void
log_op_output(svc_action_t *op)
{
char *prefix = crm_strdup_printf("%s[%d] error output", op->id, op->pid);
/* The library caller has better context to know how important the output
* is, so log it at info and debug severity here. They can log it again at
* higher severity if appropriate.
*/
crm_log_output(LOG_INFO, prefix, op->stderr_data);
strcpy(prefix + strlen(prefix) - strlen("error output"), "output");
crm_log_output(LOG_DEBUG, prefix, op->stdout_data);
free(prefix);
}
// Truncate exit reasons at this many characters
#define EXIT_REASON_MAX_LEN 128
static void
parse_exit_reason_from_stderr(svc_action_t *op)
{
const char *reason_start = NULL;
const char *reason_end = NULL;
const int prefix_len = strlen(PCMK_OCF_REASON_PREFIX);
if ((op->stderr_data == NULL) ||
// Only OCF agents have exit reasons in stderr
!pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_none)) {
return;
}
// Find the last occurrence of the magic string indicating an exit reason
for (const char *cur = strstr(op->stderr_data, PCMK_OCF_REASON_PREFIX);
cur != NULL; cur = strstr(cur, PCMK_OCF_REASON_PREFIX)) {
cur += prefix_len; // Skip over magic string
reason_start = cur;
}
if ((reason_start == NULL) || (reason_start[0] == '\n')
|| (reason_start[0] == '\0')) {
return; // No or empty exit reason
}
// Exit reason goes to end of line (or end of output)
reason_end = strchr(reason_start, '\n');
if (reason_end == NULL) {
reason_end = reason_start + strlen(reason_start);
}
// Limit size of exit reason to something reasonable
if (reason_end > (reason_start + EXIT_REASON_MAX_LEN)) {
reason_end = reason_start + EXIT_REASON_MAX_LEN;
}
free(op->opaque->exit_reason);
op->opaque->exit_reason = strndup(reason_start, reason_end - reason_start);
}
/*!
* \internal
* \brief Process the completion of an asynchronous child process
*
* \param[in] p Child process that completed
* \param[in] pid Process ID of child
* \param[in] core (unused)
* \param[in] signo Signal that interrupted child, if any
* \param[in] exitcode Exit status of child process
*/
static void
async_action_complete(mainloop_child_t *p, pid_t pid, int core, int signo,
int exitcode)
{
svc_action_t *op = mainloop_child_userdata(p);
mainloop_clear_child_userdata(p);
CRM_CHECK(op->pid == pid,
services__set_result(op, services__generic_error(op),
PCMK_EXEC_ERROR, "Bug in mainloop handling");
return);
/* Depending on the priority the mainloop gives the stdout and stderr
* file descriptors, this function could be called before everything has
* been read from them, so force a final read now.
*/
finish_op_output(op, true);
finish_op_output(op, false);
close_op_input(op);
if (signo == 0) {
crm_debug("%s[%d] exited with status %d", op->id, op->pid, exitcode);
services__set_result(op, exitcode, PCMK_EXEC_DONE, NULL);
log_op_output(op);
parse_exit_reason_from_stderr(op);
} else if (mainloop_child_timeout(p)) {
const char *what = NULL;
if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_STONITH,
pcmk__str_none)) {
what = "Fence agent";
} else if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_ALERT,
pcmk__str_none)) {
what = "Alert agent";
} else if (op->standard != NULL) {
what = "Resource agent";
} else {
what = "Process";
}
crm_info("%s[%d] timed out after %dms", op->id, op->pid, op->timeout);
services__format_result(op, services__generic_error(op),
PCMK_EXEC_TIMEOUT,
"%s did not complete in time", what);
} else if (op->cancel) {
/* If an in-flight recurring operation was killed because it was
* cancelled, don't treat that as a failure.
*/
crm_info("%s[%d] terminated with signal %d (%s)",
op->id, op->pid, signo, strsignal(signo));
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_CANCELLED, NULL);
} else {
crm_info("%s[%d] terminated with signal %d (%s)",
op->id, op->pid, signo, strsignal(signo));
- services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
- "Process interrupted by signal");
+ services__format_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
+ "Process interrupted by %s signal",
+ strsignal(signo));
}
services__finalize_async_op(op);
}
/*!
* \internal
* \brief Return agent standard's exit status for "generic error"
*
* When returning an internal error for an action, a value that is appropriate
* to the action's agent standard must be used. This function returns a value
* appropriate for errors in general.
*
* \param[in] op Action that error is for
*
* \return Exit status appropriate to agent standard
* \note Actions without a standard will get PCMK_OCF_UNKNOWN_ERROR.
*/
int
services__generic_error(svc_action_t *op)
{
if ((op == NULL) || (op->standard == NULL)) {
return PCMK_OCF_UNKNOWN_ERROR;
}
if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei)
&& pcmk__str_eq(op->action, "status", pcmk__str_casei)) {
return PCMK_LSB_STATUS_UNKNOWN;
}
#if SUPPORT_NAGIOS
if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
return NAGIOS_STATE_UNKNOWN;
}
#endif
return PCMK_OCF_UNKNOWN_ERROR;
}
/*!
* \internal
* \brief Return agent standard's exit status for "not installed"
*
* When returning an internal error for an action, a value that is appropriate
* to the action's agent standard must be used. This function returns a value
* appropriate for "not installed" errors.
*
* \param[in] op Action that error is for
*
* \return Exit status appropriate to agent standard
* \note Actions without a standard will get PCMK_OCF_UNKNOWN_ERROR.
*/
int
services__not_installed_error(svc_action_t *op)
{
if ((op == NULL) || (op->standard == NULL)) {
return PCMK_OCF_UNKNOWN_ERROR;
}
if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei)
&& pcmk__str_eq(op->action, "status", pcmk__str_casei)) {
return PCMK_LSB_STATUS_NOT_INSTALLED;
}
#if SUPPORT_NAGIOS
if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
return NAGIOS_STATE_UNKNOWN;
}
#endif
return PCMK_OCF_NOT_INSTALLED;
}
/*!
* \internal
* \brief Return agent standard's exit status for "insufficient privileges"
*
* When returning an internal error for an action, a value that is appropriate
* to the action's agent standard must be used. This function returns a value
* appropriate for "insufficient privileges" errors.
*
* \param[in] op Action that error is for
*
* \return Exit status appropriate to agent standard
* \note Actions without a standard will get PCMK_OCF_UNKNOWN_ERROR.
*/
int
services__authorization_error(svc_action_t *op)
{
if ((op == NULL) || (op->standard == NULL)) {
return PCMK_OCF_UNKNOWN_ERROR;
}
if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei)
&& pcmk__str_eq(op->action, "status", pcmk__str_casei)) {
return PCMK_LSB_STATUS_INSUFFICIENT_PRIV;
}
#if SUPPORT_NAGIOS
if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
return NAGIOS_INSUFFICIENT_PRIV;
}
#endif
return PCMK_OCF_INSUFFICIENT_PRIV;
}
/*!
* \internal
* \brief Return agent standard's exit status for "not configured"
*
* When returning an internal error for an action, a value that is appropriate
* to the action's agent standard must be used. This function returns a value
* appropriate for "not configured" errors.
*
* \param[in] op Action that error is for
* \param[in] is_fatal Whether problem is cluster-wide instead of only local
*
* \return Exit status appropriate to agent standard
* \note Actions without a standard will get PCMK_OCF_UNKNOWN_ERROR.
*/
int
services__configuration_error(svc_action_t *op, bool is_fatal)
{
if ((op == NULL) || (op->standard == NULL)) {
return PCMK_OCF_UNKNOWN_ERROR;
}
if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei)
&& pcmk__str_eq(op->action, "status", pcmk__str_casei)) {
return PCMK_LSB_NOT_CONFIGURED;
}
#if SUPPORT_NAGIOS
if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
return NAGIOS_STATE_UNKNOWN;
}
#endif
return is_fatal? PCMK_OCF_NOT_CONFIGURED : PCMK_OCF_INVALID_PARAM;
}
/*!
* \internal
* \brief Set operation rc and status per errno from stat(), fork() or execvp()
*
* \param[in,out] op Operation to set rc and status for
* \param[in] error Value of errno after system call
*
* \return void
*/
void
services__handle_exec_error(svc_action_t * op, int error)
{
+ const char *name = op->opaque->exec;
+
+ if (name == NULL) {
+ name = op->agent;
+ if (name == NULL) {
+ name = op->id;
+ }
+ }
+
switch (error) { /* see execve(2), stat(2) and fork(2) */
case ENOENT: /* No such file or directory */
case EISDIR: /* Is a directory */
case ENOTDIR: /* Path component is not a directory */
case EINVAL: /* Invalid executable format */
case ENOEXEC: /* Invalid executable format */
- services__set_result(op, services__not_installed_error(op),
- PCMK_EXEC_NOT_INSTALLED, pcmk_rc_str(error));
+ services__format_result(op, services__not_installed_error(op),
+ PCMK_EXEC_NOT_INSTALLED, "%s: %s",
+ name, pcmk_rc_str(error));
break;
case EACCES: /* permission denied (various errors) */
case EPERM: /* permission denied (various errors) */
- services__set_result(op, services__authorization_error(op),
- PCMK_EXEC_ERROR, pcmk_rc_str(error));
+ services__format_result(op, services__authorization_error(op),
+ PCMK_EXEC_ERROR, "%s: %s",
+ name, pcmk_rc_str(error));
break;
default:
services__set_result(op, services__generic_error(op),
PCMK_EXEC_ERROR, pcmk_rc_str(error));
}
}
/*!
* \internal
* \brief Exit a child process that failed before executing agent
*
* \param[in] op Action that failed
* \param[in] exit_status Exit status code to use
* \param[in] exit_reason Exit reason to output if for OCF agent
*/
static void
exit_child(svc_action_t *op, int exit_status, const char *exit_reason)
{
if ((op != NULL) && (exit_reason != NULL)
&& pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF,
pcmk__str_none)) {
fprintf(stderr, PCMK_OCF_REASON_PREFIX "%s\n", exit_reason);
}
_exit(exit_status);
}
static void
action_launch_child(svc_action_t *op)
{
int rc;
/* SIGPIPE is ignored (which is different from signal blocking) by the gnutls library.
* Depending on the libqb version in use, libqb may set SIGPIPE to be ignored as well.
* We do not want this to be inherited by the child process. By resetting this the signal
* to the default behavior, we avoid some potential odd problems that occur during OCF
* scripts when SIGPIPE is ignored by the environment. */
signal(SIGPIPE, SIG_DFL);
#if defined(HAVE_SCHED_SETSCHEDULER)
if (sched_getscheduler(0) != SCHED_OTHER) {
struct sched_param sp;
memset(&sp, 0, sizeof(sp));
sp.sched_priority = 0;
if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) {
crm_info("Could not reset scheduling policy for %s", op->id);
}
}
#endif
if (setpriority(PRIO_PROCESS, 0, 0) == -1) {
crm_info("Could not reset process priority for %s", op->id);
}
/* Man: The call setpgrp() is equivalent to setpgid(0,0)
* _and_ compiles on BSD variants too
* need to investigate if it works the same too.
*/
setpgid(0, 0);
pcmk__close_fds_in_child(false);
/* It would be nice if errors in this function could be reported as
* execution status (for example, PCMK_EXEC_NO_SECRETS for the secrets error
* below) instead of exit status. However, we've already forked, so
* exit status is all we have. At least for OCF actions, we can output an
* exit reason for the parent to parse.
*/
#if SUPPORT_CIBSECRETS
rc = pcmk__substitute_secrets(op->rsc, op->params);
if (rc != pcmk_rc_ok) {
if (pcmk__str_eq(op->action, "stop", pcmk__str_casei)) {
crm_info("Proceeding with stop operation for %s "
"despite being unable to load CIB secrets (%s)",
op->rsc, pcmk_rc_str(rc));
} else {
crm_err("Considering %s unconfigured "
"because unable to load CIB secrets: %s",
op->rsc, pcmk_rc_str(rc));
exit_child(op, services__configuration_error(op, false),
"Unable to load CIB secrets");
}
}
#endif
add_action_env_vars(op);
/* Become the desired user */
if (op->opaque->uid && (geteuid() == 0)) {
// If requested, set effective group
if (op->opaque->gid && (setgid(op->opaque->gid) < 0)) {
crm_err("Considering %s unauthorized because could not set "
"child group to %d: %s",
op->id, op->opaque->gid, strerror(errno));
exit_child(op, services__authorization_error(op),
"Could not set group for child process");
}
// Erase supplementary group list
// (We could do initgroups() if we kept a copy of the username)
if (setgroups(0, NULL) < 0) {
crm_err("Considering %s unauthorized because could not "
"clear supplementary groups: %s", op->id, strerror(errno));
exit_child(op, services__authorization_error(op),
"Could not clear supplementary groups for child process");
}
// Set effective user
if (setuid(op->opaque->uid) < 0) {
crm_err("Considering %s unauthorized because could not set user "
"to %d: %s", op->id, op->opaque->uid, strerror(errno));
exit_child(op, services__authorization_error(op),
"Could not set user for child process");
}
}
// Execute the agent (doesn't return if successful)
execvp(op->opaque->exec, op->opaque->args);
// An earlier stat() should have avoided most possible errors
rc = errno;
services__handle_exec_error(op, rc);
crm_err("Unable to execute %s: %s", op->id, strerror(rc));
exit_child(op, op->rc, "Child process was unable to execute file");
}
/*!
* \internal
* \brief Wait for synchronous action to complete, and set its result
*
* \param[in] op Action to wait for
* \param[in] data Child signal data
*/
static void
wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data)
{
int status = 0;
int timeout = op->timeout;
time_t start = time(NULL);
struct pollfd fds[3];
int wait_rc = 0;
const char *wait_reason = NULL;
fds[0].fd = op->opaque->stdout_fd;
fds[0].events = POLLIN;
fds[0].revents = 0;
fds[1].fd = op->opaque->stderr_fd;
fds[1].events = POLLIN;
fds[1].revents = 0;
fds[2].fd = sigchld_open(data);
fds[2].events = POLLIN;
fds[2].revents = 0;
crm_trace("Waiting for %s[%d]", op->id, op->pid);
do {
int poll_rc = poll(fds, 3, timeout);
wait_reason = NULL;
if (poll_rc > 0) {
if (fds[0].revents & POLLIN) {
svc_read_output(op->opaque->stdout_fd, op, FALSE);
}
if (fds[1].revents & POLLIN) {
svc_read_output(op->opaque->stderr_fd, op, TRUE);
}
if ((fds[2].revents & POLLIN) && sigchld_received(fds[2].fd)) {
wait_rc = waitpid(op->pid, &status, WNOHANG);
if ((wait_rc > 0) || ((wait_rc < 0) && (errno == ECHILD))) {
// Child process exited or doesn't exist
break;
} else if (wait_rc < 0) {
wait_reason = pcmk_rc_str(errno);
crm_info("Wait for completion of %s[%d] failed: %s "
CRM_XS " source=waitpid",
op->id, op->pid, wait_reason);
wait_rc = 0; // Act as if process is still running
}
}
} else if (poll_rc == 0) {
// Poll timed out with no descriptors ready
timeout = 0;
break;
} else if ((poll_rc < 0) && (errno != EINTR)) {
wait_reason = pcmk_rc_str(errno);
crm_info("Wait for completion of %s[%d] failed: %s "
CRM_XS " source=poll", op->id, op->pid, wait_reason);
break;
}
timeout = op->timeout - (time(NULL) - start) * 1000;
} while ((op->timeout < 0 || timeout > 0));
crm_trace("Stopped waiting for %s[%d]", op->id, op->pid);
finish_op_output(op, true);
finish_op_output(op, false);
close_op_input(op);
sigchld_close(fds[2].fd);
if (wait_rc <= 0) {
if ((op->timeout > 0) && (timeout <= 0)) {
services__set_result(op, services__generic_error(op),
PCMK_EXEC_TIMEOUT,
"Process did not exit within specified timeout");
crm_info("%s[%d] timed out after %dms",
op->id, op->pid, op->timeout);
} else {
services__set_result(op, services__generic_error(op),
PCMK_EXEC_ERROR, wait_reason);
}
/* If only child hasn't been successfully waited for, yet.
This is to limit killing wrong target a bit more. */
if ((wait_rc == 0) && (waitpid(op->pid, &status, WNOHANG) == 0)) {
if (kill(op->pid, SIGKILL)) {
crm_warn("Could not kill rogue child %s[%d]: %s",
op->id, op->pid, pcmk_rc_str(errno));
}
/* Safe to skip WNOHANG here as we sent non-ignorable signal. */
while ((waitpid(op->pid, &status, 0) == (pid_t) -1)
&& (errno == EINTR)) {
/* keep waiting */;
}
}
} else if (WIFEXITED(status)) {
services__set_result(op, WEXITSTATUS(status), PCMK_EXEC_DONE, NULL);
parse_exit_reason_from_stderr(op);
crm_info("%s[%d] exited with status %d", op->id, op->pid, op->rc);
} else if (WIFSIGNALED(status)) {
int signo = WTERMSIG(status);
- services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR,
- "Process interrupted by signal");
+ services__format_result(op, services__generic_error(op),
+ PCMK_EXEC_ERROR,
+ "Process interrupted by %s signal",
+ strsignal(signo));
crm_info("%s[%d] terminated with signal %d (%s)",
op->id, op->pid, signo, strsignal(signo));
#ifdef WCOREDUMP
if (WCOREDUMP(status)) {
crm_warn("%s[%d] dumped core", op->id, op->pid);
}
#endif
} else {
// Shouldn't be possible to get here
services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR,
"Unable to wait for child to complete");
}
}
/*!
* \internal
* \brief Execute an action whose standard uses executable files
*
* \param[in] op Action to execute
*
* \return Standard Pacemaker return value
* \retval EBUSY Recurring operation could not be initiated
* \retval pcmk_rc_error Synchronous action failed
* \retval pcmk_rc_ok Synchronous action succeeded, or asynchronous action
* should not be freed (because it's pending or because
* it failed to execute and was already freed)
*
* \note If the return value for an asynchronous action is not pcmk_rc_ok, the
* caller is responsible for freeing the action.
*/
int
services__execute_file(svc_action_t *op)
{
int stdout_fd[2];
int stderr_fd[2];
int stdin_fd[2] = {-1, -1};
int rc;
struct stat st;
struct sigchld_data_s data;
// Catch common failure conditions early
if (stat(op->opaque->exec, &st) != 0) {
rc = errno;
crm_info("Cannot execute '%s': %s " CRM_XS " stat rc=%d",
op->opaque->exec, pcmk_strerror(rc), rc);
services__handle_exec_error(op, rc);
goto done;
}
if (pipe(stdout_fd) < 0) {
rc = errno;
crm_info("Cannot execute '%s': %s " CRM_XS " pipe(stdout) rc=%d",
op->opaque->exec, pcmk_strerror(rc), rc);
services__handle_exec_error(op, rc);
goto done;
}
if (pipe(stderr_fd) < 0) {
rc = errno;
close_pipe(stdout_fd);
crm_info("Cannot execute '%s': %s " CRM_XS " pipe(stderr) rc=%d",
op->opaque->exec, pcmk_strerror(rc), rc);
services__handle_exec_error(op, rc);
goto done;
}
if (pcmk_is_set(pcmk_get_ra_caps(op->standard), pcmk_ra_cap_stdin)) {
if (pipe(stdin_fd) < 0) {
rc = errno;
close_pipe(stdout_fd);
close_pipe(stderr_fd);
crm_info("Cannot execute '%s': %s " CRM_XS " pipe(stdin) rc=%d",
op->opaque->exec, pcmk_strerror(rc), rc);
services__handle_exec_error(op, rc);
goto done;
}
}
if (op->synchronous && !sigchld_setup(&data)) {
close_pipe(stdin_fd);
close_pipe(stdout_fd);
close_pipe(stderr_fd);
sigchld_cleanup(&data);
services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR,
"Could not manage signals for child process");
goto done;
}
op->pid = fork();
switch (op->pid) {
case -1:
rc = errno;
close_pipe(stdin_fd);
close_pipe(stdout_fd);
close_pipe(stderr_fd);
crm_info("Cannot execute '%s': %s " CRM_XS " fork rc=%d",
op->opaque->exec, pcmk_strerror(rc), rc);
services__handle_exec_error(op, rc);
if (op->synchronous) {
sigchld_cleanup(&data);
}
goto done;
break;
case 0: /* Child */
close(stdout_fd[0]);
close(stderr_fd[0]);
if (stdin_fd[1] >= 0) {
close(stdin_fd[1]);
}
if (STDOUT_FILENO != stdout_fd[1]) {
if (dup2(stdout_fd[1], STDOUT_FILENO) != STDOUT_FILENO) {
crm_warn("Can't redirect output from '%s': %s "
CRM_XS " errno=%d",
op->opaque->exec, pcmk_rc_str(errno), errno);
}
close(stdout_fd[1]);
}
if (STDERR_FILENO != stderr_fd[1]) {
if (dup2(stderr_fd[1], STDERR_FILENO) != STDERR_FILENO) {
crm_warn("Can't redirect error output from '%s': %s "
CRM_XS " errno=%d",
op->opaque->exec, pcmk_rc_str(errno), errno);
}
close(stderr_fd[1]);
}
if ((stdin_fd[0] >= 0) &&
(STDIN_FILENO != stdin_fd[0])) {
if (dup2(stdin_fd[0], STDIN_FILENO) != STDIN_FILENO) {
crm_warn("Can't redirect input to '%s': %s "
CRM_XS " errno=%d",
op->opaque->exec, pcmk_rc_str(errno), errno);
}
close(stdin_fd[0]);
}
if (op->synchronous) {
sigchld_cleanup(&data);
}
action_launch_child(op);
CRM_ASSERT(0); /* action_launch_child is effectively noreturn */
}
/* Only the parent reaches here */
close(stdout_fd[1]);
close(stderr_fd[1]);
if (stdin_fd[0] >= 0) {
close(stdin_fd[0]);
}
op->opaque->stdout_fd = stdout_fd[0];
rc = pcmk__set_nonblocking(op->opaque->stdout_fd);
if (rc != pcmk_rc_ok) {
crm_info("Could not set '%s' output non-blocking: %s "
CRM_XS " rc=%d",
op->opaque->exec, pcmk_rc_str(rc), rc);
}
op->opaque->stderr_fd = stderr_fd[0];
rc = pcmk__set_nonblocking(op->opaque->stderr_fd);
if (rc != pcmk_rc_ok) {
crm_info("Could not set '%s' error output non-blocking: %s "
CRM_XS " rc=%d",
op->opaque->exec, pcmk_rc_str(rc), rc);
}
op->opaque->stdin_fd = stdin_fd[1];
if (op->opaque->stdin_fd >= 0) {
// using buffer behind non-blocking-fd here - that could be improved
// as long as no other standard uses stdin_fd assume stonith
rc = pcmk__set_nonblocking(op->opaque->stdin_fd);
if (rc != pcmk_rc_ok) {
crm_info("Could not set '%s' input non-blocking: %s "
CRM_XS " fd=%d,rc=%d", op->opaque->exec,
pcmk_rc_str(rc), op->opaque->stdin_fd, rc);
}
pipe_in_action_stdin_parameters(op);
// as long as we are handling parameters directly in here just close
close(op->opaque->stdin_fd);
op->opaque->stdin_fd = -1;
}
// after fds are setup properly and before we plug anything into mainloop
if (op->opaque->fork_callback) {
op->opaque->fork_callback(op);
}
if (op->synchronous) {
wait_for_sync_result(op, &data);
sigchld_cleanup(&data);
goto done;
}
crm_trace("Waiting async for '%s'[%d]", op->opaque->exec, op->pid);
mainloop_child_add_with_flags(op->pid, op->timeout, op->id, op,
pcmk_is_set(op->flags, SVC_ACTION_LEAVE_GROUP)? mainloop_leave_pid_group : 0,
async_action_complete);
op->opaque->stdout_gsource = mainloop_add_fd(op->id,
G_PRIORITY_LOW,
op->opaque->stdout_fd, op,
&stdout_callbacks);
op->opaque->stderr_gsource = mainloop_add_fd(op->id,
G_PRIORITY_LOW,
op->opaque->stderr_fd, op,
&stderr_callbacks);
services_add_inflight_op(op);
return pcmk_rc_ok;
done:
if (op->synchronous) {
return (op->rc == PCMK_OCF_OK)? pcmk_rc_ok : pcmk_rc_error;
} else {
return services__finalize_async_op(op);
}
}
GList *
services_os_get_single_directory_list(const char *root, gboolean files, gboolean executable)
{
GList *list = NULL;
struct dirent **namelist;
int entries = 0, lpc = 0;
char buffer[PATH_MAX];
entries = scandir(root, &namelist, NULL, alphasort);
if (entries <= 0) {
return list;
}
for (lpc = 0; lpc < entries; lpc++) {
struct stat sb;
if ('.' == namelist[lpc]->d_name[0]) {
free(namelist[lpc]);
continue;
}
snprintf(buffer, sizeof(buffer), "%s/%s", root, namelist[lpc]->d_name);
if (stat(buffer, &sb)) {
continue;
}
if (S_ISDIR(sb.st_mode)) {
if (files) {
free(namelist[lpc]);
continue;
}
} else if (S_ISREG(sb.st_mode)) {
if (files == FALSE) {
free(namelist[lpc]);
continue;
} else if (executable
&& (sb.st_mode & S_IXUSR) == 0
&& (sb.st_mode & S_IXGRP) == 0 && (sb.st_mode & S_IXOTH) == 0) {
free(namelist[lpc]);
continue;
}
}
list = g_list_append(list, strdup(namelist[lpc]->d_name));
free(namelist[lpc]);
}
free(namelist);
return list;
}
GList *
services_os_get_directory_list(const char *root, gboolean files, gboolean executable)
{
GList *result = NULL;
char *dirs = strdup(root);
char *dir = NULL;
if (pcmk__str_empty(dirs)) {
free(dirs);
return result;
}
for (dir = strtok(dirs, ":"); dir != NULL; dir = strtok(NULL, ":")) {
GList *tmp = services_os_get_single_directory_list(dir, files, executable);
if (tmp) {
result = g_list_concat(result, tmp);
}
}
free(dirs);
return result;
}
diff --git a/lib/services/systemd.c b/lib/services/systemd.c
index bed1861077..9883e5b2ae 100644
--- a/lib/services/systemd.c
+++ b/lib/services/systemd.c
@@ -1,1088 +1,1094 @@
/*
* Copyright 2012-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/services_internal.h>
#include <crm/common/mainloop.h>
#include <sys/stat.h>
#include <gio/gio.h>
#include <services_private.h>
#include <systemd.h>
#include <dbus/dbus.h>
#include <pcmk-dbus.h>
static void invoke_unit_by_path(svc_action_t *op, const char *unit);
#define BUS_NAME "org.freedesktop.systemd1"
#define BUS_NAME_MANAGER BUS_NAME ".Manager"
#define BUS_NAME_UNIT BUS_NAME ".Unit"
#define BUS_PATH "/org/freedesktop/systemd1"
/*!
* \internal
* \brief Prepare a systemd action
*
* \param[in] op Action to prepare
*
* \return Standard Pacemaker return code
*/
int
services__systemd_prepare(svc_action_t *op)
{
op->opaque->exec = strdup("systemd-dbus");
if (op->opaque->exec == NULL) {
return ENOMEM;
}
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Map a systemd result to a standard OCF result
*
* \param[in] exit_status Systemd result
*
* \return Standard OCF result
*/
enum ocf_exitcode
services__systemd2ocf(int exit_status)
{
// This library uses OCF codes for systemd actions
return (enum ocf_exitcode) exit_status;
}
static inline DBusMessage *
systemd_new_method(const char *method)
{
crm_trace("Calling: %s on " BUS_NAME_MANAGER, method);
return dbus_message_new_method_call(BUS_NAME, BUS_PATH, BUS_NAME_MANAGER,
method);
}
/*
* Functions to manage a static DBus connection
*/
static DBusConnection* systemd_proxy = NULL;
static inline DBusPendingCall *
systemd_send(DBusMessage *msg,
void(*done)(DBusPendingCall *pending, void *user_data),
void *user_data, int timeout)
{
return pcmk_dbus_send(msg, systemd_proxy, done, user_data, timeout);
}
static inline DBusMessage *
systemd_send_recv(DBusMessage *msg, DBusError *error, int timeout)
{
return pcmk_dbus_send_recv(msg, systemd_proxy, error, timeout);
}
/*!
* \internal
* \brief Send a method to systemd without arguments, and wait for reply
*
* \param[in] method Method to send
*
* \return Systemd reply on success, NULL (and error will be logged) otherwise
*
* \note The caller must call dbus_message_unref() on the reply after
* handling it.
*/
static DBusMessage *
systemd_call_simple_method(const char *method)
{
DBusMessage *msg = systemd_new_method(method);
DBusMessage *reply = NULL;
DBusError error;
/* Don't call systemd_init() here, because that calls this */
CRM_CHECK(systemd_proxy, return NULL);
if (msg == NULL) {
crm_err("Could not create message to send %s to systemd", method);
return NULL;
}
dbus_error_init(&error);
reply = systemd_send_recv(msg, &error, DBUS_TIMEOUT_USE_DEFAULT);
dbus_message_unref(msg);
if (dbus_error_is_set(&error)) {
crm_err("Could not send %s to systemd: %s (%s)",
method, error.message, error.name);
dbus_error_free(&error);
return NULL;
} else if (reply == NULL) {
crm_err("Could not send %s to systemd: no reply received", method);
return NULL;
}
return reply;
}
static gboolean
systemd_init(void)
{
static int need_init = 1;
// https://dbus.freedesktop.org/doc/api/html/group__DBusConnection.html
if (systemd_proxy
&& dbus_connection_get_is_connected(systemd_proxy) == FALSE) {
crm_warn("Connection to System DBus is closed. Reconnecting...");
pcmk_dbus_disconnect(systemd_proxy);
systemd_proxy = NULL;
need_init = 1;
}
if (need_init) {
need_init = 0;
systemd_proxy = pcmk_dbus_connect();
}
if (systemd_proxy == NULL) {
return FALSE;
}
return TRUE;
}
static inline char *
systemd_get_property(const char *unit, const char *name,
void (*callback)(const char *name, const char *value, void *userdata),
void *userdata, DBusPendingCall **pending, int timeout)
{
return systemd_proxy?
pcmk_dbus_get_property(systemd_proxy, BUS_NAME, unit, BUS_NAME_UNIT,
name, callback, userdata, pending, timeout)
: NULL;
}
void
systemd_cleanup(void)
{
if (systemd_proxy) {
pcmk_dbus_disconnect(systemd_proxy);
systemd_proxy = NULL;
}
}
/*
* end of systemd_proxy functions
*/
/*!
* \internal
* \brief Check whether a file name represents a manageable systemd unit
*
* \param[in] name File name to check
*
* \return Pointer to "dot" before filename extension if so, NULL otherwise
*/
static const char *
systemd_unit_extension(const char *name)
{
if (name) {
const char *dot = strrchr(name, '.');
if (dot && (!strcmp(dot, ".service")
|| !strcmp(dot, ".socket")
|| !strcmp(dot, ".mount")
|| !strcmp(dot, ".timer")
|| !strcmp(dot, ".path"))) {
return dot;
}
}
return NULL;
}
static char *
systemd_service_name(const char *name, bool add_instance_name)
{
const char *dot = NULL;
if (pcmk__str_empty(name)) {
return NULL;
}
/* Services that end with an @ sign are systemd templates. They expect an
* instance name to follow the service name. If no instance name was
* provided, just add "pacemaker" to the string as the instance name. It
* doesn't seem to matter for purposes of looking up whether a service
* exists or not.
*
* A template can be specified either with or without the unit extension,
* so this block handles both cases.
*/
dot = systemd_unit_extension(name);
if (dot) {
if (dot != name && *(dot-1) == '@') {
char *s = NULL;
if (asprintf(&s, "%.*spacemaker%s", (int) (dot-name), name, dot) == -1) {
/* If asprintf fails, just return name. */
return strdup(name);
}
return s;
} else {
return strdup(name);
}
} else if (add_instance_name && *(name+strlen(name)-1) == '@') {
return crm_strdup_printf("%spacemaker.service", name);
} else {
return crm_strdup_printf("%s.service", name);
}
}
static void
systemd_daemon_reload_complete(DBusPendingCall *pending, void *user_data)
{
DBusError error;
DBusMessage *reply = NULL;
unsigned int reload_count = GPOINTER_TO_UINT(user_data);
dbus_error_init(&error);
if(pending) {
reply = dbus_pending_call_steal_reply(pending);
}
if (pcmk_dbus_find_error(pending, reply, &error)) {
crm_warn("Could not issue systemd reload %d: %s",
reload_count, error.message);
dbus_error_free(&error);
} else {
crm_trace("Reload %d complete", reload_count);
}
if(pending) {
dbus_pending_call_unref(pending);
}
if(reply) {
dbus_message_unref(reply);
}
}
static bool
systemd_daemon_reload(int timeout)
{
static unsigned int reload_count = 0;
DBusMessage *msg = systemd_new_method("Reload");
reload_count++;
CRM_ASSERT(msg != NULL);
systemd_send(msg, systemd_daemon_reload_complete,
GUINT_TO_POINTER(reload_count), timeout);
dbus_message_unref(msg);
return TRUE;
}
/*!
* \internal
* \brief Set an action result based on a method error
*
* \param[in] op Action to set result for
* \param[in] error Method error
*/
static void
set_result_from_method_error(svc_action_t *op, const DBusError *error)
{
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"Unable to invoke systemd DBus method");
if (strstr(error->name, "org.freedesktop.systemd1.InvalidName")
|| strstr(error->name, "org.freedesktop.systemd1.LoadFailed")
|| strstr(error->name, "org.freedesktop.systemd1.NoSuchUnit")) {
if (pcmk__str_eq(op->action, "stop", pcmk__str_casei)) {
crm_trace("Masking systemd stop failure (%s) for %s "
"because unknown service can be considered stopped",
error->name, crm_str(op->rsc));
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
return;
}
- services__set_result(op, PCMK_OCF_NOT_INSTALLED,
- PCMK_EXEC_NOT_INSTALLED, "systemd unit not found");
+ services__format_result(op, PCMK_OCF_NOT_INSTALLED,
+ PCMK_EXEC_NOT_INSTALLED,
+ "systemd unit %s not found", op->agent);
}
crm_info("DBus request for %s of systemd unit %s for resource %s failed: %s",
op->action, op->agent, crm_str(op->rsc), error->message);
}
/*!
* \internal
* \brief Extract unit path from LoadUnit reply, and execute action
*
* \param[in] reply LoadUnit reply
* \param[in] op Action to execute (or NULL to just return path)
*
* \return DBus object path for specified unit if successful (only valid for
* lifetime of \p reply), otherwise NULL
*/
static const char *
execute_after_loadunit(DBusMessage *reply, svc_action_t *op)
{
const char *path = NULL;
DBusError error;
/* path here is not used other than as a non-NULL flag to indicate that a
* request was indeed sent
*/
if (pcmk_dbus_find_error((void *) &path, reply, &error)) {
if (op != NULL) {
set_result_from_method_error(op, &error);
}
dbus_error_free(&error);
} else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH,
__func__, __LINE__)) {
if (op != NULL) {
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"systemd DBus method had unexpected reply");
crm_info("Could not load systemd unit %s for %s: "
"DBus reply has unexpected type", op->agent, op->id);
} else {
crm_info("Could not load systemd unit: "
"DBus reply has unexpected type");
}
} else {
dbus_message_get_args (reply, NULL,
DBUS_TYPE_OBJECT_PATH, &path,
DBUS_TYPE_INVALID);
}
if (op != NULL) {
if (path != NULL) {
invoke_unit_by_path(op, path);
} else if (!(op->synchronous)) {
- services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
- "No DBus object found for systemd unit");
+ services__format_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
+ "No DBus object found for systemd unit %s",
+ op->agent);
services__finalize_async_op(op);
}
}
return path;
}
/*!
* \internal
* \brief Execute a systemd action after its LoadUnit completes
*
* \param[in] pending If not NULL, DBus call associated with LoadUnit request
* \param[in] user_data Action to execute
*/
static void
loadunit_completed(DBusPendingCall *pending, void *user_data)
{
DBusMessage *reply = NULL;
svc_action_t *op = user_data;
crm_trace("LoadUnit result for %s arrived", op->id);
// Grab the reply
if (pending != NULL) {
reply = dbus_pending_call_steal_reply(pending);
}
// The call is no longer pending
CRM_LOG_ASSERT(pending == op->opaque->pending);
services_set_op_pending(op, NULL);
// Execute the desired action based on the reply
execute_after_loadunit(reply, user_data);
if (reply != NULL) {
dbus_message_unref(reply);
}
}
/*!
* \internal
* \brief Execute a systemd action, given the unit name
*
* \param[in] arg_name Unit name (possibly shortened, i.e. without ".service")
* \param[in] op Action to execute (if NULL, just get the object path)
* \param[out] path If non-NULL and \p op is NULL or synchronous, where to
* store DBus object path for specified unit
*
* \return Standard Pacemaker return code (for NULL \p op, pcmk_rc_ok means unit
* was found; for synchronous actions, pcmk_rc_ok means unit was
* executed, with the actual result stored in \p op; for asynchronous
* actions, pcmk_rc_ok means action was initiated)
* \note It is the caller's responsibility to free the path.
*/
static int
invoke_unit_by_name(const char *arg_name, svc_action_t *op, char **path)
{
DBusMessage *msg;
DBusMessage *reply = NULL;
DBusPendingCall *pending = NULL;
char *name = NULL;
if (!systemd_init()) {
if (op != NULL) {
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"No DBus connection");
}
return ENOTCONN;
}
/* Create a LoadUnit DBus method (equivalent to GetUnit if already loaded),
* which makes the unit usable via further DBus methods.
*
* <method name="LoadUnit">
* <arg name="name" type="s" direction="in"/>
* <arg name="unit" type="o" direction="out"/>
* </method>
*/
msg = systemd_new_method("LoadUnit");
CRM_ASSERT(msg != NULL);
// Add the (expanded) unit name as the argument
name = systemd_service_name(arg_name, op == NULL || pcmk__str_eq(op->action, "meta-data", pcmk__str_none));
CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name,
DBUS_TYPE_INVALID));
free(name);
if ((op == NULL) || op->synchronous) {
// For synchronous ops, wait for a reply and extract the result
const char *unit = NULL;
int rc = pcmk_rc_ok;
reply = systemd_send_recv(msg, NULL,
(op? op->timeout : DBUS_TIMEOUT_USE_DEFAULT));
dbus_message_unref(msg);
unit = execute_after_loadunit(reply, op);
if (unit == NULL) {
rc = ENOENT;
if (path != NULL) {
*path = NULL;
}
} else if (path != NULL) {
*path = strdup(unit);
if (*path == NULL) {
rc = ENOMEM;
}
}
if (reply != NULL) {
dbus_message_unref(reply);
}
return rc;
}
// For asynchronous ops, initiate the LoadUnit call and return
pending = systemd_send(msg, loadunit_completed, op, op->timeout);
if (pending == NULL) {
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"Unable to send DBus message");
dbus_message_unref(msg);
return ECOMM;
}
// LoadUnit was successfully initiated
services__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);
services_set_op_pending(op, pending);
dbus_message_unref(msg);
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Compare two strings alphabetically (case-insensitive)
*
* \param[in] a First string to compare
* \param[in] b Second string to compare
*
* \return 0 if strings are equal, -1 if a < b, 1 if a > b
*
* \note Usable as a GCompareFunc with g_list_sort().
* NULL is considered less than non-NULL.
*/
static gint
sort_str(gconstpointer a, gconstpointer b)
{
if (!a && !b) {
return 0;
} else if (!a) {
return -1;
} else if (!b) {
return 1;
}
return strcasecmp(a, b);
}
GList *
systemd_unit_listall(void)
{
int nfiles = 0;
GList *units = NULL;
DBusMessageIter args;
DBusMessageIter unit;
DBusMessageIter elem;
DBusMessage *reply = NULL;
if (systemd_init() == FALSE) {
return NULL;
}
/*
" <method name=\"ListUnitFiles\">\n" \
" <arg name=\"files\" type=\"a(ss)\" direction=\"out\"/>\n" \
" </method>\n" \
*/
reply = systemd_call_simple_method("ListUnitFiles");
if (reply == NULL) {
return NULL;
}
if (!dbus_message_iter_init(reply, &args)) {
crm_err("Could not list systemd unit files: systemd reply has no arguments");
dbus_message_unref(reply);
return NULL;
}
if (!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_ARRAY,
__func__, __LINE__)) {
crm_err("Could not list systemd unit files: systemd reply has invalid arguments");
dbus_message_unref(reply);
return NULL;
}
dbus_message_iter_recurse(&args, &unit);
for (; dbus_message_iter_get_arg_type(&unit) != DBUS_TYPE_INVALID;
dbus_message_iter_next(&unit)) {
DBusBasicValue value;
const char *match = NULL;
char *unit_name = NULL;
char *basename = NULL;
if(!pcmk_dbus_type_check(reply, &unit, DBUS_TYPE_STRUCT, __func__, __LINE__)) {
crm_warn("Skipping systemd reply argument with unexpected type");
continue;
}
dbus_message_iter_recurse(&unit, &elem);
if(!pcmk_dbus_type_check(reply, &elem, DBUS_TYPE_STRING, __func__, __LINE__)) {
crm_warn("Skipping systemd reply argument with no string");
continue;
}
dbus_message_iter_get_basic(&elem, &value);
if (value.str == NULL) {
crm_debug("ListUnitFiles reply did not provide a string");
continue;
}
crm_trace("DBus ListUnitFiles listed: %s", value.str);
match = systemd_unit_extension(value.str);
if (match == NULL) {
// This is not a unit file type we know how to manage
crm_debug("ListUnitFiles entry '%s' is not supported as resource",
value.str);
continue;
}
// ListUnitFiles returns full path names, we just want base name
basename = strrchr(value.str, '/');
if (basename) {
basename = basename + 1;
} else {
basename = value.str;
}
if (!strcmp(match, ".service")) {
// Service is the "default" unit type, so strip it
unit_name = strndup(basename, match - basename);
} else {
unit_name = strdup(basename);
}
nfiles++;
units = g_list_prepend(units, unit_name);
}
dbus_message_unref(reply);
crm_trace("Found %d manageable systemd unit files", nfiles);
units = g_list_sort(units, sort_str);
return units;
}
gboolean
systemd_unit_exists(const char *name)
{
char *path = NULL;
char *state = NULL;
/* Note: Makes a blocking dbus calls
* Used by resources_find_service_class() when resource class=service
*/
if ((invoke_unit_by_name(name, NULL, &path) != pcmk_rc_ok)
|| (path == NULL)) {
return FALSE;
}
/* A successful LoadUnit is not sufficient to determine the unit's
* existence; it merely means the LoadUnit request received a reply.
* We must make another blocking call to check the LoadState property.
*/
state = systemd_get_property(path, "LoadState", NULL, NULL, NULL,
DBUS_TIMEOUT_USE_DEFAULT);
free(path);
if (pcmk__str_any_of(state, "loaded", "masked", NULL)) {
free(state);
return TRUE;
}
free(state);
return FALSE;
}
#define METADATA_FORMAT \
"<?xml version=\"1.0\"?>\n" \
"<!DOCTYPE resource-agent SYSTEM \"ra-api-1.dtd\">\n" \
"<resource-agent name=\"%s\" version=\"" PCMK_DEFAULT_AGENT_VERSION "\">\n" \
" <version>1.1</version>\n" \
" <longdesc lang=\"en\">\n" \
" %s\n" \
" </longdesc>\n" \
" <shortdesc lang=\"en\">systemd unit file for %s</shortdesc>\n" \
" <parameters/>\n" \
" <actions>\n" \
" <action name=\"start\" timeout=\"100\" />\n" \
" <action name=\"stop\" timeout=\"100\" />\n" \
" <action name=\"status\" timeout=\"100\" />\n" \
" <action name=\"monitor\" timeout=\"100\" interval=\"60\"/>\n" \
" <action name=\"meta-data\" timeout=\"5\" />\n" \
" </actions>\n" \
" <special tag=\"systemd\"/>\n" \
"</resource-agent>\n"
static char *
systemd_unit_metadata(const char *name, int timeout)
{
char *meta = NULL;
char *desc = NULL;
char *path = NULL;
char *escaped = NULL;
if (invoke_unit_by_name(name, NULL, &path) == pcmk_rc_ok) {
/* TODO: Worth a making blocking call for? Probably not. Possibly if cached. */
desc = systemd_get_property(path, "Description", NULL, NULL, NULL,
timeout);
} else {
desc = crm_strdup_printf("Systemd unit file for %s", name);
}
escaped = crm_xml_escape(desc);
meta = crm_strdup_printf(METADATA_FORMAT, name, escaped, name);
free(desc);
free(path);
free(escaped);
return meta;
}
/*!
* \internal
* \brief Determine result of method from reply
*
* \param[in] reply Reply to start, stop, or restart request
* \param[in] op Action that was executed
*/
static void
process_unit_method_reply(DBusMessage *reply, svc_action_t *op)
{
DBusError error;
/* The first use of error here is not used other than as a non-NULL flag to
* indicate that a request was indeed sent
*/
if (pcmk_dbus_find_error((void *) &error, reply, &error)) {
set_result_from_method_error(op, &error);
dbus_error_free(&error);
} else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH,
__func__, __LINE__)) {
crm_info("DBus request for %s of %s succeeded but "
"return type was unexpected", op->action, crm_str(op->rsc));
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE,
"systemd DBus method had unexpected reply");
} else {
const char *path = NULL;
dbus_message_get_args(reply, NULL,
DBUS_TYPE_OBJECT_PATH, &path,
DBUS_TYPE_INVALID);
crm_debug("DBus request for %s of %s using %s succeeded",
op->action, crm_str(op->rsc), path);
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
}
}
/*!
* \internal
* \brief Process the completion of an asynchronous unit start, stop, or restart
*
* \param[in] pending If not NULL, DBus call associated with request
* \param[in] user_data Action that was executed
*/
static void
unit_method_complete(DBusPendingCall *pending, void *user_data)
{
DBusMessage *reply = NULL;
svc_action_t *op = user_data;
crm_trace("Result for %s arrived", op->id);
// Grab the reply
if (pending != NULL) {
reply = dbus_pending_call_steal_reply(pending);
}
// The call is no longer pending
CRM_LOG_ASSERT(pending == op->opaque->pending);
services_set_op_pending(op, NULL);
// Determine result and finalize action
process_unit_method_reply(reply, op);
services__finalize_async_op(op);
if (reply != NULL) {
dbus_message_unref(reply);
}
}
#define SYSTEMD_OVERRIDE_ROOT "/run/systemd/system/"
/* When the cluster manages a systemd resource, we create a unit file override
* to order the service "before" pacemaker. The "before" relationship won't
* actually be used, since systemd won't ever start the resource -- we're
* interested in the reverse shutdown ordering it creates, to ensure that
* systemd doesn't stop the resource at shutdown while pacemaker is still
* running.
*
* @TODO Add start timeout
*/
#define SYSTEMD_OVERRIDE_TEMPLATE \
"[Unit]\n" \
"Description=Cluster Controlled %s\n" \
"Before=pacemaker.service pacemaker_remote.service\n" \
"\n" \
"[Service]\n" \
"Restart=no\n"
// Temporarily use rwxr-xr-x umask when opening a file for writing
static FILE *
create_world_readable(const char *filename)
{
mode_t orig_umask = umask(S_IWGRP | S_IWOTH);
FILE *fp = fopen(filename, "w");
umask(orig_umask);
return fp;
}
static void
create_override_dir(const char *agent)
{
char *override_dir = crm_strdup_printf(SYSTEMD_OVERRIDE_ROOT
"/%s.service.d", agent);
int rc = pcmk__build_path(override_dir, 0755);
if (rc != pcmk_rc_ok) {
crm_warn("Could not create systemd override directory %s: %s",
override_dir, pcmk_rc_str(rc));
}
free(override_dir);
}
static char *
get_override_filename(const char *agent)
{
return crm_strdup_printf(SYSTEMD_OVERRIDE_ROOT
"/%s.service.d/50-pacemaker.conf", agent);
}
static void
systemd_create_override(const char *agent, int timeout)
{
FILE *file_strm = NULL;
char *override_file = get_override_filename(agent);
create_override_dir(agent);
/* Ensure the override file is world-readable. This is not strictly
* necessary, but it avoids a systemd warning in the logs.
*/
file_strm = create_world_readable(override_file);
if (file_strm == NULL) {
crm_err("Cannot open systemd override file %s for writing",
override_file);
} else {
char *override = crm_strdup_printf(SYSTEMD_OVERRIDE_TEMPLATE, agent);
int rc = fprintf(file_strm, "%s\n", override);
free(override);
if (rc < 0) {
crm_perror(LOG_WARNING, "Cannot write to systemd override file %s",
override_file);
}
fflush(file_strm);
fclose(file_strm);
systemd_daemon_reload(timeout);
}
free(override_file);
}
static void
systemd_remove_override(const char *agent, int timeout)
{
char *override_file = get_override_filename(agent);
int rc = unlink(override_file);
if (rc < 0) {
// Stop may be called when already stopped, which is fine
crm_perror(LOG_DEBUG, "Cannot remove systemd override file %s",
override_file);
} else {
systemd_daemon_reload(timeout);
}
free(override_file);
}
/*!
* \internal
* \brief Parse result of systemd status check
*
* Set a status action's exit status and execution status based on a DBus
* property check result, and finalize the action if asynchronous.
*
* \param[in] name DBus interface name for property that was checked
* \param[in] state Property value
* \param[in] userdata Status action that check was done for
*/
static void
parse_status_result(const char *name, const char *state, void *userdata)
{
svc_action_t *op = userdata;
crm_trace("Resource %s has %s='%s'",
crm_str(op->rsc), name, crm_str(state));
if (pcmk__str_eq(state, "active", pcmk__str_none)) {
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
} else if (pcmk__str_eq(state, "reloading", pcmk__str_none)) {
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
} else if (pcmk__str_eq(state, "activating", pcmk__str_none)) {
services__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);
} else if (pcmk__str_eq(state, "deactivating", pcmk__str_none)) {
services__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);
} else {
services__set_result(op, PCMK_OCF_NOT_RUNNING, PCMK_EXEC_DONE, state);
}
if (!(op->synchronous)) {
services_set_op_pending(op, NULL);
services__finalize_async_op(op);
}
}
/*!
* \internal
* \brief Invoke a systemd unit, given its DBus object path
*
* \param[in] op Action to execute
* \param[in] unit DBus object path of systemd unit to invoke
*/
static void
invoke_unit_by_path(svc_action_t *op, const char *unit)
{
const char *method = NULL;
DBusMessage *msg = NULL;
DBusMessage *reply = NULL;
if (pcmk__str_any_of(op->action, "monitor", "status", NULL)) {
DBusPendingCall *pending = NULL;
char *state;
state = systemd_get_property(unit, "ActiveState",
(op->synchronous? NULL : parse_status_result),
op, (op->synchronous? NULL : &pending),
op->timeout);
if (op->synchronous) {
parse_status_result("ActiveState", state, op);
free(state);
} else if (pending == NULL) { // Could not get ActiveState property
- services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
- "Could not get unit state from DBus");
+ services__format_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
+ "Could not get state for unit %s from DBus",
+ op->agent);
services__finalize_async_op(op);
} else {
services_set_op_pending(op, pending);
}
return;
} else if (pcmk__str_eq(op->action, "start", pcmk__str_none)) {
method = "StartUnit";
systemd_create_override(op->agent, op->timeout);
} else if (pcmk__str_eq(op->action, "stop", pcmk__str_none)) {
method = "StopUnit";
systemd_remove_override(op->agent, op->timeout);
} else if (pcmk__str_eq(op->action, "restart", pcmk__str_none)) {
method = "RestartUnit";
} else {
- services__set_result(op, PCMK_OCF_UNIMPLEMENT_FEATURE, PCMK_EXEC_ERROR,
- "Action not implemented for systemd resources");
+ services__format_result(op, PCMK_OCF_UNIMPLEMENT_FEATURE,
+ PCMK_EXEC_ERROR,
+ "Action %s not implemented "
+ "for systemd resources", crm_str(op->action));
if (!(op->synchronous)) {
services__finalize_async_op(op);
}
return;
}
crm_trace("Calling %s for unit path %s named %s",
method, unit, crm_str(op->rsc));
msg = systemd_new_method(method);
CRM_ASSERT(msg != NULL);
/* (ss) */
{
const char *replace_s = "replace";
char *name = systemd_service_name(op->agent, pcmk__str_eq(op->action, "meta-data", pcmk__str_none));
CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID));
CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &replace_s, DBUS_TYPE_INVALID));
free(name);
}
if (op->synchronous) {
reply = systemd_send_recv(msg, NULL, op->timeout);
dbus_message_unref(msg);
process_unit_method_reply(reply, op);
if (reply != NULL) {
dbus_message_unref(reply);
}
} else {
DBusPendingCall *pending = systemd_send(msg, unit_method_complete, op,
op->timeout);
dbus_message_unref(msg);
if (pending == NULL) {
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"Unable to send DBus message");
services__finalize_async_op(op);
} else {
services_set_op_pending(op, pending);
}
}
}
static gboolean
systemd_timeout_callback(gpointer p)
{
svc_action_t * op = p;
op->opaque->timerid = 0;
crm_info("%s action for systemd unit %s named '%s' timed out",
op->action, op->agent, op->rsc);
- services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
- "Systemd unit action did not complete in time");
+ services__format_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
+ "%s action for systemd unit %s "
+ "did not complete in time", op->action, op->agent);
services__finalize_async_op(op);
return FALSE;
}
/*!
* \internal
* \brief Execute a systemd action
*
* \param[in] op Action to execute
*
* \return Standard Pacemaker return code
* \retval EBUSY Recurring operation could not be initiated
* \retval pcmk_rc_error Synchronous action failed
* \retval pcmk_rc_ok Synchronous action succeeded, or asynchronous action
* should not be freed (because it's pending or because
* it failed to execute and was already freed)
*
* \note If the return value for an asynchronous action is not pcmk_rc_ok, the
* caller is responsible for freeing the action.
*/
int
services__execute_systemd(svc_action_t *op)
{
CRM_ASSERT(op != NULL);
if ((op->action == NULL) || (op->agent == NULL)) {
services__set_result(op, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR_FATAL,
"Bug in action caller");
goto done;
}
if (!systemd_init()) {
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"No DBus connection");
goto done;
}
crm_debug("Performing %ssynchronous %s op on systemd unit %s named '%s'",
(op->synchronous? "" : "a"), op->action, op->agent,
crm_str(op->rsc));
if (pcmk__str_eq(op->action, "meta-data", pcmk__str_casei)) {
op->stdout_data = systemd_unit_metadata(op->agent, op->timeout);
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
goto done;
}
/* invoke_unit_by_name() should always override these values, which are here
* just as a fail-safe in case there are any code paths that neglect to
*/
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"Bug in service library");
if (invoke_unit_by_name(op->agent, op, NULL) == pcmk_rc_ok) {
op->opaque->timerid = g_timeout_add(op->timeout + 5000,
systemd_timeout_callback, op);
services_add_inflight_op(op);
return pcmk_rc_ok;
}
done:
if (op->synchronous) {
return (op->rc == PCMK_OCF_OK)? pcmk_rc_ok : pcmk_rc_error;
} else {
return services__finalize_async_op(op);
}
}
diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c
index b877beea69..8c61d3b1b1 100644
--- a/tools/crm_resource_runtime.c
+++ b/tools/crm_resource_runtime.c
@@ -1,2011 +1,2012 @@
/*
- * Copyright 2004-2021 the Pacemaker project contributors
+ * Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm_resource.h>
#include <crm/common/ipc_controld.h>
#include <crm/common/lists_internal.h>
#include <crm/services_internal.h>
resource_checks_t *
cli_check_resource(pe_resource_t *rsc, char *role_s, char *managed)
{
pe_resource_t *parent = uber_parent(rsc);
resource_checks_t *rc = calloc(1, sizeof(resource_checks_t));
if (role_s) {
enum rsc_role_e role = text2role(role_s);
if (role == RSC_ROLE_STOPPED) {
rc->flags |= rsc_remain_stopped;
} else if (pcmk_is_set(parent->flags, pe_rsc_promotable) &&
(role == RSC_ROLE_UNPROMOTED)) {
rc->flags |= rsc_unpromotable;
}
}
if (managed && !crm_is_true(managed)) {
rc->flags |= rsc_unmanaged;
}
if (rsc->lock_node) {
rc->lock_node = rsc->lock_node->details->uname;
}
rc->rsc = rsc;
return rc;
}
static GList *
build_node_info_list(pe_resource_t *rsc)
{
GList *retval = NULL;
for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child = (pe_resource_t *) iter->data;
for (GList *iter2 = child->running_on; iter2 != NULL; iter2 = iter2->next) {
pe_node_t *node = (pe_node_t *) iter2->data;
node_info_t *ni = calloc(1, sizeof(node_info_t));
ni->node_name = node->details->uname;
ni->promoted = pcmk_is_set(rsc->flags, pe_rsc_promotable) &&
child->fns->state(child, TRUE) == RSC_ROLE_PROMOTED;
retval = g_list_prepend(retval, ni);
}
}
return retval;
}
GList *
cli_resource_search(pe_resource_t *rsc, const char *requested_name,
pe_working_set_t *data_set)
{
GList *retval = NULL;
pe_resource_t *parent = uber_parent(rsc);
if (pe_rsc_is_clone(rsc)) {
retval = build_node_info_list(rsc);
/* The anonymous clone children's common ID is supplied */
} else if (pe_rsc_is_clone(parent)
&& !pcmk_is_set(rsc->flags, pe_rsc_unique)
&& rsc->clone_name
&& pcmk__str_eq(requested_name, rsc->clone_name, pcmk__str_casei)
&& !pcmk__str_eq(requested_name, rsc->id, pcmk__str_casei)) {
retval = build_node_info_list(parent);
} else if (rsc->running_on != NULL) {
for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
pe_node_t *node = (pe_node_t *) iter->data;
node_info_t *ni = calloc(1, sizeof(node_info_t));
ni->node_name = node->details->uname;
ni->promoted = (rsc->fns->state(rsc, TRUE) == RSC_ROLE_PROMOTED);
retval = g_list_prepend(retval, ni);
}
}
return retval;
}
#define XPATH_MAX 1024
// \return Standard Pacemaker return code
static int
find_resource_attr(pcmk__output_t *out, cib_t * the_cib, const char *attr,
const char *rsc, const char *attr_set_type, const char *set_name,
const char *attr_id, const char *attr_name, char **value)
{
int offset = 0;
int rc = pcmk_rc_ok;
xmlNode *xml_search = NULL;
char *xpath_string = NULL;
const char *xpath_base = NULL;
if(value) {
*value = NULL;
}
if(the_cib == NULL) {
return ENOTCONN;
}
xpath_base = pcmk_cib_xpath_for(XML_CIB_TAG_RESOURCES);
if (xpath_base == NULL) {
crm_err(XML_CIB_TAG_RESOURCES " CIB element not known (bug?)");
return ENOMSG;
}
xpath_string = calloc(1, XPATH_MAX);
offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "%s",
xpath_base);
offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "//*[@id=\"%s\"]", rsc);
if (attr_set_type) {
offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "/%s", attr_set_type);
if (set_name) {
offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "[@id=\"%s\"]", set_name);
}
}
offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "//nvpair[");
if (attr_id) {
offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "@id=\"%s\"", attr_id);
}
if (attr_name) {
if (attr_id) {
offset += snprintf(xpath_string + offset, XPATH_MAX - offset, " and ");
}
offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "@name=\"%s\"", attr_name);
}
offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "]");
CRM_LOG_ASSERT(offset > 0);
rc = the_cib->cmds->query(the_cib, xpath_string, &xml_search,
cib_sync_call | cib_scope_local | cib_xpath);
rc = pcmk_legacy2rc(rc);
if (rc != pcmk_rc_ok) {
goto done;
}
crm_log_xml_debug(xml_search, "Match");
if (xml_has_children(xml_search)) {
xmlNode *child = NULL;
rc = ENOTUNIQ;
out->info(out, "Multiple attributes match name=%s", attr_name);
for (child = pcmk__xml_first_child(xml_search); child != NULL;
child = pcmk__xml_next(child)) {
out->info(out, " Value: %s \t(id=%s)",
crm_element_value(child, XML_NVPAIR_ATTR_VALUE), ID(child));
}
out->spacer(out);
} else if(value) {
const char *tmp = crm_element_value(xml_search, attr);
if (tmp) {
*value = strdup(tmp);
}
}
done:
free(xpath_string);
free_xml(xml_search);
return rc;
}
/* PRIVATE. Use the find_matching_attr_resources instead. */
static void
find_matching_attr_resources_recursive(pcmk__output_t *out, GList/* <pe_resource_t*> */ ** result,
pe_resource_t * rsc, const char * rsc_id,
const char * attr_set, const char * attr_set_type,
const char * attr_id, const char * attr_name,
cib_t * cib, const char * cmd, int depth)
{
int rc = pcmk_rc_ok;
char *lookup_id = clone_strip(rsc->id);
char *local_attr_id = NULL;
/* visit the children */
for(GList *gIter = rsc->children; gIter; gIter = gIter->next) {
find_matching_attr_resources_recursive(out, result, (pe_resource_t*)gIter->data,
rsc_id, attr_set, attr_set_type,
attr_id, attr_name, cib, cmd, depth+1);
/* do it only once for clones */
if(pe_clone == rsc->variant) {
break;
}
}
rc = find_resource_attr(out, cib, XML_ATTR_ID, lookup_id, attr_set_type,
attr_set, attr_id, attr_name, &local_attr_id);
/* Post-order traversal.
* The root is always on the list and it is the last item. */
if((0 == depth) || (pcmk_rc_ok == rc)) {
/* push the head */
*result = g_list_append(*result, rsc);
}
free(local_attr_id);
free(lookup_id);
}
/* The result is a linearized pre-ordered tree of resources. */
static GList/*<pe_resource_t*>*/ *
find_matching_attr_resources(pcmk__output_t *out, pe_resource_t * rsc,
const char * rsc_id, const char * attr_set,
const char * attr_set_type, const char * attr_id,
const char * attr_name, cib_t * cib, const char * cmd,
gboolean force)
{
int rc = pcmk_rc_ok;
char *lookup_id = NULL;
char *local_attr_id = NULL;
GList * result = NULL;
/* If --force is used, update only the requested resource (clone or primitive).
* Otherwise, if the primitive has the attribute, use that.
* Otherwise use the clone. */
if(force == TRUE) {
return g_list_append(result, rsc);
}
if(rsc->parent && pe_clone == rsc->parent->variant) {
int rc = pcmk_rc_ok;
char *local_attr_id = NULL;
rc = find_resource_attr(out, cib, XML_ATTR_ID, rsc_id, attr_set_type,
attr_set, attr_id, attr_name, &local_attr_id);
free(local_attr_id);
if(rc != pcmk_rc_ok) {
rsc = rsc->parent;
out->info(out, "Performing %s of '%s' on '%s', the parent of '%s'",
cmd, attr_name, rsc->id, rsc_id);
}
return g_list_append(result, rsc);
} else if(rsc->parent == NULL && rsc->children && pe_clone == rsc->variant) {
pe_resource_t *child = rsc->children->data;
if(child->variant == pe_native) {
lookup_id = clone_strip(child->id); /* Could be a cloned group! */
rc = find_resource_attr(out, cib, XML_ATTR_ID, lookup_id, attr_set_type,
attr_set, attr_id, attr_name, &local_attr_id);
if(rc == pcmk_rc_ok) {
rsc = child;
out->info(out, "A value for '%s' already exists in child '%s', performing %s on that instead of '%s'",
attr_name, lookup_id, cmd, rsc_id);
}
free(local_attr_id);
free(lookup_id);
}
return g_list_append(result, rsc);
}
/* If the resource is a group ==> children inherit the attribute if defined. */
find_matching_attr_resources_recursive(out, &result, rsc, rsc_id, attr_set,
attr_set_type, attr_id, attr_name,
cib, cmd, 0);
return result;
}
// \return Standard Pacemaker return code
int
cli_resource_update_attribute(pe_resource_t *rsc, const char *requested_name,
const char *attr_set, const char *attr_set_type,
const char *attr_id, const char *attr_name,
const char *attr_value, gboolean recursive,
cib_t *cib, int cib_options,
pe_working_set_t *data_set, gboolean force)
{
pcmk__output_t *out = data_set->priv;
int rc = pcmk_rc_ok;
static bool need_init = TRUE;
char *local_attr_id = NULL;
char *local_attr_set = NULL;
GList/*<pe_resource_t*>*/ *resources = NULL;
const char *common_attr_id = attr_id;
if (attr_id == NULL && force == FALSE) {
find_resource_attr (out, cib, XML_ATTR_ID, uber_parent(rsc)->id, NULL,
NULL, NULL, attr_name, NULL);
}
if (pcmk__str_eq(attr_set_type, XML_TAG_ATTR_SETS, pcmk__str_casei)) {
if (force == FALSE) {
rc = find_resource_attr(out, cib, XML_ATTR_ID, uber_parent(rsc)->id,
XML_TAG_META_SETS, attr_set, attr_id,
attr_name, &local_attr_id);
if (rc == pcmk_rc_ok && !out->is_quiet(out)) {
out->err(out, "WARNING: There is already a meta attribute for '%s' called '%s' (id=%s)",
uber_parent(rsc)->id, attr_name, local_attr_id);
out->err(out, " Delete '%s' first or use the force option to override",
local_attr_id);
}
free(local_attr_id);
if (rc == pcmk_rc_ok) {
return ENOTUNIQ;
}
}
resources = g_list_append(resources, rsc);
} else {
resources = find_matching_attr_resources(out, rsc, requested_name, attr_set, attr_set_type,
attr_id, attr_name, cib, "update", force);
}
/* If either attr_set or attr_id is specified,
* one clearly intends to modify a single resource.
* It is the last item on the resource list.*/
for(GList *gIter = (attr_set||attr_id) ? g_list_last(resources) : resources
; gIter; gIter = gIter->next) {
char *lookup_id = NULL;
xmlNode *xml_top = NULL;
xmlNode *xml_obj = NULL;
local_attr_id = NULL;
local_attr_set = NULL;
rsc = (pe_resource_t*)gIter->data;
attr_id = common_attr_id;
lookup_id = clone_strip(rsc->id); /* Could be a cloned group! */
rc = find_resource_attr(out, cib, XML_ATTR_ID, lookup_id, attr_set_type,
attr_set, attr_id, attr_name, &local_attr_id);
if (rc == pcmk_rc_ok) {
crm_debug("Found a match for name=%s: id=%s", attr_name, local_attr_id);
attr_id = local_attr_id;
} else if (rc != ENXIO) {
free(lookup_id);
free(local_attr_id);
g_list_free(resources);
return rc;
} else {
const char *tag = crm_element_name(rsc->xml);
if (attr_set == NULL) {
local_attr_set = crm_strdup_printf("%s-%s", lookup_id,
attr_set_type);
attr_set = local_attr_set;
}
if (attr_id == NULL) {
local_attr_id = crm_strdup_printf("%s-%s", attr_set, attr_name);
attr_id = local_attr_id;
}
xml_top = create_xml_node(NULL, tag);
crm_xml_add(xml_top, XML_ATTR_ID, lookup_id);
xml_obj = create_xml_node(xml_top, attr_set_type);
crm_xml_add(xml_obj, XML_ATTR_ID, attr_set);
}
xml_obj = crm_create_nvpair_xml(xml_obj, attr_id, attr_name, attr_value);
if (xml_top == NULL) {
xml_top = xml_obj;
}
crm_log_xml_debug(xml_top, "Update");
rc = cib->cmds->modify(cib, XML_CIB_TAG_RESOURCES, xml_top, cib_options);
rc = pcmk_legacy2rc(rc);
if (rc == pcmk_rc_ok) {
out->info(out, "Set '%s' option: id=%s%s%s%s%s value=%s", lookup_id, local_attr_id,
attr_set ? " set=" : "", attr_set ? attr_set : "",
attr_name ? " name=" : "", attr_name ? attr_name : "", attr_value);
}
free_xml(xml_top);
free(lookup_id);
free(local_attr_id);
free(local_attr_set);
if(recursive && pcmk__str_eq(attr_set_type, XML_TAG_META_SETS, pcmk__str_casei)) {
GList *lpc = NULL;
if(need_init) {
need_init = FALSE;
pcmk__unpack_constraints(data_set);
pe__clear_resource_flags_on_all(data_set, pe_rsc_allocating);
}
crm_debug("Looking for dependencies %p", rsc->rsc_cons_lhs);
pe__set_resource_flags(rsc, pe_rsc_allocating);
for (lpc = rsc->rsc_cons_lhs; lpc != NULL; lpc = lpc->next) {
pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data;
crm_debug("Checking %s %d", cons->id, cons->score);
if ((cons->score > 0)
&& !pcmk_is_set(cons->dependent->flags, pe_rsc_allocating)) {
/* Don't get into colocation loops */
crm_debug("Setting %s=%s for dependent resource %s",
attr_name, attr_value, cons->dependent->id);
cli_resource_update_attribute(cons->dependent,
cons->dependent->id, NULL,
attr_set_type, NULL,
attr_name, attr_value,
recursive, cib, cib_options,
data_set, force);
}
}
}
}
g_list_free(resources);
return rc;
}
// \return Standard Pacemaker return code
int
cli_resource_delete_attribute(pe_resource_t *rsc, const char *requested_name,
const char *attr_set, const char *attr_set_type,
const char *attr_id, const char *attr_name,
cib_t *cib, int cib_options,
pe_working_set_t *data_set, gboolean force)
{
pcmk__output_t *out = data_set->priv;
int rc = pcmk_rc_ok;
GList/*<pe_resource_t*>*/ *resources = NULL;
if (attr_id == NULL && force == FALSE) {
find_resource_attr(out, cib, XML_ATTR_ID, uber_parent(rsc)->id, NULL,
NULL, NULL, attr_name, NULL);
}
if(pcmk__str_eq(attr_set_type, XML_TAG_META_SETS, pcmk__str_casei)) {
resources = find_matching_attr_resources(out, rsc, requested_name, attr_set, attr_set_type,
attr_id, attr_name, cib, "delete", force);
} else {
resources = g_list_append(resources, rsc);
}
for(GList *gIter = resources; gIter; gIter = gIter->next) {
char *lookup_id = NULL;
xmlNode *xml_obj = NULL;
char *local_attr_id = NULL;
rsc = (pe_resource_t*)gIter->data;
lookup_id = clone_strip(rsc->id);
rc = find_resource_attr(out, cib, XML_ATTR_ID, lookup_id, attr_set_type,
attr_set, attr_id, attr_name, &local_attr_id);
if (rc == ENXIO) {
free(lookup_id);
rc = pcmk_rc_ok;
continue;
} else if (rc != pcmk_rc_ok) {
free(lookup_id);
g_list_free(resources);
return rc;
}
if (attr_id == NULL) {
attr_id = local_attr_id;
}
xml_obj = crm_create_nvpair_xml(NULL, attr_id, attr_name, NULL);
crm_log_xml_debug(xml_obj, "Delete");
CRM_ASSERT(cib);
rc = cib->cmds->remove(cib, XML_CIB_TAG_RESOURCES, xml_obj, cib_options);
rc = pcmk_legacy2rc(rc);
if (rc == pcmk_rc_ok) {
out->info(out, "Deleted '%s' option: id=%s%s%s%s%s", lookup_id, local_attr_id,
attr_set ? " set=" : "", attr_set ? attr_set : "",
attr_name ? " name=" : "", attr_name ? attr_name : "");
}
free(lookup_id);
free_xml(xml_obj);
free(local_attr_id);
}
g_list_free(resources);
return rc;
}
// \return Standard Pacemaker return code
static int
send_lrm_rsc_op(pcmk_ipc_api_t *controld_api, bool do_fail_resource,
const char *host_uname, const char *rsc_id, pe_working_set_t *data_set)
{
pcmk__output_t *out = data_set->priv;
const char *router_node = host_uname;
const char *rsc_api_id = NULL;
const char *rsc_long_id = NULL;
const char *rsc_class = NULL;
const char *rsc_provider = NULL;
const char *rsc_type = NULL;
bool cib_only = false;
pe_resource_t *rsc = pe_find_resource(data_set->resources, rsc_id);
if (rsc == NULL) {
out->err(out, "Resource %s not found", rsc_id);
return ENXIO;
} else if (rsc->variant != pe_native) {
out->err(out, "We can only process primitive resources, not %s", rsc_id);
return EINVAL;
}
rsc_class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
rsc_provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER),
rsc_type = crm_element_value(rsc->xml, XML_ATTR_TYPE);
if ((rsc_class == NULL) || (rsc_type == NULL)) {
out->err(out, "Resource %s does not have a class and type", rsc_id);
return EINVAL;
}
{
pe_node_t *node = pe_find_node(data_set->nodes, host_uname);
if (node == NULL) {
out->err(out, "Node %s not found", host_uname);
return pcmk_rc_node_unknown;
}
if (!(node->details->online)) {
if (do_fail_resource) {
out->err(out, "Node %s is not online", host_uname);
return ENOTCONN;
} else {
cib_only = true;
}
}
if (!cib_only && pe__is_guest_or_remote_node(node)) {
node = pe__current_node(node->details->remote_rsc);
if (node == NULL) {
out->err(out, "No cluster connection to Pacemaker Remote node %s detected",
host_uname);
return ENOTCONN;
}
router_node = node->details->uname;
}
}
if (rsc->clone_name) {
rsc_api_id = rsc->clone_name;
rsc_long_id = rsc->id;
} else {
rsc_api_id = rsc->id;
}
if (do_fail_resource) {
return pcmk_controld_api_fail(controld_api, host_uname, router_node,
rsc_api_id, rsc_long_id,
rsc_class, rsc_provider, rsc_type);
} else {
return pcmk_controld_api_refresh(controld_api, host_uname, router_node,
rsc_api_id, rsc_long_id, rsc_class,
rsc_provider, rsc_type, cib_only);
}
}
/*!
* \internal
* \brief Get resource name as used in failure-related node attributes
*
* \param[in] rsc Resource to check
*
* \return Newly allocated string containing resource's fail name
* \note The caller is responsible for freeing the result.
*/
static inline char *
rsc_fail_name(pe_resource_t *rsc)
{
const char *name = (rsc->clone_name? rsc->clone_name : rsc->id);
return pcmk_is_set(rsc->flags, pe_rsc_unique)? strdup(name) : clone_strip(name);
}
// \return Standard Pacemaker return code
static int
clear_rsc_history(pcmk_ipc_api_t *controld_api, const char *host_uname,
const char *rsc_id, pe_working_set_t *data_set)
{
int rc = pcmk_rc_ok;
/* Erase the resource's entire LRM history in the CIB, even if we're only
* clearing a single operation's fail count. If we erased only entries for a
* single operation, we might wind up with a wrong idea of the current
* resource state, and we might not re-probe the resource.
*/
rc = send_lrm_rsc_op(controld_api, false, host_uname, rsc_id, data_set);
if (rc != pcmk_rc_ok) {
return rc;
}
crm_trace("Processing %d mainloop inputs",
pcmk_controld_api_replies_expected(controld_api));
while (g_main_context_iteration(NULL, FALSE)) {
crm_trace("Processed mainloop input, %d still remaining",
pcmk_controld_api_replies_expected(controld_api));
}
return rc;
}
// \return Standard Pacemaker return code
static int
clear_rsc_failures(pcmk__output_t *out, pcmk_ipc_api_t *controld_api,
const char *node_name, const char *rsc_id, const char *operation,
const char *interval_spec, pe_working_set_t *data_set)
{
int rc = pcmk_rc_ok;
const char *failed_value = NULL;
const char *failed_id = NULL;
const char *interval_ms_s = NULL;
GHashTable *rscs = NULL;
GHashTableIter iter;
/* Create a hash table to use as a set of resources to clean. This lets us
* clean each resource only once (per node) regardless of how many failed
* operations it has.
*/
rscs = pcmk__strkey_table(NULL, NULL);
// Normalize interval to milliseconds for comparison to history entry
if (operation) {
interval_ms_s = crm_strdup_printf("%u",
crm_parse_interval_spec(interval_spec));
}
for (xmlNode *xml_op = pcmk__xml_first_child(data_set->failed);
xml_op != NULL;
xml_op = pcmk__xml_next(xml_op)) {
failed_id = crm_element_value(xml_op, XML_LRM_ATTR_RSCID);
if (failed_id == NULL) {
// Malformed history entry, should never happen
continue;
}
// No resource specified means all resources match
if (rsc_id) {
pe_resource_t *fail_rsc = pe_find_resource_with_flags(data_set->resources,
failed_id,
pe_find_renamed|pe_find_anon);
if (!fail_rsc || !pcmk__str_eq(rsc_id, fail_rsc->id, pcmk__str_casei)) {
continue;
}
}
// Host name should always have been provided by this point
failed_value = crm_element_value(xml_op, XML_ATTR_UNAME);
if (!pcmk__str_eq(node_name, failed_value, pcmk__str_casei)) {
continue;
}
// No operation specified means all operations match
if (operation) {
failed_value = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
if (!pcmk__str_eq(operation, failed_value, pcmk__str_casei)) {
continue;
}
// Interval (if operation was specified) defaults to 0 (not all)
failed_value = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS);
if (!pcmk__str_eq(interval_ms_s, failed_value, pcmk__str_casei)) {
continue;
}
}
g_hash_table_add(rscs, (gpointer) failed_id);
}
g_hash_table_iter_init(&iter, rscs);
while (g_hash_table_iter_next(&iter, (gpointer *) &failed_id, NULL)) {
crm_debug("Erasing failures of %s on %s", failed_id, node_name);
rc = clear_rsc_history(controld_api, node_name, failed_id, data_set);
if (rc != pcmk_rc_ok) {
return rc;
}
}
g_hash_table_destroy(rscs);
return rc;
}
// \return Standard Pacemaker return code
static int
clear_rsc_fail_attrs(pe_resource_t *rsc, const char *operation,
const char *interval_spec, pe_node_t *node)
{
int rc = pcmk_rc_ok;
int attr_options = pcmk__node_attr_none;
char *rsc_name = rsc_fail_name(rsc);
if (pe__is_guest_or_remote_node(node)) {
attr_options |= pcmk__node_attr_remote;
}
rc = pcmk__node_attr_request_clear(NULL, node->details->uname, rsc_name,
operation, interval_spec, NULL,
attr_options);
free(rsc_name);
return rc;
}
// \return Standard Pacemaker return code
int
cli_resource_delete(pcmk_ipc_api_t *controld_api, const char *host_uname,
pe_resource_t *rsc, const char *operation,
const char *interval_spec, bool just_failures,
pe_working_set_t *data_set, gboolean force)
{
pcmk__output_t *out = data_set->priv;
int rc = pcmk_rc_ok;
pe_node_t *node = NULL;
if (rsc == NULL) {
return ENXIO;
} else if (rsc->children) {
GList *lpc = NULL;
for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) {
pe_resource_t *child = (pe_resource_t *) lpc->data;
rc = cli_resource_delete(controld_api, host_uname, child, operation,
interval_spec, just_failures, data_set,
force);
if (rc != pcmk_rc_ok) {
return rc;
}
}
return pcmk_rc_ok;
} else if (host_uname == NULL) {
GList *lpc = NULL;
GList *nodes = g_hash_table_get_values(rsc->known_on);
if(nodes == NULL && force) {
nodes = pcmk__copy_node_list(data_set->nodes, false);
} else if(nodes == NULL && rsc->exclusive_discover) {
GHashTableIter iter;
pe_node_t *node = NULL;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void**)&node)) {
if(node->weight >= 0) {
nodes = g_list_prepend(nodes, node);
}
}
} else if(nodes == NULL) {
nodes = g_hash_table_get_values(rsc->allowed_nodes);
}
for (lpc = nodes; lpc != NULL; lpc = lpc->next) {
node = (pe_node_t *) lpc->data;
if (node->details->online) {
rc = cli_resource_delete(controld_api, node->details->uname,
rsc, operation, interval_spec,
just_failures, data_set, force);
}
if (rc != pcmk_rc_ok) {
g_list_free(nodes);
return rc;
}
}
g_list_free(nodes);
return pcmk_rc_ok;
}
node = pe_find_node(data_set->nodes, host_uname);
if (node == NULL) {
out->err(out, "Unable to clean up %s because node %s not found",
rsc->id, host_uname);
return ENODEV;
}
if (!node->details->rsc_discovery_enabled) {
out->err(out, "Unable to clean up %s because resource discovery disabled on %s",
rsc->id, host_uname);
return EOPNOTSUPP;
}
if (controld_api == NULL) {
out->err(out, "Dry run: skipping clean-up of %s on %s due to CIB_file",
rsc->id, host_uname);
return pcmk_rc_ok;
}
rc = clear_rsc_fail_attrs(rsc, operation, interval_spec, node);
if (rc != pcmk_rc_ok) {
out->err(out, "Unable to clean up %s failures on %s: %s",
rsc->id, host_uname, pcmk_rc_str(rc));
return rc;
}
if (just_failures) {
rc = clear_rsc_failures(out, controld_api, host_uname, rsc->id, operation,
interval_spec, data_set);
} else {
rc = clear_rsc_history(controld_api, host_uname, rsc->id, data_set);
}
if (rc != pcmk_rc_ok) {
out->err(out, "Cleaned %s failures on %s, but unable to clean history: %s",
rsc->id, host_uname, pcmk_strerror(rc));
} else {
out->info(out, "Cleaned up %s on %s", rsc->id, host_uname);
}
return rc;
}
// \return Standard Pacemaker return code
int
cli_cleanup_all(pcmk_ipc_api_t *controld_api, const char *node_name,
const char *operation, const char *interval_spec,
pe_working_set_t *data_set)
{
pcmk__output_t *out = data_set->priv;
int rc = pcmk_rc_ok;
int attr_options = pcmk__node_attr_none;
const char *display_name = node_name? node_name : "all nodes";
if (controld_api == NULL) {
out->info(out, "Dry run: skipping clean-up of %s due to CIB_file",
display_name);
return rc;
}
if (node_name) {
pe_node_t *node = pe_find_node(data_set->nodes, node_name);
if (node == NULL) {
out->err(out, "Unknown node: %s", node_name);
return ENXIO;
}
if (pe__is_guest_or_remote_node(node)) {
attr_options |= pcmk__node_attr_remote;
}
}
rc = pcmk__node_attr_request_clear(NULL, node_name, NULL, operation,
interval_spec, NULL, attr_options);
if (rc != pcmk_rc_ok) {
out->err(out, "Unable to clean up all failures on %s: %s",
display_name, pcmk_rc_str(rc));
return rc;
}
if (node_name) {
rc = clear_rsc_failures(out, controld_api, node_name, NULL,
operation, interval_spec, data_set);
if (rc != pcmk_rc_ok) {
out->err(out, "Cleaned all resource failures on %s, but unable to clean history: %s",
node_name, pcmk_strerror(rc));
return rc;
}
} else {
for (GList *iter = data_set->nodes; iter; iter = iter->next) {
pe_node_t *node = (pe_node_t *) iter->data;
rc = clear_rsc_failures(out, controld_api, node->details->uname, NULL,
operation, interval_spec, data_set);
if (rc != pcmk_rc_ok) {
out->err(out, "Cleaned all resource failures on all nodes, but unable to clean history: %s",
pcmk_strerror(rc));
return rc;
}
}
}
out->info(out, "Cleaned up all resources on %s", display_name);
return rc;
}
int
cli_resource_check(pcmk__output_t *out, cib_t * cib_conn, pe_resource_t *rsc)
{
char *role_s = NULL;
char *managed = NULL;
pe_resource_t *parent = uber_parent(rsc);
int rc = pcmk_rc_no_output;
resource_checks_t *checks = NULL;
find_resource_attr(out, cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id,
NULL, NULL, NULL, XML_RSC_ATTR_MANAGED, &managed);
find_resource_attr(out, cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id,
NULL, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &role_s);
checks = cli_check_resource(rsc, role_s, managed);
if (checks->flags != 0 || checks->lock_node != NULL) {
rc = out->message(out, "resource-check-list", checks);
}
free(role_s);
free(managed);
free(checks);
return rc;
}
// \return Standard Pacemaker return code
int
cli_resource_fail(pcmk_ipc_api_t *controld_api, const char *host_uname,
const char *rsc_id, pe_working_set_t *data_set)
{
crm_notice("Failing %s on %s", rsc_id, host_uname);
return send_lrm_rsc_op(controld_api, true, host_uname, rsc_id, data_set);
}
static GHashTable *
generate_resource_params(pe_resource_t *rsc, pe_node_t *node,
pe_working_set_t *data_set)
{
GHashTable *params = NULL;
GHashTable *meta = NULL;
GHashTable *combined = NULL;
GHashTableIter iter;
char *key = NULL;
char *value = NULL;
combined = pcmk__strkey_table(free, free);
params = pe_rsc_params(rsc, node, data_set);
if (params != NULL) {
g_hash_table_iter_init(&iter, params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
g_hash_table_insert(combined, strdup(key), strdup(value));
}
}
meta = pcmk__strkey_table(free, free);
get_meta_attributes(meta, rsc, node, data_set);
if (meta != NULL) {
g_hash_table_iter_init(&iter, meta);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
char *crm_name = crm_meta_name(key);
g_hash_table_insert(combined, crm_name, strdup(value));
}
g_hash_table_destroy(meta);
}
return combined;
}
bool resource_is_running_on(pe_resource_t *rsc, const char *host)
{
bool found = TRUE;
GList *hIter = NULL;
GList *hosts = NULL;
if(rsc == NULL) {
return FALSE;
}
rsc->fns->location(rsc, &hosts, TRUE);
for (hIter = hosts; host != NULL && hIter != NULL; hIter = hIter->next) {
pe_node_t *node = (pe_node_t *) hIter->data;
if(strcmp(host, node->details->uname) == 0) {
crm_trace("Resource %s is running on %s\n", rsc->id, host);
goto done;
} else if(strcmp(host, node->details->id) == 0) {
crm_trace("Resource %s is running on %s\n", rsc->id, host);
goto done;
}
}
if(host != NULL) {
crm_trace("Resource %s is not running on: %s\n", rsc->id, host);
found = FALSE;
} else if(host == NULL && hosts == NULL) {
crm_trace("Resource %s is not running\n", rsc->id);
found = FALSE;
}
done:
g_list_free(hosts);
return found;
}
/*!
* \internal
* \brief Create a list of all resources active on host from a given list
*
* \param[in] host Name of host to check whether resources are active
* \param[in] rsc_list List of resources to check
*
* \return New list of resources from list that are active on host
*/
static GList *
get_active_resources(const char *host, GList *rsc_list)
{
GList *rIter = NULL;
GList *active = NULL;
for (rIter = rsc_list; rIter != NULL; rIter = rIter->next) {
pe_resource_t *rsc = (pe_resource_t *) rIter->data;
/* Expand groups to their members, because if we're restarting a member
* other than the first, we can't otherwise tell which resources are
* stopping and starting.
*/
if (rsc->variant == pe_group) {
active = g_list_concat(active,
get_active_resources(host, rsc->children));
} else if (resource_is_running_on(rsc, host)) {
active = g_list_append(active, strdup(rsc->id));
}
}
return active;
}
static void dump_list(GList *items, const char *tag)
{
int lpc = 0;
GList *item = NULL;
for (item = items; item != NULL; item = item->next) {
crm_trace("%s[%d]: %s", tag, lpc, (char*)item->data);
lpc++;
}
}
static void display_list(pcmk__output_t *out, GList *items, const char *tag)
{
GList *item = NULL;
for (item = items; item != NULL; item = item->next) {
out->info(out, "%s%s", tag, (const char *)item->data);
}
}
/*!
* \internal
* \brief Upgrade XML to latest schema version and use it as working set input
*
* This also updates the working set timestamp to the current time.
*
* \param[in] data_set Working set instance to update
* \param[in] xml XML to use as input
*
* \return Standard Pacemaker return code
* \note On success, caller is responsible for freeing memory allocated for
* data_set->now.
* \todo This follows the example of other callers of cli_config_update()
* and returns ENOKEY ("Required key not available") if that fails,
* but perhaps pcmk_rc_schema_validation would be better in that case.
*/
int
update_working_set_xml(pe_working_set_t *data_set, xmlNode **xml)
{
if (cli_config_update(xml, NULL, FALSE) == FALSE) {
return ENOKEY;
}
data_set->input = *xml;
data_set->now = crm_time_new(NULL);
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Update a working set's XML input based on a CIB query
*
* \param[in] data_set Data set instance to initialize
* \param[in] cib Connection to the CIB manager
*
* \return Standard Pacemaker return code
* \note On success, caller is responsible for freeing memory allocated for
* data_set->input and data_set->now.
*/
static int
update_working_set_from_cib(pcmk__output_t *out, pe_working_set_t * data_set,
cib_t *cib)
{
xmlNode *cib_xml_copy = NULL;
int rc = pcmk_rc_ok;
rc = cib->cmds->query(cib, NULL, &cib_xml_copy, cib_scope_local | cib_sync_call);
rc = pcmk_legacy2rc(rc);
if (rc != pcmk_rc_ok) {
out->err(out, "Could not obtain the current CIB: %s (%d)", pcmk_strerror(rc), rc);
return rc;
}
rc = update_working_set_xml(data_set, &cib_xml_copy);
if (rc != pcmk_rc_ok) {
out->err(out, "Could not upgrade the current CIB XML");
free_xml(cib_xml_copy);
return rc;
}
return rc;
}
// \return Standard Pacemaker return code
static int
update_dataset(cib_t *cib, pe_working_set_t * data_set, bool simulate)
{
char *pid = NULL;
char *shadow_file = NULL;
cib_t *shadow_cib = NULL;
int rc = pcmk_rc_ok;
pcmk__output_t *out = data_set->priv;
pe_reset_working_set(data_set);
rc = update_working_set_from_cib(out, data_set, cib);
if (rc != pcmk_rc_ok) {
return rc;
}
if(simulate) {
bool prev_quiet = false;
pid = pcmk__getpid_s();
shadow_cib = cib_shadow_new(pid);
shadow_file = get_shadow_file(pid);
if (shadow_cib == NULL) {
out->err(out, "Could not create shadow cib: '%s'", pid);
rc = ENXIO;
goto done;
}
rc = write_xml_file(data_set->input, shadow_file, FALSE);
if (rc < 0) {
out->err(out, "Could not populate shadow cib: %s (%d)", pcmk_strerror(rc), rc);
goto done;
}
rc = shadow_cib->cmds->signon(shadow_cib, crm_system_name, cib_command);
rc = pcmk_legacy2rc(rc);
if (rc != pcmk_rc_ok) {
out->err(out, "Could not connect to shadow cib: %s (%d)", pcmk_strerror(rc), rc);
goto done;
}
pcmk__schedule_actions(data_set, data_set->input, NULL);
prev_quiet = out->is_quiet(out);
out->quiet = true;
pcmk__simulate_transition(data_set, shadow_cib, NULL);
out->quiet = prev_quiet;
rc = update_dataset(shadow_cib, data_set, FALSE);
} else {
cluster_status(data_set);
}
done:
/* Do not free data_set->input here, we need rsc->xml to be valid later on */
cib_delete(shadow_cib);
free(pid);
if(shadow_file) {
unlink(shadow_file);
free(shadow_file);
}
return rc;
}
static int
max_delay_for_resource(pe_working_set_t * data_set, pe_resource_t *rsc)
{
int delay = 0;
int max_delay = 0;
if(rsc && rsc->children) {
GList *iter = NULL;
for(iter = rsc->children; iter; iter = iter->next) {
pe_resource_t *child = (pe_resource_t *)iter->data;
delay = max_delay_for_resource(data_set, child);
if(delay > max_delay) {
double seconds = delay / 1000.0;
crm_trace("Calculated new delay of %.1fs due to %s", seconds, child->id);
max_delay = delay;
}
}
} else if(rsc) {
char *key = crm_strdup_printf("%s_%s_0", rsc->id, RSC_STOP);
pe_action_t *stop = custom_action(rsc, key, RSC_STOP, NULL, TRUE, FALSE, data_set);
const char *value = g_hash_table_lookup(stop->meta, XML_ATTR_TIMEOUT);
long long result_ll;
if ((pcmk__scan_ll(value, &result_ll, -1LL) == pcmk_rc_ok)
&& (result_ll >= 0) && (result_ll <= INT_MAX)) {
max_delay = (int) result_ll;
} else {
max_delay = -1;
}
pe_free_action(stop);
}
return max_delay;
}
static int
max_delay_in(pe_working_set_t * data_set, GList *resources)
{
int max_delay = 0;
GList *item = NULL;
for (item = resources; item != NULL; item = item->next) {
int delay = 0;
pe_resource_t *rsc = pe_find_resource(data_set->resources, (const char *)item->data);
delay = max_delay_for_resource(data_set, rsc);
if(delay > max_delay) {
double seconds = delay / 1000.0;
crm_trace("Calculated new delay of %.1fs due to %s", seconds, rsc->id);
max_delay = delay;
}
}
return 5 + (max_delay / 1000);
}
#define waiting_for_starts(d, r, h) ((d != NULL) || \
(!resource_is_running_on((r), (h))))
/*!
* \internal
* \brief Restart a resource (on a particular host if requested).
*
* \param[in] rsc The resource to restart
* \param[in] host The host to restart the resource on (or NULL for all)
* \param[in] timeout_ms Consider failed if actions do not complete in this time
* (specified in milliseconds, but a two-second
* granularity is actually used; if 0, a timeout will be
* calculated based on the resource timeout)
* \param[in] cib Connection to the CIB manager
*
* \return Standard Pacemaker return code (exits on certain failures)
*/
int
cli_resource_restart(pcmk__output_t *out, pe_resource_t *rsc, const char *host,
const char *move_lifetime, int timeout_ms, cib_t *cib,
int cib_options, gboolean promoted_role_only, gboolean force)
{
int rc = pcmk_rc_ok;
int lpc = 0;
int before = 0;
int step_timeout_s = 0;
int sleep_interval = 2;
int timeout = timeout_ms / 1000;
bool stop_via_ban = FALSE;
char *rsc_id = NULL;
char *orig_target_role = NULL;
GList *list_delta = NULL;
GList *target_active = NULL;
GList *current_active = NULL;
GList *restart_target_active = NULL;
pe_working_set_t *data_set = NULL;
if (!resource_is_running_on(rsc, host)) {
const char *id = rsc->clone_name?rsc->clone_name:rsc->id;
if(host) {
out->err(out, "%s is not running on %s and so cannot be restarted", id, host);
} else {
out->err(out, "%s is not running anywhere and so cannot be restarted", id);
}
return ENXIO;
}
rsc_id = strdup(rsc->id);
if ((pe_rsc_is_clone(rsc) || pe_bundle_replicas(rsc)) && host) {
stop_via_ban = TRUE;
}
/*
grab full cib
determine originally active resources
disable or ban
poll cib and watch for affected resources to get stopped
without --timeout, calculate the stop timeout for each step and wait for that
if we hit --timeout or the service timeout, re-enable or un-ban, report failure and indicate which resources we couldn't take down
if everything stopped, re-enable or un-ban
poll cib and watch for affected resources to get started
without --timeout, calculate the start timeout for each step and wait for that
if we hit --timeout or the service timeout, report (different) failure and indicate which resources we couldn't bring back up
report success
Optimizations:
- use constraints to determine ordered list of affected resources
- Allow a --no-deps option (aka. --force-restart)
*/
data_set = pe_new_working_set();
if (data_set == NULL) {
crm_perror(LOG_ERR, "Could not allocate working set");
rc = ENOMEM;
goto done;
}
data_set->priv = out;
pe__set_working_set_flags(data_set, pe_flag_no_counts|pe_flag_no_compat);
rc = update_dataset(cib, data_set, FALSE);
if(rc != pcmk_rc_ok) {
out->err(out, "Could not get new resource list: %s (%d)", pcmk_strerror(rc), rc);
goto done;
}
restart_target_active = get_active_resources(host, data_set->resources);
current_active = get_active_resources(host, data_set->resources);
dump_list(current_active, "Origin");
if (stop_via_ban) {
/* Stop the clone or bundle instance by banning it from the host */
out->quiet = true;
rc = cli_resource_ban(out, rsc_id, host, move_lifetime, NULL, cib,
cib_options, promoted_role_only);
} else {
/* Stop the resource by setting target-role to Stopped.
* Remember any existing target-role so we can restore it later
* (though it only makes any difference if it's Unpromoted).
*/
char *lookup_id = clone_strip(rsc->id);
find_resource_attr(out, cib, XML_NVPAIR_ATTR_VALUE, lookup_id, NULL, NULL,
NULL, XML_RSC_ATTR_TARGET_ROLE, &orig_target_role);
free(lookup_id);
rc = cli_resource_update_attribute(rsc, rsc_id, NULL, XML_TAG_META_SETS,
NULL, XML_RSC_ATTR_TARGET_ROLE,
RSC_STOPPED, FALSE, cib, cib_options,
data_set, force);
}
if(rc != pcmk_rc_ok) {
out->err(out, "Could not set target-role for %s: %s (%d)", rsc_id, pcmk_strerror(rc), rc);
if (current_active) {
g_list_free_full(current_active, free);
}
if (restart_target_active) {
g_list_free_full(restart_target_active, free);
}
goto done;
}
rc = update_dataset(cib, data_set, TRUE);
if(rc != pcmk_rc_ok) {
out->err(out, "Could not determine which resources would be stopped");
goto failure;
}
target_active = get_active_resources(host, data_set->resources);
dump_list(target_active, "Target");
list_delta = pcmk__subtract_lists(current_active, target_active, (GCompareFunc) strcmp);
out->info(out, "Waiting for %d resources to stop:", g_list_length(list_delta));
display_list(out, list_delta, " * ");
step_timeout_s = timeout / sleep_interval;
while (list_delta != NULL) {
before = g_list_length(list_delta);
if(timeout_ms == 0) {
step_timeout_s = max_delay_in(data_set, list_delta) / sleep_interval;
}
/* We probably don't need the entire step timeout */
for(lpc = 0; (lpc < step_timeout_s) && (list_delta != NULL); lpc++) {
sleep(sleep_interval);
if(timeout) {
timeout -= sleep_interval;
crm_trace("%ds remaining", timeout);
}
rc = update_dataset(cib, data_set, FALSE);
if(rc != pcmk_rc_ok) {
out->err(out, "Could not determine which resources were stopped");
goto failure;
}
if (current_active) {
g_list_free_full(current_active, free);
}
current_active = get_active_resources(host, data_set->resources);
g_list_free(list_delta);
list_delta = pcmk__subtract_lists(current_active, target_active, (GCompareFunc) strcmp);
dump_list(current_active, "Current");
dump_list(list_delta, "Delta");
}
crm_trace("%d (was %d) resources remaining", g_list_length(list_delta), before);
if(before == g_list_length(list_delta)) {
/* aborted during stop phase, print the contents of list_delta */
out->err(out, "Could not complete shutdown of %s, %d resources remaining", rsc_id, g_list_length(list_delta));
display_list(out, list_delta, " * ");
rc = ETIME;
goto failure;
}
}
if (stop_via_ban) {
rc = cli_resource_clear(rsc_id, host, NULL, cib, cib_options, TRUE, force);
} else if (orig_target_role) {
rc = cli_resource_update_attribute(rsc, rsc_id, NULL, XML_TAG_META_SETS,
NULL, XML_RSC_ATTR_TARGET_ROLE,
orig_target_role, FALSE, cib,
cib_options, data_set, force);
free(orig_target_role);
orig_target_role = NULL;
} else {
rc = cli_resource_delete_attribute(rsc, rsc_id, NULL, XML_TAG_META_SETS,
NULL, XML_RSC_ATTR_TARGET_ROLE, cib,
cib_options, data_set, force);
}
if(rc != pcmk_rc_ok) {
out->err(out, "Could not unset target-role for %s: %s (%d)", rsc_id, pcmk_strerror(rc), rc);
goto done;
}
if (target_active) {
g_list_free_full(target_active, free);
}
target_active = restart_target_active;
list_delta = pcmk__subtract_lists(target_active, current_active, (GCompareFunc) strcmp);
out->info(out, "Waiting for %d resources to start again:", g_list_length(list_delta));
display_list(out, list_delta, " * ");
step_timeout_s = timeout / sleep_interval;
while (waiting_for_starts(list_delta, rsc, host)) {
before = g_list_length(list_delta);
if(timeout_ms == 0) {
step_timeout_s = max_delay_in(data_set, list_delta) / sleep_interval;
}
/* We probably don't need the entire step timeout */
for (lpc = 0; (lpc < step_timeout_s) && waiting_for_starts(list_delta, rsc, host); lpc++) {
sleep(sleep_interval);
if(timeout) {
timeout -= sleep_interval;
crm_trace("%ds remaining", timeout);
}
rc = update_dataset(cib, data_set, FALSE);
if(rc != pcmk_rc_ok) {
out->err(out, "Could not determine which resources were started");
goto failure;
}
if (current_active) {
g_list_free_full(current_active, free);
}
/* It's OK if dependent resources moved to a different node,
* so we check active resources on all nodes.
*/
current_active = get_active_resources(NULL, data_set->resources);
g_list_free(list_delta);
list_delta = pcmk__subtract_lists(target_active, current_active, (GCompareFunc) strcmp);
dump_list(current_active, "Current");
dump_list(list_delta, "Delta");
}
if(before == g_list_length(list_delta)) {
/* aborted during start phase, print the contents of list_delta */
out->err(out, "Could not complete restart of %s, %d resources remaining", rsc_id, g_list_length(list_delta));
display_list(out, list_delta, " * ");
rc = ETIME;
goto failure;
}
}
rc = pcmk_rc_ok;
goto done;
failure:
if (stop_via_ban) {
cli_resource_clear(rsc_id, host, NULL, cib, cib_options, TRUE, force);
} else if (orig_target_role) {
cli_resource_update_attribute(rsc, rsc_id, NULL, XML_TAG_META_SETS, NULL,
XML_RSC_ATTR_TARGET_ROLE, orig_target_role,
FALSE, cib, cib_options, data_set, force);
free(orig_target_role);
} else {
cli_resource_delete_attribute(rsc, rsc_id, NULL, XML_TAG_META_SETS, NULL,
XML_RSC_ATTR_TARGET_ROLE, cib, cib_options,
data_set, force);
}
done:
if (list_delta) {
g_list_free(list_delta);
}
if (current_active) {
g_list_free_full(current_active, free);
}
if (target_active && (target_active != restart_target_active)) {
g_list_free_full(target_active, free);
}
if (restart_target_active) {
g_list_free_full(restart_target_active, free);
}
free(rsc_id);
pe_free_working_set(data_set);
return rc;
}
static inline bool action_is_pending(pe_action_t *action)
{
if (pcmk_any_flags_set(action->flags, pe_action_optional|pe_action_pseudo)
|| !pcmk_is_set(action->flags, pe_action_runnable)
|| pcmk__str_eq("notify", action->task, pcmk__str_casei)) {
return false;
}
return true;
}
/*!
* \internal
* \brief Return TRUE if any actions in a list are pending
*
* \param[in] actions List of actions to check
*
* \return TRUE if any actions in the list are pending, FALSE otherwise
*/
static bool
actions_are_pending(GList *actions)
{
GList *action;
for (action = actions; action != NULL; action = action->next) {
pe_action_t *a = (pe_action_t *)action->data;
if (action_is_pending(a)) {
crm_notice("Waiting for %s (flags=%#.8x)", a->uuid, a->flags);
return TRUE;
}
}
return FALSE;
}
static void
print_pending_actions(pcmk__output_t *out, GList *actions)
{
GList *action;
out->info(out, "Pending actions:");
for (action = actions; action != NULL; action = action->next) {
pe_action_t *a = (pe_action_t *) action->data;
if (!action_is_pending(a)) {
continue;
}
if (a->node) {
out->info(out, "\tAction %d: %s\ton %s", a->id, a->uuid, a->node->details->uname);
} else {
out->info(out, "\tAction %d: %s", a->id, a->uuid);
}
}
}
/* For --wait, timeout (in seconds) to use if caller doesn't specify one */
#define WAIT_DEFAULT_TIMEOUT_S (60 * 60)
/* For --wait, how long to sleep between cluster state checks */
#define WAIT_SLEEP_S (2)
/*!
* \internal
* \brief Wait until all pending cluster actions are complete
*
* This waits until either the CIB's transition graph is idle or a timeout is
* reached.
*
* \param[in] timeout_ms Consider failed if actions do not complete in this time
* (specified in milliseconds, but one-second granularity
* is actually used; if 0, a default will be used)
* \param[in] cib Connection to the CIB manager
*
* \return Standard Pacemaker return code
*/
int
wait_till_stable(pcmk__output_t *out, int timeout_ms, cib_t * cib)
{
pe_working_set_t *data_set = NULL;
int rc = pcmk_rc_ok;
int timeout_s = timeout_ms? ((timeout_ms + 999) / 1000) : WAIT_DEFAULT_TIMEOUT_S;
time_t expire_time = time(NULL) + timeout_s;
time_t time_diff;
bool printed_version_warning = out->is_quiet(out); // i.e. don't print if quiet
data_set = pe_new_working_set();
if (data_set == NULL) {
return ENOMEM;
}
pe__set_working_set_flags(data_set, pe_flag_no_counts|pe_flag_no_compat);
do {
/* Abort if timeout is reached */
time_diff = expire_time - time(NULL);
if (time_diff > 0) {
crm_info("Waiting up to %ld seconds for cluster actions to complete", time_diff);
} else {
print_pending_actions(out, data_set->actions);
pe_free_working_set(data_set);
return ETIME;
}
if (rc == pcmk_rc_ok) { /* this avoids sleep on first loop iteration */
sleep(WAIT_SLEEP_S);
}
/* Get latest transition graph */
pe_reset_working_set(data_set);
rc = update_working_set_from_cib(out, data_set, cib);
if (rc != pcmk_rc_ok) {
pe_free_working_set(data_set);
return rc;
}
pcmk__schedule_actions(data_set, data_set->input, NULL);
if (!printed_version_warning) {
/* If the DC has a different version than the local node, the two
* could come to different conclusions about what actions need to be
* done. Warn the user in this case.
*
* @TODO A possible long-term solution would be to reimplement the
* wait as a new controller operation that would be forwarded to the
* DC. However, that would have potential problems of its own.
*/
const char *dc_version = g_hash_table_lookup(data_set->config_hash,
"dc-version");
if (!pcmk__str_eq(dc_version, PACEMAKER_VERSION "-" BUILD_VERSION, pcmk__str_casei)) {
out->info(out, "warning: wait option may not work properly in "
"mixed-version cluster");
printed_version_warning = TRUE;
}
}
} while (actions_are_pending(data_set->actions));
pe_free_working_set(data_set);
return rc;
}
static const char *
get_action(const char *rsc_action) {
const char *action = NULL;
if (pcmk__str_eq(rsc_action, "validate", pcmk__str_casei)) {
action = "validate-all";
} else if (pcmk__str_eq(rsc_action, "force-check", pcmk__str_casei)) {
action = "monitor";
} else if (pcmk__strcase_any_of(rsc_action, "force-start", "force-stop",
"force-demote", "force-promote", NULL)) {
action = rsc_action+6;
} else {
action = rsc_action;
}
return action;
}
/*!
* \brief Set up environment variables as expected by resource agents
*
* When the cluster executes resource agents, it adds certain environment
* variables (directly or via resource meta-attributes) expected by some
* resource agents. Add the essential ones that many resource agents expect, so
* the behavior is the same for command-line execution.
*
* \param[in] params Resource parameters that will be passed to agent
* \param[in] timeout_ms Action timeout (in milliseconds)
* \param[in] check_level OCF check level
* \param[in] verbosity Verbosity level
*/
static void
set_agent_environment(GHashTable *params, int timeout_ms, int check_level,
int verbosity)
{
g_hash_table_insert(params, strdup("CRM_meta_timeout"),
crm_strdup_printf("%d", timeout_ms));
g_hash_table_insert(params, strdup(XML_ATTR_CRM_VERSION),
strdup(CRM_FEATURE_SET));
if (check_level >= 0) {
char *level = crm_strdup_printf("%d", check_level);
setenv("OCF_CHECK_LEVEL", level, 1);
free(level);
}
setenv("HA_debug", (verbosity > 0)? "1" : "0", 1);
if (verbosity > 1) {
setenv("OCF_TRACE_RA", "1", 1);
}
/* A resource agent using the standard ocf-shellfuncs library will not print
* messages to stderr if it doesn't have a controlling terminal (e.g. if
* crm_resource is called via script or ssh). This forces it to do so.
*/
setenv("OCF_TRACE_FILE", "/dev/stderr", 0);
}
/*!
* \internal
* \brief Apply command-line overrides to resource parameters
*
* \param[in] params Parameters to be passed to agent
* \param[in] overrides Parameters to override (or NULL if none)
*/
static void
apply_overrides(GHashTable *params, GHashTable *overrides)
{
if (overrides != NULL) {
GHashTableIter iter;
char *name = NULL;
char *value = NULL;
g_hash_table_iter_init(&iter, overrides);
while (g_hash_table_iter_next(&iter, (gpointer *) &name,
(gpointer *) &value)) {
g_hash_table_replace(params, strdup(name), strdup(value));
}
}
}
crm_exit_t
cli_resource_execute_from_params(pcmk__output_t *out, const char *rsc_name,
const char *rsc_class, const char *rsc_prov,
const char *rsc_type, const char *rsc_action,
GHashTable *params, GHashTable *override_hash,
int timeout_ms, int resource_verbose, gboolean force,
int check_level)
{
const char *class = rsc_class;
const char *action = get_action(rsc_action);
crm_exit_t exit_code = CRM_EX_OK;
svc_action_t *op = NULL;
// If no timeout was provided, use the same default as the cluster
if (timeout_ms == 0) {
timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S);
}
set_agent_environment(params, timeout_ms, check_level, resource_verbose);
apply_overrides(params, override_hash);
op = services__create_resource_action(rsc_name? rsc_name : "test",
rsc_class, rsc_prov, rsc_type, action,
0, timeout_ms, params, 0);
if (op == NULL) {
out->err(out, "Could not execute %s using %s%s%s:%s: %s",
action, rsc_class, (rsc_prov? ":" : ""),
(rsc_prov? rsc_prov : ""), rsc_type, strerror(ENOMEM));
g_hash_table_destroy(params);
return CRM_EX_OSERR;
}
if (pcmk__str_eq(rsc_class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) {
class = resources_find_service_class(rsc_type);
}
if (!pcmk__strcase_any_of(class, PCMK_RESOURCE_CLASS_OCF,
PCMK_RESOURCE_CLASS_LSB, NULL)) {
- services__set_result(op, CRM_EX_UNIMPLEMENT_FEATURE, PCMK_EXEC_ERROR,
- "Manual execution of this standard is unsupported");
+ services__format_result(op, CRM_EX_UNIMPLEMENT_FEATURE, PCMK_EXEC_ERROR,
+ "Manual execution of the %s standard "
+ "is unsupported", crm_str(class));
}
if (op->rc != PCMK_OCF_UNKNOWN) {
exit_code = op->rc;
goto done;
}
services_action_sync(op);
// Map results to OCF codes for consistent reporting to user
{
enum ocf_exitcode ocf_code = services_result2ocf(class, action, op->rc);
// Cast variable instead of function return to keep compilers happy
exit_code = (crm_exit_t) ocf_code;
}
done:
out->message(out, "resource-agent-action", resource_verbose, rsc_class,
rsc_prov, rsc_type, rsc_name, rsc_action, override_hash,
exit_code, op->status, services__exit_reason(op),
op->stdout_data, op->stderr_data);
services_action_free(op);
return exit_code;
}
crm_exit_t
cli_resource_execute(pe_resource_t *rsc, const char *requested_name,
const char *rsc_action, GHashTable *override_hash,
int timeout_ms, cib_t * cib, pe_working_set_t *data_set,
int resource_verbose, gboolean force, int check_level)
{
pcmk__output_t *out = data_set->priv;
crm_exit_t exit_code = CRM_EX_OK;
const char *rid = NULL;
const char *rtype = NULL;
const char *rprov = NULL;
const char *rclass = NULL;
GHashTable *params = NULL;
if (pcmk__strcase_any_of(rsc_action, "force-start", "force-demote",
"force-promote", NULL)) {
if(pe_rsc_is_clone(rsc)) {
GList *nodes = cli_resource_search(rsc, requested_name, data_set);
if(nodes != NULL && force == FALSE) {
out->err(out, "It is not safe to %s %s here: the cluster claims it is already active",
rsc_action, rsc->id);
out->err(out, "Try setting target-role=Stopped first or specifying "
"the force option");
return CRM_EX_UNSAFE;
}
g_list_free_full(nodes, free);
}
}
if(pe_rsc_is_clone(rsc)) {
/* Grab the first child resource in the hope it's not a group */
rsc = rsc->children->data;
}
if(rsc->variant == pe_group) {
out->err(out, "Sorry, the %s option doesn't support group resources", rsc_action);
return CRM_EX_UNIMPLEMENT_FEATURE;
} else if (rsc->variant == pe_container || pe_rsc_is_bundled(rsc)) {
out->err(out, "Sorry, the %s option doesn't support bundled resources", rsc_action);
return CRM_EX_UNIMPLEMENT_FEATURE;
}
rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
rprov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER);
rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE);
params = generate_resource_params(rsc, NULL /* @TODO use local node */,
data_set);
if (timeout_ms == 0) {
timeout_ms = pe_get_configured_timeout(rsc, get_action(rsc_action), data_set);
}
rid = pe_rsc_is_anon_clone(rsc->parent)? requested_name : rsc->id;
exit_code = cli_resource_execute_from_params(out, rid, rclass, rprov, rtype, rsc_action,
params, override_hash, timeout_ms,
resource_verbose, force, check_level);
return exit_code;
}
// \return Standard Pacemaker return code
int
cli_resource_move(pe_resource_t *rsc, const char *rsc_id, const char *host_name,
const char *move_lifetime, cib_t *cib, int cib_options,
pe_working_set_t *data_set, gboolean promoted_role_only,
gboolean force)
{
pcmk__output_t *out = data_set->priv;
int rc = pcmk_rc_ok;
unsigned int count = 0;
pe_node_t *current = NULL;
pe_node_t *dest = pe_find_node(data_set->nodes, host_name);
bool cur_is_dest = FALSE;
if (dest == NULL) {
return pcmk_rc_node_unknown;
}
if (promoted_role_only && !pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
pe_resource_t *p = uber_parent(rsc);
if (pcmk_is_set(p->flags, pe_rsc_promotable)) {
out->info(out, "Using parent '%s' for move instead of '%s'.", rsc->id, rsc_id);
rsc_id = p->id;
rsc = p;
} else {
out->info(out, "Ignoring master option: %s is not promotable", rsc_id);
promoted_role_only = FALSE;
}
}
current = pe__find_active_requires(rsc, &count);
if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) {
GList *iter = NULL;
unsigned int promoted_count = 0;
pe_node_t *promoted_node = NULL;
for(iter = rsc->children; iter; iter = iter->next) {
pe_resource_t *child = (pe_resource_t *)iter->data;
enum rsc_role_e child_role = child->fns->state(child, TRUE);
if (child_role == RSC_ROLE_PROMOTED) {
rsc = child;
promoted_node = pe__current_node(child);
promoted_count++;
}
}
if (promoted_role_only || (promoted_count != 0)) {
count = promoted_count;
current = promoted_node;
}
}
if (count > 1) {
if (pe_rsc_is_clone(rsc)) {
current = NULL;
} else {
return pcmk_rc_multiple;
}
}
if (current && (current->details == dest->details)) {
cur_is_dest = TRUE;
if (force) {
crm_info("%s is already %s on %s, reinforcing placement with location constraint.",
rsc_id, promoted_role_only?"promoted":"active", dest->details->uname);
} else {
return pcmk_rc_already;
}
}
/* Clear any previous prefer constraints across all nodes. */
cli_resource_clear(rsc_id, NULL, data_set->nodes, cib, cib_options, FALSE, force);
/* Clear any previous ban constraints on 'dest'. */
cli_resource_clear(rsc_id, dest->details->uname, data_set->nodes, cib,
cib_options, TRUE, force);
/* Record an explicit preference for 'dest' */
rc = cli_resource_prefer(out, rsc_id, dest->details->uname, move_lifetime,
cib, cib_options, promoted_role_only);
crm_trace("%s%s now prefers node %s%s",
rsc->id, (promoted_role_only? " (promoted)" : ""),
dest->details->uname, force?"(forced)":"");
/* only ban the previous location if current location != destination location.
* it is possible to use -M to enforce a location without regard of where the
* resource is currently located */
if(force && (cur_is_dest == FALSE)) {
/* Ban the original location if possible */
if(current) {
(void)cli_resource_ban(out, rsc_id, current->details->uname, move_lifetime,
NULL, cib, cib_options, promoted_role_only);
} else if(count > 1) {
out->info(out, "Resource '%s' is currently %s in %d locations. "
"One may now move to %s",
rsc_id, (promoted_role_only? "promoted" : "active"),
count, dest->details->uname);
out->info(out, "To prevent '%s' from being %s at a specific location, "
"specify a node.",
rsc_id, (promoted_role_only? "promoted" : "active"));
} else {
crm_trace("Not banning %s from its current location: not active", rsc_id);
}
}
return rc;
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:37 PM (6 h, 59 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2002721
Default Alt Text
(151 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment