Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4639804
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
126 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/include/crm/services.h b/include/crm/services.h
index 17879991e2..aa3be7e722 100644
--- a/include/crm/services.h
+++ b/include/crm/services.h
@@ -1,416 +1,414 @@
/*
* Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/**
* \file
* \brief Services API
* \ingroup core
*/
#ifndef __PCMK_SERVICES__
# define __PCMK_SERVICES__
# ifdef __cplusplus
extern "C" {
# endif
# include <glib.h>
# include <stdio.h>
# include <string.h>
# include <stdbool.h>
# ifndef OCF_ROOT_DIR
# define OCF_ROOT_DIR "/usr/lib/ocf"
# endif
# ifndef LSB_ROOT_DIR
# define LSB_ROOT_DIR "/etc/init.d"
# endif
/* TODO: Autodetect these two ?*/
# ifndef SYSTEMCTL
# define SYSTEMCTL "/bin/systemctl"
# endif
/* Deprecated and unused by Pacemaker, kept for API backward compatibility */
# ifndef SERVICE_SCRIPT
# define SERVICE_SCRIPT "/sbin/service"
# endif
/* Known resource classes */
#define PCMK_RESOURCE_CLASS_OCF "ocf"
#define PCMK_RESOURCE_CLASS_SERVICE "service"
#define PCMK_RESOURCE_CLASS_LSB "lsb"
#define PCMK_RESOURCE_CLASS_SYSTEMD "systemd"
#define PCMK_RESOURCE_CLASS_UPSTART "upstart"
#define PCMK_RESOURCE_CLASS_HB "heartbeat"
#define PCMK_RESOURCE_CLASS_NAGIOS "nagios"
#define PCMK_RESOURCE_CLASS_STONITH "stonith"
#define PCMK_ALERT_CLASS "alert"
/* This is the string passed in the OCF_EXIT_REASON_PREFIX
* environment variable. The stderr output that occurs
* after this prefix is encountered is considered the exit
* reason for a completed operationt */
#define PCMK_OCF_REASON_PREFIX "ocf-exit-reason:"
enum lsb_exitcode {
PCMK_LSB_OK = 0,
PCMK_LSB_UNKNOWN_ERROR = 1,
PCMK_LSB_INVALID_PARAM = 2,
PCMK_LSB_UNIMPLEMENT_FEATURE = 3,
PCMK_LSB_INSUFFICIENT_PRIV = 4,
PCMK_LSB_NOT_INSTALLED = 5,
PCMK_LSB_NOT_CONFIGURED = 6,
PCMK_LSB_NOT_RUNNING = 7,
};
/* The return codes for the status operation are not the same for other
* operatios - go figure
*/
enum lsb_status_exitcode {
PCMK_LSB_STATUS_OK = 0,
PCMK_LSB_STATUS_VAR_PID = 1,
PCMK_LSB_STATUS_VAR_LOCK = 2,
PCMK_LSB_STATUS_NOT_RUNNING = 3,
PCMK_LSB_STATUS_UNKNOWN = 4,
/* custom codes should be in the 150-199 range reserved for application use */
PCMK_LSB_STATUS_NOT_INSTALLED = 150,
PCMK_LSB_STATUS_INSUFFICIENT_PRIV = 151,
};
/* Uniform exit codes
* Everything is mapped to its OCF equivalent so that Pacemaker only deals with one set of codes
*/
enum ocf_exitcode {
PCMK_OCF_OK = 0,
PCMK_OCF_UNKNOWN_ERROR = 1,
PCMK_OCF_INVALID_PARAM = 2,
PCMK_OCF_UNIMPLEMENT_FEATURE = 3,
PCMK_OCF_INSUFFICIENT_PRIV = 4,
PCMK_OCF_NOT_INSTALLED = 5,
PCMK_OCF_NOT_CONFIGURED = 6,
PCMK_OCF_NOT_RUNNING = 7, /* End of overlap with LSB */
PCMK_OCF_RUNNING_MASTER = 8,
PCMK_OCF_FAILED_MASTER = 9,
/* 150-199 reserved for application use */
PCMK_OCF_CONNECTION_DIED = 189, /* Operation failure implied by disconnection of the LRM API to a local or remote node */
PCMK_OCF_DEGRADED = 190, /* Active resource that is no longer 100% functional */
PCMK_OCF_DEGRADED_MASTER = 191, /* Promoted resource that is no longer 100% functional */
PCMK_OCF_EXEC_ERROR = 192, /* Generic problem invoking the agent */
PCMK_OCF_UNKNOWN = 193, /* State of the service is unknown - used for recording in-flight operations */
PCMK_OCF_SIGNAL = 194,
PCMK_OCF_NOT_SUPPORTED = 195,
PCMK_OCF_PENDING = 196,
PCMK_OCF_CANCELLED = 197,
PCMK_OCF_TIMEOUT = 198,
PCMK_OCF_OTHER_ERROR = 199, /* Keep the same codes as PCMK_LSB */
};
enum op_status {
PCMK_LRM_OP_PENDING = -1,
PCMK_LRM_OP_DONE,
PCMK_LRM_OP_CANCELLED,
PCMK_LRM_OP_TIMEOUT,
PCMK_LRM_OP_NOTSUPPORTED,
PCMK_LRM_OP_ERROR,
PCMK_LRM_OP_ERROR_HARD,
PCMK_LRM_OP_ERROR_FATAL,
PCMK_LRM_OP_NOT_INSTALLED,
};
enum nagios_exitcode {
NAGIOS_STATE_OK = 0,
NAGIOS_STATE_WARNING = 1,
NAGIOS_STATE_CRITICAL = 2,
NAGIOS_STATE_UNKNOWN = 3,
NAGIOS_STATE_DEPENDENT = 4,
NAGIOS_INSUFFICIENT_PRIV = 100,
NAGIOS_NOT_INSTALLED = 101,
};
enum svc_action_flags {
/* On timeout, only kill pid, do not kill entire pid group */
SVC_ACTION_LEAVE_GROUP = 0x01,
};
typedef struct svc_action_private_s svc_action_private_t;
typedef struct svc_action_s {
char *id;
char *rsc;
char *action;
int interval;
char *standard;
char *provider;
char *agent;
int timeout;
- GHashTable *params; /* used by OCF agents */
+ GHashTable *params; /* used by OCF agents and alert agents */
int rc;
int pid;
int cancel;
int status;
int sequence;
int expected_rc;
int synchronous;
enum svc_action_flags flags;
char *stderr_data;
char *stdout_data;
/*!
* Data stored by the creator of the action.
*
* This may be used to hold data that is needed later on by a callback,
* for example.
*/
void *cb_data;
svc_action_private_t *opaque;
-
- GHashTable *alert_params; /* used by alert agents */
} svc_action_t;
/**
* \brief Get a list of files or directories in a given path
*
* \param[in] root full path to a directory to read
* \param[in] files return list of files if TRUE or directories if FALSE
* \param[in] executable if TRUE and files is TRUE, only return executable files
*
* \return a list of what was found. The list items are char *.
* \note It is the caller's responsibility to free the result with g_list_free_full(list, free).
*/
GList *get_directory_list(const char *root, gboolean files, gboolean executable);
/**
* Get a list of services
*
* \return a list of services. The list items are gchar *. This list _must_
* be destroyed using g_list_free_full(list, free).
*/
GList *services_list(void);
/**
* \brief Get a list of providers
*
* \param[in] standard list providers of this standard (e.g. ocf, lsb, etc.)
*
* \return a list of providers as char * list items (or NULL if standard does not support providers)
* \note The caller is responsible for freeing the result using g_list_free_full(list, free).
*/
GList *resources_list_providers(const char *standard);
/**
* \brief Get a list of resource agents
*
* \param[in] standard list agents using this standard (e.g. ocf, lsb, etc.) (or NULL for all)
* \param[in] provider list agents from this provider (or NULL for all)
*
* \return a list of resource agents. The list items are char *.
* \note The caller is responsible for freeing the result using g_list_free_full(list, free).
*/
GList *resources_list_agents(const char *standard, const char *provider);
/**
* Get list of available standards
*
* \return a list of resource standards. The list items are char *. This list _must_
* be destroyed using g_list_free_full(list, free).
*/
GList *resources_list_standards(void);
svc_action_t *services_action_create(const char *name, const char *action,
int interval /* ms */ , int timeout /* ms */ );
/**
* \brief Create a new resource action
*
* \param[in] name name of resource
* \param[in] standard resource agent standard (ocf, lsb, etc.)
* \param[in] provider resource agent provider
* \param[in] agent resource agent name
* \param[in] action action (start, stop, monitor, etc.)
* \param[in] interval how often to repeat this action, in milliseconds (if 0, execute only once)
* \param[in] timeout consider action failed if it does not complete in this many milliseconds
* \param[in] params action parameters
*
* \return newly allocated action instance
*
* \post After the call, 'params' is owned, and later free'd by the svc_action_t result
* \note The caller is responsible for freeing the return value using
* services_action_free().
*/
svc_action_t *resources_action_create(const char *name, const char *standard,
const char *provider, const char *agent,
const char *action, int interval /* ms */ ,
int timeout /* ms */ , GHashTable * params,
enum svc_action_flags flags);
/**
* Kick a recurring action so it is scheduled immediately for re-execution
*/
gboolean services_action_kick(const char *name, const char *action, int interval /* ms */);
/**
* Find the first class that can provide service::${agent}
*
* \param[in] agent which agent to search for
* \return NULL, or the first class that provides the named agent
*/
const char *resources_find_service_class(const char *agent);
/**
* Utilize services API to execute an arbitrary command.
*
* This API has useful infrastructure in place to be able to run a command
* in the background and get notified via a callback when the command finishes.
*
* \param[in] exec command to execute
* \param[in] args arguments to the command, NULL terminated
*
* \return a svc_action_t object, used to pass to the execute function
* (services_action_sync() or services_action_async()) and is
* provided to the callback.
*/
svc_action_t *services_action_create_generic(const char *exec, const char *args[]);
void services_action_cleanup(svc_action_t * op);
void services_action_free(svc_action_t * op);
gboolean services_action_sync(svc_action_t * op);
/**
* Run an action asynchronously.
*
* \param[in] op services action data
* \param[in] action_callback callback for when the action completes
*
* \retval TRUE succesfully started execution
* \retval FALSE failed to start execution, no callback will be received
*/
gboolean services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *));
gboolean services_action_cancel(const char *name, const char *action, int interval);
static inline const char *services_lrm_status_str(enum op_status status) {
switch (status) {
case PCMK_LRM_OP_PENDING:
return "pending";
case PCMK_LRM_OP_DONE:return "complete";
case PCMK_LRM_OP_CANCELLED:return "Cancelled";
case PCMK_LRM_OP_TIMEOUT:return "Timed Out";
case PCMK_LRM_OP_NOTSUPPORTED:return "NOT SUPPORTED";
case PCMK_LRM_OP_ERROR:return "Error";
case PCMK_LRM_OP_NOT_INSTALLED:return "Not installed";
default:return "UNKNOWN!";
}
}
static inline const char *services_ocf_exitcode_str(enum ocf_exitcode code) {
switch (code) {
case PCMK_OCF_OK:
return "ok";
case PCMK_OCF_UNKNOWN_ERROR:
return "unknown error";
case PCMK_OCF_INVALID_PARAM:
return "invalid parameter";
case PCMK_OCF_UNIMPLEMENT_FEATURE:
return "unimplemented feature";
case PCMK_OCF_INSUFFICIENT_PRIV:
return "insufficient privileges";
case PCMK_OCF_NOT_INSTALLED:
return "not installed";
case PCMK_OCF_NOT_CONFIGURED:
return "not configured";
case PCMK_OCF_NOT_RUNNING:
return "not running";
case PCMK_OCF_RUNNING_MASTER:
return "master";
case PCMK_OCF_FAILED_MASTER:
return "master (failed)";
case PCMK_OCF_SIGNAL:
return "OCF_SIGNAL";
case PCMK_OCF_NOT_SUPPORTED:
return "OCF_NOT_SUPPORTED";
case PCMK_OCF_PENDING:
return "OCF_PENDING";
case PCMK_OCF_CANCELLED:
return "OCF_CANCELLED";
case PCMK_OCF_TIMEOUT:
return "OCF_TIMEOUT";
case PCMK_OCF_OTHER_ERROR:
return "OCF_OTHER_ERROR";
case PCMK_OCF_DEGRADED:
return "OCF_DEGRADED";
case PCMK_OCF_DEGRADED_MASTER:
return "OCF_DEGRADED_MASTER";
default:
return "unknown";
}
}
/**
* \brief Get OCF equivalent of LSB exit code
*
* \param[in] action LSB action that produced exit code
* \param[in] lsb_exitcode Exit code of LSB action
*
* \return PCMK_OCF_* constant that corresponds to LSB exit code
*/
static inline enum ocf_exitcode
services_get_ocf_exitcode(const char *action, int lsb_exitcode)
{
/* For non-status actions, LSB and OCF share error code meaning <= 7 */
if (action && strcmp(action, "status") && strcmp(action, "monitor")) {
if ((lsb_exitcode < 0) || (lsb_exitcode > PCMK_LSB_NOT_RUNNING)) {
return PCMK_OCF_UNKNOWN_ERROR;
}
return (enum ocf_exitcode)lsb_exitcode;
}
/* status has different return codes */
switch (lsb_exitcode) {
case PCMK_LSB_STATUS_OK:
return PCMK_OCF_OK;
case PCMK_LSB_STATUS_NOT_INSTALLED:
return PCMK_OCF_NOT_INSTALLED;
case PCMK_LSB_STATUS_INSUFFICIENT_PRIV:
return PCMK_OCF_INSUFFICIENT_PRIV;
case PCMK_LSB_STATUS_VAR_PID:
case PCMK_LSB_STATUS_VAR_LOCK:
case PCMK_LSB_STATUS_NOT_RUNNING:
return PCMK_OCF_NOT_RUNNING;
}
return PCMK_OCF_UNKNOWN_ERROR;
}
# ifdef __cplusplus
}
# endif
#endif /* __PCMK_SERVICES__ */
diff --git a/lib/services/services.c b/lib/services/services.c
index 1a031e044d..2765e437bd 100644
--- a/lib/services/services.c
+++ b/lib/services/services.c
@@ -1,951 +1,946 @@
/*
* Copyright (C) 2010-2016 Andrew Beekhof <andrew@beekhof.net>
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <errno.h>
#include <unistd.h>
#include <dirent.h>
#include <fcntl.h>
#include <crm/crm.h>
#include <crm/common/mainloop.h>
#include <crm/services.h>
#include <crm/msg_xml.h>
#include "services_private.h"
#if SUPPORT_UPSTART
# include <upstart.h>
#endif
#if SUPPORT_SYSTEMD
# include <systemd.h>
#endif
/* TODO: Develop a rollover strategy */
static int operations = 0;
static GHashTable *recurring_actions = NULL;
/* ops waiting to run async because of conflicting active
* pending ops */
static GList *blocked_ops = NULL;
/* ops currently active (in-flight) */
static GList *inflight_ops = NULL;
static void handle_blocked_ops(void);
svc_action_t *
services_action_create(const char *name, const char *action, int interval, int timeout)
{
return resources_action_create(name, PCMK_RESOURCE_CLASS_LSB, NULL, name,
action, interval, timeout, NULL, 0);
}
const char *
resources_find_service_class(const char *agent)
{
/* Priority is:
* - lsb
* - systemd
* - upstart
*/
int rc = 0;
struct stat st;
char *path = NULL;
#ifdef LSB_ROOT_DIR
rc = asprintf(&path, "%s/%s", LSB_ROOT_DIR, agent);
if (rc > 0 && stat(path, &st) == 0) {
free(path);
return PCMK_RESOURCE_CLASS_LSB;
}
free(path);
#endif
#if SUPPORT_SYSTEMD
if (systemd_unit_exists(agent)) {
return PCMK_RESOURCE_CLASS_SYSTEMD;
}
#endif
#if SUPPORT_UPSTART
if (upstart_job_exists(agent)) {
return PCMK_RESOURCE_CLASS_UPSTART;
}
#endif
return NULL;
}
static inline void
init_recurring_actions(void)
{
if (recurring_actions == NULL) {
recurring_actions = g_hash_table_new_full(g_str_hash, g_str_equal, NULL,
NULL);
}
}
/*!
* \internal
* \brief Check whether op is in-flight systemd or upstart op
*
* \param[in] op Operation to check
*
* \return TRUE if op is in-flight systemd or upstart op
*/
static inline gboolean
inflight_systemd_or_upstart(svc_action_t *op)
{
return (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD)
|| safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_UPSTART))
&& (g_list_find(inflight_ops, op) != NULL);
}
/*!
* \internal
* \brief Expand "service" alias to an actual resource class
*
* \param[in] rsc Resource name (for logging only)
* \param[in] standard Resource class as configured
* \param[in] agent Agent name to look for
*
* \return Newly allocated string with actual resource class
*
* \note The caller is responsible for calling free() on the result.
*/
static char *
expand_resource_class(const char *rsc, const char *standard, const char *agent)
{
char *expanded_class = NULL;
if (strcasecmp(standard, PCMK_RESOURCE_CLASS_SERVICE) == 0) {
const char *found_class = resources_find_service_class(agent);
if (found_class) {
crm_debug("Found %s agent %s for %s", found_class, agent, rsc);
expanded_class = strdup(found_class);
} else {
crm_info("Assuming resource class lsb for agent %s for %s",
agent, rsc);
expanded_class = strdup(PCMK_RESOURCE_CLASS_LSB);
}
} else {
expanded_class = strdup(standard);
}
CRM_ASSERT(expanded_class);
return expanded_class;
}
svc_action_t *
resources_action_create(const char *name, const char *standard, const char *provider,
const char *agent, const char *action, int interval, int timeout,
GHashTable * params, enum svc_action_flags flags)
{
svc_action_t *op = NULL;
/*
* Do some up front sanity checks before we go off and
* build the svc_action_t instance.
*/
if (crm_strlen_zero(name)) {
crm_err("Cannot create operation without resource name");
goto return_error;
}
if (crm_strlen_zero(standard)) {
crm_err("Cannot create operation for %s without resource class", name);
goto return_error;
}
if (!strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF)
&& crm_strlen_zero(provider)) {
crm_err("Cannot create OCF operation for %s without provider", name);
goto return_error;
}
if (crm_strlen_zero(agent)) {
crm_err("Cannot create operation for %s without agent name", name);
goto return_error;
}
if (crm_strlen_zero(action)) {
crm_err("Cannot create operation for %s without operation name", name);
goto return_error;
}
/*
* Sanity checks passed, proceed!
*/
op = calloc(1, sizeof(svc_action_t));
op->opaque = calloc(1, sizeof(svc_action_private_t));
op->rsc = strdup(name);
op->interval = interval;
op->timeout = timeout;
op->standard = expand_resource_class(name, standard, agent);
op->agent = strdup(agent);
op->sequence = ++operations;
op->flags = flags;
op->id = generate_op_key(name, action, interval);
if (safe_str_eq(action, "monitor") && (
#if SUPPORT_HEARTBEAT
safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_HB) ||
#endif
safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB))) {
action = "status";
}
op->action = strdup(action);
if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_OCF) == 0) {
op->provider = strdup(provider);
op->params = params;
params = NULL;
if (asprintf(&op->opaque->exec, "%s/resource.d/%s/%s", OCF_ROOT_DIR, provider, agent) == -1) {
crm_err("Internal error: cannot create agent path");
goto return_error;
}
op->opaque->args[0] = strdup(op->opaque->exec);
op->opaque->args[1] = strdup(action);
} else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_LSB) == 0) {
if (op->agent[0] == '/') {
/* if given an absolute path, use that instead
* of tacking on the LSB_ROOT_DIR path to the front */
op->opaque->exec = strdup(op->agent);
} else if (asprintf(&op->opaque->exec, "%s/%s", LSB_ROOT_DIR, op->agent) == -1) {
crm_err("Internal error: cannot create agent path");
goto return_error;
}
op->opaque->args[0] = strdup(op->opaque->exec);
op->opaque->args[1] = strdup(op->action);
op->opaque->args[2] = NULL;
#if SUPPORT_HEARTBEAT
} else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_HB) == 0) {
int index;
int param_num;
char buf_tmp[20];
void *value_tmp;
if (op->agent[0] == '/') {
/* if given an absolute path, use that instead
* of tacking on the HB_RA_DIR path to the front */
op->opaque->exec = strdup(op->agent);
} else if (asprintf(&op->opaque->exec, "%s/%s", HB_RA_DIR, op->agent) == -1) {
crm_err("Internal error: cannot create agent path");
goto return_error;
}
op->opaque->args[0] = strdup(op->opaque->exec);
/* The "heartbeat" agent class only has positional arguments,
* which we keyed by their decimal position number. */
param_num = 1;
for (index = 1; index <= MAX_ARGC - 3; index++ ) {
snprintf(buf_tmp, sizeof(buf_tmp), "%d", index);
value_tmp = g_hash_table_lookup(params, buf_tmp);
if (value_tmp == NULL) {
/* maybe: strdup("") ??
* But the old lrmd did simply continue as well. */
continue;
}
op->opaque->args[param_num++] = strdup(value_tmp);
}
/* Add operation code as the last argument, */
/* and the teminating NULL pointer */
op->opaque->args[param_num++] = strdup(op->action);
op->opaque->args[param_num] = NULL;
#endif
#if SUPPORT_SYSTEMD
} else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) {
op->opaque->exec = strdup("systemd-dbus");
#endif
#if SUPPORT_UPSTART
} else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_UPSTART) == 0) {
op->opaque->exec = strdup("upstart-dbus");
#endif
#if SUPPORT_NAGIOS
} else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_NAGIOS) == 0) {
int index = 0;
if (op->agent[0] == '/') {
/* if given an absolute path, use that instead
* of tacking on the NAGIOS_PLUGIN_DIR path to the front */
op->opaque->exec = strdup(op->agent);
} else if (asprintf(&op->opaque->exec, "%s/%s", NAGIOS_PLUGIN_DIR, op->agent) == -1) {
crm_err("Internal error: cannot create agent path");
goto return_error;
}
op->opaque->args[0] = strdup(op->opaque->exec);
index = 1;
if (safe_str_eq(op->action, "monitor") && op->interval == 0) {
/* Invoke --version for a nagios probe */
op->opaque->args[index] = strdup("--version");
index++;
} else if (params) {
GHashTableIter iter;
char *key = NULL;
char *value = NULL;
static int args_size = sizeof(op->opaque->args) / sizeof(char *);
g_hash_table_iter_init(&iter, params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value) &&
index <= args_size - 3) {
int len = 3;
char *long_opt = NULL;
if (safe_str_eq(key, XML_ATTR_CRM_VERSION) || strstr(key, CRM_META "_")) {
continue;
}
len += strlen(key);
long_opt = calloc(1, len);
sprintf(long_opt, "--%s", key);
long_opt[len - 1] = 0;
op->opaque->args[index] = long_opt;
op->opaque->args[index + 1] = strdup(value);
index += 2;
}
}
op->opaque->args[index] = NULL;
#endif
} else {
crm_err("Unknown resource standard: %s", op->standard);
services_action_free(op);
op = NULL;
}
if(params) {
g_hash_table_destroy(params);
}
return op;
return_error:
if(params) {
g_hash_table_destroy(params);
}
services_action_free(op);
return NULL;
}
svc_action_t *
services_action_create_generic(const char *exec, const char *args[])
{
svc_action_t *op;
unsigned int cur_arg;
op = calloc(1, sizeof(*op));
op->opaque = calloc(1, sizeof(svc_action_private_t));
op->opaque->exec = strdup(exec);
op->opaque->args[0] = strdup(exec);
for (cur_arg = 1; args && args[cur_arg - 1]; cur_arg++) {
op->opaque->args[cur_arg] = strdup(args[cur_arg - 1]);
if (cur_arg == DIMOF(op->opaque->args) - 1) {
crm_err("svc_action_t args list not long enough for '%s' execution request.", exec);
break;
}
}
return op;
}
#if SUPPORT_DBUS
/*!
* \internal
* \brief Update operation's pending DBus call, unreferencing old one if needed
*
* \param[in,out] op Operation to modify
* \param[in] pending Pending call to set
*/
void
services_set_op_pending(svc_action_t *op, DBusPendingCall *pending)
{
if (op->opaque->pending && (op->opaque->pending != pending)) {
if (pending) {
crm_info("Lost pending %s DBus call (%p)", op->id, op->opaque->pending);
} else {
crm_trace("Done with pending %s DBus call (%p)", op->id, op->opaque->pending);
}
dbus_pending_call_unref(op->opaque->pending);
}
op->opaque->pending = pending;
if (pending) {
crm_trace("Updated pending %s DBus call (%p)", op->id, pending);
} else {
crm_trace("Cleared pending %s DBus call", op->id);
}
}
#endif
void
services_action_cleanup(svc_action_t * op)
{
if(op->opaque == NULL) {
return;
}
#if SUPPORT_DBUS
if(op->opaque->timerid != 0) {
crm_trace("Removing timer for call %s to %s", op->action, op->rsc);
g_source_remove(op->opaque->timerid);
op->opaque->timerid = 0;
}
if(op->opaque->pending) {
crm_trace("Cleaning up pending dbus call %p %s for %s", op->opaque->pending, op->action, op->rsc);
if(dbus_pending_call_get_completed(op->opaque->pending)) {
crm_warn("Pending dbus call %s for %s did not complete", op->action, op->rsc);
}
dbus_pending_call_cancel(op->opaque->pending);
dbus_pending_call_unref(op->opaque->pending);
op->opaque->pending = NULL;
}
#endif
if (op->opaque->stderr_gsource) {
mainloop_del_fd(op->opaque->stderr_gsource);
op->opaque->stderr_gsource = NULL;
}
if (op->opaque->stdout_gsource) {
mainloop_del_fd(op->opaque->stdout_gsource);
op->opaque->stdout_gsource = NULL;
}
}
void
services_action_free(svc_action_t * op)
{
unsigned int i;
if (op == NULL) {
return;
}
/* The operation should be removed from all tracking lists by this point.
* If it's not, we have a bug somewhere, so bail. That may lead to a
* memory leak, but it's better than a use-after-free segmentation fault.
*/
CRM_CHECK(g_list_find(inflight_ops, op) == NULL, return);
CRM_CHECK(g_list_find(blocked_ops, op) == NULL, return);
CRM_CHECK((recurring_actions == NULL)
|| (g_hash_table_lookup(recurring_actions, op->id) == NULL),
return);
services_action_cleanup(op);
if (op->opaque->repeat_timer) {
g_source_remove(op->opaque->repeat_timer);
op->opaque->repeat_timer = 0;
}
free(op->id);
free(op->opaque->exec);
for (i = 0; i < DIMOF(op->opaque->args); i++) {
free(op->opaque->args[i]);
}
free(op->opaque);
free(op->rsc);
free(op->action);
free(op->standard);
free(op->agent);
free(op->provider);
free(op->stdout_data);
free(op->stderr_data);
if (op->params) {
g_hash_table_destroy(op->params);
op->params = NULL;
}
- if (op->alert_params) {
- g_hash_table_destroy(op->alert_params);
- op->alert_params = NULL;
- }
-
free(op);
}
gboolean
cancel_recurring_action(svc_action_t * op)
{
crm_info("Cancelling %s operation %s", op->standard, op->id);
if (recurring_actions) {
g_hash_table_remove(recurring_actions, op->id);
}
if (op->opaque->repeat_timer) {
g_source_remove(op->opaque->repeat_timer);
op->opaque->repeat_timer = 0;
}
return TRUE;
}
/*!
* \brief Cancel a recurring action
*
* \param[in] name Name of resource that operation is for
* \param[in] action Name of operation to cancel
* \param[in] interval Interval of operation to cancel
*
* \return TRUE if action was successfully cancelled, FALSE otherwise
*/
gboolean
services_action_cancel(const char *name, const char *action, int interval)
{
gboolean cancelled = FALSE;
char *id = generate_op_key(name, action, interval);
svc_action_t *op = NULL;
/* We can only cancel a recurring action */
init_recurring_actions();
op = g_hash_table_lookup(recurring_actions, id);
if (op == NULL) {
goto done;
}
/* Tell operation_finalize() not to reschedule the operation */
op->cancel = TRUE;
/* Stop tracking it as a recurring operation, and stop its timer */
cancel_recurring_action(op);
/* If the op has a PID, it's an in-flight child process, so kill it.
*
* Whether the kill succeeds or fails, the main loop will send the op to
* operation_finished() (and thus operation_finalize()) when the process
* goes away.
*/
if (op->pid != 0) {
crm_info("Terminating in-flight op %s (pid %d) early because it was cancelled",
id, op->pid);
cancelled = mainloop_child_kill(op->pid);
if (cancelled == FALSE) {
crm_err("Termination of %s (pid %d) failed", id, op->pid);
}
goto done;
}
/* In-flight systemd and upstart ops don't have a pid. The relevant handlers
* will call operation_finalize() when the operation completes.
* @TODO: Can we request early termination, maybe using
* dbus_pending_call_cancel()?
*/
if (inflight_systemd_or_upstart(op)) {
crm_info("Will cancel %s op %s when in-flight instance completes",
op->standard, op->id);
cancelled = FALSE;
goto done;
}
/* Otherwise, operation is not in-flight, just report as cancelled */
op->status = PCMK_LRM_OP_CANCELLED;
if (op->opaque->callback) {
op->opaque->callback(op);
}
blocked_ops = g_list_remove(blocked_ops, op);
services_action_free(op);
cancelled = TRUE;
done:
free(id);
return cancelled;
}
gboolean
services_action_kick(const char *name, const char *action, int interval /* ms */)
{
svc_action_t * op = NULL;
char *id = generate_op_key(name, action, interval);
init_recurring_actions();
op = g_hash_table_lookup(recurring_actions, id);
free(id);
if (op == NULL) {
return FALSE;
}
if (op->pid || inflight_systemd_or_upstart(op)) {
return TRUE;
} else {
if (op->opaque->repeat_timer) {
g_source_remove(op->opaque->repeat_timer);
op->opaque->repeat_timer = 0;
}
recurring_action_timer(op);
return TRUE;
}
}
/*!
* \internal
* \brief Add a new recurring operation, checking for duplicates
*
* \param[in] op Operation to add
*
* \return TRUE if duplicate found (and reschedule), FALSE otherwise
*/
static gboolean
handle_duplicate_recurring(svc_action_t * op)
{
svc_action_t * dup = NULL;
/* check for duplicates */
dup = g_hash_table_lookup(recurring_actions, op->id);
if (dup && (dup != op)) {
/* update user data */
if (op->opaque->callback) {
dup->opaque->callback = op->opaque->callback;
dup->cb_data = op->cb_data;
op->cb_data = NULL;
}
/* immediately execute the next interval */
if (dup->pid != 0) {
if (op->opaque->repeat_timer) {
g_source_remove(op->opaque->repeat_timer);
op->opaque->repeat_timer = 0;
}
recurring_action_timer(dup);
}
/* free the duplicate */
services_action_free(op);
return TRUE;
}
return FALSE;
}
inline static gboolean
action_exec_helper(svc_action_t * op)
{
/* Whether a/synchronous must be decided (op->synchronous) beforehand. */
if (op->standard
&& (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_UPSTART) == 0)) {
#if SUPPORT_UPSTART
return upstart_job_exec(op);
#endif
} else if (op->standard && strcasecmp(op->standard,
PCMK_RESOURCE_CLASS_SYSTEMD) == 0) {
#if SUPPORT_SYSTEMD
return systemd_unit_exec(op);
#endif
} else {
return services_os_action_execute(op);
}
/* The 'op' has probably been freed if the execution functions return TRUE
for the asynchronous 'op'. */
/* Avoid using the 'op' in here. */
return FALSE;
}
void
services_add_inflight_op(svc_action_t * op)
{
if (op == NULL) {
return;
}
CRM_ASSERT(op->synchronous == FALSE);
/* keep track of ops that are in-flight to avoid collisions in the same namespace */
if (op->rsc) {
inflight_ops = g_list_append(inflight_ops, op);
}
}
/*!
* \internal
* \brief Stop tracking an operation that completed
*
* \param[in] op Operation to stop tracking
*/
void
services_untrack_op(svc_action_t *op)
{
/* Op is no longer in-flight or blocked */
inflight_ops = g_list_remove(inflight_ops, op);
blocked_ops = g_list_remove(blocked_ops, op);
/* Op is no longer blocking other ops, so check if any need to run */
handle_blocked_ops();
}
gboolean
services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *))
{
op->synchronous = false;
if (action_callback) {
op->opaque->callback = action_callback;
}
if (op->interval > 0) {
init_recurring_actions();
if (handle_duplicate_recurring(op) == TRUE) {
/* entry rescheduled, dup freed */
/* exit early */
return TRUE;
}
g_hash_table_replace(recurring_actions, op->id, op);
}
if (op->rsc && is_op_blocked(op->rsc)) {
blocked_ops = g_list_append(blocked_ops, op);
return TRUE;
}
return action_exec_helper(op);
}
static gboolean processing_blocked_ops = FALSE;
gboolean
is_op_blocked(const char *rsc)
{
GList *gIter = NULL;
svc_action_t *op = NULL;
for (gIter = inflight_ops; gIter != NULL; gIter = gIter->next) {
op = gIter->data;
if (safe_str_eq(op->rsc, rsc)) {
return TRUE;
}
}
return FALSE;
}
static void
handle_blocked_ops(void)
{
GList *executed_ops = NULL;
GList *gIter = NULL;
svc_action_t *op = NULL;
gboolean res = FALSE;
if (processing_blocked_ops) {
/* avoid nested calling of this function */
return;
}
processing_blocked_ops = TRUE;
/* n^2 operation here, but blocked ops are incredibly rare. this list
* will be empty 99% of the time. */
for (gIter = blocked_ops; gIter != NULL; gIter = gIter->next) {
op = gIter->data;
if (is_op_blocked(op->rsc)) {
continue;
}
executed_ops = g_list_append(executed_ops, op);
res = action_exec_helper(op);
if (res == FALSE) {
op->status = PCMK_LRM_OP_ERROR;
/* this can cause this function to be called recursively
* which is why we have processing_blocked_ops static variable */
operation_finalize(op);
}
}
for (gIter = executed_ops; gIter != NULL; gIter = gIter->next) {
op = gIter->data;
blocked_ops = g_list_remove(blocked_ops, op);
}
g_list_free(executed_ops);
processing_blocked_ops = FALSE;
}
gboolean
services_action_sync(svc_action_t * op)
{
gboolean rc = TRUE;
if (op == NULL) {
crm_trace("No operation to execute");
return FALSE;
}
op->synchronous = true;
rc = action_exec_helper(op);
crm_trace(" > %s_%s_%d: %s = %d", op->rsc, op->action, op->interval, op->opaque->exec, op->rc);
if (op->stdout_data) {
crm_trace(" > stdout: %s", op->stdout_data);
}
if (op->stderr_data) {
crm_trace(" > stderr: %s", op->stderr_data);
}
return rc;
}
GList *
get_directory_list(const char *root, gboolean files, gboolean executable)
{
return services_os_get_directory_list(root, files, executable);
}
GList *
services_list(void)
{
return resources_list_agents(PCMK_RESOURCE_CLASS_LSB, NULL);
}
#if SUPPORT_HEARTBEAT
static GList *
resources_os_list_hb_agents(void)
{
return services_os_get_directory_list(HB_RA_DIR, TRUE, TRUE);
}
#endif
GList *
resources_list_standards(void)
{
GList *standards = NULL;
GList *agents = NULL;
standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_OCF));
standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_LSB));
standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_SERVICE));
#if SUPPORT_SYSTEMD
agents = systemd_unit_listall();
if (agents) {
standards = g_list_append(standards,
strdup(PCMK_RESOURCE_CLASS_SYSTEMD));
g_list_free_full(agents, free);
}
#endif
#if SUPPORT_UPSTART
agents = upstart_job_listall();
if (agents) {
standards = g_list_append(standards,
strdup(PCMK_RESOURCE_CLASS_UPSTART));
g_list_free_full(agents, free);
}
#endif
#if SUPPORT_NAGIOS
agents = resources_os_list_nagios_agents();
if (agents) {
standards = g_list_append(standards,
strdup(PCMK_RESOURCE_CLASS_NAGIOS));
g_list_free_full(agents, free);
}
#endif
#if SUPPORT_HEARTBEAT
standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_HB));
#endif
return standards;
}
GList *
resources_list_providers(const char *standard)
{
if (strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF) == 0) {
return resources_os_list_ocf_providers();
}
return NULL;
}
GList *
resources_list_agents(const char *standard, const char *provider)
{
if ((standard == NULL)
|| (strcasecmp(standard, PCMK_RESOURCE_CLASS_SERVICE) == 0)) {
GList *tmp1;
GList *tmp2;
GList *result = resources_os_list_lsb_agents();
if (standard == NULL) {
tmp1 = result;
tmp2 = resources_os_list_ocf_agents(NULL);
if (tmp2) {
result = g_list_concat(tmp1, tmp2);
}
}
#if SUPPORT_SYSTEMD
tmp1 = result;
tmp2 = systemd_unit_listall();
if (tmp2) {
result = g_list_concat(tmp1, tmp2);
}
#endif
#if SUPPORT_UPSTART
tmp1 = result;
tmp2 = upstart_job_listall();
if (tmp2) {
result = g_list_concat(tmp1, tmp2);
}
#endif
return result;
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF) == 0) {
return resources_os_list_ocf_agents(provider);
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_LSB) == 0) {
return resources_os_list_lsb_agents();
#if SUPPORT_HEARTBEAT
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_HB) == 0) {
return resources_os_list_hb_agents();
#endif
#if SUPPORT_SYSTEMD
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) {
return systemd_unit_listall();
#endif
#if SUPPORT_UPSTART
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_UPSTART) == 0) {
return upstart_job_listall();
#endif
#if SUPPORT_NAGIOS
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_NAGIOS) == 0) {
return resources_os_list_nagios_agents();
#endif
}
return NULL;
}
diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c
index 1c60766dd8..74ac03162e 100644
--- a/lib/services/services_linux.c
+++ b/lib/services/services_linux.c
@@ -1,925 +1,926 @@
/*
* Copyright (C) 2010-2016 Andrew Beekhof <andrew@beekhof.net>
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <errno.h>
#include <unistd.h>
#include <dirent.h>
#include <fcntl.h>
#include <string.h>
#include <sys/time.h>
#include <sys/resource.h>
#ifdef HAVE_SYS_SIGNALFD_H
#include <sys/signalfd.h>
#endif
#include "crm/crm.h"
#include "crm/common/mainloop.h"
#include "crm/services.h"
#include "services_private.h"
#if SUPPORT_CIBSECRETS
# include "crm/common/cib_secrets.h"
#endif
static inline void
set_fd_opts(int fd, int opts)
{
int flag;
if ((flag = fcntl(fd, F_GETFL)) >= 0) {
if (fcntl(fd, F_SETFL, flag | opts) < 0) {
crm_err("fcntl() write failed");
}
} else {
crm_err("fcntl() read failed");
}
}
static gboolean
svc_read_output(int fd, svc_action_t * op, bool is_stderr)
{
char *data = NULL;
int rc = 0, len = 0;
char buf[500];
static const size_t buf_read_len = sizeof(buf) - 1;
if (fd < 0) {
crm_trace("No fd for %s", op->id);
return FALSE;
}
if (is_stderr && op->stderr_data) {
len = strlen(op->stderr_data);
data = op->stderr_data;
crm_trace("Reading %s stderr into offset %d", op->id, len);
} else if (is_stderr == FALSE && op->stdout_data) {
len = strlen(op->stdout_data);
data = op->stdout_data;
crm_trace("Reading %s stdout into offset %d", op->id, len);
} else {
crm_trace("Reading %s %s into offset %d", op->id, is_stderr?"stderr":"stdout", len);
}
do {
rc = read(fd, buf, buf_read_len);
if (rc > 0) {
crm_trace("Got %d chars: %.80s", rc, buf);
buf[rc] = 0;
data = realloc_safe(data, len + rc + 1);
len += sprintf(data + len, "%s", buf);
} else if (errno != EINTR) {
/* error or EOF
* Cleanup happens in pipe_done()
*/
rc = FALSE;
break;
}
} while (rc == buf_read_len || rc < 0);
if (is_stderr) {
op->stderr_data = data;
} else {
op->stdout_data = data;
}
return rc;
}
static int
dispatch_stdout(gpointer userdata)
{
svc_action_t *op = (svc_action_t *) userdata;
return svc_read_output(op->opaque->stdout_fd, op, FALSE);
}
static int
dispatch_stderr(gpointer userdata)
{
svc_action_t *op = (svc_action_t *) userdata;
return svc_read_output(op->opaque->stderr_fd, op, TRUE);
}
static void
pipe_out_done(gpointer user_data)
{
svc_action_t *op = (svc_action_t *) user_data;
crm_trace("%p", op);
op->opaque->stdout_gsource = NULL;
if (op->opaque->stdout_fd > STDOUT_FILENO) {
close(op->opaque->stdout_fd);
}
op->opaque->stdout_fd = -1;
}
static void
pipe_err_done(gpointer user_data)
{
svc_action_t *op = (svc_action_t *) user_data;
op->opaque->stderr_gsource = NULL;
if (op->opaque->stderr_fd > STDERR_FILENO) {
close(op->opaque->stderr_fd);
}
op->opaque->stderr_fd = -1;
}
static struct mainloop_fd_callbacks stdout_callbacks = {
.dispatch = dispatch_stdout,
.destroy = pipe_out_done,
};
static struct mainloop_fd_callbacks stderr_callbacks = {
.dispatch = dispatch_stderr,
.destroy = pipe_err_done,
};
static void
set_ocf_env(const char *key, const char *value, gpointer user_data)
{
if (setenv(key, value, 1) != 0) {
crm_perror(LOG_ERR, "setenv failed for key:%s and value:%s", key, value);
}
}
static void
set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data)
{
char buffer[500];
snprintf(buffer, sizeof(buffer), "OCF_RESKEY_%s", (char *)key);
set_ocf_env(buffer, value, user_data);
}
static void
set_alert_env(gpointer key, gpointer value, gpointer user_data)
{
set_ocf_env((char*)key, value, user_data);
}
/*!
* \internal
* \brief Add environment variables suitable for an action
*
* \param[in] op Action to use
*
* \note Environment variables are added only for alerts and OCF agents.
*/
static void
add_action_env_vars(const svc_action_t *op)
{
- if (op->alert_params) {
- g_hash_table_foreach(op->alert_params, set_alert_env, NULL);
+ if (safe_str_eq(op->standard, PCMK_ALERT_CLASS)) {
+ if (op->params) {
+ g_hash_table_foreach(op->params, set_alert_env, NULL);
+ }
return;
}
- if ((op->standard == NULL)
- || (strcasecmp(PCMK_RESOURCE_CLASS_OCF, op->standard) != 0)) {
+ if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF) == FALSE) {
return;
}
if (op->params) {
g_hash_table_foreach(op->params, set_ocf_env_with_prefix, NULL);
}
set_ocf_env("OCF_RA_VERSION_MAJOR", "1", NULL);
set_ocf_env("OCF_RA_VERSION_MINOR", "0", NULL);
set_ocf_env("OCF_ROOT", OCF_ROOT_DIR, NULL);
set_ocf_env("OCF_EXIT_REASON_PREFIX", PCMK_OCF_REASON_PREFIX, NULL);
if (op->rsc) {
set_ocf_env("OCF_RESOURCE_INSTANCE", op->rsc, NULL);
}
if (op->agent != NULL) {
set_ocf_env("OCF_RESOURCE_TYPE", op->agent, NULL);
}
/* Notes: this is not added to specification yet. Sept 10,2004 */
if (op->provider != NULL) {
set_ocf_env("OCF_RESOURCE_PROVIDER", op->provider, NULL);
}
}
gboolean
recurring_action_timer(gpointer data)
{
svc_action_t *op = data;
crm_debug("Scheduling another invocation of %s", op->id);
/* Clean out the old result */
free(op->stdout_data);
op->stdout_data = NULL;
free(op->stderr_data);
op->stderr_data = NULL;
op->opaque->repeat_timer = 0;
services_action_async(op, NULL);
return FALSE;
}
/* Returns FALSE if 'op' should be free'd by the caller */
gboolean
operation_finalize(svc_action_t * op)
{
int recurring = 0;
if (op->interval) {
if (op->cancel) {
op->status = PCMK_LRM_OP_CANCELLED;
cancel_recurring_action(op);
} else {
recurring = 1;
op->opaque->repeat_timer = g_timeout_add(op->interval,
recurring_action_timer, (void *)op);
}
}
if (op->opaque->callback) {
op->opaque->callback(op);
}
op->pid = 0;
services_untrack_op(op);
if (!recurring && op->synchronous == FALSE) {
/*
* If this is a recurring action, do not free explicitly.
* It will get freed whenever the action gets cancelled.
*/
services_action_free(op);
return TRUE;
}
services_action_cleanup(op);
return FALSE;
}
static void
operation_finished(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
{
svc_action_t *op = mainloop_child_userdata(p);
char *prefix = crm_strdup_printf("%s:%d", op->id, op->pid);
mainloop_clear_child_userdata(p);
op->status = PCMK_LRM_OP_DONE;
CRM_ASSERT(op->pid == pid);
crm_trace("%s %p %p", prefix, op->opaque->stderr_gsource, op->opaque->stdout_gsource);
if (op->opaque->stderr_gsource) {
/* Make sure we have read everything from the buffer.
* Depending on the priority mainloop gives the fd, operation_finished
* could occur before all the reads are done. Force the read now.*/
crm_trace("%s dispatching stderr", prefix);
dispatch_stderr(op);
crm_trace("%s: %p", op->id, op->stderr_data);
mainloop_del_fd(op->opaque->stderr_gsource);
op->opaque->stderr_gsource = NULL;
}
if (op->opaque->stdout_gsource) {
/* Make sure we have read everything from the buffer.
* Depending on the priority mainloop gives the fd, operation_finished
* could occur before all the reads are done. Force the read now.*/
crm_trace("%s dispatching stdout", prefix);
dispatch_stdout(op);
crm_trace("%s: %p", op->id, op->stdout_data);
mainloop_del_fd(op->opaque->stdout_gsource);
op->opaque->stdout_gsource = NULL;
}
if (signo) {
if (mainloop_child_timeout(p)) {
crm_warn("%s - timed out after %dms", prefix, op->timeout);
op->status = PCMK_LRM_OP_TIMEOUT;
op->rc = PCMK_OCF_TIMEOUT;
} else {
do_crm_log_unlikely((op->cancel) ? LOG_INFO : LOG_WARNING,
"%s - terminated with signal %d", prefix, signo);
op->status = PCMK_LRM_OP_ERROR;
op->rc = PCMK_OCF_SIGNAL;
}
} else {
op->rc = exitcode;
crm_debug("%s - exited with rc=%d", prefix, exitcode);
}
free(prefix);
prefix = crm_strdup_printf("%s:%d:stderr", op->id, op->pid);
crm_log_output(LOG_NOTICE, prefix, op->stderr_data);
free(prefix);
prefix = crm_strdup_printf("%s:%d:stdout", op->id, op->pid);
crm_log_output(LOG_DEBUG, prefix, op->stdout_data);
free(prefix);
operation_finalize(op);
}
/*!
* \internal
* \brief Set operation rc and status per errno from stat(), fork() or execvp()
*
* \param[in,out] op Operation to set rc and status for
* \param[in] error Value of errno after system call
*
* \return void
*/
static void
services_handle_exec_error(svc_action_t * op, int error)
{
int rc_not_installed, rc_insufficient_priv, rc_exec_error;
/* Mimic the return codes for each standard as that's what we'll convert back from in get_uniform_rc() */
if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB)
&& safe_str_eq(op->action, "status")) {
rc_not_installed = PCMK_LSB_STATUS_NOT_INSTALLED;
rc_insufficient_priv = PCMK_LSB_STATUS_INSUFFICIENT_PRIV;
rc_exec_error = PCMK_LSB_STATUS_UNKNOWN;
#if SUPPORT_NAGIOS
} else if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS)) {
rc_not_installed = NAGIOS_NOT_INSTALLED;
rc_insufficient_priv = NAGIOS_INSUFFICIENT_PRIV;
rc_exec_error = PCMK_OCF_EXEC_ERROR;
#endif
} else {
rc_not_installed = PCMK_OCF_NOT_INSTALLED;
rc_insufficient_priv = PCMK_OCF_INSUFFICIENT_PRIV;
rc_exec_error = PCMK_OCF_EXEC_ERROR;
}
switch (error) { /* see execve(2), stat(2) and fork(2) */
case ENOENT: /* No such file or directory */
case EISDIR: /* Is a directory */
case ENOTDIR: /* Path component is not a directory */
case EINVAL: /* Invalid executable format */
case ENOEXEC: /* Invalid executable format */
op->rc = rc_not_installed;
op->status = PCMK_LRM_OP_NOT_INSTALLED;
break;
case EACCES: /* permission denied (various errors) */
case EPERM: /* permission denied (various errors) */
op->rc = rc_insufficient_priv;
op->status = PCMK_LRM_OP_ERROR;
break;
default:
op->rc = rc_exec_error;
op->status = PCMK_LRM_OP_ERROR;
}
}
static void
action_launch_child(svc_action_t *op)
{
int lpc;
/* SIGPIPE is ignored (which is different from signal blocking) by the gnutls library.
* Depending on the libqb version in use, libqb may set SIGPIPE to be ignored as well.
* We do not want this to be inherited by the child process. By resetting this the signal
* to the default behavior, we avoid some potential odd problems that occur during OCF
* scripts when SIGPIPE is ignored by the environment. */
signal(SIGPIPE, SIG_DFL);
#if defined(HAVE_SCHED_SETSCHEDULER)
if (sched_getscheduler(0) != SCHED_OTHER) {
struct sched_param sp;
memset(&sp, 0, sizeof(sp));
sp.sched_priority = 0;
if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) {
crm_perror(LOG_ERR, "Could not reset scheduling policy to SCHED_OTHER for %s", op->id);
}
}
#endif
if (setpriority(PRIO_PROCESS, 0, 0) == -1) {
crm_perror(LOG_ERR, "Could not reset process priority to 0 for %s", op->id);
}
/* Man: The call setpgrp() is equivalent to setpgid(0,0)
* _and_ compiles on BSD variants too
* need to investigate if it works the same too.
*/
setpgid(0, 0);
/* close all descriptors except stdin/out/err and channels to logd */
for (lpc = getdtablesize() - 1; lpc > STDERR_FILENO; lpc--) {
close(lpc);
}
#if SUPPORT_CIBSECRETS
if (replace_secret_params(op->rsc, op->params) < 0) {
/* replacing secrets failed! */
if (safe_str_eq(op->action,"stop")) {
/* don't fail on stop! */
crm_info("proceeding with the stop operation for %s", op->rsc);
} else {
crm_err("failed to get secrets for %s, "
"considering resource not configured", op->rsc);
_exit(PCMK_OCF_NOT_CONFIGURED);
}
}
#endif
add_action_env_vars(op);
/* execute the RA */
execvp(op->opaque->exec, op->opaque->args);
/* Most cases should have been already handled by stat() */
services_handle_exec_error(op, errno);
_exit(op->rc);
}
#ifndef HAVE_SYS_SIGNALFD_H
static int sigchld_pipe[2] = { -1, -1 };
static void
sigchld_handler()
{
if ((sigchld_pipe[1] >= 0) && (write(sigchld_pipe[1], "", 1) == -1)) {
crm_perror(LOG_TRACE, "Could not poke SIGCHLD self-pipe");
}
}
#endif
static void
action_synced_wait(svc_action_t * op, sigset_t *mask)
{
int status = 0;
int timeout = op->timeout;
int sfd = -1;
time_t start = -1;
struct pollfd fds[3];
int wait_rc = 0;
#ifdef HAVE_SYS_SIGNALFD_H
sfd = signalfd(-1, mask, SFD_NONBLOCK);
if (sfd < 0) {
crm_perror(LOG_ERR, "signalfd() failed");
}
#else
sfd = sigchld_pipe[0];
#endif
fds[0].fd = op->opaque->stdout_fd;
fds[0].events = POLLIN;
fds[0].revents = 0;
fds[1].fd = op->opaque->stderr_fd;
fds[1].events = POLLIN;
fds[1].revents = 0;
fds[2].fd = sfd;
fds[2].events = POLLIN;
fds[2].revents = 0;
crm_trace("Waiting for %d", op->pid);
start = time(NULL);
do {
int poll_rc = poll(fds, 3, timeout);
if (poll_rc > 0) {
if (fds[0].revents & POLLIN) {
svc_read_output(op->opaque->stdout_fd, op, FALSE);
}
if (fds[1].revents & POLLIN) {
svc_read_output(op->opaque->stderr_fd, op, TRUE);
}
if (fds[2].revents & POLLIN) {
#ifdef HAVE_SYS_SIGNALFD_H
struct signalfd_siginfo fdsi;
ssize_t s;
s = read(sfd, &fdsi, sizeof(struct signalfd_siginfo));
if (s != sizeof(struct signalfd_siginfo)) {
crm_perror(LOG_ERR, "Read from signal fd %d failed", sfd);
} else if (fdsi.ssi_signo == SIGCHLD) {
#else
if (1) {
/* Clear out the sigchld pipe. */
char ch;
while (read(sfd, &ch, 1) == 1) /*omit*/;
#endif
wait_rc = waitpid(op->pid, &status, WNOHANG);
if (wait_rc > 0) {
break;
} else if (wait_rc < 0){
if (errno == ECHILD) {
/* Here, don't dare to kill and bail out... */
break;
} else {
/* ...otherwise pretend process still runs. */
wait_rc = 0;
}
crm_perror(LOG_ERR, "waitpid() for %d failed", op->pid);
}
}
}
} else if (poll_rc == 0) {
timeout = 0;
break;
} else if (poll_rc < 0) {
if (errno != EINTR) {
crm_perror(LOG_ERR, "poll() failed");
break;
}
}
timeout = op->timeout - (time(NULL) - start) * 1000;
} while ((op->timeout < 0 || timeout > 0));
crm_trace("Child done: %d", op->pid);
if (wait_rc <= 0) {
op->rc = PCMK_OCF_UNKNOWN_ERROR;
if (op->timeout > 0 && timeout <= 0) {
op->status = PCMK_LRM_OP_TIMEOUT;
crm_warn("%s:%d - timed out after %dms", op->id, op->pid, op->timeout);
} else {
op->status = PCMK_LRM_OP_ERROR;
}
/* If only child hasn't been successfully waited for, yet.
This is to limit killing wrong target a bit more. */
if (wait_rc == 0 && waitpid(op->pid, &status, WNOHANG) == 0) {
if (kill(op->pid, SIGKILL)) {
crm_err("kill(%d, KILL) failed: %d", op->pid, errno);
}
/* Safe to skip WNOHANG here as we sent non-ignorable signal. */
while (waitpid(op->pid, &status, 0) == (pid_t) -1 && errno == EINTR) /*omit*/;
}
} else if (WIFEXITED(status)) {
op->status = PCMK_LRM_OP_DONE;
op->rc = WEXITSTATUS(status);
crm_info("Managed %s process %d exited with rc=%d", op->id, op->pid, op->rc);
} else if (WIFSIGNALED(status)) {
int signo = WTERMSIG(status);
op->status = PCMK_LRM_OP_ERROR;
crm_err("Managed %s process %d exited with signal=%d", op->id, op->pid, signo);
}
#ifdef WCOREDUMP
if (WCOREDUMP(status)) {
crm_err("Managed %s process %d dumped core", op->id, op->pid);
}
#endif
svc_read_output(op->opaque->stdout_fd, op, FALSE);
svc_read_output(op->opaque->stderr_fd, op, TRUE);
close(op->opaque->stdout_fd);
close(op->opaque->stderr_fd);
#ifdef HAVE_SYS_SIGNALFD_H
close(sfd);
#endif
}
/* For an asynchronous 'op', returns FALSE if 'op' should be free'd by the caller */
/* For a synchronous 'op', returns FALSE if 'op' fails */
gboolean
services_os_action_execute(svc_action_t * op)
{
int stdout_fd[2];
int stderr_fd[2];
struct stat st;
sigset_t *pmask;
#ifdef HAVE_SYS_SIGNALFD_H
sigset_t mask;
sigset_t old_mask;
#define sigchld_cleanup() do { \
if (sigismember(&old_mask, SIGCHLD) == 0) { \
if (sigprocmask(SIG_UNBLOCK, &mask, NULL) < 0) { \
crm_perror(LOG_ERR, "sigprocmask() failed to unblock sigchld"); \
} \
} \
} while (0)
#else
struct sigaction sa;
struct sigaction old_sa;
#define sigchld_cleanup() do { \
if (sigaction(SIGCHLD, &old_sa, NULL) < 0) { \
crm_perror(LOG_ERR, "sigaction() failed to remove sigchld handler"); \
} \
close(sigchld_pipe[0]); \
close(sigchld_pipe[1]); \
sigchld_pipe[0] = sigchld_pipe[1] = -1; \
} while(0)
#endif
/* Fail fast */
if(stat(op->opaque->exec, &st) != 0) {
int rc = errno;
crm_warn("Cannot execute '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc);
services_handle_exec_error(op, rc);
if (!op->synchronous) {
return operation_finalize(op);
}
return FALSE;
}
if (pipe(stdout_fd) < 0) {
int rc = errno;
crm_err("pipe(stdout_fd) failed. '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc);
services_handle_exec_error(op, rc);
if (!op->synchronous) {
return operation_finalize(op);
}
return FALSE;
}
if (pipe(stderr_fd) < 0) {
int rc = errno;
close(stdout_fd[0]);
close(stdout_fd[1]);
crm_err("pipe(stderr_fd) failed. '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc);
services_handle_exec_error(op, rc);
if (!op->synchronous) {
return operation_finalize(op);
}
return FALSE;
}
if (op->synchronous) {
#ifdef HAVE_SYS_SIGNALFD_H
sigemptyset(&mask);
sigaddset(&mask, SIGCHLD);
sigemptyset(&old_mask);
if (sigprocmask(SIG_BLOCK, &mask, &old_mask) < 0) {
crm_perror(LOG_ERR, "sigprocmask() failed to block sigchld");
}
pmask = &mask;
#else
if(pipe(sigchld_pipe) == -1) {
crm_perror(LOG_ERR, "pipe() failed");
}
set_fd_opts(sigchld_pipe[0], O_NONBLOCK);
set_fd_opts(sigchld_pipe[1], O_NONBLOCK);
sa.sa_handler = sigchld_handler;
sa.sa_flags = 0;
sigemptyset(&sa.sa_mask);
if (sigaction(SIGCHLD, &sa, &old_sa) < 0) {
crm_perror(LOG_ERR, "sigaction() failed to set sigchld handler");
}
pmask = NULL;
#endif
}
op->pid = fork();
switch (op->pid) {
case -1:
{
int rc = errno;
close(stdout_fd[0]);
close(stdout_fd[1]);
close(stderr_fd[0]);
close(stderr_fd[1]);
crm_err("Could not execute '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc);
services_handle_exec_error(op, rc);
if (!op->synchronous) {
return operation_finalize(op);
}
sigchld_cleanup();
return FALSE;
}
case 0: /* Child */
close(stdout_fd[0]);
close(stderr_fd[0]);
if (STDOUT_FILENO != stdout_fd[1]) {
if (dup2(stdout_fd[1], STDOUT_FILENO) != STDOUT_FILENO) {
crm_err("dup2() failed (stdout)");
}
close(stdout_fd[1]);
}
if (STDERR_FILENO != stderr_fd[1]) {
if (dup2(stderr_fd[1], STDERR_FILENO) != STDERR_FILENO) {
crm_err("dup2() failed (stderr)");
}
close(stderr_fd[1]);
}
if (op->synchronous) {
sigchld_cleanup();
}
action_launch_child(op);
CRM_ASSERT(0); /* action_launch_child is effectively noreturn */
}
/* Only the parent reaches here */
close(stdout_fd[1]);
close(stderr_fd[1]);
op->opaque->stdout_fd = stdout_fd[0];
set_fd_opts(op->opaque->stdout_fd, O_NONBLOCK);
op->opaque->stderr_fd = stderr_fd[0];
set_fd_opts(op->opaque->stderr_fd, O_NONBLOCK);
if (op->synchronous) {
action_synced_wait(op, pmask);
sigchld_cleanup();
} else {
crm_trace("Async waiting for %d - %s", op->pid, op->opaque->exec);
mainloop_child_add_with_flags(op->pid,
op->timeout,
op->id,
op,
(op->flags & SVC_ACTION_LEAVE_GROUP) ? mainloop_leave_pid_group : 0,
operation_finished);
op->opaque->stdout_gsource = mainloop_add_fd(op->id,
G_PRIORITY_LOW,
op->opaque->stdout_fd, op, &stdout_callbacks);
op->opaque->stderr_gsource = mainloop_add_fd(op->id,
G_PRIORITY_LOW,
op->opaque->stderr_fd, op, &stderr_callbacks);
services_add_inflight_op(op);
}
return TRUE;
}
GList *
services_os_get_directory_list(const char *root, gboolean files, gboolean executable)
{
GList *list = NULL;
struct dirent **namelist;
int entries = 0, lpc = 0;
char buffer[PATH_MAX];
entries = scandir(root, &namelist, NULL, alphasort);
if (entries <= 0) {
return list;
}
for (lpc = 0; lpc < entries; lpc++) {
struct stat sb;
if ('.' == namelist[lpc]->d_name[0]) {
free(namelist[lpc]);
continue;
}
snprintf(buffer, sizeof(buffer), "%s/%s", root, namelist[lpc]->d_name);
if (stat(buffer, &sb)) {
continue;
}
if (S_ISDIR(sb.st_mode)) {
if (files) {
free(namelist[lpc]);
continue;
}
} else if (S_ISREG(sb.st_mode)) {
if (files == FALSE) {
free(namelist[lpc]);
continue;
} else if (executable
&& (sb.st_mode & S_IXUSR) == 0
&& (sb.st_mode & S_IXGRP) == 0 && (sb.st_mode & S_IXOTH) == 0) {
free(namelist[lpc]);
continue;
}
}
list = g_list_append(list, strdup(namelist[lpc]->d_name));
free(namelist[lpc]);
}
free(namelist);
return list;
}
GList *
resources_os_list_lsb_agents(void)
{
return get_directory_list(LSB_ROOT_DIR, TRUE, TRUE);
}
GList *
resources_os_list_ocf_providers(void)
{
return get_directory_list(OCF_ROOT_DIR "/resource.d", FALSE, TRUE);
}
GList *
resources_os_list_ocf_agents(const char *provider)
{
GList *gIter = NULL;
GList *result = NULL;
GList *providers = NULL;
if (provider) {
char buffer[500];
snprintf(buffer, sizeof(buffer), "%s/resource.d/%s", OCF_ROOT_DIR, provider);
return get_directory_list(buffer, TRUE, TRUE);
}
providers = resources_os_list_ocf_providers();
for (gIter = providers; gIter != NULL; gIter = gIter->next) {
GList *tmp1 = result;
GList *tmp2 = resources_os_list_ocf_agents(gIter->data);
if (tmp2) {
result = g_list_concat(tmp1, tmp2);
}
}
g_list_free_full(providers, free);
return result;
}
#if SUPPORT_NAGIOS
GList *
resources_os_list_nagios_agents(void)
{
GList *plugin_list = NULL;
GList *result = NULL;
GList *gIter = NULL;
plugin_list = get_directory_list(NAGIOS_PLUGIN_DIR, TRUE, TRUE);
/* Make sure both the plugin and its metadata exist */
for (gIter = plugin_list; gIter != NULL; gIter = gIter->next) {
const char *plugin = gIter->data;
char *metadata = crm_strdup_printf(NAGIOS_METADATA_DIR "/%s.xml", plugin);
struct stat st;
if (stat(metadata, &st) == 0) {
result = g_list_append(result, strdup(plugin));
}
free(metadata);
}
g_list_free_full(plugin_list, free);
return result;
}
#endif
diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c
index afdfd01b02..76c7e5058e 100644
--- a/lrmd/lrmd.c
+++ b/lrmd/lrmd.c
@@ -1,1793 +1,1793 @@
/*
* Copyright (c) 2012 David Vossel <davidvossel@gmail.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <crm_internal.h>
#include <glib.h>
#include <unistd.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/common/mainloop.h>
#include <crm/common/ipc.h>
#include <crm/common/ipcs.h>
#include <crm/common/alerts_internal.h>
#include <crm/msg_xml.h>
#include <lrmd_private.h>
#ifdef HAVE_SYS_TIMEB_H
# include <sys/timeb.h>
#endif
#define EXIT_REASON_MAX_LEN 128
GHashTable *rsc_list = NULL;
typedef struct lrmd_cmd_s {
int timeout;
int interval;
int start_delay;
int timeout_orig;
int call_id;
int exec_rc;
int lrmd_op_status;
int call_opts;
/* Timer ids, must be removed on cmd destruction. */
int delay_id;
int stonith_recurring_id;
int rsc_deleted;
int service_flags;
char *client_id;
char *origin;
char *rsc_id;
char *action;
char *real_action;
char *exit_reason;
char *output;
char *userdata_str;
/* when set, this cmd should go through a container wrapper */
const char *isolation_wrapper;
#ifdef HAVE_SYS_TIMEB_H
/* recurring and systemd operations may involve more than one lrmd command
* per operation, so they need info about original and most recent
*/
struct timeb t_first_run; /* Timestamp of when op first ran */
struct timeb t_run; /* Timestamp of when op most recently ran */
struct timeb t_first_queue; /* Timestamp of when op first was queued */
struct timeb t_queue; /* Timestamp of when op most recently was queued */
struct timeb t_rcchange; /* Timestamp of last rc change */
#endif
int first_notify_sent;
int last_notify_rc;
int last_notify_op_status;
int last_pid;
GHashTable *params;
} lrmd_cmd_t;
static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
static gboolean lrmd_rsc_dispatch(gpointer user_data);
static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
static void
log_finished(lrmd_cmd_t * cmd, int exec_time, int queue_time)
{
char pid_str[32] = { 0, };
int log_level = LOG_INFO;
if (cmd->last_pid) {
snprintf(pid_str, 32, "%d", cmd->last_pid);
}
if (safe_str_eq(cmd->action, "monitor")) {
log_level = LOG_DEBUG;
}
#ifdef HAVE_SYS_TIMEB_H
do_crm_log(log_level,
"finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d exec-time:%dms queue-time:%dms",
cmd->rsc_id, cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str,
cmd->exec_rc, exec_time, queue_time);
#else
do_crm_log(log_level, "finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d",
cmd->rsc_id,
cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str, cmd->exec_rc);
#endif
}
static void
log_execute(lrmd_cmd_t * cmd)
{
int log_level = LOG_INFO;
if (safe_str_eq(cmd->action, "monitor")) {
log_level = LOG_DEBUG;
}
do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
cmd->rsc_id, cmd->action, cmd->call_id);
}
static const char *
normalize_action_name(lrmd_rsc_t * rsc, const char *action)
{
if (safe_str_eq(action, "monitor") &&
(safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_LSB) ||
safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE)
|| safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SYSTEMD))) {
return "status";
}
return action;
}
static lrmd_rsc_t *
build_rsc_from_xml(xmlNode * msg)
{
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
lrmd_rsc_t *rsc = NULL;
rsc = calloc(1, sizeof(lrmd_rsc_t));
crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts);
rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS);
rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER);
rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE);
rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc);
return rsc;
}
static void
dup_attr(gpointer key, gpointer value, gpointer user_data)
{
g_hash_table_replace(user_data, strdup(key), strdup(value));
}
static lrmd_cmd_t *
create_lrmd_cmd(xmlNode * msg, crm_client_t * client, lrmd_rsc_t *rsc)
{
int call_options = 0;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
lrmd_cmd_t *cmd = NULL;
cmd = calloc(1, sizeof(lrmd_cmd_t));
crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options);
cmd->call_opts = call_options;
cmd->client_id = strdup(client->id);
crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id);
crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval);
crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout);
crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay);
cmd->timeout_orig = cmd->timeout;
cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN);
cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION);
cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR);
cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
cmd->params = xml2list(rsc_xml);
cmd->isolation_wrapper = g_hash_table_lookup(cmd->params, "CRM_meta_isolation_wrapper");
if (cmd->isolation_wrapper) {
if (g_hash_table_lookup(cmd->params, "CRM_meta_isolation_instance") == NULL) {
g_hash_table_insert(cmd->params, strdup("CRM_meta_isolation_instance"), strdup(rsc->rsc_id));
}
if (rsc->provider) {
g_hash_table_insert(cmd->params, strdup("CRM_meta_provider"), strdup(rsc->provider));
}
g_hash_table_insert(cmd->params, strdup("CRM_meta_class"), strdup(rsc->class));
g_hash_table_insert(cmd->params, strdup("CRM_meta_type"), strdup(rsc->type));
}
if (safe_str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block")) {
crm_debug("Setting flag to leave pid group on timeout and only kill action pid for %s_%s_%d", cmd->rsc_id, cmd->action, cmd->interval);
cmd->service_flags |= SVC_ACTION_LEAVE_GROUP;
}
return cmd;
}
static lrmd_cmd_t *
create_alert_cmd(xmlNode * msg, crm_client_t * client, lrmd_rsc_t *rsc)
{
int call_options = 0;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_ALERT, msg, LOG_ERR);
lrmd_cmd_t *cmd = NULL;
cmd = calloc(1, sizeof(lrmd_cmd_t));
crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options);
cmd->call_opts = call_options;
cmd->client_id = strdup(client->id);
crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id);
crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout);
cmd->timeout_orig = cmd->timeout;
cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN);
cmd->action = strdup("start");
cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_ALERT_ID);
cmd->params = xml2list(rsc_xml);
return cmd;
}
static void
free_lrmd_cmd(lrmd_cmd_t * cmd)
{
if (cmd->stonith_recurring_id) {
g_source_remove(cmd->stonith_recurring_id);
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->params) {
g_hash_table_destroy(cmd->params);
}
free(cmd->origin);
free(cmd->action);
free(cmd->real_action);
free(cmd->userdata_str);
free(cmd->rsc_id);
free(cmd->output);
free(cmd->exit_reason);
free(cmd->client_id);
free(cmd);
}
static gboolean
stonith_recurring_op_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc;
cmd->stonith_recurring_id = 0;
if (!cmd->rsc_id) {
return FALSE;
}
rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
CRM_ASSERT(rsc != NULL);
/* take it out of recurring_ops list, and put it in the pending ops
* to be executed */
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef HAVE_SYS_TIMEB_H
ftime(&cmd->t_queue);
if (cmd->t_first_queue.time == 0) {
cmd->t_first_queue = cmd->t_queue;
}
#endif
mainloop_set_trigger(rsc->work);
return FALSE;
}
static gboolean
start_delay_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc = NULL;
cmd->delay_id = 0;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (rsc) {
mainloop_set_trigger(rsc->work);
}
return FALSE;
}
static gboolean
merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
GListPtr gIter = NULL;
lrmd_cmd_t * dup = NULL;
gboolean dup_pending = FALSE;
if (cmd->interval == 0) {
return 0;
}
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
dup = gIter->data;
if (safe_str_eq(cmd->action, dup->action) && cmd->interval == dup->interval) {
dup_pending = TRUE;
goto merge_dup;
}
}
/* if dup is in recurring_ops list, that means it has already executed
* and is in the interval loop. we can't just remove it in this case. */
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
dup = gIter->data;
if (safe_str_eq(cmd->action, dup->action) && cmd->interval == dup->interval) {
goto merge_dup;
}
}
return FALSE;
merge_dup:
/* This should not occur, if it does we need to investigate in the crmd
* how something like this is possible */
crm_warn("Duplicate recurring op entry detected (%s_%s_%d), merging with previous op entry",
rsc->rsc_id,
normalize_action_name(rsc, dup->action),
dup->interval);
/* merge */
dup->first_notify_sent = 0;
free(dup->userdata_str);
dup->userdata_str = cmd->userdata_str;
cmd->userdata_str = NULL;
dup->call_id = cmd->call_id;
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
/* if we are waiting for the next interval, kick it off now */
if (dup_pending == TRUE) {
g_source_remove(cmd->stonith_recurring_id);
cmd->stonith_recurring_id = 0;
stonith_recurring_op_helper(cmd);
}
} else if (dup_pending == FALSE) {
/* if we've already handed this to the service lib, kick off an early execution */
services_action_kick(rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval);
}
free_lrmd_cmd(cmd);
return TRUE;
}
static void
schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
gboolean dup_processed = FALSE;
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(rsc != NULL, return);
crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
dup_processed = merge_recurring_duplicate(rsc, cmd);
if (dup_processed) {
/* duplicate recurring cmd found, cmds merged */
return;
}
/* crmd expects lrmd to automatically cancel recurring ops before rsc stops. */
if (rsc && safe_str_eq(cmd->action, "stop")) {
cancel_all_recurring(rsc, NULL);
}
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef HAVE_SYS_TIMEB_H
ftime(&cmd->t_queue);
if (cmd->t_first_queue.time == 0) {
cmd->t_first_queue = cmd->t_queue;
}
#endif
mainloop_set_trigger(rsc->work);
if (cmd->start_delay) {
cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
}
static void
send_reply(crm_client_t * client, int rc, uint32_t id, int call_id)
{
int send_rc = 0;
xmlNode *reply = NULL;
reply = create_xml_node(NULL, T_LRMD_REPLY);
crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(reply, F_LRMD_RC, rc);
crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
send_rc = lrmd_server_send_reply(client, id, reply);
free_xml(reply);
if (send_rc < 0) {
crm_warn("LRMD reply to %s failed: %d", client->name, send_rc);
}
}
static void
send_client_notify(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
crm_client_t *client = value;
if (client == NULL) {
crm_err("Asked to send event to NULL client");
return;
} else if (client->name == NULL) {
crm_trace("Asked to send event to client with no name");
return;
}
if (lrmd_server_send_notify(client, update_msg) <= 0) {
crm_warn("Notification of client %s/%s failed", client->name, client->id);
}
}
#ifdef HAVE_SYS_TIMEB_H
/*!
* \internal
* \brief Return difference between two times in milliseconds
*
* \param[in] now More recent time (or NULL to use current time)
* \param[in] old Earlier time
*
* \return milliseconds difference (or 0 if old is NULL or has time zero)
*/
static int
time_diff_ms(struct timeb *now, struct timeb *old)
{
struct timeb local_now = { 0, };
if (now == NULL) {
ftime(&local_now);
now = &local_now;
}
if ((old == NULL) || (old->time == 0)) {
return 0;
}
return difftime(now->time, old->time) * 1000 + now->millitm - old->millitm;
}
/*!
* \internal
* \brief Reset a command's operation times to their original values.
*
* Reset a command's run and queued timestamps to the timestamps of the original
* command, so we report the entire time since then and not just the time since
* the most recent command (for recurring and systemd operations).
*
* /param[in] cmd LRMD command object to reset
*
* /note It's not obvious what the queued time should be for a systemd
* start/stop operation, which might go like this:
* initial command queued 5ms, runs 3s
* monitor command queued 10ms, runs 10s
* monitor command queued 10ms, runs 10s
* Is the queued time for that operation 5ms, 10ms or 25ms? The current
* implementation will report 5ms. If it's 25ms, then we need to
* subtract 20ms from the total exec time so as not to count it twice.
* We can implement that later if it matters to anyone ...
*/
static void
cmd_original_times(lrmd_cmd_t * cmd)
{
cmd->t_run = cmd->t_first_run;
cmd->t_queue = cmd->t_first_queue;
}
#endif
static void
send_cmd_complete_notify(lrmd_cmd_t * cmd)
{
int exec_time = 0;
int queue_time = 0;
xmlNode *notify = NULL;
#ifdef HAVE_SYS_TIMEB_H
exec_time = time_diff_ms(NULL, &cmd->t_run);
queue_time = time_diff_ms(&cmd->t_run, &cmd->t_queue);
#endif
log_finished(cmd, exec_time, queue_time);
/* if the first notify result for a cmd has already been sent earlier, and the
* the option to only send notifies on result changes is set. Check to see
* if the last result is the same as the new one. If so, suppress this update */
if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) {
if (cmd->last_notify_rc == cmd->exec_rc &&
cmd->last_notify_op_status == cmd->lrmd_op_status) {
/* only send changes */
return;
}
}
cmd->first_notify_sent = 1;
cmd->last_notify_rc = cmd->exec_rc;
cmd->last_notify_op_status = cmd->lrmd_op_status;
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout);
crm_xml_add_int(notify, F_LRMD_RSC_INTERVAL, cmd->interval);
crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay);
crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc);
crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status);
crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id);
crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted);
#ifdef HAVE_SYS_TIMEB_H
crm_xml_add_int(notify, F_LRMD_RSC_RUN_TIME, cmd->t_run.time);
crm_xml_add_int(notify, F_LRMD_RSC_RCCHANGE_TIME, cmd->t_rcchange.time);
crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time);
crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time);
#endif
crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC);
crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id);
if(cmd->real_action) {
crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action);
} else {
crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action);
}
crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output);
crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->exit_reason);
if (cmd->params) {
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS);
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
hash2smartfield((gpointer) key, (gpointer) value, args);
}
}
if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) {
crm_client_t *client = crm_client_get_by_id(cmd->client_id);
if (client) {
send_client_notify(client->id, client, notify);
}
} else if (client_connections != NULL) {
g_hash_table_foreach(client_connections, send_client_notify, notify);
}
free_xml(notify);
}
static void
send_generic_notify(int rc, xmlNode * request)
{
if (client_connections != NULL) {
int call_id = 0;
xmlNode *notify = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *op = crm_element_value(request, F_LRMD_OPERATION);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(notify, F_LRMD_RC, rc);
crm_xml_add_int(notify, F_LRMD_CALLID, call_id);
crm_xml_add(notify, F_LRMD_OPERATION, op);
crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id);
g_hash_table_foreach(client_connections, send_client_notify, notify);
free_xml(notify);
}
}
static void
cmd_reset(lrmd_cmd_t * cmd)
{
cmd->lrmd_op_status = 0;
cmd->last_pid = 0;
memset(&cmd->t_run, 0, sizeof(cmd->t_run));
memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
free(cmd->exit_reason);
cmd->exit_reason = NULL;
free(cmd->output);
cmd->output = NULL;
}
static void
cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
{
crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action,
rsc ? rsc->active : NULL, cmd);
if (rsc && (rsc->active == cmd)) {
rsc->active = NULL;
mainloop_set_trigger(rsc->work);
}
if (!rsc) {
cmd->rsc_deleted = 1;
}
/* reset original timeout so client notification has correct information */
cmd->timeout = cmd->timeout_orig;
send_cmd_complete_notify(cmd);
if (cmd->interval && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) {
if (rsc) {
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else if (cmd->interval == 0) {
if (rsc) {
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else {
/* Clear all the values pertaining just to the last iteration of a recurring op. */
cmd_reset(cmd);
}
}
#if SUPPORT_HEARTBEAT
static int pattern_matched(const char *pat, const char *str)
{
if (g_pattern_match_simple(pat, str)) {
crm_debug("RA output matched stopped pattern [%s]", pat);
return TRUE;
}
return FALSE;
}
static int
hb2uniform_rc(const char *action, int rc, const char *stdout_data)
{
const char *stop_pattern[] = { "*stopped*", "*not*running*" };
const char *running_pattern[] = { "*running*", "*OK*" };
char *lower_std_output = NULL;
int result;
if (rc < 0) {
return PCMK_OCF_UNKNOWN_ERROR;
}
/* Treat class heartbeat the same as class lsb. */
if (!safe_str_eq(action, "status") && !safe_str_eq(action, "monitor")) {
return services_get_ocf_exitcode(action, rc);
}
/* for status though, exit code is ignored,
* and the stdout is scanned for specific strings */
if (stdout_data == NULL) {
crm_warn("No status output from the (hb) resource agent, assuming stopped");
return PCMK_OCF_NOT_RUNNING;
}
lower_std_output = g_ascii_strdown(stdout_data, -1);
if (pattern_matched(stop_pattern[0], lower_std_output) ||
pattern_matched(stop_pattern[1], lower_std_output)) {
result = PCMK_OCF_NOT_RUNNING;
} else if (pattern_matched(running_pattern[0], lower_std_output) ||
pattern_matched(running_pattern[1], stdout_data)) {
/* "OK" is matched case sensitive */
result = PCMK_OCF_OK;
} else {
/* It didn't say it was running - must be stopped */
crm_debug("RA output did not match any pattern, assuming stopped");
result = PCMK_OCF_NOT_RUNNING;
}
free(lower_std_output);
return result;
}
#endif
static int
ocf2uniform_rc(int rc)
{
if (rc < 0 || rc > PCMK_OCF_FAILED_MASTER) {
return PCMK_OCF_UNKNOWN_ERROR;
}
return rc;
}
static int
stonith2uniform_rc(const char *action, int rc)
{
if (rc == -ENODEV) {
if (safe_str_eq(action, "stop")) {
rc = PCMK_OCF_OK;
} else if (safe_str_eq(action, "start")) {
rc = PCMK_OCF_NOT_INSTALLED;
} else {
rc = PCMK_OCF_NOT_RUNNING;
}
} else if (rc != 0) {
rc = PCMK_OCF_UNKNOWN_ERROR;
}
return rc;
}
#if SUPPORT_NAGIOS
static int
nagios2uniform_rc(const char *action, int rc)
{
if (rc < 0) {
return PCMK_OCF_UNKNOWN_ERROR;
}
switch (rc) {
case NAGIOS_STATE_OK:
return PCMK_OCF_OK;
case NAGIOS_INSUFFICIENT_PRIV:
return PCMK_OCF_INSUFFICIENT_PRIV;
case NAGIOS_NOT_INSTALLED:
return PCMK_OCF_NOT_INSTALLED;
case NAGIOS_STATE_WARNING:
case NAGIOS_STATE_CRITICAL:
case NAGIOS_STATE_UNKNOWN:
case NAGIOS_STATE_DEPENDENT:
default:
return PCMK_OCF_UNKNOWN_ERROR;
}
return PCMK_OCF_UNKNOWN_ERROR;
}
#endif
static int
get_uniform_rc(const char *standard, const char *action, int rc)
{
if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_OCF)) {
return ocf2uniform_rc(rc);
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_STONITH)) {
return stonith2uniform_rc(action, rc);
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD)) {
return rc;
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_UPSTART)) {
return rc;
#if SUPPORT_NAGIOS
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_NAGIOS)) {
return nagios2uniform_rc(action, rc);
#endif
} else {
return services_get_ocf_exitcode(action, rc);
}
}
static int
action_get_uniform_rc(svc_action_t * action)
{
lrmd_cmd_t *cmd = action->cb_data;
#if SUPPORT_HEARTBEAT
if (safe_str_eq(action->standard, PCMK_RESOURCE_CLASS_HB)) {
return hb2uniform_rc(cmd->action, action->rc, action->stdout_data);
}
#endif
return get_uniform_rc(action->standard, cmd->action, action->rc);
}
void
notify_of_new_client(crm_client_t *new_client)
{
crm_client_t *client = NULL;
GHashTableIter iter;
xmlNode *notify = NULL;
char *key = NULL;
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT);
g_hash_table_iter_init(&iter, client_connections);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & client)) {
if (safe_str_eq(client->id, new_client->id)) {
continue;
}
send_client_notify((gpointer) key, (gpointer) client, (gpointer) notify);
}
free_xml(notify);
}
static char *
parse_exit_reason(const char *output)
{
const char *cur = NULL;
const char *last = NULL;
char *reason = NULL;
static int cookie_len = 0;
char *eol = NULL;
if (output == NULL) {
return NULL;
}
if (!cookie_len) {
cookie_len = strlen(PCMK_OCF_REASON_PREFIX);
}
cur = strstr(output, PCMK_OCF_REASON_PREFIX);
for (; cur != NULL; cur = strstr(cur, PCMK_OCF_REASON_PREFIX)) {
/* skip over the cookie delimiter string */
cur += cookie_len;
last = cur;
}
if (last == NULL) {
return NULL;
}
/* make our own copy */
reason = calloc(1, (EXIT_REASON_MAX_LEN+1));
CRM_ASSERT(reason);
/* limit reason string size */
strncpy(reason, last, EXIT_REASON_MAX_LEN);
/* truncate everything after a new line */
eol = strchr(reason, '\n');
if (eol != NULL) {
*eol = '\0';
}
return reason;
}
void
client_disconnect_cleanup(const char *client_id)
{
GHashTableIter iter;
lrmd_rsc_t *rsc = NULL;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
if (rsc->call_opts & lrmd_opt_drop_recurring) {
/* This client is disconnecting, drop any recurring operations
* it may have initiated on the resource */
cancel_all_recurring(rsc, client_id);
}
}
}
static void
action_complete(svc_action_t * action)
{
lrmd_rsc_t *rsc;
lrmd_cmd_t *cmd = action->cb_data;
const char *rclass = NULL;
bool goagain = false;
if (!cmd) {
crm_err("LRMD action (%s) completed does not match any known operations.", action->id);
return;
}
#ifdef HAVE_SYS_TIMEB_H
if (cmd->exec_rc != action->rc) {
ftime(&cmd->t_rcchange);
}
#endif
cmd->last_pid = action->pid;
cmd->exec_rc = action_get_uniform_rc(action);
cmd->lrmd_op_status = action->status;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE)) {
rclass = resources_find_service_class(rsc->class);
} else if(rsc) {
rclass = rsc->class;
}
if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD)) {
if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->action, "start")) {
/* systemd I curse thee!
*
* systemd returns from start actions after the start _begins_
* not after it completes.
*
* So we have to jump through a few hoops so that we don't
* report 'complete' to the rest of pacemaker until, you know,
* it's actually done.
*/
goagain = true;
cmd->real_action = cmd->action;
cmd->action = strdup("monitor");
} else if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->action, "stop")) {
goagain = true;
cmd->real_action = cmd->action;
cmd->action = strdup("monitor");
} else if(cmd->real_action) {
/* Ok, so this is the follow up monitor action to check if start actually completed */
if(cmd->lrmd_op_status == PCMK_LRM_OP_DONE && cmd->exec_rc == PCMK_OCF_PENDING) {
goagain = true;
} else if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->real_action, "stop")) {
goagain = true;
} else {
#ifdef HAVE_SYS_TIMEB_H
int time_sum = time_diff_ms(NULL, &cmd->t_first_run);
int timeout_left = cmd->timeout_orig - time_sum;
crm_debug("%s %s is now complete (elapsed=%dms, remaining=%dms): %s (%d)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc);
cmd_original_times(cmd);
#endif
if(cmd->lrmd_op_status == PCMK_LRM_OP_DONE && cmd->exec_rc == PCMK_OCF_NOT_RUNNING && safe_str_eq(cmd->real_action, "stop")) {
cmd->exec_rc = PCMK_OCF_OK;
}
}
}
}
#if SUPPORT_NAGIOS
if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS)) {
if (safe_str_eq(cmd->action, "monitor") &&
cmd->interval == 0 && cmd->exec_rc == PCMK_OCF_OK) {
/* Successfully executed --version for the nagios plugin */
cmd->exec_rc = PCMK_OCF_NOT_RUNNING;
} else if (safe_str_eq(cmd->action, "start") && cmd->exec_rc != PCMK_OCF_OK) {
goagain = true;
}
}
#endif
/* Wrapping this section in ifdef implies that systemd resources are not
* fully supported on platforms without sys/timeb.h. Since timeb is
* obsolete, we should eventually prefer a clock_gettime() implementation
* (wrapped in its own ifdef) with timeb as a fallback.
*/
#ifdef HAVE_SYS_TIMEB_H
if(goagain) {
int time_sum = time_diff_ms(NULL, &cmd->t_first_run);
int timeout_left = cmd->timeout_orig - time_sum;
int delay = cmd->timeout_orig / 10;
if(delay >= timeout_left && timeout_left > 20) {
delay = timeout_left/2;
}
delay = QB_MIN(2000, delay);
if (delay < timeout_left) {
cmd->start_delay = delay;
cmd->timeout = timeout_left;
if(cmd->exec_rc == PCMK_OCF_OK) {
crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);
} else if(cmd->exec_rc == PCMK_OCF_PENDING) {
crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);
} else {
crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->action, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc, time_sum, timeout_left, delay);
}
cmd_reset(cmd);
if(rsc) {
rsc->active = NULL;
}
schedule_lrmd_cmd(rsc, cmd);
/* Don't finalize cmd, we're not done with it yet */
return;
} else {
crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)",
cmd->rsc_id, cmd->real_action?cmd->real_action:cmd->action, cmd->exec_rc, time_sum, timeout_left);
cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT;
cmd->exec_rc = PCMK_OCF_TIMEOUT;
cmd_original_times(cmd);
}
}
#endif
if (action->stderr_data) {
cmd->output = strdup(action->stderr_data);
cmd->exit_reason = parse_exit_reason(action->stderr_data);
} else if (action->stdout_data) {
cmd->output = strdup(action->stdout_data);
}
cmd_finalize(cmd, rsc);
}
static void
stonith_action_complete(lrmd_cmd_t * cmd, int rc)
{
int recurring = cmd->interval;
lrmd_rsc_t *rsc = NULL;
cmd->exec_rc = get_uniform_rc(PCMK_RESOURCE_CLASS_STONITH, cmd->action, rc);
rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
if (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED) {
recurring = 0;
/* do nothing */
} else if (rc == -ENODEV && safe_str_eq(cmd->action, "monitor")) {
/* Not registered == inactive */
cmd->lrmd_op_status = PCMK_LRM_OP_DONE;
cmd->exec_rc = PCMK_OCF_NOT_RUNNING;
} else if (rc) {
/* Attempt to map return codes to op status if possible */
switch (rc) {
case -EPROTONOSUPPORT:
cmd->lrmd_op_status = PCMK_LRM_OP_NOTSUPPORTED;
break;
case -ETIME:
cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT;
break;
default:
/* TODO: This looks wrong. Status should be _DONE and exec_rc set to an error */
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
}
} else {
/* command successful */
cmd->lrmd_op_status = PCMK_LRM_OP_DONE;
if (safe_str_eq(cmd->action, "start") && rsc) {
rsc->stonith_started = 1;
}
}
if (recurring && rsc) {
if (cmd->stonith_recurring_id) {
g_source_remove(cmd->stonith_recurring_id);
}
cmd->stonith_recurring_id = g_timeout_add(cmd->interval, stonith_recurring_op_helper, cmd);
}
cmd_finalize(cmd, rsc);
}
static void
lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
stonith_action_complete(data->userdata, data->rc);
}
void
stonith_connection_failed(void)
{
GHashTableIter iter;
GList *cmd_list = NULL;
GList *cmd_iter = NULL;
lrmd_rsc_t *rsc = NULL;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
if (rsc->active) {
cmd_list = g_list_append(cmd_list, rsc->active);
}
if (rsc->recurring_ops) {
cmd_list = g_list_concat(cmd_list, rsc->recurring_ops);
}
if (rsc->pending_ops) {
cmd_list = g_list_concat(cmd_list, rsc->pending_ops);
}
rsc->pending_ops = rsc->recurring_ops = NULL;
}
}
if (!cmd_list) {
return;
}
crm_err("STONITH connection failed, finalizing %d pending operations.",
g_list_length(cmd_list));
for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
stonith_action_complete(cmd_iter->data, -ENOTCONN);
}
g_list_free(cmd_list);
}
static int
lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
int rc = 0;
int do_monitor = 0;
stonith_t *stonith_api = get_stonith_connection();
if (!stonith_api) {
cmd->exec_rc = get_uniform_rc(PCMK_RESOURCE_CLASS_STONITH, cmd->action,
-ENOTCONN);
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
cmd_finalize(cmd, rsc);
return -EUNATCH;
}
if (safe_str_eq(cmd->action, "start")) {
char *key = NULL;
char *value = NULL;
stonith_key_value_t *device_params = NULL;
if (cmd->params) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
device_params = stonith_key_value_add(device_params, key, value);
}
}
/* Stonith automatically registers devices from the IPC when changes occur,
* but to avoid a possible race condition between stonith receiving the IPC update
* and the lrmd requesting that resource, the lrmd still registers the device as well.
* Stonith knows how to handle duplicate device registrations correctly. */
rc = stonith_api->cmds->register_device(stonith_api,
st_opt_sync_call,
cmd->rsc_id,
rsc->provider, rsc->type, device_params);
stonith_key_value_freeall(device_params, 1, 1);
if (rc == 0) {
do_monitor = 1;
}
} else if (safe_str_eq(cmd->action, "stop")) {
rc = stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, cmd->rsc_id);
rsc->stonith_started = 0;
} else if (safe_str_eq(cmd->action, "monitor")) {
if (cmd->interval) {
do_monitor = 1;
} else {
rc = rsc->stonith_started ? 0 : -ENODEV;
}
}
if (!do_monitor) {
goto cleanup_stonith_exec;
}
rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id, cmd->timeout / 1000);
rc = stonith_api->cmds->register_callback(stonith_api,
rc,
0,
0,
cmd, "lrmd_stonith_callback", lrmd_stonith_callback);
/* don't cleanup yet, we will find out the result of the monitor later */
if (rc > 0) {
rsc->active = cmd;
return rc;
} else if (rc == 0) {
rc = -1;
}
cleanup_stonith_exec:
stonith_action_complete(cmd, rc);
return rc;
}
static int
lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
svc_action_t *action = NULL;
GHashTable *params_copy = NULL;
CRM_ASSERT(rsc);
CRM_ASSERT(cmd);
crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
#if SUPPORT_NAGIOS
/* Recurring operations are cancelled anyway for a stop operation */
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS)
&& safe_str_eq(cmd->action, "stop")) {
cmd->exec_rc = PCMK_OCF_OK;
goto exec_done;
}
#endif
if (cmd->params) {
params_copy = g_hash_table_new_full(crm_str_hash,
g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
if (params_copy != NULL) {
g_hash_table_foreach(cmd->params, dup_attr, params_copy);
}
}
if (safe_str_eq(rsc->class, PCMK_ALERT_CLASS)) {
/* In the case of Alert, lrmd always set rsc->type from CRM_alert_path parameter. */
void *value_lookup = g_hash_table_lookup(params_copy, CRM_ALERT_KEY_PATH);
if (value_lookup != NULL) {
action = services_action_create_generic((char*)value_lookup, NULL);
action->action = strdup(cmd->action);
action->timeout = cmd->timeout;
action->id = strdup(rsc->rsc_id);
- action->alert_params = params_copy;
+ action->params = params_copy;
value_lookup = g_hash_table_lookup(params_copy, CRM_ALERT_NODE_SEQUENCE);
if (value_lookup != NULL) {
action->sequence = crm_atoi(value_lookup, "");
}
}
} else if (cmd->isolation_wrapper) {
g_hash_table_remove(params_copy, "CRM_meta_isolation_wrapper");
action = resources_action_create(rsc->rsc_id,
PCMK_RESOURCE_CLASS_OCF,
LRMD_ISOLATION_PROVIDER,
cmd->isolation_wrapper,
cmd->action, /*action will be normalized in wrapper*/
cmd->interval,
cmd->timeout,
params_copy,
cmd->service_flags);
} else {
action = resources_action_create(rsc->rsc_id,
rsc->class,
rsc->provider,
rsc->type,
normalize_action_name(rsc, cmd->action),
cmd->interval,
cmd->timeout,
params_copy,
cmd->service_flags);
}
if (!action) {
crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id);
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
goto exec_done;
}
action->cb_data = cmd;
/* 'cmd' may not be valid after this point if
* services_action_async() returned TRUE
*
* Upstart and systemd both synchronously determine monitor/status
* results and call action_complete (which may free 'cmd') if necessary.
*/
if (services_action_async(action, action_complete)) {
return TRUE;
}
cmd->exec_rc = action->rc;
if(action->status != PCMK_LRM_OP_DONE) {
cmd->lrmd_op_status = action->status;
} else {
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
}
services_action_free(action);
action = NULL;
exec_done:
cmd_finalize(cmd, rsc);
return TRUE;
}
static gboolean
lrmd_rsc_execute(lrmd_rsc_t * rsc)
{
lrmd_cmd_t *cmd = NULL;
CRM_CHECK(rsc != NULL, return FALSE);
if (rsc->active) {
crm_trace("%s is still active", rsc->rsc_id);
return TRUE;
}
if (rsc->pending_ops) {
GList *first = rsc->pending_ops;
cmd = first->data;
if (cmd->delay_id) {
crm_trace
("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
cmd->rsc_id, cmd->action, cmd->start_delay);
return TRUE;
}
rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
g_list_free_1(first);
#ifdef HAVE_SYS_TIMEB_H
if (cmd->t_first_run.time == 0) {
ftime(&cmd->t_first_run);
}
ftime(&cmd->t_run);
#endif
}
if (!cmd) {
crm_trace("Nothing further to do for %s", rsc->rsc_id);
return TRUE;
}
rsc->active = cmd; /* only one op at a time for a rsc */
if (cmd->interval) {
rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
}
log_execute(cmd);
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
lrmd_rsc_execute_stonith(rsc, cmd);
} else {
lrmd_rsc_execute_service_lib(rsc, cmd);
}
return TRUE;
}
static gboolean
lrmd_rsc_dispatch(gpointer user_data)
{
return lrmd_rsc_execute(user_data);
}
void
free_rsc(gpointer data)
{
GListPtr gIter = NULL;
lrmd_rsc_t *rsc = data;
int is_stonith = safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH);
gIter = rsc->pending_ops;
while (gIter != NULL) {
GListPtr next = gIter->next;
lrmd_cmd_t *cmd = gIter->data;
/* command was never executed */
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, NULL);
gIter = next;
}
/* frees list, but not list elements. */
g_list_free(rsc->pending_ops);
gIter = rsc->recurring_ops;
while (gIter != NULL) {
GListPtr next = gIter->next;
lrmd_cmd_t *cmd = gIter->data;
if (is_stonith) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
/* If a stonith command is in-flight, just mark it as cancelled;
* it is not safe to finalize/free the cmd until the stonith api
* says it has either completed or timed out.
*/
if (rsc->active != cmd) {
cmd_finalize(cmd, NULL);
}
} else {
/* This command is already handed off to service library,
* let service library cancel it and tell us via the callback
* when it is cancelled. The rsc can be safely destroyed
* even if we are waiting for the cancel result */
services_action_cancel(rsc->rsc_id, normalize_action_name(rsc, cmd->action), cmd->interval);
}
gIter = next;
}
/* frees list, but not list elements. */
g_list_free(rsc->recurring_ops);
free(rsc->rsc_id);
free(rsc->class);
free(rsc->provider);
free(rsc->type);
mainloop_destroy_trigger(rsc->work);
free(rsc);
}
static int
process_lrmd_signon(crm_client_t * client, uint32_t id, xmlNode * request)
{
xmlNode *reply = create_xml_node(NULL, "reply");
const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER);
const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION);
if (compare_version(protocol_version, LRMD_PROTOCOL_VERSION) < 0) {
crm_err("Cluster API version must be greater than or equal to %s, not %s",
LRMD_PROTOCOL_VERSION, protocol_version);
crm_xml_add_int(reply, F_LRMD_RC, -EPROTO);
crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
}
crm_xml_add(reply, F_LRMD_OPERATION, CRM_OP_REGISTER);
crm_xml_add(reply, F_LRMD_CLIENTID, client->id);
crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
lrmd_server_send_reply(client, id, reply);
if (crm_is_true(is_ipc_provider)) {
/* this is a remote connection from a cluster nodes crmd */
#ifdef SUPPORT_REMOTE
ipc_proxy_add_provider(client);
#endif
}
free_xml(reply);
return pcmk_ok;
}
static int
process_lrmd_rsc_register(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = build_rsc_from_xml(request);
lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
if (dup &&
safe_str_eq(rsc->class, dup->class) &&
safe_str_eq(rsc->provider, dup->provider) && safe_str_eq(rsc->type, dup->type)) {
crm_warn("Can't add, RSC '%s' already present in the rsc list (%d active resources)",
rsc->rsc_id, g_hash_table_size(rsc_list));
free_rsc(rsc);
return rc;
}
g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
crm_info("Added '%s' to the rsc list (%d active resources)",
rsc->rsc_id, g_hash_table_size(rsc_list));
return rc;
}
static void
process_lrmd_get_rsc_info(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
int send_rc = 0;
int call_id = 0;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
xmlNode *reply = NULL;
lrmd_rsc_t *rsc = NULL;
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
if (!rsc_id) {
rc = -ENODEV;
goto get_rsc_done;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
rc = -ENODEV;
goto get_rsc_done;
}
get_rsc_done:
reply = create_xml_node(NULL, T_LRMD_REPLY);
crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(reply, F_LRMD_RC, rc);
crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
if (rsc) {
crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id);
crm_xml_add(reply, F_LRMD_CLASS, rsc->class);
crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider);
crm_xml_add(reply, F_LRMD_TYPE, rsc->type);
}
send_rc = lrmd_server_send_reply(client, id, reply);
if (send_rc < 0) {
crm_warn("LRMD reply to %s failed: %d", client->name, send_rc);
}
free_xml(reply);
}
static int
process_lrmd_rsc_unregister(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
if (!rsc_id) {
return -ENODEV;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
return pcmk_ok;
}
if (rsc->active) {
/* let the caller know there are still active ops on this rsc to watch for */
crm_trace("Operation still in progress: %p", rsc->active);
rc = -EINPROGRESS;
}
g_hash_table_remove(rsc_list, rsc_id);
return rc;
}
static int
process_lrmd_rsc_exec(crm_client_t * client, uint32_t id, xmlNode * request)
{
lrmd_rsc_t *rsc = NULL;
lrmd_cmd_t *cmd = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
int call_id;
if (!rsc_id) {
return -EINVAL;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
return -ENODEV;
}
cmd = create_lrmd_cmd(request, client, rsc);
call_id = cmd->call_id;
/* Don't reference cmd after handing it off to be scheduled.
* The cmd could get merged and freed. */
schedule_lrmd_cmd(rsc, cmd);
return call_id;
}
static int
process_lrmd_alert_exec(crm_client_t * client, uint32_t id, xmlNode * request)
{
lrmd_rsc_t *alert = NULL;
lrmd_cmd_t *cmd = NULL;
xmlNode *alert_xml = get_xpath_object("//" F_LRMD_ALERT, request, LOG_ERR);
const char *alert_id = crm_element_value(alert_xml, F_LRMD_ALERT_ID);
int call_id;
if (!alert_id) {
return -EINVAL;
}
alert = g_hash_table_lookup(rsc_list, alert_id);
if (alert == NULL) {
crm_info("Alert '%s' not found (%d active resources)",
alert_id, g_hash_table_size(rsc_list));
return -ENODEV;
}
call_id = pcmk_ok;
cmd = create_alert_cmd(request, client, alert);
call_id = cmd->call_id;
/* Don't reference cmd after handing it off to be scheduled.
* The cmd could get merged and freed. */
schedule_lrmd_cmd(alert, cmd);
return call_id;
}
static int
cancel_op(const char *rsc_id, const char *action, int interval)
{
GListPtr gIter = NULL;
lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
/* How to cancel an action.
* 1. Check pending ops list, if it hasn't been handed off
* to the service library or stonith recurring list remove
* it there and that will stop it.
* 2. If it isn't in the pending ops list, then it's either a
* recurring op in the stonith recurring list, or the service
* library's recurring list. Stop it there
* 3. If not found in any lists, then this operation has either
* been executed already and is not a recurring operation, or
* never existed.
*/
if (!rsc) {
return -ENODEV;
}
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (safe_str_eq(cmd->action, action) && cmd->interval == interval) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, rsc);
return pcmk_ok;
}
}
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
/* The service library does not handle stonith operations.
* We have to handle recurring stonith operations ourselves. */
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (safe_str_eq(cmd->action, action) && cmd->interval == interval) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
if (rsc->active != cmd) {
cmd_finalize(cmd, rsc);
}
return pcmk_ok;
}
}
} else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval) == TRUE) {
/* The service library will tell the action_complete callback function
* this action was cancelled, which will destroy the cmd and remove
* it from the recurring_op list. Do not do that in this function
* if the service library says it cancelled it. */
return pcmk_ok;
}
return -EOPNOTSUPP;
}
static void
cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
{
GList *cmd_list = NULL;
GList *cmd_iter = NULL;
/* Notice a copy of each list is created when concat is called.
* This prevents odd behavior from occurring when the cmd_list
* is iterated through later on. It is possible the cancel_op
* function may end up modifying the recurring_ops and pending_ops
* lists. If we did not copy those lists, our cmd_list iteration
* could get messed up.*/
if (rsc->recurring_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
}
if (rsc->pending_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
}
if (!cmd_list) {
return;
}
for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
lrmd_cmd_t *cmd = cmd_iter->data;
if (cmd->interval == 0) {
continue;
}
if (client_id && safe_str_neq(cmd->client_id, client_id)) {
continue;
}
cancel_op(rsc->rsc_id, cmd->action, cmd->interval);
}
/* frees only the copied list data, not the cmds */
g_list_free(cmd_list);
}
static int
process_lrmd_rsc_cancel(crm_client_t * client, uint32_t id, xmlNode * request)
{
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION);
int interval = 0;
crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &interval);
if (!rsc_id || !action) {
return -EINVAL;
}
return cancel_op(rsc_id, action, interval);
}
void
process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
int call_id = 0;
const char *op = crm_element_value(request, F_LRMD_OPERATION);
int do_reply = 0;
int do_notify = 0;
crm_trace("Processing %s operation from %s", op, client->id);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
if (crm_str_eq(op, CRM_OP_IPC_FWD, TRUE)) {
#ifdef SUPPORT_REMOTE
ipc_proxy_forward_client(client, request);
#endif
do_reply = 1;
} else if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) {
rc = process_lrmd_signon(client, id, request);
} else if (crm_str_eq(op, LRMD_OP_RSC_REG, TRUE)) {
rc = process_lrmd_rsc_register(client, id, request);
do_notify = 1;
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_INFO, TRUE)) {
process_lrmd_get_rsc_info(client, id, request);
} else if (crm_str_eq(op, LRMD_OP_RSC_UNREG, TRUE)) {
rc = process_lrmd_rsc_unregister(client, id, request);
/* don't notify anyone about failed un-registers */
if (rc == pcmk_ok || rc == -EINPROGRESS) {
do_notify = 1;
}
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_EXEC, TRUE)) {
rc = process_lrmd_rsc_exec(client, id, request);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_CANCEL, TRUE)) {
rc = process_lrmd_rsc_cancel(client, id, request);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_POKE, TRUE)) {
do_notify = 1;
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_CHECK, TRUE)) {
xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA);
const char *timeout = crm_element_value(data, F_LRMD_WATCHDOG);
CRM_LOG_ASSERT(data != NULL);
check_sbd_timeout(timeout);
} else if (crm_str_eq(op, LRMD_OP_ALERT_EXEC, TRUE)) {
rc = process_lrmd_alert_exec(client, id, request);
do_reply = 1;
} else {
rc = -EOPNOTSUPP;
do_reply = 1;
crm_err("Unknown %s from %s", op, client->name);
crm_log_xml_warn(request, "UnknownOp");
}
crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d",
op, client->id, rc, do_reply, do_notify);
if (do_reply) {
send_reply(client, rc, id, call_id);
}
if (do_notify) {
send_generic_notify(rc, request);
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Thu, Jul 10, 3:26 AM (10 h, 28 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2003225
Default Alt Text
(126 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment