Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F1842374
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
198 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
index f5ec4c960f..84bd93727a 100644
--- a/daemons/execd/execd_commands.c
+++ b/daemons/execd/execd_commands.c
@@ -1,1961 +1,1903 @@
/*
* Copyright 2012-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <glib.h>
// Check whether we have a high-resolution monotonic clock
#undef PCMK__TIME_USE_CGT
#if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC)
# define PCMK__TIME_USE_CGT
# include <time.h> /* clock_gettime */
#endif
#include <unistd.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/services_internal.h>
#include <crm/common/mainloop.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/msg_xml.h>
#include "pacemaker-execd.h"
GHashTable *rsc_list = NULL;
typedef struct lrmd_cmd_s {
int timeout;
guint interval_ms;
int start_delay;
int timeout_orig;
int call_id;
int call_opts;
/* Timer ids, must be removed on cmd destruction. */
int delay_id;
int stonith_recurring_id;
int rsc_deleted;
int service_flags;
char *client_id;
char *origin;
char *rsc_id;
char *action;
char *real_action;
char *userdata_str;
pcmk__action_result_t result;
/* We can track operation queue time and run time, to be saved with the CIB
* resource history (and displayed in cluster status). We need
* high-resolution monotonic time for this purpose, so we use
* clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature
* is disabled).
*
* However, we also need epoch timestamps for recording the time the command
* last ran and the time its return value last changed, for use in time
* displays (as opposed to interval calculations). We keep time_t values for
* this purpose.
*
* The last run time is used for both purposes, so we keep redundant
* monotonic and epoch values for this. Technically the two could represent
* different times, but since time_t has only second resolution and the
* values are used for distinct purposes, that is not significant.
*/
#ifdef PCMK__TIME_USE_CGT
/* Recurring and systemd operations may involve more than one executor
* command per operation, so they need info about the original and the most
* recent.
*/
struct timespec t_first_run; // When op first ran
struct timespec t_run; // When op most recently ran
struct timespec t_first_queue; // When op was first queued
struct timespec t_queue; // When op was most recently queued
#endif
time_t epoch_last_run; // Epoch timestamp of when op last ran
time_t epoch_rcchange; // Epoch timestamp of when rc last changed
bool first_notify_sent;
int last_notify_rc;
int last_notify_op_status;
int last_pid;
GHashTable *params;
} lrmd_cmd_t;
static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
static gboolean lrmd_rsc_dispatch(gpointer user_data);
static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
#ifdef PCMK__TIME_USE_CGT
/*!
* \internal
* \brief Check whether a struct timespec has been set
*
* \param[in] timespec Time to check
*
* \return true if timespec has been set (i.e. is nonzero), false otherwise
*/
static inline bool
time_is_set(struct timespec *timespec)
{
return (timespec != NULL) &&
((timespec->tv_sec != 0) || (timespec->tv_nsec != 0));
}
/*
* \internal
* \brief Set a timespec (and its original if unset) to the current time
*
* \param[out] t_current Where to store current time
* \param[out] t_orig Where to copy t_current if unset
*/
static void
get_current_time(struct timespec *t_current, struct timespec *t_orig)
{
clock_gettime(CLOCK_MONOTONIC, t_current);
if ((t_orig != NULL) && !time_is_set(t_orig)) {
*t_orig = *t_current;
}
}
/*!
* \internal
* \brief Return difference between two times in milliseconds
*
* \param[in] now More recent time (or NULL to use current time)
* \param[in] old Earlier time
*
* \return milliseconds difference (or 0 if old is NULL or unset)
*
* \note Can overflow on 32bit machines when the differences is around
* 24 days or more.
*/
static int
time_diff_ms(struct timespec *now, struct timespec *old)
{
int diff_ms = 0;
if (time_is_set(old)) {
struct timespec local_now = { 0, };
if (now == NULL) {
clock_gettime(CLOCK_MONOTONIC, &local_now);
now = &local_now;
}
diff_ms = (now->tv_sec - old->tv_sec) * 1000
+ (now->tv_nsec - old->tv_nsec) / 1000000;
}
return diff_ms;
}
/*!
* \internal
* \brief Reset a command's operation times to their original values.
*
* Reset a command's run and queued timestamps to the timestamps of the original
* command, so we report the entire time since then and not just the time since
* the most recent command (for recurring and systemd operations).
*
* \param[in] cmd Executor command object to reset
*
* \note It's not obvious what the queued time should be for a systemd
* start/stop operation, which might go like this:
* initial command queued 5ms, runs 3s
* monitor command queued 10ms, runs 10s
* monitor command queued 10ms, runs 10s
* Is the queued time for that operation 5ms, 10ms or 25ms? The current
* implementation will report 5ms. If it's 25ms, then we need to
* subtract 20ms from the total exec time so as not to count it twice.
* We can implement that later if it matters to anyone ...
*/
static void
cmd_original_times(lrmd_cmd_t * cmd)
{
cmd->t_run = cmd->t_first_run;
cmd->t_queue = cmd->t_first_queue;
}
#endif
static inline bool
action_matches(lrmd_cmd_t *cmd, const char *action, guint interval_ms)
{
return (cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, action, pcmk__str_casei);
}
static void
log_finished(lrmd_cmd_t * cmd, int exec_time, int queue_time)
{
char pid_str[32] = { 0, };
int log_level = LOG_INFO;
if (cmd->last_pid) {
snprintf(pid_str, 32, "%d", cmd->last_pid);
}
if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
log_level = LOG_DEBUG;
}
#ifdef PCMK__TIME_USE_CGT
do_crm_log(log_level, "%s %s (call %d%s%s) exited with status %d"
" (execution time %dms, queue time %dms)",
cmd->rsc_id, cmd->action, cmd->call_id,
(cmd->last_pid? ", PID " : ""), pid_str,
cmd->result.exit_status, exec_time, queue_time);
#else
do_crm_log(log_level, "%s %s (call %d%s%s) exited with status %d",
cmd->rsc_id, cmd->action, cmd->call_id,
(cmd->last_pid? ", PID " : ""), pid_str,
cmd->result.exit_status);
#endif
}
static void
log_execute(lrmd_cmd_t * cmd)
{
int log_level = LOG_INFO;
if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
log_level = LOG_DEBUG;
}
do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
cmd->rsc_id, cmd->action, cmd->call_id);
}
static const char *
normalize_action_name(lrmd_rsc_t * rsc, const char *action)
{
if (pcmk__str_eq(action, "monitor", pcmk__str_casei) &&
pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) {
return "status";
}
return action;
}
static lrmd_rsc_t *
build_rsc_from_xml(xmlNode * msg)
{
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
lrmd_rsc_t *rsc = NULL;
rsc = calloc(1, sizeof(lrmd_rsc_t));
crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts);
rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS);
rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER);
rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE);
rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc);
rsc->st_probe_rc = -ENODEV; // if stonith, initialize to "not running"
return rsc;
}
static lrmd_cmd_t *
create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client)
{
int call_options = 0;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
lrmd_cmd_t *cmd = NULL;
cmd = calloc(1, sizeof(lrmd_cmd_t));
crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options);
cmd->call_opts = call_options;
cmd->client_id = strdup(client->id);
crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id);
crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval_ms);
crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout);
crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay);
cmd->timeout_orig = cmd->timeout;
cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN);
cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION);
cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR);
cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
cmd->params = xml2list(rsc_xml);
if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block", pcmk__str_casei)) {
crm_debug("Setting flag to leave pid group on timeout and "
"only kill action pid for " PCMK__OP_FMT,
cmd->rsc_id, cmd->action, cmd->interval_ms);
cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__,
LOG_TRACE, "Action",
cmd->action, 0,
SVC_ACTION_LEAVE_GROUP,
"SVC_ACTION_LEAVE_GROUP");
}
return cmd;
}
static void
stop_recurring_timer(lrmd_cmd_t *cmd)
{
if (cmd) {
if (cmd->stonith_recurring_id) {
g_source_remove(cmd->stonith_recurring_id);
}
cmd->stonith_recurring_id = 0;
}
}
static void
free_lrmd_cmd(lrmd_cmd_t * cmd)
{
stop_recurring_timer(cmd);
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->params) {
g_hash_table_destroy(cmd->params);
}
pcmk__reset_result(&(cmd->result));
free(cmd->origin);
free(cmd->action);
free(cmd->real_action);
free(cmd->userdata_str);
free(cmd->rsc_id);
free(cmd->client_id);
free(cmd);
}
static gboolean
stonith_recurring_op_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc;
cmd->stonith_recurring_id = 0;
if (!cmd->rsc_id) {
return FALSE;
}
rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
CRM_ASSERT(rsc != NULL);
/* take it out of recurring_ops list, and put it in the pending ops
* to be executed */
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef PCMK__TIME_USE_CGT
get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
#endif
mainloop_set_trigger(rsc->work);
return FALSE;
}
static inline void
start_recurring_timer(lrmd_cmd_t *cmd)
{
if (cmd && (cmd->interval_ms > 0)) {
cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms,
stonith_recurring_op_helper,
cmd);
}
}
static gboolean
start_delay_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc = NULL;
cmd->delay_id = 0;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (rsc) {
mainloop_set_trigger(rsc->work);
}
return FALSE;
}
/*!
* \internal
* \brief Check whether a list already contains the equivalent of a given action
*/
static lrmd_cmd_t *
find_duplicate_action(GList *action_list, lrmd_cmd_t *cmd)
{
for (GList *item = action_list; item != NULL; item = item->next) {
lrmd_cmd_t *dup = item->data;
if (action_matches(cmd, dup->action, dup->interval_ms)) {
return dup;
}
}
return NULL;
}
static bool
merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
lrmd_cmd_t * dup = NULL;
bool dup_pending = true;
if (cmd->interval_ms == 0) {
return false;
}
// Search for a duplicate of this action (in-flight or not)
dup = find_duplicate_action(rsc->pending_ops, cmd);
if (dup == NULL) {
dup_pending = false;
dup = find_duplicate_action(rsc->recurring_ops, cmd);
if (dup == NULL) {
return false;
}
}
/* Do not merge fencing monitors marked for cancellation, so we can reply to
* the cancellation separately.
*/
if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
pcmk__str_casei)
&& (dup->result.execution_status == PCMK_EXEC_CANCELLED)) {
return false;
}
/* This should not occur. If it does, we need to investigate how something
* like this is possible in the controller.
*/
crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT
"), merging with previous op entry",
rsc->rsc_id, normalize_action_name(rsc, dup->action),
dup->interval_ms);
// Merge new action's call ID and user data into existing action
dup->first_notify_sent = false;
free(dup->userdata_str);
dup->userdata_str = cmd->userdata_str;
cmd->userdata_str = NULL;
dup->call_id = cmd->call_id;
free_lrmd_cmd(cmd);
cmd = NULL;
/* If dup is not pending, that means it has already executed at least once
* and is waiting in the interval. In that case, stop waiting and initiate
* a new instance now.
*/
if (!dup_pending) {
if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
pcmk__str_casei)) {
stop_recurring_timer(dup);
stonith_recurring_op_helper(dup);
} else {
services_action_kick(rsc->rsc_id,
normalize_action_name(rsc, dup->action),
dup->interval_ms);
}
}
return true;
}
static void
schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(rsc != NULL, return);
crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
if (merge_recurring_duplicate(rsc, cmd)) {
// Equivalent of cmd has already been scheduled
return;
}
/* The controller expects the executor to automatically cancel
* recurring operations before a resource stops.
*/
if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
cancel_all_recurring(rsc, NULL);
}
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef PCMK__TIME_USE_CGT
get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
#endif
mainloop_set_trigger(rsc->work);
if (cmd->start_delay) {
cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
}
static xmlNode *
create_lrmd_reply(const char *origin, int rc, int call_id)
{
xmlNode *reply = create_xml_node(NULL, T_LRMD_REPLY);
crm_xml_add(reply, F_LRMD_ORIGIN, origin);
crm_xml_add_int(reply, F_LRMD_RC, rc);
crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
return reply;
}
static void
send_client_notify(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
pcmk__client_t *client = value;
int rc;
int log_level = LOG_WARNING;
const char *msg = NULL;
CRM_CHECK(client != NULL, return);
if (client->name == NULL) {
crm_trace("Skipping notification to client without name");
return;
}
if (pcmk_is_set(client->flags, pcmk__client_to_proxy)) {
/* We only want to notify clients of the executor IPC API. If we are
* running as Pacemaker Remote, we may have clients proxied to other
* IPC services in the cluster, so skip those.
*/
crm_trace("Skipping executor API notification to client %s",
pcmk__client_name(client));
return;
}
rc = lrmd_server_send_notify(client, update_msg);
if (rc == pcmk_rc_ok) {
return;
}
switch (rc) {
case ENOTCONN:
case EPIPE: // Client exited without waiting for notification
log_level = LOG_INFO;
msg = "Disconnected";
break;
default:
msg = pcmk_rc_str(rc);
break;
}
do_crm_log(log_level, "Could not notify client %s: %s " CRM_XS " rc=%d",
pcmk__client_name(client), msg, rc);
}
static void
send_cmd_complete_notify(lrmd_cmd_t * cmd)
{
xmlNode *notify = NULL;
#ifdef PCMK__TIME_USE_CGT
int exec_time = time_diff_ms(NULL, &(cmd->t_run));
int queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue));
log_finished(cmd, exec_time, queue_time);
#else
log_finished(cmd, 0, 0);
#endif
/* if the first notify result for a cmd has already been sent earlier, and the
* the option to only send notifies on result changes is set. Check to see
* if the last result is the same as the new one. If so, suppress this update */
if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) {
if ((cmd->last_notify_rc == cmd->result.exit_status) &&
(cmd->last_notify_op_status == cmd->result.execution_status)) {
/* only send changes */
return;
}
}
cmd->first_notify_sent = true;
cmd->last_notify_rc = cmd->result.exit_status;
cmd->last_notify_op_status = cmd->result.execution_status;
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __func__);
crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout);
crm_xml_add_ms(notify, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay);
crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->result.exit_status);
crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->result.execution_status);
crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id);
crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted);
crm_xml_add_ll(notify, F_LRMD_RSC_RUN_TIME,
(long long) cmd->epoch_last_run);
crm_xml_add_ll(notify, F_LRMD_RSC_RCCHANGE_TIME,
(long long) cmd->epoch_rcchange);
#ifdef PCMK__TIME_USE_CGT
crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time);
crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time);
#endif
crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC);
crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id);
if(cmd->real_action) {
crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action);
} else {
crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action);
}
crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->result.exit_reason);
if (cmd->result.action_stderr != NULL) {
crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stderr);
} else if (cmd->result.action_stdout != NULL) {
crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stdout);
}
if (cmd->params) {
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS);
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
hash2smartfield((gpointer) key, (gpointer) value, args);
}
}
if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) {
pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id);
if (client) {
send_client_notify(client->id, client, notify);
}
} else {
pcmk__foreach_ipc_client(send_client_notify, notify);
}
free_xml(notify);
}
static void
send_generic_notify(int rc, xmlNode * request)
{
if (pcmk__ipc_client_count() != 0) {
int call_id = 0;
xmlNode *notify = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *op = crm_element_value(request, F_LRMD_OPERATION);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __func__);
crm_xml_add_int(notify, F_LRMD_RC, rc);
crm_xml_add_int(notify, F_LRMD_CALLID, call_id);
crm_xml_add(notify, F_LRMD_OPERATION, op);
crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id);
pcmk__foreach_ipc_client(send_client_notify, notify);
free_xml(notify);
}
}
static void
cmd_reset(lrmd_cmd_t * cmd)
{
cmd->last_pid = 0;
#ifdef PCMK__TIME_USE_CGT
memset(&cmd->t_run, 0, sizeof(cmd->t_run));
memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
#endif
cmd->epoch_last_run = 0;
pcmk__reset_result(&(cmd->result));
cmd->result.execution_status = PCMK_EXEC_DONE;
}
static void
cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
{
crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action,
rsc ? rsc->active : NULL, cmd);
if (rsc && (rsc->active == cmd)) {
rsc->active = NULL;
mainloop_set_trigger(rsc->work);
}
if (!rsc) {
cmd->rsc_deleted = 1;
}
/* reset original timeout so client notification has correct information */
cmd->timeout = cmd->timeout_orig;
send_cmd_complete_notify(cmd);
if ((cmd->interval_ms != 0)
&& (cmd->result.execution_status == PCMK_EXEC_CANCELLED)) {
if (rsc) {
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else if (cmd->interval_ms == 0) {
if (rsc) {
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else {
/* Clear all the values pertaining just to the last iteration of a recurring op. */
cmd_reset(cmd);
}
}
-static int
-ocf2uniform_rc(int rc)
-{
- switch (rc) {
- case PCMK_OCF_DEGRADED:
- case PCMK_OCF_DEGRADED_PROMOTED:
- break;
- default:
- if (rc < 0 || rc > PCMK_OCF_FAILED_PROMOTED) {
- return PCMK_OCF_UNKNOWN_ERROR;
- }
- }
-
- return rc;
-}
-
static int
stonith2uniform_rc(const char *action, int rc)
{
switch (rc) {
case pcmk_ok:
rc = PCMK_OCF_OK;
break;
case -ENODEV:
/* This should be possible only for probes in practice, but
* interpret for all actions to be safe.
*/
if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) {
rc = PCMK_OCF_NOT_RUNNING;
} else if (pcmk__str_eq(action, "stop", pcmk__str_casei)) {
rc = PCMK_OCF_OK;
} else {
rc = PCMK_OCF_NOT_INSTALLED;
}
break;
case -EOPNOTSUPP:
rc = PCMK_OCF_UNIMPLEMENT_FEATURE;
break;
default:
rc = PCMK_OCF_UNKNOWN_ERROR;
break;
}
return rc;
}
-#if SUPPORT_NAGIOS
static int
-nagios2uniform_rc(const char *action, int rc)
+action_get_uniform_rc(svc_action_t *action)
{
- if (rc < 0) {
- return PCMK_OCF_UNKNOWN_ERROR;
- }
-
- switch (rc) {
- case NAGIOS_STATE_OK:
- return PCMK_OCF_OK;
-
- case NAGIOS_INSUFFICIENT_PRIV:
- return PCMK_OCF_INSUFFICIENT_PRIV;
-
- case NAGIOS_NOT_INSTALLED:
- return PCMK_OCF_NOT_INSTALLED;
-
- case NAGIOS_STATE_WARNING:
- return PCMK_OCF_DEGRADED;
-
- case NAGIOS_STATE_CRITICAL:
- case NAGIOS_STATE_UNKNOWN:
- case NAGIOS_STATE_DEPENDENT:
- default:
- return PCMK_OCF_UNKNOWN_ERROR;
- }
-
- return PCMK_OCF_UNKNOWN_ERROR;
-}
-#endif
+ lrmd_cmd_t *cmd = action->cb_data;
-static int
-get_uniform_rc(const char *standard, const char *action, int rc)
-{
- if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_casei)) {
- return ocf2uniform_rc(rc);
- } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
- return stonith2uniform_rc(action, rc);
- } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
- return rc;
- } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_UPSTART, pcmk__str_casei)) {
- return rc;
-#if SUPPORT_NAGIOS
- } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
- return nagios2uniform_rc(action, rc);
-#endif
+ if (pcmk__str_eq(action->standard, PCMK_RESOURCE_CLASS_STONITH,
+ pcmk__str_casei)) {
+ return stonith2uniform_rc(cmd->action, action->rc);
} else {
- return services_get_ocf_exitcode(action, rc);
- }
-}
+ enum ocf_exitcode code = services_result2ocf(action->standard,
+ cmd->action, action->rc);
-static int
-action_get_uniform_rc(svc_action_t * action)
-{
- lrmd_cmd_t *cmd = action->cb_data;
- return get_uniform_rc(action->standard, cmd->action, action->rc);
+ // Cast variable instead of function return to keep compilers happy
+ return (int) code;
+ }
}
struct notify_new_client_data {
xmlNode *notify;
pcmk__client_t *new_client;
};
static void
notify_one_client(gpointer key, gpointer value, gpointer user_data)
{
pcmk__client_t *client = value;
struct notify_new_client_data *data = user_data;
if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) {
send_client_notify(key, (gpointer) client, (gpointer) data->notify);
}
}
void
notify_of_new_client(pcmk__client_t *new_client)
{
struct notify_new_client_data data;
data.new_client = new_client;
data.notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(data.notify, F_LRMD_ORIGIN, __func__);
crm_xml_add(data.notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT);
pcmk__foreach_ipc_client(notify_one_client, &data);
free_xml(data.notify);
}
void
client_disconnect_cleanup(const char *client_id)
{
GHashTableIter iter;
lrmd_rsc_t *rsc = NULL;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
if (rsc->call_opts & lrmd_opt_drop_recurring) {
/* This client is disconnecting, drop any recurring operations
* it may have initiated on the resource */
cancel_all_recurring(rsc, client_id);
}
}
}
static void
action_complete(svc_action_t * action)
{
lrmd_rsc_t *rsc;
lrmd_cmd_t *cmd = action->cb_data;
#ifdef PCMK__TIME_USE_CGT
const char *rclass = NULL;
bool goagain = false;
#endif
if (!cmd) {
crm_err("Completed executor action (%s) does not match any known operations",
action->id);
return;
}
#ifdef PCMK__TIME_USE_CGT
if (cmd->result.exit_status != action->rc) {
cmd->epoch_rcchange = time(NULL);
}
#endif
cmd->last_pid = action->pid;
pcmk__set_result(&(cmd->result), action_get_uniform_rc(action),
action->status, services__exit_reason(action));
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
#ifdef PCMK__TIME_USE_CGT
if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) {
rclass = resources_find_service_class(rsc->type);
} else if(rsc) {
rclass = rsc->class;
}
if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
if ((cmd->result.exit_status == PCMK_OCF_OK)
&& pcmk__strcase_any_of(cmd->action, "start", "stop", NULL)) {
/* systemd returns from start and stop actions after the action
* begins, not after it completes. We have to jump through a few
* hoops so that we don't report 'complete' to the rest of pacemaker
* until it's actually done.
*/
goagain = true;
cmd->real_action = cmd->action;
cmd->action = strdup("monitor");
} else if (cmd->real_action != NULL) {
// This is follow-up monitor to check whether start/stop completed
if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
goagain = true;
} else if ((cmd->result.exit_status == PCMK_OCF_OK)
&& pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) {
goagain = true;
} else {
int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
int timeout_left = cmd->timeout_orig - time_sum;
crm_debug("%s systemd %s is now complete (elapsed=%dms, "
"remaining=%dms): %s (%d)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left,
services_ocf_exitcode_str(cmd->result.exit_status),
cmd->result.exit_status);
cmd_original_times(cmd);
// Monitors may return "not running", but start/stop shouldn't
if ((cmd->result.execution_status == PCMK_EXEC_DONE)
&& (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) {
if (pcmk__str_eq(cmd->real_action, "start", pcmk__str_casei)) {
cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR;
} else if (pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) {
cmd->result.exit_status = PCMK_OCF_OK;
}
}
}
}
}
#endif
#if SUPPORT_NAGIOS
if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
if (action_matches(cmd, "monitor", 0)
&& (cmd->result.exit_status == PCMK_OCF_OK)) {
/* Successfully executed --version for the nagios plugin */
cmd->result.exit_status = PCMK_OCF_NOT_RUNNING;
} else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)
&& (cmd->result.exit_status != PCMK_OCF_OK)) {
#ifdef PCMK__TIME_USE_CGT
goagain = true;
#endif
}
}
#endif
#ifdef PCMK__TIME_USE_CGT
if (goagain) {
int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
int timeout_left = cmd->timeout_orig - time_sum;
int delay = cmd->timeout_orig / 10;
if(delay >= timeout_left && timeout_left > 20) {
delay = timeout_left/2;
}
delay = QB_MIN(2000, delay);
if (delay < timeout_left) {
cmd->start_delay = delay;
cmd->timeout = timeout_left;
if (cmd->result.exit_status == PCMK_OCF_OK) {
crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);
} else if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);
} else {
crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->action,
services_ocf_exitcode_str(cmd->result.exit_status),
cmd->result.exit_status, time_sum, timeout_left,
delay);
}
cmd_reset(cmd);
if(rsc) {
rsc->active = NULL;
}
schedule_lrmd_cmd(rsc, cmd);
/* Don't finalize cmd, we're not done with it yet */
return;
} else {
crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)",
cmd->rsc_id,
(cmd->real_action? cmd->real_action : cmd->action),
cmd->result.exit_status, time_sum, timeout_left);
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"Investigate reason for timeout, and adjust "
"configured operation timeout if necessary");
cmd_original_times(cmd);
}
}
#endif
pcmk__set_result_output(&(cmd->result),
action->stdout_data, action->stderr_data);
cmd_finalize(cmd, rsc);
}
/*!
* \internal
* \brief Determine operation status of a stonith operation
*
* Non-stonith resource operations get their operation status directly from the
* service library, but the fencer does not have an equivalent, so we must infer
* an operation status from the fencer API's return code.
*
* \param[in] action Name of action performed on stonith resource
* \param[in] interval_ms Action interval
* \param[in] rc Action result from fencer
*
* \return Operation status corresponding to fencer API return code
*/
static int
stonith_rc2status(const char *action, guint interval_ms, int rc)
{
int status = PCMK_EXEC_DONE;
switch (rc) {
case pcmk_ok:
break;
case -EOPNOTSUPP:
case -EPROTONOSUPPORT:
status = PCMK_EXEC_NOT_SUPPORTED;
break;
case -ETIME:
case -ETIMEDOUT:
status = PCMK_EXEC_TIMEOUT;
break;
case -ENOTCONN:
case -ECOMM:
// Couldn't talk to fencer
status = PCMK_EXEC_ERROR;
break;
case -ENODEV:
// The device is not registered with the fencer
status = PCMK_EXEC_ERROR;
break;
default:
break;
}
return status;
}
static void
stonith_action_complete(lrmd_cmd_t * cmd, int rc)
{
// This can be NULL if resource was removed before command completed
lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
cmd->result.exit_status = stonith2uniform_rc(cmd->action, rc);
/* This function may be called with status already set to cancelled, if a
* pending action was aborted. Otherwise, we need to determine status from
* the fencer return code.
*/
if (cmd->result.execution_status != PCMK_EXEC_CANCELLED) {
cmd->result.execution_status = stonith_rc2status(cmd->action,
cmd->interval_ms, rc);
// Certain successful actions change the known state of the resource
if ((rsc != NULL) && (cmd->result.exit_status == PCMK_OCF_OK)) {
if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
rsc->st_probe_rc = pcmk_ok; // maps to PCMK_OCF_OK
} else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
rsc->st_probe_rc = -ENODEV; // maps to PCMK_OCF_NOT_RUNNING
}
}
}
// Give the user more detail than an OCF code
if (rc != -pcmk_err_generic) {
cmd->result.exit_reason = strdup(pcmk_strerror(rc));
}
/* The recurring timer should not be running at this point in any case, but
* as a failsafe, stop it if it is.
*/
stop_recurring_timer(cmd);
/* Reschedule this command if appropriate. If a recurring command is *not*
* rescheduled, its status must be PCMK_EXEC_CANCELLED, otherwise it will
* not be removed from recurring_ops by cmd_finalize().
*/
if (rsc && (cmd->interval_ms > 0)
&& (cmd->result.execution_status != PCMK_EXEC_CANCELLED)) {
start_recurring_timer(cmd);
}
cmd_finalize(cmd, rsc);
}
static void
lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
stonith_action_complete(data->userdata, data->rc);
}
void
stonith_connection_failed(void)
{
GHashTableIter iter;
GList *cmd_list = NULL;
GList *cmd_iter = NULL;
lrmd_rsc_t *rsc = NULL;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
/* If we registered this fence device, we don't know whether the
* fencer still has the registration or not. Cause future probes to
* return PCMK_OCF_UNKNOWN_ERROR until the resource is stopped or
* started successfully. This is especially important if the
* controller also went away (possibly due to a cluster layer
* restart) and won't receive our client notification of any
* monitors finalized below.
*/
if (rsc->st_probe_rc == pcmk_ok) {
rsc->st_probe_rc = pcmk_err_generic;
}
if (rsc->active) {
cmd_list = g_list_append(cmd_list, rsc->active);
}
if (rsc->recurring_ops) {
cmd_list = g_list_concat(cmd_list, rsc->recurring_ops);
}
if (rsc->pending_ops) {
cmd_list = g_list_concat(cmd_list, rsc->pending_ops);
}
rsc->pending_ops = rsc->recurring_ops = NULL;
}
}
if (!cmd_list) {
return;
}
crm_err("Connection to fencer failed, finalizing %d pending operations",
g_list_length(cmd_list));
for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
stonith_action_complete(cmd_iter->data, -ENOTCONN);
}
g_list_free(cmd_list);
}
/*!
* \internal
* \brief Execute a stonith resource "start" action
*
* Start a stonith resource by registering it with the fencer.
* (Stonith agents don't have a start command.)
*
* \param[in] stonith_api Connection to fencer
* \param[in] rsc Stonith resource to start
* \param[in] cmd Start command to execute
*
* \return pcmk_ok on success, -errno otherwise
*/
static int
execd_stonith_start(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
{
char *key = NULL;
char *value = NULL;
stonith_key_value_t *device_params = NULL;
int rc = pcmk_ok;
// Convert command parameters to stonith API key/values
if (cmd->params) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
device_params = stonith_key_value_add(device_params, key, value);
}
}
/* The fencer will automatically register devices via CIB notifications
* when the CIB changes, but to avoid a possible race condition between
* the fencer receiving the notification and the executor requesting that
* resource, the executor registers the device as well. The fencer knows how
* to handle duplicate registrations.
*/
rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call,
cmd->rsc_id, rsc->provider,
rsc->type, device_params);
stonith_key_value_freeall(device_params, 1, 1);
return rc;
}
/*!
* \internal
* \brief Execute a stonith resource "stop" action
*
* Stop a stonith resource by unregistering it with the fencer.
* (Stonith agents don't have a stop command.)
*
* \param[in] stonith_api Connection to fencer
* \param[in] rsc Stonith resource to stop
*
* \return pcmk_ok on success, -errno otherwise
*/
static inline int
execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc)
{
/* @TODO Failure would indicate a problem communicating with fencer;
* perhaps we should try reconnecting and retrying a few times?
*/
return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call,
rsc->rsc_id);
}
/*!
* \internal
* \brief Initiate a stonith resource agent recurring "monitor" action
*
* \param[in] stonith_api Connection to fencer
* \param[in] rsc Stonith resource to monitor
* \param[in] cmd Monitor command being executed
*
* \return pcmk_ok if monitor was successfully initiated, -errno otherwise
*/
static inline int
execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
{
int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id,
cmd->timeout / 1000);
rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd,
"lrmd_stonith_callback",
lrmd_stonith_callback);
if (rc == TRUE) {
rsc->active = cmd;
rc = pcmk_ok;
} else {
rc = -pcmk_err_generic;
}
return rc;
}
static void
lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
int rc = 0;
bool do_monitor = FALSE;
stonith_t *stonith_api = get_stonith_connection();
if (!stonith_api) {
rc = -ENOTCONN;
} else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
rc = execd_stonith_start(stonith_api, rsc, cmd);
if (rc == 0) {
do_monitor = TRUE;
}
} else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
rc = execd_stonith_stop(stonith_api, rsc);
} else if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
if (cmd->interval_ms > 0) {
do_monitor = TRUE;
} else {
rc = rsc->st_probe_rc;
}
}
if (do_monitor) {
rc = execd_stonith_monitor(stonith_api, rsc, cmd);
if (rc == pcmk_ok) {
// Don't clean up yet, we will find out result of the monitor later
return;
}
}
stonith_action_complete(cmd, rc);
}
static int
lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
svc_action_t *action = NULL;
GHashTable *params_copy = NULL;
CRM_ASSERT(rsc);
CRM_ASSERT(cmd);
crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
#if SUPPORT_NAGIOS
/* Recurring operations are cancelled anyway for a stop operation */
if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)
&& pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
cmd->result.exit_status = PCMK_OCF_OK;
goto exec_done;
}
#endif
params_copy = pcmk__str_table_dup(cmd->params);
action = services__create_resource_action(rsc->rsc_id, rsc->class, rsc->provider,
rsc->type,
normalize_action_name(rsc, cmd->action),
cmd->interval_ms, cmd->timeout,
params_copy, cmd->service_flags);
if (action == NULL) {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR, strerror(ENOMEM));
goto exec_done;
}
if (action->rc != PCMK_OCF_UNKNOWN) {
pcmk__set_result(&(cmd->result), action->rc, action->status,
services__exit_reason(action));
services_action_free(action);
goto exec_done;
}
action->cb_data = cmd;
if (services_action_async(action, action_complete)) {
/* When services_action_async() returns TRUE, the callback might have
* been called -- in this case action_complete(), which might free cmd,
* so cmd cannot be used here.
*/
return TRUE;
}
pcmk__set_result(&(cmd->result), action->rc, action->status,
services__exit_reason(action));
services_action_free(action);
action = NULL;
exec_done:
cmd_finalize(cmd, rsc);
return TRUE;
}
static gboolean
lrmd_rsc_execute(lrmd_rsc_t * rsc)
{
lrmd_cmd_t *cmd = NULL;
CRM_CHECK(rsc != NULL, return FALSE);
if (rsc->active) {
crm_trace("%s is still active", rsc->rsc_id);
return TRUE;
}
if (rsc->pending_ops) {
GList *first = rsc->pending_ops;
cmd = first->data;
if (cmd->delay_id) {
crm_trace
("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
cmd->rsc_id, cmd->action, cmd->start_delay);
return TRUE;
}
rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
g_list_free_1(first);
#ifdef PCMK__TIME_USE_CGT
get_current_time(&(cmd->t_run), &(cmd->t_first_run));
#endif
cmd->epoch_last_run = time(NULL);
}
if (!cmd) {
crm_trace("Nothing further to do for %s", rsc->rsc_id);
return TRUE;
}
rsc->active = cmd; /* only one op at a time for a rsc */
if (cmd->interval_ms) {
rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
}
log_execute(cmd);
if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
lrmd_rsc_execute_stonith(rsc, cmd);
} else {
lrmd_rsc_execute_service_lib(rsc, cmd);
}
return TRUE;
}
static gboolean
lrmd_rsc_dispatch(gpointer user_data)
{
return lrmd_rsc_execute(user_data);
}
void
free_rsc(gpointer data)
{
GList *gIter = NULL;
lrmd_rsc_t *rsc = data;
int is_stonith = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
pcmk__str_casei);
gIter = rsc->pending_ops;
while (gIter != NULL) {
GList *next = gIter->next;
lrmd_cmd_t *cmd = gIter->data;
/* command was never executed */
cmd->result.execution_status = PCMK_EXEC_CANCELLED;
cmd_finalize(cmd, NULL);
gIter = next;
}
/* frees list, but not list elements. */
g_list_free(rsc->pending_ops);
gIter = rsc->recurring_ops;
while (gIter != NULL) {
GList *next = gIter->next;
lrmd_cmd_t *cmd = gIter->data;
if (is_stonith) {
cmd->result.execution_status = PCMK_EXEC_CANCELLED;
/* If a stonith command is in-flight, just mark it as cancelled;
* it is not safe to finalize/free the cmd until the stonith api
* says it has either completed or timed out.
*/
if (rsc->active != cmd) {
cmd_finalize(cmd, NULL);
}
} else {
/* This command is already handed off to service library,
* let service library cancel it and tell us via the callback
* when it is cancelled. The rsc can be safely destroyed
* even if we are waiting for the cancel result */
services_action_cancel(rsc->rsc_id,
normalize_action_name(rsc, cmd->action),
cmd->interval_ms);
}
gIter = next;
}
/* frees list, but not list elements. */
g_list_free(rsc->recurring_ops);
free(rsc->rsc_id);
free(rsc->class);
free(rsc->provider);
free(rsc->type);
mainloop_destroy_trigger(rsc->work);
free(rsc);
}
static int
process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id,
xmlNode **reply)
{
int rc = pcmk_ok;
const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER);
const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION);
if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) {
crm_err("Cluster API version must be greater than or equal to %s, not %s",
LRMD_MIN_PROTOCOL_VERSION, protocol_version);
rc = -EPROTO;
}
if (crm_is_true(is_ipc_provider)) {
#ifdef PCMK__COMPILE_REMOTE
if ((client->remote != NULL) && client->remote->tls_handshake_complete) {
// This is a remote connection from a cluster node's controller
ipc_proxy_add_provider(client);
} else {
rc = -EACCES;
}
#else
rc = -EPROTONOSUPPORT;
#endif
}
*reply = create_lrmd_reply(__func__, rc, call_id);
crm_xml_add(*reply, F_LRMD_OPERATION, CRM_OP_REGISTER);
crm_xml_add(*reply, F_LRMD_CLIENTID, client->id);
crm_xml_add(*reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
return rc;
}
static int
process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = build_rsc_from_xml(request);
lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
if (dup &&
pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) &&
pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) {
crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id);
free_rsc(rsc);
return rc;
}
g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
crm_info("Cached agent information for '%s'", rsc->rsc_id);
return rc;
}
static xmlNode *
process_lrmd_get_rsc_info(xmlNode *request, int call_id)
{
int rc = pcmk_ok;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
xmlNode *reply = NULL;
lrmd_rsc_t *rsc = NULL;
if (rsc_id == NULL) {
rc = -ENODEV;
} else {
rsc = g_hash_table_lookup(rsc_list, rsc_id);
if (rsc == NULL) {
crm_info("Agent information for '%s' not in cache", rsc_id);
rc = -ENODEV;
}
}
reply = create_lrmd_reply(__func__, rc, call_id);
if (rsc) {
crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id);
crm_xml_add(reply, F_LRMD_CLASS, rsc->class);
crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider);
crm_xml_add(reply, F_LRMD_TYPE, rsc->type);
}
return reply;
}
static int
process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id,
xmlNode *request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
if (!rsc_id) {
return -ENODEV;
}
rsc = g_hash_table_lookup(rsc_list, rsc_id);
if (rsc == NULL) {
crm_info("Ignoring unregistration of resource '%s', which is not registered",
rsc_id);
return pcmk_ok;
}
if (rsc->active) {
/* let the caller know there are still active ops on this rsc to watch for */
crm_trace("Operation (0x%p) still in progress for unregistered resource %s",
rsc->active, rsc_id);
rc = -EINPROGRESS;
}
g_hash_table_remove(rsc_list, rsc_id);
return rc;
}
static int
process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request)
{
lrmd_rsc_t *rsc = NULL;
lrmd_cmd_t *cmd = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
int call_id;
if (!rsc_id) {
return -EINVAL;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
return -ENODEV;
}
cmd = create_lrmd_cmd(request, client);
call_id = cmd->call_id;
/* Don't reference cmd after handing it off to be scheduled.
* The cmd could get merged and freed. */
schedule_lrmd_cmd(rsc, cmd);
return call_id;
}
static int
cancel_op(const char *rsc_id, const char *action, guint interval_ms)
{
GList *gIter = NULL;
lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
/* How to cancel an action.
* 1. Check pending ops list, if it hasn't been handed off
* to the service library or stonith recurring list remove
* it there and that will stop it.
* 2. If it isn't in the pending ops list, then it's either a
* recurring op in the stonith recurring list, or the service
* library's recurring list. Stop it there
* 3. If not found in any lists, then this operation has either
* been executed already and is not a recurring operation, or
* never existed.
*/
if (!rsc) {
return -ENODEV;
}
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (action_matches(cmd, action, interval_ms)) {
cmd->result.execution_status = PCMK_EXEC_CANCELLED;
cmd_finalize(cmd, rsc);
return pcmk_ok;
}
}
if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
/* The service library does not handle stonith operations.
* We have to handle recurring stonith operations ourselves. */
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (action_matches(cmd, action, interval_ms)) {
cmd->result.execution_status = PCMK_EXEC_CANCELLED;
if (rsc->active != cmd) {
cmd_finalize(cmd, rsc);
}
return pcmk_ok;
}
}
} else if (services_action_cancel(rsc_id,
normalize_action_name(rsc, action),
interval_ms) == TRUE) {
/* The service library will tell the action_complete callback function
* this action was cancelled, which will destroy the cmd and remove
* it from the recurring_op list. Do not do that in this function
* if the service library says it cancelled it. */
return pcmk_ok;
}
return -EOPNOTSUPP;
}
static void
cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
{
GList *cmd_list = NULL;
GList *cmd_iter = NULL;
/* Notice a copy of each list is created when concat is called.
* This prevents odd behavior from occurring when the cmd_list
* is iterated through later on. It is possible the cancel_op
* function may end up modifying the recurring_ops and pending_ops
* lists. If we did not copy those lists, our cmd_list iteration
* could get messed up.*/
if (rsc->recurring_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
}
if (rsc->pending_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
}
if (!cmd_list) {
return;
}
for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
lrmd_cmd_t *cmd = cmd_iter->data;
if (cmd->interval_ms == 0) {
continue;
}
if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) {
continue;
}
cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms);
}
/* frees only the copied list data, not the cmds */
g_list_free(cmd_list);
}
static int
process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request)
{
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION);
guint interval_ms = 0;
crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &interval_ms);
if (!rsc_id || !action) {
return -EINVAL;
}
return cancel_op(rsc_id, action, interval_ms);
}
static void
add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc)
{
xmlNode *rsc_xml = create_xml_node(reply, F_LRMD_RSC);
crm_xml_add(rsc_xml, F_LRMD_RSC_ID, rsc->rsc_id);
for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) {
lrmd_cmd_t *cmd = item->data;
xmlNode *op_xml = create_xml_node(rsc_xml, T_LRMD_RSC_OP);
crm_xml_add(op_xml, F_LRMD_RSC_ACTION,
(cmd->real_action? cmd->real_action : cmd->action));
crm_xml_add_ms(op_xml, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
crm_xml_add_int(op_xml, F_LRMD_TIMEOUT, cmd->timeout_orig);
}
}
static xmlNode *
process_lrmd_get_recurring(xmlNode *request, int call_id)
{
int rc = pcmk_ok;
const char *rsc_id = NULL;
lrmd_rsc_t *rsc = NULL;
xmlNode *reply = NULL;
xmlNode *rsc_xml = NULL;
// Resource ID is optional
rsc_xml = first_named_child(request, F_LRMD_CALLDATA);
if (rsc_xml) {
rsc_xml = first_named_child(rsc_xml, F_LRMD_RSC);
}
if (rsc_xml) {
rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
}
// If resource ID is specified, resource must exist
if (rsc_id != NULL) {
rsc = g_hash_table_lookup(rsc_list, rsc_id);
if (rsc == NULL) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
rc = -ENODEV;
}
}
reply = create_lrmd_reply(__func__, rc, call_id);
// If resource ID is not specified, check all resources
if (rsc_id == NULL) {
GHashTableIter iter;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &rsc)) {
add_recurring_op_xml(reply, rsc);
}
} else if (rsc) {
add_recurring_op_xml(reply, rsc);
}
return reply;
}
void
process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request)
{
int rc = pcmk_ok;
int call_id = 0;
const char *op = crm_element_value(request, F_LRMD_OPERATION);
int do_reply = 0;
int do_notify = 0;
xmlNode *reply = NULL;
/* Certain IPC commands may be done only by privileged users (i.e. root or
* hacluster), because they would otherwise provide a means of bypassing
* ACLs.
*/
bool allowed = pcmk_is_set(client->flags, pcmk__client_privileged);
crm_trace("Processing %s operation from %s", op, client->id);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
if (pcmk__str_eq(op, CRM_OP_IPC_FWD, pcmk__str_none)) {
#ifdef PCMK__COMPILE_REMOTE
if (allowed) {
ipc_proxy_forward_client(client, request);
} else {
rc = -EACCES;
}
#else
rc = -EPROTONOSUPPORT;
#endif
do_reply = 1;
} else if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) {
rc = process_lrmd_signon(client, request, call_id, &reply);
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_RSC_REG, pcmk__str_none)) {
if (allowed) {
rc = process_lrmd_rsc_register(client, id, request);
do_notify = 1;
} else {
rc = -EACCES;
}
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_RSC_INFO, pcmk__str_none)) {
if (allowed) {
reply = process_lrmd_get_rsc_info(request, call_id);
} else {
rc = -EACCES;
}
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_RSC_UNREG, pcmk__str_none)) {
if (allowed) {
rc = process_lrmd_rsc_unregister(client, id, request);
/* don't notify anyone about failed un-registers */
if (rc == pcmk_ok || rc == -EINPROGRESS) {
do_notify = 1;
}
} else {
rc = -EACCES;
}
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_RSC_EXEC, pcmk__str_none)) {
if (allowed) {
rc = process_lrmd_rsc_exec(client, id, request);
} else {
rc = -EACCES;
}
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_RSC_CANCEL, pcmk__str_none)) {
if (allowed) {
rc = process_lrmd_rsc_cancel(client, id, request);
} else {
rc = -EACCES;
}
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_POKE, pcmk__str_none)) {
do_notify = 1;
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_CHECK, pcmk__str_none)) {
if (allowed) {
xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA);
CRM_LOG_ASSERT(data != NULL);
pcmk__valid_sbd_timeout(crm_element_value(data, F_LRMD_WATCHDOG));
} else {
rc = -EACCES;
}
} else if (pcmk__str_eq(op, LRMD_OP_ALERT_EXEC, pcmk__str_none)) {
if (allowed) {
rc = process_lrmd_alert_exec(client, id, request);
} else {
rc = -EACCES;
}
do_reply = 1;
} else if (pcmk__str_eq(op, LRMD_OP_GET_RECURRING, pcmk__str_none)) {
if (allowed) {
reply = process_lrmd_get_recurring(request, call_id);
} else {
rc = -EACCES;
}
do_reply = 1;
} else {
rc = -EOPNOTSUPP;
do_reply = 1;
crm_err("Unknown IPC request '%s' from client %s",
op, pcmk__client_name(client));
}
if (rc == -EACCES) {
crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
op, pcmk__client_name(client));
}
crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d",
op, client->id, rc, do_reply, do_notify);
if (do_reply) {
int send_rc = pcmk_rc_ok;
if (reply == NULL) {
reply = create_lrmd_reply(__func__, rc, call_id);
}
send_rc = lrmd_server_send_reply(client, id, reply);
free_xml(reply);
if (send_rc != pcmk_rc_ok) {
crm_warn("Reply to client %s failed: %s " CRM_XS " rc=%d",
pcmk__client_name(client), pcmk_rc_str(send_rc), send_rc);
}
}
if (do_notify) {
send_generic_notify(rc, request);
}
}
diff --git a/include/crm/services.h b/include/crm/services.h
index ae75af4faa..2136103041 100644
--- a/include/crm/services.h
+++ b/include/crm/services.h
@@ -1,401 +1,404 @@
/*
* Copyright 2010-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef __PCMK_SERVICES__
# define __PCMK_SERVICES__
#ifdef __cplusplus
extern "C" {
#endif
/**
* \file
* \brief Services API
* \ingroup core
*/
# include <glib.h>
# include <stdio.h>
# include <stdint.h>
# include <string.h>
# include <stdbool.h>
# include <sys/types.h>
# include <crm_config.h> // OCF_ROOT_DIR
# include "common/results.h"
# ifndef LSB_ROOT_DIR
# define LSB_ROOT_DIR "/etc/init.d"
# endif
/* TODO: Autodetect these two ?*/
# ifndef SYSTEMCTL
# define SYSTEMCTL "/bin/systemctl"
# endif
/* Known resource classes */
#define PCMK_RESOURCE_CLASS_OCF "ocf"
#define PCMK_RESOURCE_CLASS_SERVICE "service"
#define PCMK_RESOURCE_CLASS_LSB "lsb"
#define PCMK_RESOURCE_CLASS_SYSTEMD "systemd"
#define PCMK_RESOURCE_CLASS_UPSTART "upstart"
#define PCMK_RESOURCE_CLASS_NAGIOS "nagios"
#define PCMK_RESOURCE_CLASS_STONITH "stonith"
/* This is the string passed in the OCF_EXIT_REASON_PREFIX environment variable.
* The stderr output that occurs after this prefix is encountered is considered
* the exit reason for a completed operation.
*/
#define PCMK_OCF_REASON_PREFIX "ocf-exit-reason:"
// Agent version to use if agent doesn't specify one
#define PCMK_DEFAULT_AGENT_VERSION "0.1"
enum lsb_exitcode {
PCMK_LSB_OK = 0,
PCMK_LSB_UNKNOWN_ERROR = 1,
PCMK_LSB_INVALID_PARAM = 2,
PCMK_LSB_UNIMPLEMENT_FEATURE = 3,
PCMK_LSB_INSUFFICIENT_PRIV = 4,
PCMK_LSB_NOT_INSTALLED = 5,
PCMK_LSB_NOT_CONFIGURED = 6,
PCMK_LSB_NOT_RUNNING = 7,
};
/* The return codes for the status operation are not the same for other
* operatios - go figure
*/
enum lsb_status_exitcode {
PCMK_LSB_STATUS_OK = 0,
PCMK_LSB_STATUS_VAR_PID = 1,
PCMK_LSB_STATUS_VAR_LOCK = 2,
PCMK_LSB_STATUS_NOT_RUNNING = 3,
PCMK_LSB_STATUS_UNKNOWN = 4,
/* custom codes should be in the 150-199 range reserved for application use */
PCMK_LSB_STATUS_NOT_INSTALLED = 150,
PCMK_LSB_STATUS_INSUFFICIENT_PRIV = 151,
};
enum nagios_exitcode {
NAGIOS_STATE_OK = 0,
NAGIOS_STATE_WARNING = 1,
NAGIOS_STATE_CRITICAL = 2,
NAGIOS_STATE_UNKNOWN = 3,
NAGIOS_STATE_DEPENDENT = 4,
NAGIOS_INSUFFICIENT_PRIV = 100,
NAGIOS_NOT_INSTALLED = 101,
};
enum svc_action_flags {
/* On timeout, only kill pid, do not kill entire pid group */
SVC_ACTION_LEAVE_GROUP = 0x01,
SVC_ACTION_NON_BLOCKED = 0x02,
};
typedef struct svc_action_private_s svc_action_private_t;
/*!
* \brief Object for executing external actions
*
* \note This object should never be instantiated directly, but instead created
* using one of the constructor functions (resources_action_create() for
* resource agents, services_alert_create() for alert agents, or
* services_action_create_generic() for generic executables). Similarly,
* do not use sizeof() on this struct.
*
* \internal Internally, services__create_resource_action() is preferable to
* resources_action_create().
*/
typedef struct svc_action_s {
/*! Operation key (<resource>_<action>_<interval>) for resource actions,
* XML ID for alert actions, or NULL for generic actions
*/
char *id;
//! XML ID of resource being executed for resource actions, otherwise NULL
char *rsc;
//! Name of action being executed for resource actions, otherwise NULL
char *action;
//! Action interval for recurring resource actions, otherwise 0
guint interval_ms;
//! Resource standard for resource actions, otherwise NULL
char *standard;
//! Resource provider for resource actions that require it, otherwise NULL
char *provider;
//! Resource agent name for resource actions, otherwise NULL
char *agent;
int timeout; //!< Action timeout (in milliseconds)
/*! A hash table of name/value pairs to use as parameters for resource and
* alert actions, otherwise NULL. These will be used to set environment
* variables for non-fencing resource agents and alert agents, and to send
* stdin to fence agents.
*/
GHashTable *params;
int rc; //!< Exit status of action (set by library upon completion)
//!@{
//! This field should be treated as internal to Pacemaker
int pid; // Process ID of child
int cancel; // Whether this is a cancellation of a recurring action
//!@}
int status; //!< Execution status (enum pcmk_exec_status set by library)
/*! Action counter (set by library for resource actions, or by caller
* otherwise)
*/
int sequence;
//!@{
//! This field should be treated as internal to Pacemaker
int expected_rc; // Unused
int synchronous; // Whether execution should be synchronous (blocking)
//!@}
enum svc_action_flags flags; //!< Flag group of enum svc_action_flags
char *stderr_data; //!< Action stderr (set by library)
char *stdout_data; //!< Action stdout (set by library)
void *cb_data; //!< For caller's use (not used by library)
//! This field should be treated as internal to Pacemaker
svc_action_private_t *opaque;
} svc_action_t;
/**
* \brief Get a list of files or directories in a given path
*
* \param[in] root full path to a directory to read
* \param[in] files return list of files if TRUE or directories if FALSE
* \param[in] executable if TRUE and files is TRUE, only return executable files
*
* \return a list of what was found. The list items are char *.
* \note It is the caller's responsibility to free the result with g_list_free_full(list, free).
*/
GList *get_directory_list(const char *root, gboolean files, gboolean executable);
/**
* \brief Get a list of providers
*
* \param[in] standard list providers of this standard (e.g. ocf, lsb, etc.)
*
* \return a list of providers as char * list items (or NULL if standard does not support providers)
* \note The caller is responsible for freeing the result using g_list_free_full(list, free).
*/
GList *resources_list_providers(const char *standard);
/**
* \brief Get a list of resource agents
*
* \param[in] standard list agents using this standard (e.g. ocf, lsb, etc.) (or NULL for all)
* \param[in] provider list agents from this provider (or NULL for all)
*
* \return a list of resource agents. The list items are char *.
* \note The caller is responsible for freeing the result using g_list_free_full(list, free).
*/
GList *resources_list_agents(const char *standard, const char *provider);
/**
* Get list of available standards
*
* \return a list of resource standards. The list items are char *. This list _must_
* be destroyed using g_list_free_full(list, free).
*/
GList *resources_list_standards(void);
/**
* Does the given standard, provider, and agent describe a resource that can exist?
*
* \param[in] standard Which class of agent does the resource belong to?
* \param[in] provider What provides the agent (NULL for most standards)?
* \param[in] agent What is the name of the agent?
*
* \return A boolean
*/
gboolean resources_agent_exists(const char *standard, const char *provider, const char *agent);
/**
* \brief Create a new resource action
*
* \param[in] name Name of resource
* \param[in] standard Resource agent standard (ocf, lsb, etc.)
* \param[in] provider Resource agent provider
* \param[in] agent Resource agent name
* \param[in] action action (start, stop, monitor, etc.)
* \param[in] interval_ms How often to repeat this action (if 0, execute once)
* \param[in] timeout Consider action failed if it does not complete in this many milliseconds
* \param[in] params Action parameters
*
* \return newly allocated action instance
*
* \post After the call, 'params' is owned, and later free'd by the svc_action_t result
* \note The caller is responsible for freeing the return value using
* services_action_free().
*/
svc_action_t *resources_action_create(const char *name, const char *standard,
const char *provider, const char *agent,
const char *action, guint interval_ms,
int timeout /* ms */, GHashTable *params,
enum svc_action_flags flags);
/**
* Kick a recurring action so it is scheduled immediately for re-execution
*/
gboolean services_action_kick(const char *name, const char *action,
guint interval_ms);
const char *resources_find_service_class(const char *agent);
/**
* Utilize services API to execute an arbitrary command.
*
* This API has useful infrastructure in place to be able to run a command
* in the background and get notified via a callback when the command finishes.
*
* \param[in] exec command to execute
* \param[in] args arguments to the command, NULL terminated
*
* \return a svc_action_t object, used to pass to the execute function
* (services_action_sync() or services_action_async()) and is
* provided to the callback.
*/
svc_action_t *services_action_create_generic(const char *exec, const char *args[]);
void services_action_cleanup(svc_action_t * op);
void services_action_free(svc_action_t * op);
int services_action_user(svc_action_t *op, const char *user);
gboolean services_action_sync(svc_action_t * op);
/**
* \brief Run an action asynchronously
*
* \param[in] op Action to run
* \param[in] action_callback Function to call when the action completes
* \param[in] action_fork_callback Function to call after action process forks
*
* \return TRUE if execution was successfully initiated, FALSE otherwise (in
* which case the callback will not be called)
*/
gboolean services_action_async_fork_notify(svc_action_t * op,
void (*action_callback) (svc_action_t *),
void (*action_fork_callback) (svc_action_t *));
gboolean services_action_async(svc_action_t * op,
void (*action_callback) (svc_action_t *));
gboolean services_action_cancel(const char *name, const char *action,
guint interval_ms);
/* functions for alert agents */
svc_action_t *services_alert_create(const char *id, const char *exec,
int timeout, GHashTable *params,
int sequence, void *cb_data);
gboolean services_alert_async(svc_action_t *action,
void (*cb)(svc_action_t *op));
+enum ocf_exitcode services_result2ocf(const char *standard, const char *action,
+ int exit_status);
+
static inline const char *services_ocf_exitcode_str(enum ocf_exitcode code) {
switch (code) {
case PCMK_OCF_OK:
return "ok";
case PCMK_OCF_UNKNOWN_ERROR:
return "error";
case PCMK_OCF_INVALID_PARAM:
return "invalid parameter";
case PCMK_OCF_UNIMPLEMENT_FEATURE:
return "unimplemented feature";
case PCMK_OCF_INSUFFICIENT_PRIV:
return "insufficient privileges";
case PCMK_OCF_NOT_INSTALLED:
return "not installed";
case PCMK_OCF_NOT_CONFIGURED:
return "not configured";
case PCMK_OCF_NOT_RUNNING:
return "not running";
case PCMK_OCF_RUNNING_PROMOTED:
return "promoted";
case PCMK_OCF_FAILED_PROMOTED:
return "promoted (failed)";
case PCMK_OCF_DEGRADED:
return "OCF_DEGRADED";
case PCMK_OCF_DEGRADED_PROMOTED:
return "promoted (degraded)";
#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
case PCMK_OCF_NOT_SUPPORTED:
return "not supported (DEPRECATED STATUS)";
case PCMK_OCF_CANCELLED:
return "cancelled (DEPRECATED STATUS)";
case PCMK_OCF_OTHER_ERROR:
return "other error (DEPRECATED STATUS)";
case PCMK_OCF_SIGNAL:
return "interrupted by signal (DEPRECATED STATUS)";
case PCMK_OCF_PENDING:
return "pending (DEPRECATED STATUS)";
case PCMK_OCF_TIMEOUT:
return "timeout (DEPRECATED STATUS)";
#endif
default:
return "unknown";
}
}
/**
* \brief Get OCF equivalent of LSB exit code
*
* \param[in] action LSB action that produced exit code
* \param[in] lsb_exitcode Exit code of LSB action
*
* \return PCMK_OCF_* constant that corresponds to LSB exit code
*/
static inline enum ocf_exitcode
services_get_ocf_exitcode(const char *action, int lsb_exitcode)
{
/* For non-status actions, LSB and OCF share error code meaning <= 7 */
if (action && strcmp(action, "status") && strcmp(action, "monitor")) {
if ((lsb_exitcode < 0) || (lsb_exitcode > PCMK_LSB_NOT_RUNNING)) {
return PCMK_OCF_UNKNOWN_ERROR;
}
return (enum ocf_exitcode)lsb_exitcode;
}
/* status has different return codes */
switch (lsb_exitcode) {
case PCMK_LSB_STATUS_OK:
return PCMK_OCF_OK;
case PCMK_LSB_STATUS_NOT_INSTALLED:
return PCMK_OCF_NOT_INSTALLED;
case PCMK_LSB_STATUS_INSUFFICIENT_PRIV:
return PCMK_OCF_INSUFFICIENT_PRIV;
case PCMK_LSB_STATUS_VAR_PID:
case PCMK_LSB_STATUS_VAR_LOCK:
case PCMK_LSB_STATUS_NOT_RUNNING:
return PCMK_OCF_NOT_RUNNING;
}
return PCMK_OCF_UNKNOWN_ERROR;
}
#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
#include <crm/services_compat.h>
#endif
# ifdef __cplusplus
}
# endif
#endif /* __PCMK_SERVICES__ */
diff --git a/lib/services/services.c b/lib/services/services.c
index 5c17be17a8..1a496c92dc 100644
--- a/lib/services/services.c
+++ b/lib/services/services.c
@@ -1,1266 +1,1311 @@
/*
* Copyright 2010-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <errno.h>
#include <unistd.h>
#include <dirent.h>
#include <fcntl.h>
#include <crm/crm.h>
#include <crm/common/mainloop.h>
#include <crm/services.h>
#include <crm/services_internal.h>
#include <crm/stonith-ng.h>
#include <crm/msg_xml.h>
#include "services_private.h"
#include "services_ocf.h"
#include "services_lsb.h"
#if SUPPORT_UPSTART
# include <upstart.h>
#endif
#if SUPPORT_SYSTEMD
# include <systemd.h>
#endif
#if SUPPORT_NAGIOS
# include <services_nagios.h>
#endif
/* TODO: Develop a rollover strategy */
static int operations = 0;
static GHashTable *recurring_actions = NULL;
/* ops waiting to run async because of conflicting active
* pending ops */
static GList *blocked_ops = NULL;
/* ops currently active (in-flight) */
static GList *inflight_ops = NULL;
static void handle_blocked_ops(void);
/*!
* \brief Find first service class that can provide a specified agent
*
* \param[in] agent Name of agent to search for
*
* \return Service class if found, NULL otherwise
*
* \note The priority is LSB, then systemd, then upstart. It would be preferable
* to put systemd first, but LSB merely requires a file existence check,
* while systemd requires contacting D-Bus.
*/
const char *
resources_find_service_class(const char *agent)
{
if (services__lsb_agent_exists(agent)) {
return PCMK_RESOURCE_CLASS_LSB;
}
#if SUPPORT_SYSTEMD
if (systemd_unit_exists(agent)) {
return PCMK_RESOURCE_CLASS_SYSTEMD;
}
#endif
#if SUPPORT_UPSTART
if (upstart_job_exists(agent)) {
return PCMK_RESOURCE_CLASS_UPSTART;
}
#endif
return NULL;
}
static inline void
init_recurring_actions(void)
{
if (recurring_actions == NULL) {
recurring_actions = pcmk__strkey_table(NULL, NULL);
}
}
/*!
* \internal
* \brief Check whether op is in-flight systemd or upstart op
*
* \param[in] op Operation to check
*
* \return TRUE if op is in-flight systemd or upstart op
*/
static inline gboolean
inflight_systemd_or_upstart(svc_action_t *op)
{
return pcmk__strcase_any_of(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD,
PCMK_RESOURCE_CLASS_UPSTART, NULL) &&
g_list_find(inflight_ops, op) != NULL;
}
/*!
* \internal
* \brief Expand "service" alias to an actual resource class
*
* \param[in] rsc Resource name (for logging only)
* \param[in] standard Resource class as configured
* \param[in] agent Agent name to look for
*
* \return Newly allocated string with actual resource class
*
* \note The caller is responsible for calling free() on the result.
*/
static char *
expand_resource_class(const char *rsc, const char *standard, const char *agent)
{
char *expanded_class = NULL;
if (strcasecmp(standard, PCMK_RESOURCE_CLASS_SERVICE) == 0) {
const char *found_class = resources_find_service_class(agent);
if (found_class) {
crm_debug("Found %s agent %s for %s", found_class, agent, rsc);
expanded_class = strdup(found_class);
} else {
crm_info("Assuming resource class lsb for agent %s for %s",
agent, rsc);
expanded_class = strdup(PCMK_RESOURCE_CLASS_LSB);
}
} else {
expanded_class = strdup(standard);
}
CRM_ASSERT(expanded_class);
return expanded_class;
}
/*!
* \internal
* \brief Create a simple svc_action_t instance
*
* \return Newly allocated instance (or NULL if not enough memory)
*/
static svc_action_t *
new_action(void)
{
svc_action_t *op = calloc(1, sizeof(svc_action_t));
if (op == NULL) {
return NULL;
}
op->opaque = calloc(1, sizeof(svc_action_private_t));
if (op->opaque == NULL) {
free(op);
return NULL;
}
// Initialize result
services__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN, NULL);
return op;
}
static bool
required_argument_missing(uint32_t ra_caps, const char *name,
const char *standard, const char *provider,
const char *agent, const char *action)
{
if (pcmk__str_empty(name)) {
crm_info("Cannot create operation without resource name (bug?)");
return true;
}
if (pcmk__str_empty(standard)) {
crm_info("Cannot create operation for %s without resource class (bug?)",
name);
return true;
}
if (pcmk_is_set(ra_caps, pcmk_ra_cap_provider)
&& pcmk__str_empty(provider)) {
crm_info("Cannot create operation for %s resource %s "
"without provider (bug?)", standard, name);
return true;
}
if (pcmk__str_empty(agent)) {
crm_info("Cannot create operation for %s without agent name (bug?)",
name);
return true;
}
if (pcmk__str_empty(action)) {
crm_info("Cannot create operation for %s without action name (bug?)",
name);
return true;
}
return false;
}
// \return Standard Pacemaker return code (pcmk_rc_ok or ENOMEM)
static int
copy_action_arguments(svc_action_t *op, uint32_t ra_caps, const char *name,
const char *standard, const char *provider,
const char *agent, const char *action)
{
op->rsc = strdup(name);
if (op->rsc == NULL) {
return ENOMEM;
}
op->agent = strdup(agent);
if (op->agent == NULL) {
return ENOMEM;
}
op->standard = expand_resource_class(name, standard, agent);
if (op->standard == NULL) {
return ENOMEM;
}
if (pcmk_is_set(ra_caps, pcmk_ra_cap_status)
&& pcmk__str_eq(action, "monitor", pcmk__str_casei)) {
action = "status";
}
op->action = strdup(action);
if (op->action == NULL) {
return ENOMEM;
}
if (pcmk_is_set(ra_caps, pcmk_ra_cap_provider)) {
op->provider = strdup(provider);
if (op->provider == NULL) {
return ENOMEM;
}
}
return pcmk_rc_ok;
}
svc_action_t *
services__create_resource_action(const char *name, const char *standard,
const char *provider, const char *agent,
const char *action, guint interval_ms, int timeout,
GHashTable *params, enum svc_action_flags flags)
{
svc_action_t *op = NULL;
uint32_t ra_caps = pcmk_get_ra_caps(standard);
int rc = pcmk_rc_ok;
op = new_action();
if (op == NULL) {
crm_crit("Cannot prepare action: %s", strerror(ENOMEM));
if (params != NULL) {
g_hash_table_destroy(params);
}
return NULL;
}
op->interval_ms = interval_ms;
op->timeout = timeout;
op->flags = flags;
op->sequence = ++operations;
// Take ownership of params
if (pcmk_is_set(ra_caps, pcmk_ra_cap_params)) {
op->params = params;
} else if (params != NULL) {
g_hash_table_destroy(params);
params = NULL;
}
if (required_argument_missing(ra_caps, name, standard, provider, agent,
action)) {
services__set_result(op, services__generic_error(op),
PCMK_EXEC_ERROR_FATAL,
"Required agent or action information missing");
return op;
}
op->id = pcmk__op_key(name, action, interval_ms);
if (copy_action_arguments(op, ra_caps, name, standard, provider, agent,
action) != pcmk_rc_ok) {
crm_crit("Cannot prepare %s action for %s: %s",
action, name, strerror(ENOMEM));
services__handle_exec_error(op, ENOMEM);
return op;
}
if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_OCF) == 0) {
rc = services__ocf_prepare(op);
} else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_LSB) == 0) {
rc = services__lsb_prepare(op);
#if SUPPORT_SYSTEMD
} else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) {
rc = services__systemd_prepare(op);
#endif
#if SUPPORT_UPSTART
} else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_UPSTART) == 0) {
rc = services__upstart_prepare(op);
#endif
#if SUPPORT_NAGIOS
} else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_NAGIOS) == 0) {
rc = services__nagios_prepare(op);
#endif
} else {
crm_err("Unknown resource standard: %s", op->standard);
rc = ENOENT;
}
if (rc != pcmk_rc_ok) {
crm_err("Cannot prepare %s operation for %s: %s",
action, name, strerror(rc));
services__handle_exec_error(op, rc);
}
return op;
}
svc_action_t *
resources_action_create(const char *name, const char *standard,
const char *provider, const char *agent,
const char *action, guint interval_ms, int timeout,
GHashTable *params, enum svc_action_flags flags)
{
svc_action_t *op = services__create_resource_action(name, standard,
provider, agent, action, interval_ms, timeout,
params, flags);
if (op == NULL || op->rc != 0) {
services_action_free(op);
return NULL;
} else {
// Preserve public API backward compatibility
op->rc = PCMK_OCF_OK;
op->status = PCMK_EXEC_DONE;
return op;
}
}
svc_action_t *
services_action_create_generic(const char *exec, const char *args[])
{
svc_action_t *op = new_action();
CRM_ASSERT(op != NULL);
op->opaque->exec = strdup(exec);
op->opaque->args[0] = strdup(exec);
if ((op->opaque->exec == NULL) || (op->opaque->args[0] == NULL)) {
crm_crit("Cannot prepare action for '%s': %s", exec, strerror(ENOMEM));
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
strerror(ENOMEM));
return op;
}
if (args == NULL) {
return op;
}
for (int cur_arg = 1; args[cur_arg - 1] != NULL; cur_arg++) {
if (cur_arg == PCMK__NELEM(op->opaque->args)) {
crm_info("Cannot prepare action for '%s': Too many arguments",
exec);
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR_HARD, "Too many arguments");
break;
}
op->opaque->args[cur_arg] = strdup(args[cur_arg - 1]);
if (op->opaque->args[cur_arg] == NULL) {
crm_crit("Cannot prepare action for '%s': %s",
exec, strerror(ENOMEM));
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
strerror(ENOMEM));
break;
}
}
return op;
}
/*!
* \brief Create an alert agent action
*
* \param[in] id Alert ID
* \param[in] exec Path to alert agent executable
* \param[in] timeout Action timeout
* \param[in] params Parameters to use with action
* \param[in] sequence Action sequence number
* \param[in] cb_data Data to pass to callback function
*
* \return New action on success, NULL on error
* \note It is the caller's responsibility to free cb_data.
* The caller should not free params explicitly.
*/
svc_action_t *
services_alert_create(const char *id, const char *exec, int timeout,
GHashTable *params, int sequence, void *cb_data)
{
svc_action_t *action = services_action_create_generic(exec, NULL);
action->timeout = timeout;
action->id = strdup(id);
action->params = params;
action->sequence = sequence;
action->cb_data = cb_data;
return action;
}
/*!
* \brief Set the user and group that an action will execute as
*
* \param[in,out] action Action to modify
* \param[in] user Name of user to execute action as
* \param[in] group Name of group to execute action as
*
* \return pcmk_ok on success, -errno otherwise
*
* \note This will have no effect unless the process executing the action runs
* as root, and the action is not a systemd or upstart action.
* We could implement this for systemd by adding User= and Group= to
* [Service] in the override file, but that seems more likely to cause
* problems than be useful.
*/
int
services_action_user(svc_action_t *op, const char *user)
{
CRM_CHECK((op != NULL) && (user != NULL), return -EINVAL);
return crm_user_lookup(user, &(op->opaque->uid), &(op->opaque->gid));
}
/*!
* \brief Execute an alert agent action
*
* \param[in] action Action to execute
* \param[in] cb Function to call when action completes
*
* \return TRUE if the library will free action, FALSE otherwise
*
* \note If this function returns FALSE, it is the caller's responsibility to
* free the action with services_action_free().
*/
gboolean
services_alert_async(svc_action_t *action, void (*cb)(svc_action_t *op))
{
action->synchronous = false;
action->opaque->callback = cb;
return services__execute_file(action) == pcmk_rc_ok;
}
#if SUPPORT_DBUS
/*!
* \internal
* \brief Update operation's pending DBus call, unreferencing old one if needed
*
* \param[in,out] op Operation to modify
* \param[in] pending Pending call to set
*/
void
services_set_op_pending(svc_action_t *op, DBusPendingCall *pending)
{
if (op->opaque->pending && (op->opaque->pending != pending)) {
if (pending) {
crm_info("Lost pending %s DBus call (%p)", op->id, op->opaque->pending);
} else {
crm_trace("Done with pending %s DBus call (%p)", op->id, op->opaque->pending);
}
dbus_pending_call_unref(op->opaque->pending);
}
op->opaque->pending = pending;
if (pending) {
crm_trace("Updated pending %s DBus call (%p)", op->id, pending);
} else {
crm_trace("Cleared pending %s DBus call", op->id);
}
}
#endif
void
services_action_cleanup(svc_action_t * op)
{
if ((op == NULL) || (op->opaque == NULL)) {
return;
}
#if SUPPORT_DBUS
if(op->opaque->timerid != 0) {
crm_trace("Removing timer for call %s to %s", op->action, op->rsc);
g_source_remove(op->opaque->timerid);
op->opaque->timerid = 0;
}
if(op->opaque->pending) {
if (dbus_pending_call_get_completed(op->opaque->pending)) {
// This should never be the case
crm_warn("Result of %s op %s was unhandled",
op->standard, op->id);
} else {
crm_debug("Will ignore any result of canceled %s op %s",
op->standard, op->id);
}
dbus_pending_call_cancel(op->opaque->pending);
services_set_op_pending(op, NULL);
}
#endif
if (op->opaque->stderr_gsource) {
mainloop_del_fd(op->opaque->stderr_gsource);
op->opaque->stderr_gsource = NULL;
}
if (op->opaque->stdout_gsource) {
mainloop_del_fd(op->opaque->stdout_gsource);
op->opaque->stdout_gsource = NULL;
}
}
+/*!
+ * \internal
+ * \brief Map an actual resource action result to a standard OCF result
+ *
+ * \param[in] standard Agent standard (must not be "service")
+ * \param[in] action Action that result is for
+ * \param[in] exit_status Actual agent exit status
+ *
+ * \return Standard OCF result
+ */
+enum ocf_exitcode
+services_result2ocf(const char *standard, const char *action, int exit_status)
+{
+ if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_casei)) {
+ return services__ocf2ocf(exit_status);
+
+#if SUPPORT_SYSTEMD
+ } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD,
+ pcmk__str_casei)) {
+ return services__systemd2ocf(exit_status);
+#endif
+
+#if SUPPORT_UPSTART
+ } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_UPSTART,
+ pcmk__str_casei)) {
+ return services__upstart2ocf(exit_status);
+#endif
+
+#if SUPPORT_NAGIOS
+ } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_NAGIOS,
+ pcmk__str_casei)) {
+ return services__nagios2ocf(exit_status);
+#endif
+
+ } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_LSB,
+ pcmk__str_casei)) {
+ return services__lsb2ocf(action, exit_status);
+
+ } else {
+ crm_warn("Treating result from unknown standard '%s' as OCF",
+ ((standard == NULL)? "unspecified" : standard));
+ return services__ocf2ocf(exit_status);
+ }
+}
+
void
services_action_free(svc_action_t * op)
{
unsigned int i;
if (op == NULL) {
return;
}
/* The operation should be removed from all tracking lists by this point.
* If it's not, we have a bug somewhere, so bail. That may lead to a
* memory leak, but it's better than a use-after-free segmentation fault.
*/
CRM_CHECK(g_list_find(inflight_ops, op) == NULL, return);
CRM_CHECK(g_list_find(blocked_ops, op) == NULL, return);
CRM_CHECK((recurring_actions == NULL)
|| (g_hash_table_lookup(recurring_actions, op->id) == NULL),
return);
services_action_cleanup(op);
if (op->opaque->repeat_timer) {
g_source_remove(op->opaque->repeat_timer);
op->opaque->repeat_timer = 0;
}
free(op->id);
free(op->opaque->exec);
for (i = 0; i < PCMK__NELEM(op->opaque->args); i++) {
free(op->opaque->args[i]);
}
free(op->opaque->exit_reason);
free(op->opaque);
free(op->rsc);
free(op->action);
free(op->standard);
free(op->agent);
free(op->provider);
free(op->stdout_data);
free(op->stderr_data);
if (op->params) {
g_hash_table_destroy(op->params);
op->params = NULL;
}
free(op);
}
gboolean
cancel_recurring_action(svc_action_t * op)
{
crm_info("Cancelling %s operation %s", op->standard, op->id);
if (recurring_actions) {
g_hash_table_remove(recurring_actions, op->id);
}
if (op->opaque->repeat_timer) {
g_source_remove(op->opaque->repeat_timer);
op->opaque->repeat_timer = 0;
}
return TRUE;
}
/*!
* \brief Cancel a recurring action
*
* \param[in] name Name of resource that operation is for
* \param[in] action Name of operation to cancel
* \param[in] interval_ms Interval of operation to cancel
*
* \return TRUE if action was successfully cancelled, FALSE otherwise
*/
gboolean
services_action_cancel(const char *name, const char *action, guint interval_ms)
{
gboolean cancelled = FALSE;
char *id = pcmk__op_key(name, action, interval_ms);
svc_action_t *op = NULL;
/* We can only cancel a recurring action */
init_recurring_actions();
op = g_hash_table_lookup(recurring_actions, id);
if (op == NULL) {
goto done;
}
// Tell services__finalize_async_op() not to reschedule the operation
op->cancel = TRUE;
/* Stop tracking it as a recurring operation, and stop its repeat timer */
cancel_recurring_action(op);
/* If the op has a PID, it's an in-flight child process, so kill it.
*
* Whether the kill succeeds or fails, the main loop will send the op to
* async_action_complete() (and thus services__finalize_async_op()) when the
* process goes away.
*/
if (op->pid != 0) {
crm_info("Terminating in-flight op %s[%d] early because it was cancelled",
id, op->pid);
cancelled = mainloop_child_kill(op->pid);
if (cancelled == FALSE) {
crm_err("Termination of %s[%d] failed", id, op->pid);
}
goto done;
}
#if SUPPORT_DBUS
// In-flight systemd and upstart ops don't have a pid
if (inflight_systemd_or_upstart(op)) {
inflight_ops = g_list_remove(inflight_ops, op);
/* This will cause any result that comes in later to be discarded, so we
* don't call the callback and free the operation twice.
*/
services_action_cleanup(op);
}
#endif
/* The rest of this is essentially equivalent to
* services__finalize_async_op(), minus the handle_blocked_ops() call.
*/
// Report operation as cancelled
services__set_cancelled(op);
if (op->opaque->callback) {
op->opaque->callback(op);
}
blocked_ops = g_list_remove(blocked_ops, op);
services_action_free(op);
cancelled = TRUE;
// @TODO Initiate handle_blocked_ops() asynchronously
done:
free(id);
return cancelled;
}
gboolean
services_action_kick(const char *name, const char *action, guint interval_ms)
{
svc_action_t * op = NULL;
char *id = pcmk__op_key(name, action, interval_ms);
init_recurring_actions();
op = g_hash_table_lookup(recurring_actions, id);
free(id);
if (op == NULL) {
return FALSE;
}
if (op->pid || inflight_systemd_or_upstart(op)) {
return TRUE;
} else {
if (op->opaque->repeat_timer) {
g_source_remove(op->opaque->repeat_timer);
op->opaque->repeat_timer = 0;
}
recurring_action_timer(op);
return TRUE;
}
}
/*!
* \internal
* \brief Add a new recurring operation, checking for duplicates
*
* \param[in] op Operation to add
*
* \return TRUE if duplicate found (and reschedule), FALSE otherwise
*/
static gboolean
handle_duplicate_recurring(svc_action_t * op)
{
svc_action_t * dup = NULL;
/* check for duplicates */
dup = g_hash_table_lookup(recurring_actions, op->id);
if (dup && (dup != op)) {
/* update user data */
if (op->opaque->callback) {
dup->opaque->callback = op->opaque->callback;
dup->cb_data = op->cb_data;
op->cb_data = NULL;
}
/* immediately execute the next interval */
if (dup->pid != 0) {
if (op->opaque->repeat_timer) {
g_source_remove(op->opaque->repeat_timer);
op->opaque->repeat_timer = 0;
}
recurring_action_timer(dup);
}
/* free the duplicate */
services_action_free(op);
return TRUE;
}
return FALSE;
}
/*!
* \internal
* \brief Execute an action appropriately according to its standard
*
* \param[in] op Action to execute
*
* \return Standard Pacemaker return code
* \retval EBUSY Recurring operation could not be initiated
* \retval pcmk_rc_error Synchronous action failed
* \retval pcmk_rc_ok Synchronous action succeeded, or asynchronous action
* should not be freed (because it already was or is
* pending)
*
* \note If the return value for an asynchronous action is not pcmk_rc_ok, the
* caller is responsible for freeing the action.
*/
static int
execute_action(svc_action_t *op)
{
#if SUPPORT_UPSTART
if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_UPSTART,
pcmk__str_casei)) {
return services__execute_upstart(op);
}
#endif
#if SUPPORT_SYSTEMD
if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD,
pcmk__str_casei)) {
return services__execute_systemd(op);
}
#endif
return services__execute_file(op);
}
void
services_add_inflight_op(svc_action_t * op)
{
if (op == NULL) {
return;
}
CRM_ASSERT(op->synchronous == FALSE);
/* keep track of ops that are in-flight to avoid collisions in the same namespace */
if (op->rsc) {
inflight_ops = g_list_append(inflight_ops, op);
}
}
/*!
* \internal
* \brief Stop tracking an operation that completed
*
* \param[in] op Operation to stop tracking
*/
void
services_untrack_op(svc_action_t *op)
{
/* Op is no longer in-flight or blocked */
inflight_ops = g_list_remove(inflight_ops, op);
blocked_ops = g_list_remove(blocked_ops, op);
/* Op is no longer blocking other ops, so check if any need to run */
handle_blocked_ops();
}
gboolean
services_action_async_fork_notify(svc_action_t * op,
void (*action_callback) (svc_action_t *),
void (*action_fork_callback) (svc_action_t *))
{
op->synchronous = false;
if (action_callback) {
op->opaque->callback = action_callback;
}
if (action_fork_callback) {
op->opaque->fork_callback = action_fork_callback;
}
if (op->interval_ms > 0) {
init_recurring_actions();
if (handle_duplicate_recurring(op) == TRUE) {
/* entry rescheduled, dup freed */
/* exit early */
return TRUE;
}
g_hash_table_replace(recurring_actions, op->id, op);
}
if (!pcmk_is_set(op->flags, SVC_ACTION_NON_BLOCKED)
&& op->rsc && is_op_blocked(op->rsc)) {
blocked_ops = g_list_append(blocked_ops, op);
return TRUE;
}
return execute_action(op) == pcmk_rc_ok;
}
gboolean
services_action_async(svc_action_t * op,
void (*action_callback) (svc_action_t *))
{
return services_action_async_fork_notify(op, action_callback, NULL);
}
static gboolean processing_blocked_ops = FALSE;
gboolean
is_op_blocked(const char *rsc)
{
GList *gIter = NULL;
svc_action_t *op = NULL;
for (gIter = inflight_ops; gIter != NULL; gIter = gIter->next) {
op = gIter->data;
if (pcmk__str_eq(op->rsc, rsc, pcmk__str_casei)) {
return TRUE;
}
}
return FALSE;
}
static void
handle_blocked_ops(void)
{
GList *executed_ops = NULL;
GList *gIter = NULL;
svc_action_t *op = NULL;
if (processing_blocked_ops) {
/* avoid nested calling of this function */
return;
}
processing_blocked_ops = TRUE;
/* n^2 operation here, but blocked ops are incredibly rare. this list
* will be empty 99% of the time. */
for (gIter = blocked_ops; gIter != NULL; gIter = gIter->next) {
op = gIter->data;
if (is_op_blocked(op->rsc)) {
continue;
}
executed_ops = g_list_append(executed_ops, op);
if (execute_action(op) != pcmk_rc_ok) {
/* this can cause this function to be called recursively
* which is why we have processing_blocked_ops static variable */
services__finalize_async_op(op);
}
}
for (gIter = executed_ops; gIter != NULL; gIter = gIter->next) {
op = gIter->data;
blocked_ops = g_list_remove(blocked_ops, op);
}
g_list_free(executed_ops);
processing_blocked_ops = FALSE;
}
/*!
* \internal
* \brief Execute a meta-data action appropriately to standard
*
* \param[in] op Meta-data action to execute
*
* \return Standard Pacemaker return code
*/
static int
execute_metadata_action(svc_action_t *op)
{
const char *class = op->standard;
if (op->agent == NULL) {
crm_err("meta-data requested without specifying agent");
services__set_result(op, services__generic_error(op),
PCMK_EXEC_ERROR_FATAL, "Agent not specified");
return EINVAL;
}
if (class == NULL) {
crm_err("meta-data requested for agent %s without specifying class",
op->agent);
services__set_result(op, services__generic_error(op),
PCMK_EXEC_ERROR_FATAL,
"Agent standard not specified");
return EINVAL;
}
if (!strcmp(class, PCMK_RESOURCE_CLASS_SERVICE)) {
class = resources_find_service_class(op->agent);
}
if (class == NULL) {
crm_err("meta-data requested for %s, but could not determine class",
op->agent);
services__set_result(op, services__generic_error(op),
PCMK_EXEC_ERROR_HARD,
"Agent standard could not be determined");
return EINVAL;
}
if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei)) {
return pcmk_legacy2rc(services__get_lsb_metadata(op->agent,
&op->stdout_data));
}
#if SUPPORT_NAGIOS
if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
return pcmk_legacy2rc(services__get_nagios_metadata(op->agent,
&op->stdout_data));
}
#endif
return execute_action(op);
}
gboolean
services_action_sync(svc_action_t * op)
{
gboolean rc = TRUE;
if (op == NULL) {
crm_trace("No operation to execute");
return FALSE;
}
op->synchronous = true;
if (pcmk__str_eq(op->action, "meta-data", pcmk__str_casei)) {
/* Synchronous meta-data operations are handled specially. Since most
* resource classes don't provide any meta-data, it has to be
* synthesized from available information about the agent.
*
* services_action_async() doesn't treat meta-data actions specially, so
* it will result in an error for classes that don't support the action.
*/
rc = (execute_metadata_action(op) == pcmk_rc_ok);
} else {
rc = (execute_action(op) == pcmk_rc_ok);
}
crm_trace(" > " PCMK__OP_FMT ": %s = %d",
op->rsc, op->action, op->interval_ms, op->opaque->exec, op->rc);
if (op->stdout_data) {
crm_trace(" > stdout: %s", op->stdout_data);
}
if (op->stderr_data) {
crm_trace(" > stderr: %s", op->stderr_data);
}
return rc;
}
GList *
get_directory_list(const char *root, gboolean files, gboolean executable)
{
return services_os_get_directory_list(root, files, executable);
}
GList *
resources_list_standards(void)
{
GList *standards = NULL;
standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_OCF));
standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_LSB));
standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_SERVICE));
#if SUPPORT_SYSTEMD
{
GList *agents = systemd_unit_listall();
if (agents != NULL) {
standards = g_list_append(standards,
strdup(PCMK_RESOURCE_CLASS_SYSTEMD));
g_list_free_full(agents, free);
}
}
#endif
#if SUPPORT_UPSTART
{
GList *agents = upstart_job_listall();
if (agents != NULL) {
standards = g_list_append(standards,
strdup(PCMK_RESOURCE_CLASS_UPSTART));
g_list_free_full(agents, free);
}
}
#endif
#if SUPPORT_NAGIOS
{
GList *agents = services__list_nagios_agents();
if (agents != NULL) {
standards = g_list_append(standards,
strdup(PCMK_RESOURCE_CLASS_NAGIOS));
g_list_free_full(agents, free);
}
}
#endif
return standards;
}
GList *
resources_list_providers(const char *standard)
{
if (pcmk_is_set(pcmk_get_ra_caps(standard), pcmk_ra_cap_provider)) {
return resources_os_list_ocf_providers();
}
return NULL;
}
GList *
resources_list_agents(const char *standard, const char *provider)
{
if ((standard == NULL)
|| (strcasecmp(standard, PCMK_RESOURCE_CLASS_SERVICE) == 0)) {
GList *tmp1;
GList *tmp2;
GList *result = services__list_lsb_agents();
if (standard == NULL) {
tmp1 = result;
tmp2 = resources_os_list_ocf_agents(NULL);
if (tmp2) {
result = g_list_concat(tmp1, tmp2);
}
}
#if SUPPORT_SYSTEMD
tmp1 = result;
tmp2 = systemd_unit_listall();
if (tmp2) {
result = g_list_concat(tmp1, tmp2);
}
#endif
#if SUPPORT_UPSTART
tmp1 = result;
tmp2 = upstart_job_listall();
if (tmp2) {
result = g_list_concat(tmp1, tmp2);
}
#endif
return result;
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF) == 0) {
return resources_os_list_ocf_agents(provider);
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_LSB) == 0) {
return services__list_lsb_agents();
#if SUPPORT_SYSTEMD
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) {
return systemd_unit_listall();
#endif
#if SUPPORT_UPSTART
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_UPSTART) == 0) {
return upstart_job_listall();
#endif
#if SUPPORT_NAGIOS
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_NAGIOS) == 0) {
return services__list_nagios_agents();
#endif
}
return NULL;
}
gboolean
resources_agent_exists(const char *standard, const char *provider, const char *agent)
{
GList *standards = NULL;
GList *providers = NULL;
GList *iter = NULL;
gboolean rc = FALSE;
gboolean has_providers = FALSE;
standards = resources_list_standards();
for (iter = standards; iter != NULL; iter = iter->next) {
if (pcmk__str_eq(iter->data, standard, pcmk__str_none)) {
rc = TRUE;
break;
}
}
if (rc == FALSE) {
goto done;
}
rc = FALSE;
has_providers = pcmk_is_set(pcmk_get_ra_caps(standard), pcmk_ra_cap_provider);
if (has_providers == TRUE && provider != NULL) {
providers = resources_list_providers(standard);
for (iter = providers; iter != NULL; iter = iter->next) {
if (pcmk__str_eq(iter->data, provider, pcmk__str_none)) {
rc = TRUE;
break;
}
}
} else if (has_providers == FALSE && provider == NULL) {
rc = TRUE;
}
if (rc == FALSE) {
goto done;
}
if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) {
if (services__lsb_agent_exists(agent)) {
rc = TRUE;
#if SUPPORT_SYSTEMD
} else if (systemd_unit_exists(agent)) {
rc = TRUE;
#endif
#if SUPPORT_UPSTART
} else if (upstart_job_exists(agent)) {
rc = TRUE;
#endif
} else {
rc = FALSE;
}
} else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_casei)) {
rc = services__ocf_agent_exists(provider, agent);
} else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei)) {
rc = services__lsb_agent_exists(agent);
#if SUPPORT_SYSTEMD
} else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
rc = systemd_unit_exists(agent);
#endif
#if SUPPORT_UPSTART
} else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_UPSTART, pcmk__str_casei)) {
rc = upstart_job_exists(agent);
#endif
#if SUPPORT_NAGIOS
} else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
rc = services__nagios_agent_exists(agent);
#endif
} else {
rc = FALSE;
}
done:
g_list_free(standards);
g_list_free(providers);
return rc;
}
/*!
* \internal
* \brief Set the result of an action
*
* \param[out] action Where to set action result
* \param[in] agent_status Exit status to set
* \param[in] exec_status Execution status to set
* \param[in] reason Human-friendly description of event to set
*/
void
services__set_result(svc_action_t *action, int agent_status,
enum pcmk_exec_status exec_status, const char *reason)
{
if (action == NULL) {
return;
}
action->rc = agent_status;
action->status = exec_status;
if (!pcmk__str_eq(action->opaque->exit_reason, reason,
pcmk__str_none)) {
free(action->opaque->exit_reason);
action->opaque->exit_reason = (reason == NULL)? NULL : strdup(reason);
}
}
/*!
* \internal
* \brief Set the result of an action to cancelled
*
* \param[out] action Where to set action result
*
* \note This sets execution status but leaves the exit status unchanged
*/
void
services__set_cancelled(svc_action_t *action)
{
if (action != NULL) {
action->status = PCMK_EXEC_CANCELLED;
free(action->opaque->exit_reason);
action->opaque->exit_reason = NULL;
}
}
/*!
* \internal
* \brief Get the exit reason of an action
*
* \param[in] action Action to check
*
* \return Action's exit reason (or NULL if none)
*/
const char *
services__exit_reason(svc_action_t *action)
{
return action->opaque->exit_reason;
}
diff --git a/lib/services/services_lsb.c b/lib/services/services_lsb.c
index 4e233085dd..9de92918b6 100644
--- a/lib/services/services_lsb.c
+++ b/lib/services/services_lsb.c
@@ -1,302 +1,343 @@
/*
* Copyright 2010-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <stdio.h>
#include <errno.h>
#include <sys/stat.h>
#include <crm/crm.h>
#include <crm/services.h>
#include "services_private.h"
#include "services_lsb.h"
#define lsb_metadata_template \
"<?xml version='1.0'?>\n" \
"<!DOCTYPE resource-agent SYSTEM 'ra-api-1.dtd'>\n" \
"<resource-agent name='%s' version='" PCMK_DEFAULT_AGENT_VERSION "'>\n" \
" <version>1.0</version>\n" \
" <longdesc lang='en'>\n" \
"%s" \
" </longdesc>\n" \
" <shortdesc lang='en'>%s</shortdesc>\n" \
" <parameters>\n" \
" </parameters>\n" \
" <actions>\n" \
" <action name='meta-data' timeout='5' />\n" \
" <action name='start' timeout='15' />\n" \
" <action name='stop' timeout='15' />\n" \
" <action name='status' timeout='15' />\n" \
" <action name='restart' timeout='15' />\n" \
" <action name='force-reload' timeout='15' />\n" \
" <action name='monitor' timeout='15' interval='15' />\n" \
" </actions>\n" \
" <special tag='LSB'>\n" \
" <Provides>%s</Provides>\n" \
" <Required-Start>%s</Required-Start>\n" \
" <Required-Stop>%s</Required-Stop>\n" \
" <Should-Start>%s</Should-Start>\n" \
" <Should-Stop>%s</Should-Stop>\n" \
" <Default-Start>%s</Default-Start>\n" \
" <Default-Stop>%s</Default-Stop>\n" \
" </special>\n" \
"</resource-agent>\n"
/* See "Comment Conventions for Init Scripts" in the LSB core specification at:
* http://refspecs.linuxfoundation.org/lsb.shtml
*/
#define LSB_INITSCRIPT_INFOBEGIN_TAG "### BEGIN INIT INFO"
#define LSB_INITSCRIPT_INFOEND_TAG "### END INIT INFO"
#define PROVIDES "# Provides:"
#define REQ_START "# Required-Start:"
#define REQ_STOP "# Required-Stop:"
#define SHLD_START "# Should-Start:"
#define SHLD_STOP "# Should-Stop:"
#define DFLT_START "# Default-Start:"
#define DFLT_STOP "# Default-Stop:"
#define SHORT_DSCR "# Short-Description:"
#define DESCRIPTION "# Description:"
#define lsb_meta_helper_free_value(m) \
do { \
if ((m) != NULL) { \
xmlFree(m); \
(m) = NULL; \
} \
} while(0)
/*!
* \internal
* \brief Grab an LSB header value
*
* \param[in] line Line read from LSB init script
* \param[in,out] value If not set, will be set to XML-safe copy of value
* \param[in] prefix Set value if line starts with this pattern
*
* \return TRUE if value was set, FALSE otherwise
*/
static inline gboolean
lsb_meta_helper_get_value(const char *line, char **value, const char *prefix)
{
if (!*value && pcmk__starts_with(line, prefix)) {
*value = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST line+strlen(prefix));
return TRUE;
}
return FALSE;
}
#define DESC_MAX 2048
int
services__get_lsb_metadata(const char *type, char **output)
{
char ra_pathname[PATH_MAX] = { 0, };
FILE *fp = NULL;
char buffer[1024] = { 0, };
char *provides = NULL;
char *req_start = NULL;
char *req_stop = NULL;
char *shld_start = NULL;
char *shld_stop = NULL;
char *dflt_start = NULL;
char *dflt_stop = NULL;
char *s_dscrpt = NULL;
char *xml_l_dscrpt = NULL;
int offset = 0;
bool in_header = FALSE;
char description[DESC_MAX] = { 0, };
if (type[0] == '/') {
snprintf(ra_pathname, sizeof(ra_pathname), "%s", type);
} else {
snprintf(ra_pathname, sizeof(ra_pathname), "%s/%s",
LSB_ROOT_DIR, type);
}
crm_trace("Looking into %s", ra_pathname);
fp = fopen(ra_pathname, "r");
if (fp == NULL) {
return -errno;
}
/* Enter into the LSB-compliant comment block */
while (fgets(buffer, sizeof(buffer), fp)) {
// Ignore lines up to and including the block delimiter
if (pcmk__starts_with(buffer, LSB_INITSCRIPT_INFOBEGIN_TAG)) {
in_header = TRUE;
continue;
}
if (!in_header) {
continue;
}
/* Assume each of the following eight arguments contain one line */
if (lsb_meta_helper_get_value(buffer, &provides, PROVIDES)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &req_start, REQ_START)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &req_stop, REQ_STOP)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &shld_start, SHLD_START)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &shld_stop, SHLD_STOP)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &dflt_start, DFLT_START)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &dflt_stop, DFLT_STOP)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &s_dscrpt, SHORT_DSCR)) {
continue;
}
/* Long description may cross multiple lines */
if ((offset == 0) // haven't already found long description
&& pcmk__starts_with(buffer, DESCRIPTION)) {
bool processed_line = TRUE;
// Get remainder of description line itself
offset += snprintf(description, DESC_MAX, "%s",
buffer + strlen(DESCRIPTION));
// Read any continuation lines of the description
buffer[0] = '\0';
while (fgets(buffer, sizeof(buffer), fp)) {
if (pcmk__starts_with(buffer, "# ")
|| pcmk__starts_with(buffer, "#\t")) {
/* '#' followed by a tab or more than one space indicates a
* continuation of the long description.
*/
offset += snprintf(description + offset, DESC_MAX - offset,
"%s", buffer + 1);
} else {
/* This line is not part of the long description,
* so continue with normal processing.
*/
processed_line = FALSE;
break;
}
}
// Make long description safe to use in XML
xml_l_dscrpt = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST(description));
if (processed_line) {
// We grabbed the line into the long description
continue;
}
}
// Stop if we leave the header block
if (pcmk__starts_with(buffer, LSB_INITSCRIPT_INFOEND_TAG)) {
break;
}
if (buffer[0] != '#') {
break;
}
}
fclose(fp);
*output = crm_strdup_printf(lsb_metadata_template, type,
(xml_l_dscrpt? xml_l_dscrpt : type),
(s_dscrpt? s_dscrpt : type),
(provides? provides : ""),
(req_start? req_start : ""),
(req_stop? req_stop : ""),
(shld_start? shld_start : ""),
(shld_stop? shld_stop : ""),
(dflt_start? dflt_start : ""),
(dflt_stop? dflt_stop : ""));
lsb_meta_helper_free_value(xml_l_dscrpt);
lsb_meta_helper_free_value(s_dscrpt);
lsb_meta_helper_free_value(provides);
lsb_meta_helper_free_value(req_start);
lsb_meta_helper_free_value(req_stop);
lsb_meta_helper_free_value(shld_start);
lsb_meta_helper_free_value(shld_stop);
lsb_meta_helper_free_value(dflt_start);
lsb_meta_helper_free_value(dflt_stop);
crm_trace("Created fake metadata: %llu",
(unsigned long long) strlen(*output));
return pcmk_ok;
}
GList *
services__list_lsb_agents(void)
{
return services_os_get_directory_list(LSB_ROOT_DIR, TRUE, TRUE);
}
bool
services__lsb_agent_exists(const char *agent)
{
bool rc = FALSE;
struct stat st;
char *path = pcmk__full_path(agent, LSB_ROOT_DIR);
rc = (stat(path, &st) == 0);
free(path);
return rc;
}
/*!
* \internal
* \brief Prepare an LSB action
*
* \param[in] op Action to prepare
*
* \return Standard Pacemaker return code
*/
int
services__lsb_prepare(svc_action_t *op)
{
op->opaque->exec = pcmk__full_path(op->agent, LSB_ROOT_DIR);
op->opaque->args[0] = strdup(op->opaque->exec);
op->opaque->args[1] = strdup(op->action);
if ((op->opaque->args[0] == NULL) || (op->opaque->args[1] == NULL)) {
return ENOMEM;
}
return pcmk_rc_ok;
}
+/*!
+ * \internal
+ * \brief Map an LSB result to a standard OCF result
+ *
+ * \param[in] action Action that result is for
+ * \param[in] exit_status LSB agent exit status
+ *
+ * \return Standard OCF result
+ */
+enum ocf_exitcode
+services__lsb2ocf(const char *action, int exit_status)
+{
+ // For non-status actions, LSB and OCF share error codes <= 7
+ if (!pcmk__str_any_of(action, "status", "monitor", NULL)) {
+ if ((exit_status < 0) || (exit_status > PCMK_LSB_NOT_RUNNING)) {
+ return PCMK_OCF_UNKNOWN_ERROR;
+ }
+ return (enum ocf_exitcode) exit_status;
+ }
+
+ // LSB status actions have their own codes
+ switch (exit_status) {
+ case PCMK_LSB_STATUS_OK:
+ return PCMK_OCF_OK;
+
+ case PCMK_LSB_STATUS_NOT_INSTALLED:
+ return PCMK_OCF_NOT_INSTALLED;
+
+ case PCMK_LSB_STATUS_INSUFFICIENT_PRIV:
+ return PCMK_OCF_INSUFFICIENT_PRIV;
+
+ case PCMK_LSB_STATUS_VAR_PID:
+ case PCMK_LSB_STATUS_VAR_LOCK:
+ case PCMK_LSB_STATUS_NOT_RUNNING:
+ return PCMK_OCF_NOT_RUNNING;
+
+ default:
+ return PCMK_OCF_UNKNOWN_ERROR;
+ }
+}
+
// Deprecated functions kept only for backward API compatibility
// LCOV_EXCL_START
#include <crm/services_compat.h>
svc_action_t *
services_action_create(const char *name, const char *action,
guint interval_ms, int timeout)
{
return resources_action_create(name, PCMK_RESOURCE_CLASS_LSB, NULL, name,
action, interval_ms, timeout, NULL, 0);
}
GList *
services_list(void)
{
return resources_list_agents(PCMK_RESOURCE_CLASS_LSB, NULL);
}
// LCOV_EXCL_STOP
// End deprecated API
diff --git a/lib/services/services_lsb.h b/lib/services/services_lsb.h
index 46d23dfdad..27023f61c5 100644
--- a/lib/services/services_lsb.h
+++ b/lib/services/services_lsb.h
@@ -1,18 +1,21 @@
/*
* Copyright 2010-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef SERVICES_LSB__H
# define SERVICES_LSB__H
G_GNUC_INTERNAL int services__get_lsb_metadata(const char *type, char **output);
G_GNUC_INTERNAL GList *services__list_lsb_agents(void);
G_GNUC_INTERNAL bool services__lsb_agent_exists(const char *agent);
G_GNUC_INTERNAL int services__lsb_prepare(svc_action_t *op);
+G_GNUC_INTERNAL
+enum ocf_exitcode services__lsb2ocf(const char *action, int exit_status);
+
#endif
diff --git a/lib/services/services_nagios.c b/lib/services/services_nagios.c
index 2699e3f664..198b94f623 100644
--- a/lib/services/services_nagios.c
+++ b/lib/services/services_nagios.c
@@ -1,192 +1,224 @@
/*
- * Copyright 2010-2019 the Pacemaker project contributors
+ * Copyright 2010-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <errno.h>
#include <unistd.h>
#include <dirent.h>
#include <grp.h>
#include <string.h>
#include <sys/time.h>
#include <sys/resource.h>
#include "crm/crm.h"
#include <crm/msg_xml.h>
#include "crm/common/mainloop.h"
#include "crm/services.h"
#include "services_private.h"
#include "services_nagios.h"
/*!
* \internal
* \brief Prepare a Nagios action
*
* \param[in] op Action to prepare
*
* \return Standard Pacemaker return code
*/
int
services__nagios_prepare(svc_action_t *op)
{
op->opaque->exec = pcmk__full_path(op->agent, NAGIOS_PLUGIN_DIR);
op->opaque->args[0] = strdup(op->opaque->exec);
if (op->opaque->args[0] == NULL) {
return ENOMEM;
}
if (pcmk__str_eq(op->action, "monitor", pcmk__str_casei)
&& (op->interval_ms == 0)) {
// Invoke --version for a nagios probe
op->opaque->args[1] = strdup("--version");
if (op->opaque->args[1] == NULL) {
return ENOMEM;
}
} else if (op->params != NULL) {
GHashTableIter iter;
char *key = NULL;
char *value = NULL;
int index = 1; // 0 is already set to executable name
g_hash_table_iter_init(&iter, op->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key,
(gpointer *) & value)) {
if (index > (PCMK__NELEM(op->opaque->args) - 2)) {
return E2BIG;
}
if (pcmk__str_eq(key, XML_ATTR_CRM_VERSION, pcmk__str_casei)
|| strstr(key, CRM_META "_")) {
continue;
}
op->opaque->args[index++] = crm_strdup_printf("--%s", key);
op->opaque->args[index++] = strdup(value);
if (op->opaque->args[index - 1] == NULL) {
return ENOMEM;
}
}
}
// Nagios actions don't need to keep the parameters
if (op->params != NULL) {
g_hash_table_destroy(op->params);
op->params = NULL;
}
return pcmk_rc_ok;
}
+/*!
+ * \internal
+ * \brief Map a Nagios result to a standard OCF result
+ *
+ * \param[in] exit_status Nagios exit status
+ *
+ * \return Standard OCF result
+ */
+enum ocf_exitcode
+services__nagios2ocf(int exit_status)
+{
+ switch (exit_status) {
+ case NAGIOS_STATE_OK:
+ return PCMK_OCF_OK;
+
+ case NAGIOS_INSUFFICIENT_PRIV:
+ return PCMK_OCF_INSUFFICIENT_PRIV;
+
+ case NAGIOS_NOT_INSTALLED:
+ return PCMK_OCF_NOT_INSTALLED;
+
+ case NAGIOS_STATE_WARNING:
+ return PCMK_OCF_DEGRADED;
+
+ case NAGIOS_STATE_CRITICAL:
+ case NAGIOS_STATE_UNKNOWN:
+ case NAGIOS_STATE_DEPENDENT:
+ default:
+ return PCMK_OCF_UNKNOWN_ERROR;
+ }
+}
+
static inline char *
nagios_metadata_name(const char *plugin)
{
return crm_strdup_printf(NAGIOS_METADATA_DIR "/%s.xml", plugin);
}
GList *
services__list_nagios_agents(void)
{
GList *plugin_list = NULL;
GList *result = NULL;
plugin_list = services_os_get_directory_list(NAGIOS_PLUGIN_DIR, TRUE, TRUE);
// Return only the plugins that have metadata
for (GList *gIter = plugin_list; gIter != NULL; gIter = gIter->next) {
struct stat st;
const char *plugin = gIter->data;
char *metadata = nagios_metadata_name(plugin);
if (stat(metadata, &st) == 0) {
result = g_list_append(result, strdup(plugin));
}
free(metadata);
}
g_list_free_full(plugin_list, free);
return result;
}
gboolean
services__nagios_agent_exists(const char *name)
{
char *buf = NULL;
gboolean rc = FALSE;
struct stat st;
if (name == NULL) {
return rc;
}
buf = crm_strdup_printf(NAGIOS_PLUGIN_DIR "/%s", name);
if (stat(buf, &st) == 0) {
rc = TRUE;
}
free(buf);
return rc;
}
int
services__get_nagios_metadata(const char *type, char **output)
{
int rc = pcmk_ok;
FILE *file_strm = NULL;
int start = 0, length = 0, read_len = 0;
char *metadata_file = nagios_metadata_name(type);
file_strm = fopen(metadata_file, "r");
if (file_strm == NULL) {
crm_err("Metadata file %s does not exist", metadata_file);
free(metadata_file);
return -EIO;
}
/* see how big the file is */
start = ftell(file_strm);
fseek(file_strm, 0L, SEEK_END);
length = ftell(file_strm);
fseek(file_strm, 0L, start);
CRM_ASSERT(length >= 0);
CRM_ASSERT(start == ftell(file_strm));
if (length <= 0) {
crm_info("%s was not valid", metadata_file);
free(*output);
*output = NULL;
rc = -EIO;
} else {
crm_trace("Reading %d bytes from file", length);
*output = calloc(1, (length + 1));
read_len = fread(*output, 1, length, file_strm);
if (read_len != length) {
crm_err("Calculated and read bytes differ: %d vs. %d",
length, read_len);
free(*output);
*output = NULL;
rc = -EIO;
}
}
fclose(file_strm);
free(metadata_file);
return rc;
}
diff --git a/lib/services/services_nagios.h b/lib/services/services_nagios.h
index fd95cb55f7..2e447e0328 100644
--- a/lib/services/services_nagios.h
+++ b/lib/services/services_nagios.h
@@ -1,25 +1,28 @@
/*
- * Copyright 2010-2019 the Pacemaker project contributors
+ * Copyright 2010-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef SERVICES_NAGIOS__H
# define SERVICES_NAGIOS__H
G_GNUC_INTERNAL
int services__nagios_prepare(svc_action_t *op);
+G_GNUC_INTERNAL
+enum ocf_exitcode services__nagios2ocf(int exit_status);
+
G_GNUC_INTERNAL
GList *services__list_nagios_agents(void);
G_GNUC_INTERNAL
gboolean services__nagios_agent_exists(const char *agent);
G_GNUC_INTERNAL
int services__get_nagios_metadata(const char *type, char **output);
#endif
diff --git a/lib/services/services_ocf.c b/lib/services/services_ocf.c
index 1b7d2b50a4..e5bf2dd3cd 100644
--- a/lib/services/services_ocf.c
+++ b/lib/services/services_ocf.c
@@ -1,155 +1,179 @@
/*
* Copyright 2012-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/services_internal.h>
#include "services_private.h"
#include "services_ocf.h"
GList *
resources_os_list_ocf_providers(void)
{
return get_directory_list(OCF_RA_PATH, FALSE, TRUE);
}
static GList *
services_os_get_directory_list_provider(const char *root, const char *provider,
gboolean files, gboolean executable)
{
GList *result = NULL;
char *dirs = strdup(root);
char *dir = NULL;
char buffer[PATH_MAX];
if (pcmk__str_empty(dirs)) {
free(dirs);
return result;
}
for (dir = strtok(dirs, ":"); dir != NULL; dir = strtok(NULL, ":")) {
GList *tmp = NULL;
sprintf(buffer, "%s/%s", dir, provider);
tmp = services_os_get_single_directory_list(buffer, files, executable);
if (tmp) {
result = g_list_concat(result, tmp);
}
}
free(dirs);
return result;
}
GList *
resources_os_list_ocf_agents(const char *provider)
{
GList *gIter = NULL;
GList *result = NULL;
GList *providers = NULL;
if (provider) {
return services_os_get_directory_list_provider(OCF_RA_PATH, provider,
TRUE, TRUE);
}
providers = resources_os_list_ocf_providers();
for (gIter = providers; gIter != NULL; gIter = gIter->next) {
GList *tmp1 = result;
GList *tmp2 = resources_os_list_ocf_agents(gIter->data);
if (tmp2) {
result = g_list_concat(tmp1, tmp2);
}
}
g_list_free_full(providers, free);
return result;
}
gboolean
services__ocf_agent_exists(const char *provider, const char *agent)
{
gboolean rc = FALSE;
struct stat st;
char *dirs = strdup(OCF_RA_PATH);
char *dir = NULL;
char *buf = NULL;
if (provider == NULL || agent == NULL || pcmk__str_empty(dirs)) {
free(dirs);
return rc;
}
for (dir = strtok(dirs, ":"); dir != NULL; dir = strtok(NULL, ":")) {
buf = crm_strdup_printf("%s/%s/%s", dir, provider, agent);
if (stat(buf, &st) == 0) {
free(buf);
rc = TRUE;
break;
}
free(buf);
}
free(dirs);
return rc;
}
/*!
* \internal
* \brief Prepare an OCF action
*
* \param[in] op Action to prepare
*
* \return Standard Pacemaker return code
*/
int
services__ocf_prepare(svc_action_t *op)
{
char *dirs = strdup(OCF_RA_PATH);
struct stat st;
if (dirs == NULL) {
return ENOMEM;
}
// Look for agent on path
for (char *dir = strtok(dirs, ":"); dir != NULL; dir = strtok(NULL, ":")) {
char *buf = crm_strdup_printf("%s/%s/%s", dir, op->provider, op->agent);
if (stat(buf, &st) == 0) {
op->opaque->exec = buf;
break;
}
free(buf);
}
free(dirs);
if (op->opaque->exec == NULL) {
return ENOENT;
}
op->opaque->args[0] = strdup(op->opaque->exec);
op->opaque->args[1] = strdup(op->action);
if ((op->opaque->args[0] == NULL) || (op->opaque->args[1] == NULL)) {
return ENOMEM;
}
return pcmk_rc_ok;
}
+
+/*!
+ * \internal
+ * \brief Map an actual OCF result to a standard OCF result
+ *
+ * \param[in] exit_status Actual OCF agent exit status
+ *
+ * \return Standard OCF result
+ */
+enum ocf_exitcode
+services__ocf2ocf(int exit_status)
+{
+ switch (exit_status) {
+ case PCMK_OCF_DEGRADED:
+ case PCMK_OCF_DEGRADED_PROMOTED:
+ break;
+ default:
+ if ((exit_status < 0) || (exit_status > PCMK_OCF_FAILED_PROMOTED)) {
+ exit_status = PCMK_OCF_UNKNOWN_ERROR;
+ }
+ break;
+ }
+ return (enum ocf_exitcode) exit_status;
+}
diff --git a/lib/services/services_ocf.h b/lib/services/services_ocf.h
index 62c344b55c..1c40552723 100644
--- a/lib/services/services_ocf.h
+++ b/lib/services/services_ocf.h
@@ -1,28 +1,31 @@
/*
* Copyright 2010-2011 Red Hat, Inc.
* Later changes copyright 2012-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__SERVICES_OCF__H
#define PCMK__SERVICES_OCF__H
#include <glib.h>
G_GNUC_INTERNAL
GList *resources_os_list_ocf_providers(void);
G_GNUC_INTERNAL
GList *resources_os_list_ocf_agents(const char *provider);
G_GNUC_INTERNAL
gboolean services__ocf_agent_exists(const char *provider, const char *agent);
G_GNUC_INTERNAL
int services__ocf_prepare(svc_action_t *op);
+G_GNUC_INTERNAL
+enum ocf_exitcode services__ocf2ocf(int exit_status);
+
#endif // PCMK__SERVICES_OCF__H
diff --git a/lib/services/systemd.c b/lib/services/systemd.c
index f832f02494..731a4d4943 100644
--- a/lib/services/systemd.c
+++ b/lib/services/systemd.c
@@ -1,1037 +1,1052 @@
/*
* Copyright 2012-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/services_internal.h>
#include <crm/common/mainloop.h>
#include <sys/stat.h>
#include <gio/gio.h>
#include <services_private.h>
#include <systemd.h>
#include <dbus/dbus.h>
#include <pcmk-dbus.h>
static void invoke_unit_by_path(svc_action_t *op, const char *unit);
#define BUS_NAME "org.freedesktop.systemd1"
#define BUS_NAME_MANAGER BUS_NAME ".Manager"
#define BUS_NAME_UNIT BUS_NAME ".Unit"
#define BUS_PATH "/org/freedesktop/systemd1"
/*!
* \internal
* \brief Prepare a systemd action
*
* \param[in] op Action to prepare
*
* \return Standard Pacemaker return code
*/
int
services__systemd_prepare(svc_action_t *op)
{
op->opaque->exec = strdup("systemd-dbus");
if (op->opaque->exec == NULL) {
return ENOMEM;
}
return pcmk_rc_ok;
}
+/*!
+ * \internal
+ * \brief Map a systemd result to a standard OCF result
+ *
+ * \param[in] exit_status Systemd result
+ *
+ * \return Standard OCF result
+ */
+enum ocf_exitcode
+services__systemd2ocf(int exit_status)
+{
+ // This library uses OCF codes for systemd actions
+ return (enum ocf_exitcode) exit_status;
+}
+
static inline DBusMessage *
systemd_new_method(const char *method)
{
crm_trace("Calling: %s on " BUS_NAME_MANAGER, method);
return dbus_message_new_method_call(BUS_NAME, BUS_PATH, BUS_NAME_MANAGER,
method);
}
/*
* Functions to manage a static DBus connection
*/
static DBusConnection* systemd_proxy = NULL;
static inline DBusPendingCall *
systemd_send(DBusMessage *msg,
void(*done)(DBusPendingCall *pending, void *user_data),
void *user_data, int timeout)
{
return pcmk_dbus_send(msg, systemd_proxy, done, user_data, timeout);
}
static inline DBusMessage *
systemd_send_recv(DBusMessage *msg, DBusError *error, int timeout)
{
return pcmk_dbus_send_recv(msg, systemd_proxy, error, timeout);
}
/*!
* \internal
* \brief Send a method to systemd without arguments, and wait for reply
*
* \param[in] method Method to send
*
* \return Systemd reply on success, NULL (and error will be logged) otherwise
*
* \note The caller must call dbus_message_unref() on the reply after
* handling it.
*/
static DBusMessage *
systemd_call_simple_method(const char *method)
{
DBusMessage *msg = systemd_new_method(method);
DBusMessage *reply = NULL;
DBusError error;
/* Don't call systemd_init() here, because that calls this */
CRM_CHECK(systemd_proxy, return NULL);
if (msg == NULL) {
crm_err("Could not create message to send %s to systemd", method);
return NULL;
}
dbus_error_init(&error);
reply = systemd_send_recv(msg, &error, DBUS_TIMEOUT_USE_DEFAULT);
dbus_message_unref(msg);
if (dbus_error_is_set(&error)) {
crm_err("Could not send %s to systemd: %s (%s)",
method, error.message, error.name);
dbus_error_free(&error);
return NULL;
} else if (reply == NULL) {
crm_err("Could not send %s to systemd: no reply received", method);
return NULL;
}
return reply;
}
static gboolean
systemd_init(void)
{
static int need_init = 1;
// https://dbus.freedesktop.org/doc/api/html/group__DBusConnection.html
if (systemd_proxy
&& dbus_connection_get_is_connected(systemd_proxy) == FALSE) {
crm_warn("Connection to System DBus is closed. Reconnecting...");
pcmk_dbus_disconnect(systemd_proxy);
systemd_proxy = NULL;
need_init = 1;
}
if (need_init) {
need_init = 0;
systemd_proxy = pcmk_dbus_connect();
}
if (systemd_proxy == NULL) {
return FALSE;
}
return TRUE;
}
static inline char *
systemd_get_property(const char *unit, const char *name,
void (*callback)(const char *name, const char *value, void *userdata),
void *userdata, DBusPendingCall **pending, int timeout)
{
return systemd_proxy?
pcmk_dbus_get_property(systemd_proxy, BUS_NAME, unit, BUS_NAME_UNIT,
name, callback, userdata, pending, timeout)
: NULL;
}
void
systemd_cleanup(void)
{
if (systemd_proxy) {
pcmk_dbus_disconnect(systemd_proxy);
systemd_proxy = NULL;
}
}
/*
* end of systemd_proxy functions
*/
/*!
* \internal
* \brief Check whether a file name represents a manageable systemd unit
*
* \param[in] name File name to check
*
* \return Pointer to "dot" before filename extension if so, NULL otherwise
*/
static const char *
systemd_unit_extension(const char *name)
{
if (name) {
const char *dot = strrchr(name, '.');
if (dot && (!strcmp(dot, ".service")
|| !strcmp(dot, ".socket")
|| !strcmp(dot, ".mount")
|| !strcmp(dot, ".timer")
|| !strcmp(dot, ".path"))) {
return dot;
}
}
return NULL;
}
static char *
systemd_service_name(const char *name)
{
if (name == NULL) {
return NULL;
}
if (systemd_unit_extension(name)) {
return strdup(name);
}
return crm_strdup_printf("%s.service", name);
}
static void
systemd_daemon_reload_complete(DBusPendingCall *pending, void *user_data)
{
DBusError error;
DBusMessage *reply = NULL;
unsigned int reload_count = GPOINTER_TO_UINT(user_data);
dbus_error_init(&error);
if(pending) {
reply = dbus_pending_call_steal_reply(pending);
}
if (pcmk_dbus_find_error(pending, reply, &error)) {
crm_err("Could not issue systemd reload %d: %s", reload_count, error.message);
dbus_error_free(&error);
} else {
crm_trace("Reload %d complete", reload_count);
}
if(pending) {
dbus_pending_call_unref(pending);
}
if(reply) {
dbus_message_unref(reply);
}
}
static bool
systemd_daemon_reload(int timeout)
{
static unsigned int reload_count = 0;
DBusMessage *msg = systemd_new_method("Reload");
reload_count++;
CRM_ASSERT(msg != NULL);
systemd_send(msg, systemd_daemon_reload_complete,
GUINT_TO_POINTER(reload_count), timeout);
dbus_message_unref(msg);
return TRUE;
}
/*!
* \internal
* \brief Set an action result based on a method error
*
* \param[in] op Action to set result for
* \param[in] error Method error
*/
static void
set_result_from_method_error(svc_action_t *op, const DBusError *error)
{
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"Unable to invoke systemd DBus method");
if (strstr(error->name, "org.freedesktop.systemd1.InvalidName")
|| strstr(error->name, "org.freedesktop.systemd1.LoadFailed")
|| strstr(error->name, "org.freedesktop.systemd1.NoSuchUnit")) {
if (pcmk__str_eq(op->action, "stop", pcmk__str_casei)) {
crm_trace("Masking systemd stop failure (%s) for %s "
"because unknown service can be considered stopped",
error->name, crm_str(op->rsc));
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
return;
}
services__set_result(op, PCMK_OCF_NOT_INSTALLED,
PCMK_EXEC_NOT_INSTALLED, "systemd unit not found");
}
crm_err("DBus request for %s of systemd unit %s for resource %s failed: %s",
op->action, op->agent, crm_str(op->rsc), error->message);
}
/*!
* \internal
* \brief Extract unit path from LoadUnit reply, and execute action
*
* \param[in] reply LoadUnit reply
* \param[in] op Action to execute (or NULL to just return path)
*
* \return DBus object path for specified unit if successful (only valid for
* lifetime of \p reply), otherwise NULL
*/
static const char *
execute_after_loadunit(DBusMessage *reply, svc_action_t *op)
{
const char *path = NULL;
DBusError error;
/* path here is not used other than as a non-NULL flag to indicate that a
* request was indeed sent
*/
if (pcmk_dbus_find_error((void *) &path, reply, &error)) {
if (op != NULL) {
set_result_from_method_error(op, &error);
}
dbus_error_free(&error);
} else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH,
__func__, __LINE__)) {
if (op != NULL) {
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"systemd DBus method had unexpected reply");
crm_err("Could not load systemd unit %s for %s: "
"DBus reply has unexpected type", op->agent, op->id);
} else {
crm_err("Could not load systemd unit: "
"DBus reply has unexpected type");
}
} else {
dbus_message_get_args (reply, NULL,
DBUS_TYPE_OBJECT_PATH, &path,
DBUS_TYPE_INVALID);
}
if (op != NULL) {
if (path != NULL) {
invoke_unit_by_path(op, path);
} else if (!(op->synchronous)) {
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"No DBus object found for systemd unit");
services__finalize_async_op(op);
}
}
return path;
}
/*!
* \internal
* \brief Execute a systemd action after its LoadUnit completes
*
* \param[in] pending If not NULL, DBus call associated with LoadUnit request
* \param[in] user_data Action to execute
*/
static void
loadunit_completed(DBusPendingCall *pending, void *user_data)
{
DBusMessage *reply = NULL;
svc_action_t *op = user_data;
crm_trace("LoadUnit result for %s arrived", op->id);
// Grab the reply
if (pending != NULL) {
reply = dbus_pending_call_steal_reply(pending);
}
// The call is no longer pending
CRM_LOG_ASSERT(pending == op->opaque->pending);
services_set_op_pending(op, NULL);
// Execute the desired action based on the reply
execute_after_loadunit(reply, user_data);
if (reply != NULL) {
dbus_message_unref(reply);
}
}
/*!
* \internal
* \brief Execute a systemd action, given the unit name
*
* \param[in] arg_name Unit name (possibly shortened, i.e. without ".service")
* \param[in] op Action to execute (if NULL, just get the object path)
* \param[out] path If non-NULL and \p op is NULL or synchronous, where to
* store DBus object path for specified unit
*
* \return Standard Pacemaker return code (for NULL \p op, pcmk_rc_ok means unit
* was found; for synchronous actions, pcmk_rc_ok means unit was
* executed, with the actual result stored in \p op; for asynchronous
* actions, pcmk_rc_ok means action was initiated)
* \note It is the caller's responsibility to free the return value if non-NULL.
*/
static int
invoke_unit_by_name(const char *arg_name, svc_action_t *op, char **path)
{
DBusMessage *msg;
DBusMessage *reply = NULL;
DBusPendingCall *pending = NULL;
char *name = NULL;
if (!systemd_init()) {
if (op != NULL) {
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"No DBus connection");
}
return ENOTCONN;
}
/* Create a LoadUnit DBus method (equivalent to GetUnit if already loaded),
* which makes the unit usable via further DBus methods.
*
* <method name="LoadUnit">
* <arg name="name" type="s" direction="in"/>
* <arg name="unit" type="o" direction="out"/>
* </method>
*/
msg = systemd_new_method("LoadUnit");
CRM_ASSERT(msg != NULL);
// Add the (expanded) unit name as the argument
name = systemd_service_name(arg_name);
CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name,
DBUS_TYPE_INVALID));
free(name);
if ((op == NULL) || op->synchronous) {
// For synchronous ops, wait for a reply and extract the result
const char *unit = NULL;
int rc = pcmk_rc_ok;
reply = systemd_send_recv(msg, NULL,
(op? op->timeout : DBUS_TIMEOUT_USE_DEFAULT));
dbus_message_unref(msg);
unit = execute_after_loadunit(reply, op);
if (unit == NULL) {
rc = ENOENT;
if (path != NULL) {
*path = NULL;
}
} else if (path != NULL) {
*path = strdup(unit);
if (*path == NULL) {
rc = ENOMEM;
}
}
if (reply != NULL) {
dbus_message_unref(reply);
}
return rc;
}
// For asynchronous ops, initiate the LoadUnit call and return
pending = systemd_send(msg, loadunit_completed, op, op->timeout);
if (pending == NULL) {
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"Unable to send DBus message");
dbus_message_unref(msg);
return ECOMM;
}
// LoadUnit was successfully initiated
services__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);
services_set_op_pending(op, pending);
dbus_message_unref(msg);
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Compare two strings alphabetically (case-insensitive)
*
* \param[in] a First string to compare
* \param[in] b Second string to compare
*
* \return 0 if strings are equal, -1 if a < b, 1 if a > b
*
* \note Usable as a GCompareFunc with g_list_sort().
* NULL is considered less than non-NULL.
*/
static gint
sort_str(gconstpointer a, gconstpointer b)
{
if (!a && !b) {
return 0;
} else if (!a) {
return -1;
} else if (!b) {
return 1;
}
return strcasecmp(a, b);
}
GList *
systemd_unit_listall(void)
{
int nfiles = 0;
GList *units = NULL;
DBusMessageIter args;
DBusMessageIter unit;
DBusMessageIter elem;
DBusMessage *reply = NULL;
if (systemd_init() == FALSE) {
return NULL;
}
/*
" <method name=\"ListUnitFiles\">\n" \
" <arg name=\"files\" type=\"a(ss)\" direction=\"out\"/>\n" \
" </method>\n" \
*/
reply = systemd_call_simple_method("ListUnitFiles");
if (reply == NULL) {
return NULL;
}
if (!dbus_message_iter_init(reply, &args)) {
crm_err("Could not list systemd unit files: systemd reply has no arguments");
dbus_message_unref(reply);
return NULL;
}
if (!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_ARRAY,
__func__, __LINE__)) {
crm_err("Could not list systemd unit files: systemd reply has invalid arguments");
dbus_message_unref(reply);
return NULL;
}
dbus_message_iter_recurse(&args, &unit);
for (; dbus_message_iter_get_arg_type(&unit) != DBUS_TYPE_INVALID;
dbus_message_iter_next(&unit)) {
DBusBasicValue value;
const char *match = NULL;
char *unit_name = NULL;
char *basename = NULL;
if(!pcmk_dbus_type_check(reply, &unit, DBUS_TYPE_STRUCT, __func__, __LINE__)) {
crm_warn("Skipping systemd reply argument with unexpected type");
continue;
}
dbus_message_iter_recurse(&unit, &elem);
if(!pcmk_dbus_type_check(reply, &elem, DBUS_TYPE_STRING, __func__, __LINE__)) {
crm_warn("Skipping systemd reply argument with no string");
continue;
}
dbus_message_iter_get_basic(&elem, &value);
if (value.str == NULL) {
crm_debug("ListUnitFiles reply did not provide a string");
continue;
}
crm_trace("DBus ListUnitFiles listed: %s", value.str);
match = systemd_unit_extension(value.str);
if (match == NULL) {
// This is not a unit file type we know how to manage
crm_debug("ListUnitFiles entry '%s' is not supported as resource",
value.str);
continue;
}
// ListUnitFiles returns full path names, we just want base name
basename = strrchr(value.str, '/');
if (basename) {
basename = basename + 1;
} else {
basename = value.str;
}
if (!strcmp(match, ".service")) {
// Service is the "default" unit type, so strip it
unit_name = strndup(basename, match - basename);
} else {
unit_name = strdup(basename);
}
nfiles++;
units = g_list_prepend(units, unit_name);
}
dbus_message_unref(reply);
crm_trace("Found %d manageable systemd unit files", nfiles);
units = g_list_sort(units, sort_str);
return units;
}
gboolean
systemd_unit_exists(const char *name)
{
char *path = NULL;
char *state = NULL;
/* Note: Makes a blocking dbus calls
* Used by resources_find_service_class() when resource class=service
*/
if ((invoke_unit_by_name(name, NULL, &path) != pcmk_rc_ok)
|| (path == NULL)) {
return FALSE;
}
/* A successful LoadUnit is not sufficient to determine the unit's
* existence; it merely means the LoadUnit request received a reply.
* We must make another blocking call to check the LoadState property.
*/
state = systemd_get_property(path, "LoadState", NULL, NULL, NULL,
DBUS_TIMEOUT_USE_DEFAULT);
if (pcmk__str_any_of(state, "loaded", "masked", NULL)) {
free(state);
return TRUE;
}
free(state);
return FALSE;
}
#define METADATA_FORMAT \
"<?xml version=\"1.0\"?>\n" \
"<!DOCTYPE resource-agent SYSTEM \"ra-api-1.dtd\">\n" \
"<resource-agent name=\"%s\" version=\"" PCMK_DEFAULT_AGENT_VERSION "\">\n" \
" <version>1.1</version>\n" \
" <longdesc lang=\"en\">\n" \
" %s\n" \
" </longdesc>\n" \
" <shortdesc lang=\"en\">systemd unit file for %s</shortdesc>\n" \
" <parameters/>\n" \
" <actions>\n" \
" <action name=\"start\" timeout=\"100\" />\n" \
" <action name=\"stop\" timeout=\"100\" />\n" \
" <action name=\"status\" timeout=\"100\" />\n" \
" <action name=\"monitor\" timeout=\"100\" interval=\"60\"/>\n" \
" <action name=\"meta-data\" timeout=\"5\" />\n" \
" </actions>\n" \
" <special tag=\"systemd\"/>\n" \
"</resource-agent>\n"
static char *
systemd_unit_metadata(const char *name, int timeout)
{
char *meta = NULL;
char *desc = NULL;
char *path = NULL;
if (invoke_unit_by_name(name, NULL, &path) == pcmk_rc_ok) {
/* TODO: Worth a making blocking call for? Probably not. Possibly if cached. */
desc = systemd_get_property(path, "Description", NULL, NULL, NULL,
timeout);
} else {
desc = crm_strdup_printf("Systemd unit file for %s", name);
}
meta = crm_strdup_printf(METADATA_FORMAT, name, desc, name);
free(desc);
free(path);
return meta;
}
/*!
* \internal
* \brief Determine result of method from reply
*
* \param[in] reply Reply to start, stop, or restart request
* \param[in] op Action that was executed
*/
static void
process_unit_method_reply(DBusMessage *reply, svc_action_t *op)
{
DBusError error;
/* The first use of error here is not used other than as a non-NULL flag to
* indicate that a request was indeed sent
*/
if (pcmk_dbus_find_error((void *) &error, reply, &error)) {
set_result_from_method_error(op, &error);
dbus_error_free(&error);
} else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH,
__func__, __LINE__)) {
crm_warn("DBus request for %s of %s succeeded but "
"return type was unexpected", op->action, crm_str(op->rsc));
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE,
"systemd DBus method had unexpected reply");
} else {
const char *path = NULL;
dbus_message_get_args(reply, NULL,
DBUS_TYPE_OBJECT_PATH, &path,
DBUS_TYPE_INVALID);
crm_debug("DBus request for %s of %s using %s succeeded",
op->action, crm_str(op->rsc), path);
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
}
}
/*!
* \internal
* \brief Process the completion of an asynchronous unit start, stop, or restart
*
* \param[in] pending If not NULL, DBus call associated with request
* \param[in] user_data Action that was executed
*/
static void
unit_method_complete(DBusPendingCall *pending, void *user_data)
{
DBusMessage *reply = NULL;
svc_action_t *op = user_data;
crm_trace("Result for %s arrived", op->id);
// Grab the reply
if (pending != NULL) {
reply = dbus_pending_call_steal_reply(pending);
}
// The call is no longer pending
CRM_LOG_ASSERT(pending == op->opaque->pending);
services_set_op_pending(op, NULL);
// Determine result and finalize action
process_unit_method_reply(reply, op);
services__finalize_async_op(op);
if (reply != NULL) {
dbus_message_unref(reply);
}
}
#define SYSTEMD_OVERRIDE_ROOT "/run/systemd/system/"
/* When the cluster manages a systemd resource, we create a unit file override
* to order the service "before" pacemaker. The "before" relationship won't
* actually be used, since systemd won't ever start the resource -- we're
* interested in the reverse shutdown ordering it creates, to ensure that
* systemd doesn't stop the resource at shutdown while pacemaker is still
* running.
*
* @TODO Add start timeout
*/
#define SYSTEMD_OVERRIDE_TEMPLATE \
"[Unit]\n" \
"Description=Cluster Controlled %s\n" \
"Before=pacemaker.service pacemaker_remote.service\n" \
"\n" \
"[Service]\n" \
"Restart=no\n"
// Temporarily use rwxr-xr-x umask when opening a file for writing
static FILE *
create_world_readable(const char *filename)
{
mode_t orig_umask = umask(S_IWGRP | S_IWOTH);
FILE *fp = fopen(filename, "w");
umask(orig_umask);
return fp;
}
static void
create_override_dir(const char *agent)
{
char *override_dir = crm_strdup_printf(SYSTEMD_OVERRIDE_ROOT
"/%s.service.d", agent);
int rc = pcmk__build_path(override_dir, 0755);
if (rc != pcmk_rc_ok) {
crm_warn("Could not create systemd override directory %s: %s",
override_dir, pcmk_rc_str(rc));
}
free(override_dir);
}
static char *
get_override_filename(const char *agent)
{
return crm_strdup_printf(SYSTEMD_OVERRIDE_ROOT
"/%s.service.d/50-pacemaker.conf", agent);
}
static void
systemd_create_override(const char *agent, int timeout)
{
FILE *file_strm = NULL;
char *override_file = get_override_filename(agent);
create_override_dir(agent);
/* Ensure the override file is world-readable. This is not strictly
* necessary, but it avoids a systemd warning in the logs.
*/
file_strm = create_world_readable(override_file);
if (file_strm == NULL) {
crm_err("Cannot open systemd override file %s for writing",
override_file);
} else {
char *override = crm_strdup_printf(SYSTEMD_OVERRIDE_TEMPLATE, agent);
int rc = fprintf(file_strm, "%s\n", override);
free(override);
if (rc < 0) {
crm_perror(LOG_WARNING, "Cannot write to systemd override file %s",
override_file);
}
fflush(file_strm);
fclose(file_strm);
systemd_daemon_reload(timeout);
}
free(override_file);
}
static void
systemd_remove_override(const char *agent, int timeout)
{
char *override_file = get_override_filename(agent);
int rc = unlink(override_file);
if (rc < 0) {
// Stop may be called when already stopped, which is fine
crm_perror(LOG_DEBUG, "Cannot remove systemd override file %s",
override_file);
} else {
systemd_daemon_reload(timeout);
}
free(override_file);
}
/*!
* \internal
* \brief Parse result of systemd status check
*
* Set a status action's exit status and execution status based on a DBus
* property check result, and finalize the action if asynchronous.
*
* \param[in] name DBus interface name for property that was checked
* \param[in] state Property value
* \param[in] userdata Status action that check was done for
*/
static void
parse_status_result(const char *name, const char *state, void *userdata)
{
svc_action_t *op = userdata;
crm_trace("Resource %s has %s='%s'",
crm_str(op->rsc), name, crm_str(state));
if (pcmk__str_eq(state, "active", pcmk__str_none)) {
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
} else if (pcmk__str_eq(state, "reloading", pcmk__str_none)) {
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
} else if (pcmk__str_eq(state, "activating", pcmk__str_none)) {
services__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);
} else if (pcmk__str_eq(state, "deactivating", pcmk__str_none)) {
services__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);
} else {
services__set_result(op, PCMK_OCF_NOT_RUNNING, PCMK_EXEC_DONE, state);
}
if (!(op->synchronous)) {
services_set_op_pending(op, NULL);
services__finalize_async_op(op);
}
}
/*!
* \internal
* \brief Invoke a systemd unit, given its DBus object path
*
* \param[in] op Action to execute
* \param[in] unit DBus object path of systemd unit to invoke
*/
static void
invoke_unit_by_path(svc_action_t *op, const char *unit)
{
const char *method = NULL;
DBusMessage *msg = NULL;
DBusMessage *reply = NULL;
if (pcmk__str_any_of(op->action, "monitor", "status", NULL)) {
DBusPendingCall *pending = NULL;
char *state;
state = systemd_get_property(unit, "ActiveState",
(op->synchronous? NULL : parse_status_result),
op, (op->synchronous? NULL : &pending),
op->timeout);
if (op->synchronous) {
parse_status_result("ActiveState", state, op);
free(state);
} else if (pending == NULL) { // Could not get ActiveState property
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"Could not get unit state from DBus");
services__finalize_async_op(op);
} else {
services_set_op_pending(op, pending);
}
return;
} else if (pcmk__str_eq(op->action, "start", pcmk__str_none)) {
method = "StartUnit";
systemd_create_override(op->agent, op->timeout);
} else if (pcmk__str_eq(op->action, "stop", pcmk__str_none)) {
method = "StopUnit";
systemd_remove_override(op->agent, op->timeout);
} else if (pcmk__str_eq(op->action, "restart", pcmk__str_none)) {
method = "RestartUnit";
} else {
services__set_result(op, PCMK_OCF_UNIMPLEMENT_FEATURE, PCMK_EXEC_ERROR,
"Action not implemented for systemd resources");
if (!(op->synchronous)) {
services__finalize_async_op(op);
}
return;
}
crm_trace("Calling %s for unit path %s named %s",
method, unit, crm_str(op->rsc));
msg = systemd_new_method(method);
CRM_ASSERT(msg != NULL);
/* (ss) */
{
const char *replace_s = "replace";
char *name = systemd_service_name(op->agent);
CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID));
CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &replace_s, DBUS_TYPE_INVALID));
free(name);
}
if (op->synchronous) {
reply = systemd_send_recv(msg, NULL, op->timeout);
dbus_message_unref(msg);
process_unit_method_reply(reply, op);
if (reply != NULL) {
dbus_message_unref(reply);
}
} else {
DBusPendingCall *pending = systemd_send(msg, unit_method_complete, op,
op->timeout);
dbus_message_unref(msg);
if (pending == NULL) {
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"Unable to send DBus message");
services__finalize_async_op(op);
} else {
services_set_op_pending(op, pending);
}
}
}
static gboolean
systemd_timeout_callback(gpointer p)
{
svc_action_t * op = p;
op->opaque->timerid = 0;
crm_warn("%s operation on systemd unit %s named '%s' timed out", op->action, op->agent, op->rsc);
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
"Systemd action did not complete within specified timeout");
services__finalize_async_op(op);
return FALSE;
}
/*!
* \internal
* \brief Execute a systemd action
*
* \param[in] op Action to execute
*
* \return Standard Pacemaker return code
* \retval EBUSY Recurring operation could not be initiated
* \retval pcmk_rc_error Synchronous action failed
* \retval pcmk_rc_ok Synchronous action succeeded, or asynchronous action
* should not be freed (because it already was or is
* pending)
*
* \note If the return value for an asynchronous action is not pcmk_rc_ok, the
* caller is responsible for freeing the action.
*/
int
services__execute_systemd(svc_action_t *op)
{
CRM_ASSERT(op != NULL);
if ((op->action == NULL) || (op->agent == NULL)) {
services__set_result(op, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR_FATAL,
"Bug in action caller");
goto done;
}
if (!systemd_init()) {
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"No DBus connection");
goto done;
}
crm_debug("Performing %ssynchronous %s op on systemd unit %s named '%s'",
(op->synchronous? "" : "a"), op->action, op->agent,
crm_str(op->rsc));
if (pcmk__str_eq(op->action, "meta-data", pcmk__str_casei)) {
op->stdout_data = systemd_unit_metadata(op->agent, op->timeout);
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
goto done;
}
/* invoke_unit_by_name() should always override these values, which are here
* just as a fail-safe in case there are any code paths that neglect to
*/
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"Bug in service library");
if (invoke_unit_by_name(op->agent, op, NULL) == pcmk_rc_ok) {
op->opaque->timerid = g_timeout_add(op->timeout + 5000,
systemd_timeout_callback, op);
services_add_inflight_op(op);
return pcmk_rc_ok;
}
done:
if (op->synchronous) {
return (op->rc == PCMK_OCF_OK)? pcmk_rc_ok : pcmk_rc_error;
} else {
return services__finalize_async_op(op);
}
}
diff --git a/lib/services/systemd.h b/lib/services/systemd.h
index 6d5dea9dee..0d3dbe6d7f 100644
--- a/lib/services/systemd.h
+++ b/lib/services/systemd.h
@@ -1,27 +1,30 @@
/*
* Copyright 2012-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef SYSTEMD__H
# define SYSTEMD__H
# include <glib.h>
# include "crm/services.h"
G_GNUC_INTERNAL GList *systemd_unit_listall(void);
G_GNUC_INTERNAL
int services__systemd_prepare(svc_action_t *op);
+G_GNUC_INTERNAL
+enum ocf_exitcode services__systemd2ocf(int exit_status);
+
G_GNUC_INTERNAL
int services__execute_systemd(svc_action_t *op);
G_GNUC_INTERNAL gboolean systemd_unit_exists(const gchar * name);
G_GNUC_INTERNAL void systemd_cleanup(void);
#endif /* SYSTEMD__H */
diff --git a/lib/services/upstart.c b/lib/services/upstart.c
index 3ad4275052..2fdc229adc 100644
--- a/lib/services/upstart.c
+++ b/lib/services/upstart.c
@@ -1,681 +1,696 @@
/*
* Original copyright 2010 Senko Rasic <senko.rasic@dobarkod.hr>
* and Ante Karamatic <ivoks@init.hr>
* Later changes copyright 2012-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/common/mainloop.h>
#include <services_private.h>
#include <upstart.h>
#include <dbus/dbus.h>
#include <pcmk-dbus.h>
#include <glib.h>
#include <gio/gio.h>
#define BUS_NAME "com.ubuntu.Upstart"
#define BUS_PATH "/com/ubuntu/Upstart"
#define UPSTART_06_API BUS_NAME"0_6"
#define UPSTART_JOB_IFACE UPSTART_06_API".Job"
#define BUS_PROPERTY_IFACE "org.freedesktop.DBus.Properties"
/*
http://upstart.ubuntu.com/wiki/DBusInterface
*/
static DBusConnection *upstart_proxy = NULL;
/*!
* \internal
* \brief Prepare an Upstart action
*
* \param[in] op Action to prepare
*
* \return Standard Pacemaker return code
*/
int
services__upstart_prepare(svc_action_t *op)
{
op->opaque->exec = strdup("upstart-dbus");
if (op->opaque->exec == NULL) {
return ENOMEM;
}
return pcmk_rc_ok;
}
+/*!
+ * \internal
+ * \brief Map a Upstart result to a standard OCF result
+ *
+ * \param[in] exit_status Upstart result
+ *
+ * \return Standard OCF result
+ */
+enum ocf_exitcode
+services__upstart2ocf(int exit_status)
+{
+ // This library uses OCF codes for Upstart actions
+ return (enum ocf_exitcode) exit_status;
+}
+
static gboolean
upstart_init(void)
{
static int need_init = 1;
if (need_init) {
need_init = 0;
upstart_proxy = pcmk_dbus_connect();
}
if (upstart_proxy == NULL) {
return FALSE;
}
return TRUE;
}
void
upstart_cleanup(void)
{
if (upstart_proxy) {
pcmk_dbus_disconnect(upstart_proxy);
upstart_proxy = NULL;
}
}
/*!
* \internal
* \brief Get the DBus object path corresponding to a job name
*
* \param[in] arg_name Name of job to get path for
* \param[out] path If not NULL, where to store DBus object path
* \param[in] timeout Give up after this many seconds
*
* \return true if object path was found, false otherwise
* \note The caller is responsible for freeing *path if it is non-NULL.
*/
static bool
object_path_for_job(const gchar *arg_name, char **path, int timeout)
{
/*
com.ubuntu.Upstart0_6.GetJobByName (in String name, out ObjectPath job)
*/
DBusError error;
DBusMessage *msg;
DBusMessage *reply = NULL;
bool rc = false;
if (path != NULL) {
*path = NULL;
}
if (!upstart_init()) {
return false;
}
msg = dbus_message_new_method_call(BUS_NAME, // target for the method call
BUS_PATH, // object to call on
UPSTART_06_API, // interface to call on
"GetJobByName"); // method name
dbus_error_init(&error);
CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &arg_name,
DBUS_TYPE_INVALID));
reply = pcmk_dbus_send_recv(msg, upstart_proxy, &error, timeout);
dbus_message_unref(msg);
if (dbus_error_is_set(&error)) {
crm_err("Could not get DBus object path for %s: %s",
arg_name, error.message);
dbus_error_free(&error);
} else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH,
__func__, __LINE__)) {
crm_err("Could not get DBus object path for %s: Invalid return type",
arg_name);
} else {
if (path != NULL) {
dbus_message_get_args(reply, NULL, DBUS_TYPE_OBJECT_PATH, path,
DBUS_TYPE_INVALID);
if (*path != NULL) {
*path = strdup(*path);
}
}
rc = true;
}
if (reply != NULL) {
dbus_message_unref(reply);
}
return rc;
}
static void
fix(char *input, const char *search, char replace)
{
char *match = NULL;
int shuffle = strlen(search) - 1;
while (TRUE) {
int len, lpc;
match = strstr(input, search);
if (match == NULL) {
break;
}
crm_trace("Found: %s", match);
match[0] = replace;
len = strlen(match) - shuffle;
for (lpc = 1; lpc <= len; lpc++) {
match[lpc] = match[lpc + shuffle];
}
}
}
static char *
fix_upstart_name(const char *input)
{
char *output = strdup(input);
fix(output, "_2b", '+');
fix(output, "_2c", ',');
fix(output, "_2d", '-');
fix(output, "_2e", '.');
fix(output, "_40", '@');
fix(output, "_5f", '_');
return output;
}
GList *
upstart_job_listall(void)
{
GList *units = NULL;
DBusMessageIter args;
DBusMessageIter unit;
DBusMessage *msg = NULL;
DBusMessage *reply = NULL;
const char *method = "GetAllJobs";
DBusError error;
int lpc = 0;
if (upstart_init() == FALSE) {
return NULL;
}
/*
com.ubuntu.Upstart0_6.GetAllJobs (out <Array of ObjectPath> jobs)
*/
dbus_error_init(&error);
msg = dbus_message_new_method_call(BUS_NAME, // target for the method call
BUS_PATH, // object to call on
UPSTART_06_API, // interface to call on
method); // method name
CRM_ASSERT(msg != NULL);
reply = pcmk_dbus_send_recv(msg, upstart_proxy, &error, DBUS_TIMEOUT_USE_DEFAULT);
dbus_message_unref(msg);
if (dbus_error_is_set(&error)) {
crm_err("Call to %s failed: %s", method, error.message);
dbus_error_free(&error);
return NULL;
} else if (!dbus_message_iter_init(reply, &args)) {
crm_err("Call to %s failed: Message has no arguments", method);
dbus_message_unref(reply);
return NULL;
}
if(!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_ARRAY, __func__, __LINE__)) {
crm_err("Call to %s failed: Message has invalid arguments", method);
dbus_message_unref(reply);
return NULL;
}
dbus_message_iter_recurse(&args, &unit);
while (dbus_message_iter_get_arg_type (&unit) != DBUS_TYPE_INVALID) {
DBusBasicValue value;
const char *job = NULL;
char *path = NULL;
if(!pcmk_dbus_type_check(reply, &unit, DBUS_TYPE_OBJECT_PATH, __func__, __LINE__)) {
crm_warn("Skipping Upstart reply argument with unexpected type");
continue;
}
dbus_message_iter_get_basic(&unit, &value);
if(value.str) {
int llpc = 0;
path = value.str;
job = value.str;
while (path[llpc] != 0) {
if (path[llpc] == '/') {
job = path + llpc + 1;
}
llpc++;
}
lpc++;
crm_trace("%s -> %s", path, job);
units = g_list_append(units, fix_upstart_name(job));
}
dbus_message_iter_next (&unit);
}
dbus_message_unref(reply);
crm_trace("Found %d upstart jobs", lpc);
return units;
}
gboolean
upstart_job_exists(const char *name)
{
return object_path_for_job(name, NULL, DBUS_TIMEOUT_USE_DEFAULT);
}
static char *
get_first_instance(const gchar * job, int timeout)
{
char *instance = NULL;
const char *method = "GetAllInstances";
DBusError error;
DBusMessage *msg;
DBusMessage *reply;
DBusMessageIter args;
DBusMessageIter unit;
dbus_error_init(&error);
msg = dbus_message_new_method_call(BUS_NAME, // target for the method call
job, // object to call on
UPSTART_JOB_IFACE, // interface to call on
method); // method name
CRM_ASSERT(msg != NULL);
dbus_message_append_args(msg, DBUS_TYPE_INVALID);
reply = pcmk_dbus_send_recv(msg, upstart_proxy, &error, timeout);
dbus_message_unref(msg);
if (dbus_error_is_set(&error)) {
crm_err("Call to %s failed: %s", method, error.message);
dbus_error_free(&error);
goto done;
} else if(reply == NULL) {
crm_err("Call to %s failed: no reply", method);
goto done;
} else if (!dbus_message_iter_init(reply, &args)) {
crm_err("Call to %s failed: Message has no arguments", method);
goto done;
}
if(!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_ARRAY, __func__, __LINE__)) {
crm_err("Call to %s failed: Message has invalid arguments", method);
goto done;
}
dbus_message_iter_recurse(&args, &unit);
if(pcmk_dbus_type_check(reply, &unit, DBUS_TYPE_OBJECT_PATH, __func__, __LINE__)) {
DBusBasicValue value;
dbus_message_iter_get_basic(&unit, &value);
if(value.str) {
instance = strdup(value.str);
crm_trace("Result: %s", instance);
}
}
done:
if(reply) {
dbus_message_unref(reply);
}
return instance;
}
/*!
* \internal
* \brief Parse result of Upstart status check
*
* \param[in] name DBus interface name for property that was checked
* \param[in] state Property value
* \param[in] userdata Status action that check was done for
*/
static void
parse_status_result(const char *name, const char *state, void *userdata)
{
svc_action_t *op = userdata;
if (pcmk__str_eq(state, "running", pcmk__str_none)) {
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
} else {
services__set_result(op, PCMK_OCF_NOT_RUNNING, PCMK_EXEC_DONE, state);
}
if (!(op->synchronous)) {
services_set_op_pending(op, NULL);
services__finalize_async_op(op);
}
}
#define METADATA_FORMAT \
"<?xml version=\"1.0\"?>\n" \
"<!DOCTYPE resource-agent SYSTEM \"ra-api-1.dtd\">\n" \
"<resource-agent name=\"%s\" version=\"" PCMK_DEFAULT_AGENT_VERSION "\">\n" \
" <version>1.1</version>\n" \
" <longdesc lang=\"en\">\n" \
" Upstart agent for controlling the system %s service\n" \
" </longdesc>\n" \
" <shortdesc lang=\"en\">Upstart job for %s</shortdesc>\n" \
" <parameters/>\n" \
" <actions>\n" \
" <action name=\"start\" timeout=\"15\" />\n" \
" <action name=\"stop\" timeout=\"15\" />\n" \
" <action name=\"status\" timeout=\"15\" />\n" \
" <action name=\"restart\" timeout=\"15\" />\n" \
" <action name=\"monitor\" timeout=\"15\" interval=\"15\" start-delay=\"15\" />\n" \
" <action name=\"meta-data\" timeout=\"5\" />\n" \
" </actions>\n" \
" <special tag=\"upstart\"/>\n" \
"</resource-agent>\n"
static char *
upstart_job_metadata(const char *name)
{
return crm_strdup_printf(METADATA_FORMAT, name, name, name);
}
/*!
* \internal
* \brief Set an action result based on a method error
*
* \param[in] op Action to set result for
* \param[in] error Method error
*/
static void
set_result_from_method_error(svc_action_t *op, const DBusError *error)
{
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"Unable to invoke Upstart DBus method");
if (strstr(error->name, UPSTART_06_API ".Error.UnknownInstance")) {
if (pcmk__str_eq(op->action, "stop", pcmk__str_casei)) {
crm_trace("Masking stop failure (%s) for %s "
"because unknown service can be considered stopped",
error->name, crm_str(op->rsc));
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
return;
}
services__set_result(op, PCMK_OCF_NOT_INSTALLED,
PCMK_EXEC_NOT_INSTALLED, "Upstart job not found");
} else if (pcmk__str_eq(op->action, "start", pcmk__str_casei)
&& strstr(error->name, UPSTART_06_API ".Error.AlreadyStarted")) {
crm_trace("Masking start failure (%s) for %s "
"because already started resource is OK",
error->name, crm_str(op->rsc));
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
return;
}
crm_err("DBus request for %s of Upstart job %s for resource %s failed: %s",
op->action, op->agent, crm_str(op->rsc), error->message);
}
/*!
* \internal
* \brief Process the completion of an asynchronous job start, stop, or restart
*
* \param[in] pending If not NULL, DBus call associated with request
* \param[in] user_data Action that was executed
*/
static void
job_method_complete(DBusPendingCall *pending, void *user_data)
{
DBusError error;
DBusMessage *reply = NULL;
svc_action_t *op = user_data;
// Grab the reply
if (pending != NULL) {
reply = dbus_pending_call_steal_reply(pending);
}
// Determine result
dbus_error_init(&error);
if (pcmk_dbus_find_error(pending, reply, &error)) {
set_result_from_method_error(op, &error);
dbus_error_free(&error);
} else if (pcmk__str_eq(op->action, "stop", pcmk__str_none)) {
// Call has no return value
crm_debug("DBus request for stop of %s succeeded", crm_str(op->rsc));
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
} else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH,
__func__, __LINE__)) {
crm_warn("DBus request for %s of %s succeeded but "
"return type was unexpected", op->action, crm_str(op->rsc));
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
} else {
const char *path = NULL;
dbus_message_get_args(reply, NULL, DBUS_TYPE_OBJECT_PATH, &path,
DBUS_TYPE_INVALID);
crm_debug("DBus request for %s of %s using %s succeeded",
op->action, crm_str(op->rsc), path);
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
}
// The call is no longer pending
CRM_LOG_ASSERT(pending == op->opaque->pending);
services_set_op_pending(op, NULL);
// Finalize action
services__finalize_async_op(op);
if (reply != NULL) {
dbus_message_unref(reply);
}
}
/*!
* \internal
* \brief Execute an Upstart action
*
* \param[in] op Action to execute
*
* \return Standard Pacemaker return code
* \retval EBUSY Recurring operation could not be initiated
* \retval pcmk_rc_error Synchronous action failed
* \retval pcmk_rc_ok Synchronous action succeeded, or asynchronous action
* should not be freed (because it already was or is
* pending)
*
* \note If the return value for an asynchronous action is not pcmk_rc_ok, the
* caller is responsible for freeing the action.
*/
int
services__execute_upstart(svc_action_t *op)
{
char *job = NULL;
int arg_wait = TRUE;
const char *arg_env = "pacemaker=1";
const char *action = op->action;
DBusError error;
DBusMessage *msg = NULL;
DBusMessage *reply = NULL;
DBusMessageIter iter, array_iter;
CRM_ASSERT(op != NULL);
if ((op->action == NULL) || (op->agent == NULL)) {
services__set_result(op, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR_FATAL,
"Bug in action caller");
goto cleanup;
}
if (!upstart_init()) {
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"No DBus connection");
goto cleanup;
}
if (pcmk__str_eq(op->action, "meta-data", pcmk__str_casei)) {
op->stdout_data = upstart_job_metadata(op->agent);
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
goto cleanup;
}
if (!object_path_for_job(op->agent, &job, op->timeout)) {
if (pcmk__str_eq(action, "stop", pcmk__str_none)) {
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
} else {
services__set_result(op, PCMK_OCF_NOT_INSTALLED,
PCMK_EXEC_NOT_INSTALLED,
"Upstart job not found");
}
goto cleanup;
}
if (job == NULL) {
// Shouldn't normally be possible -- maybe a memory error
op->rc = PCMK_OCF_UNKNOWN_ERROR;
op->status = PCMK_EXEC_ERROR;
goto cleanup;
}
if (pcmk__strcase_any_of(op->action, "monitor", "status", NULL)) {
DBusPendingCall *pending = NULL;
char *state = NULL;
char *path = get_first_instance(job, op->timeout);
services__set_result(op, PCMK_OCF_NOT_RUNNING, PCMK_EXEC_DONE,
"No Upstart job instances found");
if (path == NULL) {
goto cleanup;
}
state = pcmk_dbus_get_property(upstart_proxy, BUS_NAME, path,
UPSTART_06_API ".Instance", "state",
op->synchronous? NULL : parse_status_result,
op,
op->synchronous? NULL : &pending,
op->timeout);
free(path);
if (op->synchronous) {
parse_status_result("state", state, op);
free(state);
} else if (pending == NULL) {
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"Could not get job state from DBus");
} else { // Successfully initiated async op
free(job);
services_set_op_pending(op, pending);
services_add_inflight_op(op);
return pcmk_rc_ok;
}
goto cleanup;
} else if (pcmk__str_eq(action, "start", pcmk__str_none)) {
action = "Start";
} else if (pcmk__str_eq(action, "stop", pcmk__str_none)) {
action = "Stop";
} else if (pcmk__str_eq(action, "restart", pcmk__str_none)) {
action = "Restart";
} else {
services__set_result(op, PCMK_OCF_UNIMPLEMENT_FEATURE,
PCMK_EXEC_ERROR_HARD,
"Action not implemented for Upstart resources");
goto cleanup;
}
// Initialize rc/status in case called functions don't set them
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_DONE,
"Bug in service library");
crm_debug("Calling %s for %s on %s", action, crm_str(op->rsc), job);
msg = dbus_message_new_method_call(BUS_NAME, // target for the method call
job, // object to call on
UPSTART_JOB_IFACE, // interface to call on
action); // method name
CRM_ASSERT(msg != NULL);
dbus_message_iter_init_append (msg, &iter);
CRM_LOG_ASSERT(dbus_message_iter_open_container(&iter,
DBUS_TYPE_ARRAY,
DBUS_TYPE_STRING_AS_STRING,
&array_iter));
CRM_LOG_ASSERT(dbus_message_iter_append_basic(&array_iter,
DBUS_TYPE_STRING, &arg_env));
CRM_LOG_ASSERT(dbus_message_iter_close_container(&iter, &array_iter));
CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_BOOLEAN, &arg_wait,
DBUS_TYPE_INVALID));
if (!(op->synchronous)) {
DBusPendingCall *pending = pcmk_dbus_send(msg, upstart_proxy,
job_method_complete, op,
op->timeout);
if (pending == NULL) {
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"Unable to send DBus message");
goto cleanup;
} else { // Successfully initiated async op
free(job);
services_set_op_pending(op, pending);
services_add_inflight_op(op);
return pcmk_rc_ok;
}
}
// Synchronous call
dbus_error_init(&error);
reply = pcmk_dbus_send_recv(msg, upstart_proxy, &error, op->timeout);
if (dbus_error_is_set(&error)) {
set_result_from_method_error(op, &error);
dbus_error_free(&error);
} else if (pcmk__str_eq(op->action, "stop", pcmk__str_none)) {
// DBus call does not return a value
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
} else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH,
__func__, __LINE__)) {
crm_warn("Call to %s passed but return type was unexpected", op->action);
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
} else {
const char *path = NULL;
dbus_message_get_args(reply, NULL, DBUS_TYPE_OBJECT_PATH, &path,
DBUS_TYPE_INVALID);
crm_info("Call to %s passed: %s", op->action, path);
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
}
cleanup:
free(job);
if (msg != NULL) {
dbus_message_unref(msg);
}
if (reply != NULL) {
dbus_message_unref(reply);
}
if (op->synchronous) {
return (op->rc == PCMK_OCF_OK)? pcmk_rc_ok : pcmk_rc_error;
} else {
return services__finalize_async_op(op);
}
}
diff --git a/lib/services/upstart.h b/lib/services/upstart.h
index fa68a2efea..b6c4effc07 100644
--- a/lib/services/upstart.h
+++ b/lib/services/upstart.h
@@ -1,28 +1,31 @@
/*
* Copyright 2010 Senko Rasic <senko.rasic@dobarkod.hr>
* Copyright 2010 Ante Karamatic <ivoks@init.hr>
* Later changes copyright 2012-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef UPSTART__H
# define UPSTART__H
# include <glib.h>
# include "crm/services.h"
G_GNUC_INTERNAL GList *upstart_job_listall(void);
G_GNUC_INTERNAL
int services__upstart_prepare(svc_action_t *op);
+G_GNUC_INTERNAL
+enum ocf_exitcode services__upstart2ocf(int exit_status);
+
G_GNUC_INTERNAL
int services__execute_upstart(svc_action_t *op);
G_GNUC_INTERNAL gboolean upstart_job_exists(const gchar * name);
G_GNUC_INTERNAL void upstart_cleanup(void);
#endif /* UPSTART__H */
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Nov 23, 4:24 PM (17 h, 4 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1018893
Default Alt Text
(198 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment