Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
index aaf2976402..685fcc74ec 100644
--- a/daemons/execd/execd_commands.c
+++ b/daemons/execd/execd_commands.c
@@ -1,1896 +1,1905 @@
/*
* Copyright 2012-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <glib.h>
// Check whether we have a high-resolution monotonic clock
#undef PCMK__TIME_USE_CGT
#if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC) \
&& !defined(PCMK_TIME_EMERGENCY_CGT)
# define PCMK__TIME_USE_CGT
# include <time.h> /* clock_gettime */
#endif
#include <unistd.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/common/mainloop.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/msg_xml.h>
#include "pacemaker-execd.h"
#define EXIT_REASON_MAX_LEN 128
GHashTable *rsc_list = NULL;
typedef struct lrmd_cmd_s {
int timeout;
guint interval_ms;
int start_delay;
int timeout_orig;
int call_id;
int exec_rc;
int lrmd_op_status;
int call_opts;
/* Timer ids, must be removed on cmd destruction. */
int delay_id;
int stonith_recurring_id;
int rsc_deleted;
int service_flags;
char *client_id;
char *origin;
char *rsc_id;
char *action;
char *real_action;
char *exit_reason;
char *output;
char *userdata_str;
/* We can track operation queue time and run time, to be saved with the CIB
* resource history (and displayed in cluster status). We need
* high-resolution monotonic time for this purpose, so we use
* clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature
* is disabled).
*
* However, we also need epoch timestamps for recording the time the command
* last ran and the time its return value last changed, for use in time
* displays (as opposed to interval calculations). We keep time_t values for
* this purpose.
*
* The last run time is used for both purposes, so we keep redundant
* monotonic and epoch values for this. Technically the two could represent
* different times, but since time_t has only second resolution and the
* values are used for distinct purposes, that is not significant.
*/
#ifdef PCMK__TIME_USE_CGT
/* Recurring and systemd operations may involve more than one executor
* command per operation, so they need info about the original and the most
* recent.
*/
struct timespec t_first_run; // When op first ran
struct timespec t_run; // When op most recently ran
struct timespec t_first_queue; // When op was first queued
struct timespec t_queue; // When op was most recently queued
#endif
time_t epoch_last_run; // Epoch timestamp of when op last ran
time_t epoch_rcchange; // Epoch timestamp of when rc last changed
int first_notify_sent;
int last_notify_rc;
int last_notify_op_status;
int last_pid;
GHashTable *params;
} lrmd_cmd_t;
static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
static gboolean lrmd_rsc_dispatch(gpointer user_data);
static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
#ifdef PCMK__TIME_USE_CGT
/*!
* \internal
* \brief Check whether a struct timespec has been set
*
* \param[in] timespec Time to check
*
* \return true if timespec has been set (i.e. is nonzero), false otherwise
*/
static inline bool
time_is_set(struct timespec *timespec)
{
return (timespec != NULL) &&
((timespec->tv_sec != 0) || (timespec->tv_nsec != 0));
}
/*
* \internal
* \brief Set a timespec (and its original if unset) to the current time
*
* \param[out] t_current Where to store current time
* \param[out] t_orig Where to copy t_current if unset
*/
static void
get_current_time(struct timespec *t_current, struct timespec *t_orig)
{
clock_gettime(CLOCK_MONOTONIC, t_current);
if ((t_orig != NULL) && !time_is_set(t_orig)) {
*t_orig = *t_current;
}
}
/*!
* \internal
* \brief Return difference between two times in milliseconds
*
* \param[in] now More recent time (or NULL to use current time)
* \param[in] old Earlier time
*
* \return milliseconds difference (or 0 if old is NULL or unset)
*
* \note Can overflow on 32bit machines when the differences is around
* 24 days or more.
*/
static int
time_diff_ms(struct timespec *now, struct timespec *old)
{
int diff_ms = 0;
if (time_is_set(old)) {
struct timespec local_now = { 0, };
if (now == NULL) {
clock_gettime(CLOCK_MONOTONIC, &local_now);
now = &local_now;
}
diff_ms = (now->tv_sec - old->tv_sec) * 1000
+ (now->tv_nsec - old->tv_nsec) / 1000000;
}
return diff_ms;
}
/*!
* \internal
* \brief Reset a command's operation times to their original values.
*
* Reset a command's run and queued timestamps to the timestamps of the original
* command, so we report the entire time since then and not just the time since
* the most recent command (for recurring and systemd operations).
*
* \param[in] cmd Executor command object to reset
*
* \note It's not obvious what the queued time should be for a systemd
* start/stop operation, which might go like this:
* initial command queued 5ms, runs 3s
* monitor command queued 10ms, runs 10s
* monitor command queued 10ms, runs 10s
* Is the queued time for that operation 5ms, 10ms or 25ms? The current
* implementation will report 5ms. If it's 25ms, then we need to
* subtract 20ms from the total exec time so as not to count it twice.
* We can implement that later if it matters to anyone ...
*/
static void
cmd_original_times(lrmd_cmd_t * cmd)
{
cmd->t_run = cmd->t_first_run;
cmd->t_queue = cmd->t_first_queue;
}
#endif
static void
log_finished(lrmd_cmd_t * cmd, int exec_time, int queue_time)
{
char pid_str[32] = { 0, };
int log_level = LOG_INFO;
if (cmd->last_pid) {
snprintf(pid_str, 32, "%d", cmd->last_pid);
}
if (safe_str_eq(cmd->action, "monitor")) {
log_level = LOG_DEBUG;
}
#ifdef PCMK__TIME_USE_CGT
do_crm_log(log_level, "%s %s (call %d%s%s) exited with status %d"
" (execution time %dms, queue time %dms)",
cmd->rsc_id, cmd->action, cmd->call_id,
(cmd->last_pid? ", PID " : ""), pid_str, cmd->exec_rc,
exec_time, queue_time);
#else
do_crm_log(log_level, "%s %s (call %d%s%s) exited with status %d"
cmd->rsc_id, cmd->action, cmd->call_id,
(cmd->last_pid? ", PID " : ""), pid_str, cmd->exec_rc);
#endif
}
static void
log_execute(lrmd_cmd_t * cmd)
{
int log_level = LOG_INFO;
if (safe_str_eq(cmd->action, "monitor")) {
log_level = LOG_DEBUG;
}
do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
cmd->rsc_id, cmd->action, cmd->call_id);
}
static const char *
normalize_action_name(lrmd_rsc_t * rsc, const char *action)
{
if (safe_str_eq(action, "monitor") &&
is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) {
return "status";
}
return action;
}
static lrmd_rsc_t *
build_rsc_from_xml(xmlNode * msg)
{
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
lrmd_rsc_t *rsc = NULL;
rsc = calloc(1, sizeof(lrmd_rsc_t));
crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts);
rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS);
rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER);
rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE);
rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc);
rsc->st_probe_rc = -ENODEV; // if stonith, initialize to "not running"
return rsc;
}
static lrmd_cmd_t *
create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client)
{
int call_options = 0;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
lrmd_cmd_t *cmd = NULL;
cmd = calloc(1, sizeof(lrmd_cmd_t));
crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options);
cmd->call_opts = call_options;
cmd->client_id = strdup(client->id);
crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id);
crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval_ms);
crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout);
crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay);
cmd->timeout_orig = cmd->timeout;
cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN);
cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION);
cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR);
cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
cmd->params = xml2list(rsc_xml);
if (safe_str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block")) {
crm_debug("Setting flag to leave pid group on timeout and "
"only kill action pid for " PCMK__OP_FMT,
cmd->rsc_id, cmd->action, cmd->interval_ms);
cmd->service_flags |= SVC_ACTION_LEAVE_GROUP;
}
return cmd;
}
static void
stop_recurring_timer(lrmd_cmd_t *cmd)
{
if (cmd) {
if (cmd->stonith_recurring_id) {
g_source_remove(cmd->stonith_recurring_id);
}
cmd->stonith_recurring_id = 0;
}
}
static void
free_lrmd_cmd(lrmd_cmd_t * cmd)
{
stop_recurring_timer(cmd);
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->params) {
g_hash_table_destroy(cmd->params);
}
free(cmd->origin);
free(cmd->action);
free(cmd->real_action);
free(cmd->userdata_str);
free(cmd->rsc_id);
free(cmd->output);
free(cmd->exit_reason);
free(cmd->client_id);
free(cmd);
}
static gboolean
stonith_recurring_op_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc;
cmd->stonith_recurring_id = 0;
if (!cmd->rsc_id) {
return FALSE;
}
rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
CRM_ASSERT(rsc != NULL);
/* take it out of recurring_ops list, and put it in the pending ops
* to be executed */
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef PCMK__TIME_USE_CGT
get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
#endif
mainloop_set_trigger(rsc->work);
return FALSE;
}
static inline void
start_recurring_timer(lrmd_cmd_t *cmd)
{
if (cmd && (cmd->interval_ms > 0)) {
cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms,
stonith_recurring_op_helper,
cmd);
}
}
static gboolean
start_delay_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc = NULL;
cmd->delay_id = 0;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (rsc) {
mainloop_set_trigger(rsc->work);
}
return FALSE;
}
static gboolean
merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
GListPtr gIter = NULL;
lrmd_cmd_t * dup = NULL;
gboolean dup_pending = FALSE;
if (cmd->interval_ms == 0) {
return 0;
}
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
dup = gIter->data;
if (safe_str_eq(cmd->action, dup->action)
&& (cmd->interval_ms == dup->interval_ms)) {
dup_pending = TRUE;
goto merge_dup;
}
}
/* if dup is in recurring_ops list, that means it has already executed
* and is in the interval loop. we can't just remove it in this case. */
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
dup = gIter->data;
if (safe_str_eq(cmd->action, dup->action)
&& (cmd->interval_ms == dup->interval_ms)) {
goto merge_dup;
}
}
return FALSE;
merge_dup:
/* This should not occur. If it does, we need to investigate how something
* like this is possible in the controller.
*/
crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT
"), merging with previous op entry",
rsc->rsc_id, normalize_action_name(rsc, dup->action),
dup->interval_ms);
/* merge */
dup->first_notify_sent = 0;
free(dup->userdata_str);
dup->userdata_str = cmd->userdata_str;
cmd->userdata_str = NULL;
dup->call_id = cmd->call_id;
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
/* if we are waiting for the next interval, kick it off now */
if (dup_pending == TRUE) {
stop_recurring_timer(cmd);
stonith_recurring_op_helper(cmd);
}
} else if (dup_pending == FALSE) {
/* if we've already handed this to the service lib, kick off an early execution */
services_action_kick(rsc->rsc_id,
normalize_action_name(rsc, dup->action),
dup->interval_ms);
}
free_lrmd_cmd(cmd);
return TRUE;
}
static void
schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
gboolean dup_processed = FALSE;
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(rsc != NULL, return);
crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
dup_processed = merge_recurring_duplicate(rsc, cmd);
if (dup_processed) {
/* duplicate recurring cmd found, cmds merged */
return;
}
/* The controller expects the executor to automatically cancel
* recurring operations before a resource stops.
*/
if (safe_str_eq(cmd->action, "stop")) {
cancel_all_recurring(rsc, NULL);
}
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef PCMK__TIME_USE_CGT
get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
#endif
mainloop_set_trigger(rsc->work);
if (cmd->start_delay) {
cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
}
static xmlNode *
create_lrmd_reply(const char *origin, int rc, int call_id)
{
xmlNode *reply = create_xml_node(NULL, T_LRMD_REPLY);
crm_xml_add(reply, F_LRMD_ORIGIN, origin);
crm_xml_add_int(reply, F_LRMD_RC, rc);
crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
return reply;
}
static void
send_client_notify(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
pcmk__client_t *client = value;
int rc;
int log_level = LOG_WARNING;
const char *msg = NULL;
CRM_CHECK(client != NULL, return);
if (client->name == NULL) {
crm_trace("Skipping notification to client without name");
return;
}
+ if (is_set(client->flags, pcmk__client_to_proxy)) {
+ /* We only want to notify clients of the executor IPC API. If we are
+ * running as Pacemaker Remote, we may have clients proxied to other
+ * IPC services in the cluster, so skip those.
+ */
+ crm_trace("Skipping executor API notification to %s IPC client",
+ client->name);
+ return;
+ }
rc = lrmd_server_send_notify(client, update_msg);
if (rc == pcmk_rc_ok) {
return;
}
switch (rc) {
case ENOTCONN:
case EPIPE: // Client exited without waiting for notification
log_level = LOG_INFO;
msg = "Disconnected";
break;
default:
msg = pcmk_rc_str(rc);
break;
}
do_crm_log(log_level,
"Could not notify client %s/%s: %s " CRM_XS " rc=%d",
client->name, client->id, msg, rc);
}
static void
send_cmd_complete_notify(lrmd_cmd_t * cmd)
{
xmlNode *notify = NULL;
#ifdef PCMK__TIME_USE_CGT
int exec_time = time_diff_ms(NULL, &(cmd->t_run));
int queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue));
log_finished(cmd, exec_time, queue_time);
#else
log_finished(cmd, 0, 0);
#endif
/* if the first notify result for a cmd has already been sent earlier, and the
* the option to only send notifies on result changes is set. Check to see
* if the last result is the same as the new one. If so, suppress this update */
if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) {
if (cmd->last_notify_rc == cmd->exec_rc &&
cmd->last_notify_op_status == cmd->lrmd_op_status) {
/* only send changes */
return;
}
}
cmd->first_notify_sent = 1;
cmd->last_notify_rc = cmd->exec_rc;
cmd->last_notify_op_status = cmd->lrmd_op_status;
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout);
crm_xml_add_ms(notify, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay);
crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc);
crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status);
crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id);
crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted);
crm_xml_add_ll(notify, F_LRMD_RSC_RUN_TIME,
(long long) cmd->epoch_last_run);
crm_xml_add_ll(notify, F_LRMD_RSC_RCCHANGE_TIME,
(long long) cmd->epoch_rcchange);
#ifdef PCMK__TIME_USE_CGT
crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time);
crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time);
#endif
crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC);
crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id);
if(cmd->real_action) {
crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action);
} else {
crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action);
}
crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output);
crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->exit_reason);
if (cmd->params) {
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS);
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
hash2smartfield((gpointer) key, (gpointer) value, args);
}
}
if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) {
pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id);
if (client) {
send_client_notify(client->id, client, notify);
}
} else {
pcmk__foreach_ipc_client(send_client_notify, notify);
}
free_xml(notify);
}
static void
send_generic_notify(int rc, xmlNode * request)
{
if (pcmk__ipc_client_count() != 0) {
int call_id = 0;
xmlNode *notify = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *op = crm_element_value(request, F_LRMD_OPERATION);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(notify, F_LRMD_RC, rc);
crm_xml_add_int(notify, F_LRMD_CALLID, call_id);
crm_xml_add(notify, F_LRMD_OPERATION, op);
crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id);
pcmk__foreach_ipc_client(send_client_notify, notify);
free_xml(notify);
}
}
static void
cmd_reset(lrmd_cmd_t * cmd)
{
cmd->lrmd_op_status = 0;
cmd->last_pid = 0;
#ifdef PCMK__TIME_USE_CGT
memset(&cmd->t_run, 0, sizeof(cmd->t_run));
memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
#endif
cmd->epoch_last_run = 0;
free(cmd->exit_reason);
cmd->exit_reason = NULL;
free(cmd->output);
cmd->output = NULL;
}
static void
cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
{
crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action,
rsc ? rsc->active : NULL, cmd);
if (rsc && (rsc->active == cmd)) {
rsc->active = NULL;
mainloop_set_trigger(rsc->work);
}
if (!rsc) {
cmd->rsc_deleted = 1;
}
/* reset original timeout so client notification has correct information */
cmd->timeout = cmd->timeout_orig;
send_cmd_complete_notify(cmd);
if (cmd->interval_ms && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) {
if (rsc) {
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else if (cmd->interval_ms == 0) {
if (rsc) {
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else {
/* Clear all the values pertaining just to the last iteration of a recurring op. */
cmd_reset(cmd);
}
}
static int
ocf2uniform_rc(int rc)
{
if (rc < 0 || rc > PCMK_OCF_FAILED_MASTER) {
return PCMK_OCF_UNKNOWN_ERROR;
}
return rc;
}
static int
stonith2uniform_rc(const char *action, int rc)
{
switch (rc) {
case pcmk_ok:
rc = PCMK_OCF_OK;
break;
case -ENODEV:
/* This should be possible only for probes in practice, but
* interpret for all actions to be safe.
*/
if (safe_str_eq(action, "monitor")) {
rc = PCMK_OCF_NOT_RUNNING;
} else if (safe_str_eq(action, "stop")) {
rc = PCMK_OCF_OK;
} else {
rc = PCMK_OCF_NOT_INSTALLED;
}
break;
case -EOPNOTSUPP:
rc = PCMK_OCF_UNIMPLEMENT_FEATURE;
break;
case -ETIME:
case -ETIMEDOUT:
rc = PCMK_OCF_TIMEOUT;
break;
default:
rc = PCMK_OCF_UNKNOWN_ERROR;
break;
}
return rc;
}
#if SUPPORT_NAGIOS
static int
nagios2uniform_rc(const char *action, int rc)
{
if (rc < 0) {
return PCMK_OCF_UNKNOWN_ERROR;
}
switch (rc) {
case NAGIOS_STATE_OK:
return PCMK_OCF_OK;
case NAGIOS_INSUFFICIENT_PRIV:
return PCMK_OCF_INSUFFICIENT_PRIV;
case NAGIOS_NOT_INSTALLED:
return PCMK_OCF_NOT_INSTALLED;
case NAGIOS_STATE_WARNING:
case NAGIOS_STATE_CRITICAL:
case NAGIOS_STATE_UNKNOWN:
case NAGIOS_STATE_DEPENDENT:
default:
return PCMK_OCF_UNKNOWN_ERROR;
}
return PCMK_OCF_UNKNOWN_ERROR;
}
#endif
static int
get_uniform_rc(const char *standard, const char *action, int rc)
{
if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_OCF)) {
return ocf2uniform_rc(rc);
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_STONITH)) {
return stonith2uniform_rc(action, rc);
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD)) {
return rc;
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_UPSTART)) {
return rc;
#if SUPPORT_NAGIOS
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_NAGIOS)) {
return nagios2uniform_rc(action, rc);
#endif
} else {
return services_get_ocf_exitcode(action, rc);
}
}
static int
action_get_uniform_rc(svc_action_t * action)
{
lrmd_cmd_t *cmd = action->cb_data;
return get_uniform_rc(action->standard, cmd->action, action->rc);
}
struct notify_new_client_data {
xmlNode *notify;
pcmk__client_t *new_client;
};
static void
notify_one_client(gpointer key, gpointer value, gpointer user_data)
{
pcmk__client_t *client = value;
struct notify_new_client_data *data = user_data;
if (safe_str_neq(client->id, data->new_client->id)) {
send_client_notify(key, (gpointer) client, (gpointer) data->notify);
}
}
void
notify_of_new_client(pcmk__client_t *new_client)
{
struct notify_new_client_data data;
data.new_client = new_client;
data.notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(data.notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(data.notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT);
pcmk__foreach_ipc_client(notify_one_client, &data);
free_xml(data.notify);
}
static char *
parse_exit_reason(const char *output)
{
const char *cur = NULL;
const char *last = NULL;
static int cookie_len = 0;
char *eol = NULL;
size_t reason_len = EXIT_REASON_MAX_LEN;
if (output == NULL) {
return NULL;
}
if (!cookie_len) {
cookie_len = strlen(PCMK_OCF_REASON_PREFIX);
}
cur = strstr(output, PCMK_OCF_REASON_PREFIX);
for (; cur != NULL; cur = strstr(cur, PCMK_OCF_REASON_PREFIX)) {
/* skip over the cookie delimiter string */
cur += cookie_len;
last = cur;
}
if (last == NULL) {
return NULL;
}
// Truncate everything after a new line, and limit reason string size
eol = strchr(last, '\n');
if (eol) {
reason_len = QB_MIN(reason_len, eol - last);
}
return strndup(last, reason_len);
}
void
client_disconnect_cleanup(const char *client_id)
{
GHashTableIter iter;
lrmd_rsc_t *rsc = NULL;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
if (rsc->call_opts & lrmd_opt_drop_recurring) {
/* This client is disconnecting, drop any recurring operations
* it may have initiated on the resource */
cancel_all_recurring(rsc, client_id);
}
}
}
static void
action_complete(svc_action_t * action)
{
lrmd_rsc_t *rsc;
lrmd_cmd_t *cmd = action->cb_data;
const char *rclass = NULL;
#ifdef PCMK__TIME_USE_CGT
bool goagain = false;
#endif
if (!cmd) {
crm_err("Completed executor action (%s) does not match any known operations",
action->id);
return;
}
#ifdef PCMK__TIME_USE_CGT
if (cmd->exec_rc != action->rc) {
cmd->epoch_rcchange = time(NULL);
}
#endif
cmd->last_pid = action->pid;
cmd->exec_rc = action_get_uniform_rc(action);
cmd->lrmd_op_status = action->status;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE)) {
rclass = resources_find_service_class(rsc->type);
} else if(rsc) {
rclass = rsc->class;
}
#ifdef PCMK__TIME_USE_CGT
if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD)) {
if ((cmd->exec_rc == PCMK_OCF_OK)
&& pcmk__str_any_of(cmd->action, "start", "stop", NULL)) {
/* systemd returns from start and stop actions after the action
* begins, not after it completes. We have to jump through a few
* hoops so that we don't report 'complete' to the rest of pacemaker
* until it's actually done.
*/
goagain = true;
cmd->real_action = cmd->action;
cmd->action = strdup("monitor");
} else if (cmd->real_action != NULL) {
// This is follow-up monitor to check whether start/stop completed
if ((cmd->lrmd_op_status == PCMK_LRM_OP_DONE)
&& (cmd->exec_rc == PCMK_OCF_PENDING)) {
goagain = true;
} else if ((cmd->exec_rc == PCMK_OCF_OK)
&& safe_str_eq(cmd->real_action, "stop")) {
goagain = true;
} else {
int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
int timeout_left = cmd->timeout_orig - time_sum;
crm_debug("%s systemd %s is now complete (elapsed=%dms, "
"remaining=%dms): %s (%d)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left,
services_ocf_exitcode_str(cmd->exec_rc),
cmd->exec_rc);
cmd_original_times(cmd);
// Monitors may return "not running", but start/stop shouldn't
if ((cmd->lrmd_op_status == PCMK_LRM_OP_DONE)
&& (cmd->exec_rc == PCMK_OCF_NOT_RUNNING)) {
if (safe_str_eq(cmd->real_action, "start")) {
cmd->exec_rc = PCMK_OCF_UNKNOWN_ERROR;
} else if (safe_str_eq(cmd->real_action, "stop")) {
cmd->exec_rc = PCMK_OCF_OK;
}
}
}
}
}
#endif
#if SUPPORT_NAGIOS
if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS)) {
if (safe_str_eq(cmd->action, "monitor") &&
(cmd->interval_ms == 0) && cmd->exec_rc == PCMK_OCF_OK) {
/* Successfully executed --version for the nagios plugin */
cmd->exec_rc = PCMK_OCF_NOT_RUNNING;
} else if (safe_str_eq(cmd->action, "start") && cmd->exec_rc != PCMK_OCF_OK) {
#ifdef PCMK__TIME_USE_CGT
goagain = true;
#endif
}
}
#endif
#ifdef PCMK__TIME_USE_CGT
if (goagain) {
int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
int timeout_left = cmd->timeout_orig - time_sum;
int delay = cmd->timeout_orig / 10;
if(delay >= timeout_left && timeout_left > 20) {
delay = timeout_left/2;
}
delay = QB_MIN(2000, delay);
if (delay < timeout_left) {
cmd->start_delay = delay;
cmd->timeout = timeout_left;
if(cmd->exec_rc == PCMK_OCF_OK) {
crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);
} else if(cmd->exec_rc == PCMK_OCF_PENDING) {
crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);
} else {
crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->action, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc, time_sum, timeout_left, delay);
}
cmd_reset(cmd);
if(rsc) {
rsc->active = NULL;
}
schedule_lrmd_cmd(rsc, cmd);
/* Don't finalize cmd, we're not done with it yet */
return;
} else {
crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)",
cmd->rsc_id, cmd->real_action?cmd->real_action:cmd->action, cmd->exec_rc, time_sum, timeout_left);
cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT;
cmd->exec_rc = PCMK_OCF_TIMEOUT;
cmd_original_times(cmd);
}
}
#endif
if (action->stderr_data) {
cmd->output = strdup(action->stderr_data);
cmd->exit_reason = parse_exit_reason(action->stderr_data);
} else if (action->stdout_data) {
cmd->output = strdup(action->stdout_data);
}
cmd_finalize(cmd, rsc);
}
/*!
* \internal
* \brief Determine operation status of a stonith operation
*
* Non-stonith resource operations get their operation status directly from the
* service library, but the fencer does not have an equivalent, so we must infer
* an operation status from the fencer API's return code.
*
* \param[in] action Name of action performed on stonith resource
* \param[in] interval_ms Action interval
* \param[in] rc Action result from fencer
*
* \return Operation status corresponding to fencer API return code
*/
static int
stonith_rc2status(const char *action, guint interval_ms, int rc)
{
int status = PCMK_LRM_OP_DONE;
switch (rc) {
case pcmk_ok:
break;
case -EOPNOTSUPP:
case -EPROTONOSUPPORT:
status = PCMK_LRM_OP_NOTSUPPORTED;
break;
case -ETIME:
case -ETIMEDOUT:
status = PCMK_LRM_OP_TIMEOUT;
break;
case -ENOTCONN:
case -ECOMM:
// Couldn't talk to fencer
status = PCMK_LRM_OP_ERROR;
break;
case -ENODEV:
// The device is not registered with the fencer
status = PCMK_LRM_OP_ERROR;
break;
default:
break;
}
return status;
}
static void
stonith_action_complete(lrmd_cmd_t * cmd, int rc)
{
// This can be NULL if resource was removed before command completed
lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
cmd->exec_rc = stonith2uniform_rc(cmd->action, rc);
/* This function may be called with status already set to cancelled, if a
* pending action was aborted. Otherwise, we need to determine status from
* the fencer return code.
*/
if (cmd->lrmd_op_status != PCMK_LRM_OP_CANCELLED) {
cmd->lrmd_op_status = stonith_rc2status(cmd->action, cmd->interval_ms,
rc);
// Certain successful actions change the known state of the resource
if (rsc && (cmd->exec_rc == PCMK_OCF_OK)) {
if (safe_str_eq(cmd->action, "start")) {
rsc->st_probe_rc = pcmk_ok; // maps to PCMK_OCF_OK
} else if (safe_str_eq(cmd->action, "stop")) {
rsc->st_probe_rc = -ENODEV; // maps to PCMK_OCF_NOT_RUNNING
}
}
}
/* The recurring timer should not be running at this point in any case, but
* as a failsafe, stop it if it is.
*/
stop_recurring_timer(cmd);
/* Reschedule this command if appropriate. If a recurring command is *not*
* rescheduled, its status must be PCMK_LRM_OP_CANCELLED, otherwise it will
* not be removed from recurring_ops by cmd_finalize().
*/
if (rsc && (cmd->interval_ms > 0)
&& (cmd->lrmd_op_status != PCMK_LRM_OP_CANCELLED)) {
start_recurring_timer(cmd);
}
cmd_finalize(cmd, rsc);
}
static void
lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
stonith_action_complete(data->userdata, data->rc);
}
void
stonith_connection_failed(void)
{
GHashTableIter iter;
GList *cmd_list = NULL;
GList *cmd_iter = NULL;
lrmd_rsc_t *rsc = NULL;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
/* If we registered this fence device, we don't know whether the
* fencer still has the registration or not. Cause future probes to
* return PCMK_OCF_UNKNOWN_ERROR until the resource is stopped or
* started successfully. This is especially important if the
* controller also went away (possibly due to a cluster layer
* restart) and won't receive our client notification of any
* monitors finalized below.
*/
if (rsc->st_probe_rc == pcmk_ok) {
rsc->st_probe_rc = pcmk_err_generic;
}
if (rsc->active) {
cmd_list = g_list_append(cmd_list, rsc->active);
}
if (rsc->recurring_ops) {
cmd_list = g_list_concat(cmd_list, rsc->recurring_ops);
}
if (rsc->pending_ops) {
cmd_list = g_list_concat(cmd_list, rsc->pending_ops);
}
rsc->pending_ops = rsc->recurring_ops = NULL;
}
}
if (!cmd_list) {
return;
}
crm_err("Connection to fencer failed, finalizing %d pending operations",
g_list_length(cmd_list));
for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
stonith_action_complete(cmd_iter->data, -ENOTCONN);
}
g_list_free(cmd_list);
}
/*!
* \internal
* \brief Execute a stonith resource "start" action
*
* Start a stonith resource by registering it with the fencer.
* (Stonith agents don't have a start command.)
*
* \param[in] stonith_api Connection to fencer
* \param[in] rsc Stonith resource to start
* \param[in] cmd Start command to execute
*
* \return pcmk_ok on success, -errno otherwise
*/
static int
execd_stonith_start(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
{
char *key = NULL;
char *value = NULL;
stonith_key_value_t *device_params = NULL;
int rc = pcmk_ok;
// Convert command parameters to stonith API key/values
if (cmd->params) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
device_params = stonith_key_value_add(device_params, key, value);
}
}
/* The fencer will automatically register devices via CIB notifications
* when the CIB changes, but to avoid a possible race condition between
* the fencer receiving the notification and the executor requesting that
* resource, the executor registers the device as well. The fencer knows how
* to handle duplicate registrations.
*/
rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call,
cmd->rsc_id, rsc->provider,
rsc->type, device_params);
stonith_key_value_freeall(device_params, 1, 1);
return rc;
}
/*!
* \internal
* \brief Execute a stonith resource "stop" action
*
* Stop a stonith resource by unregistering it with the fencer.
* (Stonith agents don't have a stop command.)
*
* \param[in] stonith_api Connection to fencer
* \param[in] rsc Stonith resource to stop
*
* \return pcmk_ok on success, -errno otherwise
*/
static inline int
execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc)
{
/* @TODO Failure would indicate a problem communicating with fencer;
* perhaps we should try reconnecting and retrying a few times?
*/
return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call,
rsc->rsc_id);
}
/*!
* \internal
* \brief Initiate a stonith resource agent recurring "monitor" action
*
* \param[in] stonith_api Connection to fencer
* \param[in] rsc Stonith resource to monitor
* \param[in] cmd Monitor command being executed
*
* \return pcmk_ok if monitor was successfully initiated, -errno otherwise
*/
static inline int
execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
{
int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id,
cmd->timeout / 1000);
rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd,
"lrmd_stonith_callback",
lrmd_stonith_callback);
if (rc == TRUE) {
rsc->active = cmd;
rc = pcmk_ok;
} else {
rc = -pcmk_err_generic;
}
return rc;
}
static void
lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
int rc = 0;
bool do_monitor = FALSE;
stonith_t *stonith_api = get_stonith_connection();
if (!stonith_api) {
rc = -ENOTCONN;
} else if (safe_str_eq(cmd->action, "start")) {
rc = execd_stonith_start(stonith_api, rsc, cmd);
if (rc == 0) {
do_monitor = TRUE;
}
} else if (safe_str_eq(cmd->action, "stop")) {
rc = execd_stonith_stop(stonith_api, rsc);
} else if (safe_str_eq(cmd->action, "monitor")) {
if (cmd->interval_ms > 0) {
do_monitor = TRUE;
} else {
rc = rsc->st_probe_rc;
}
}
if (do_monitor) {
rc = execd_stonith_monitor(stonith_api, rsc, cmd);
if (rc == pcmk_ok) {
// Don't clean up yet, we will find out result of the monitor later
return;
}
}
stonith_action_complete(cmd, rc);
}
static int
lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
svc_action_t *action = NULL;
GHashTable *params_copy = NULL;
CRM_ASSERT(rsc);
CRM_ASSERT(cmd);
crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
#if SUPPORT_NAGIOS
/* Recurring operations are cancelled anyway for a stop operation */
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS)
&& safe_str_eq(cmd->action, "stop")) {
cmd->exec_rc = PCMK_OCF_OK;
goto exec_done;
}
#endif
params_copy = crm_str_table_dup(cmd->params);
action = resources_action_create(rsc->rsc_id, rsc->class, rsc->provider,
rsc->type,
normalize_action_name(rsc, cmd->action),
cmd->interval_ms, cmd->timeout,
params_copy, cmd->service_flags);
if (!action) {
crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id);
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
goto exec_done;
}
action->cb_data = cmd;
/* 'cmd' may not be valid after this point if
* services_action_async() returned TRUE
*
* Upstart and systemd both synchronously determine monitor/status
* results and call action_complete (which may free 'cmd') if necessary.
*/
if (services_action_async(action, action_complete)) {
return TRUE;
}
cmd->exec_rc = action->rc;
if(action->status != PCMK_LRM_OP_DONE) {
cmd->lrmd_op_status = action->status;
} else {
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
}
services_action_free(action);
action = NULL;
exec_done:
cmd_finalize(cmd, rsc);
return TRUE;
}
static gboolean
lrmd_rsc_execute(lrmd_rsc_t * rsc)
{
lrmd_cmd_t *cmd = NULL;
CRM_CHECK(rsc != NULL, return FALSE);
if (rsc->active) {
crm_trace("%s is still active", rsc->rsc_id);
return TRUE;
}
if (rsc->pending_ops) {
GList *first = rsc->pending_ops;
cmd = first->data;
if (cmd->delay_id) {
crm_trace
("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
cmd->rsc_id, cmd->action, cmd->start_delay);
return TRUE;
}
rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
g_list_free_1(first);
#ifdef PCMK__TIME_USE_CGT
get_current_time(&(cmd->t_run), &(cmd->t_first_run));
#endif
cmd->epoch_last_run = time(NULL);
}
if (!cmd) {
crm_trace("Nothing further to do for %s", rsc->rsc_id);
return TRUE;
}
rsc->active = cmd; /* only one op at a time for a rsc */
if (cmd->interval_ms) {
rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
}
log_execute(cmd);
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
lrmd_rsc_execute_stonith(rsc, cmd);
} else {
lrmd_rsc_execute_service_lib(rsc, cmd);
}
return TRUE;
}
static gboolean
lrmd_rsc_dispatch(gpointer user_data)
{
return lrmd_rsc_execute(user_data);
}
void
free_rsc(gpointer data)
{
GListPtr gIter = NULL;
lrmd_rsc_t *rsc = data;
int is_stonith = safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH);
gIter = rsc->pending_ops;
while (gIter != NULL) {
GListPtr next = gIter->next;
lrmd_cmd_t *cmd = gIter->data;
/* command was never executed */
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, NULL);
gIter = next;
}
/* frees list, but not list elements. */
g_list_free(rsc->pending_ops);
gIter = rsc->recurring_ops;
while (gIter != NULL) {
GListPtr next = gIter->next;
lrmd_cmd_t *cmd = gIter->data;
if (is_stonith) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
/* If a stonith command is in-flight, just mark it as cancelled;
* it is not safe to finalize/free the cmd until the stonith api
* says it has either completed or timed out.
*/
if (rsc->active != cmd) {
cmd_finalize(cmd, NULL);
}
} else {
/* This command is already handed off to service library,
* let service library cancel it and tell us via the callback
* when it is cancelled. The rsc can be safely destroyed
* even if we are waiting for the cancel result */
services_action_cancel(rsc->rsc_id,
normalize_action_name(rsc, cmd->action),
cmd->interval_ms);
}
gIter = next;
}
/* frees list, but not list elements. */
g_list_free(rsc->recurring_ops);
free(rsc->rsc_id);
free(rsc->class);
free(rsc->provider);
free(rsc->type);
mainloop_destroy_trigger(rsc->work);
free(rsc);
}
static xmlNode *
process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id)
{
xmlNode *reply = NULL;
int rc = pcmk_ok;
const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER);
const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION);
if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) {
crm_err("Cluster API version must be greater than or equal to %s, not %s",
LRMD_MIN_PROTOCOL_VERSION, protocol_version);
rc = -EPROTO;
}
reply = create_lrmd_reply(__FUNCTION__, rc, call_id);
crm_xml_add(reply, F_LRMD_OPERATION, CRM_OP_REGISTER);
crm_xml_add(reply, F_LRMD_CLIENTID, client->id);
crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
if (crm_is_true(is_ipc_provider)) {
// This is a remote connection from a cluster node's controller
#ifdef SUPPORT_REMOTE
ipc_proxy_add_provider(client);
#endif
}
return reply;
}
static int
process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = build_rsc_from_xml(request);
lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
if (dup &&
safe_str_eq(rsc->class, dup->class) &&
safe_str_eq(rsc->provider, dup->provider) && safe_str_eq(rsc->type, dup->type)) {
crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id);
free_rsc(rsc);
return rc;
}
g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
crm_info("Cached agent information for '%s'", rsc->rsc_id);
return rc;
}
static xmlNode *
process_lrmd_get_rsc_info(xmlNode *request, int call_id)
{
int rc = pcmk_ok;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
xmlNode *reply = NULL;
lrmd_rsc_t *rsc = NULL;
if (rsc_id == NULL) {
rc = -ENODEV;
} else {
rsc = g_hash_table_lookup(rsc_list, rsc_id);
if (rsc == NULL) {
crm_info("Agent information for '%s' not in cache", rsc_id);
rc = -ENODEV;
}
}
reply = create_lrmd_reply(__FUNCTION__, rc, call_id);
if (rsc) {
crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id);
crm_xml_add(reply, F_LRMD_CLASS, rsc->class);
crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider);
crm_xml_add(reply, F_LRMD_TYPE, rsc->type);
}
return reply;
}
static int
process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id,
xmlNode *request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
if (!rsc_id) {
return -ENODEV;
}
rsc = g_hash_table_lookup(rsc_list, rsc_id);
if (rsc == NULL) {
crm_info("Ignoring unregistration of resource '%s', which is not registered",
rsc_id);
return pcmk_ok;
}
if (rsc->active) {
/* let the caller know there are still active ops on this rsc to watch for */
crm_trace("Operation (0x%p) still in progress for unregistered resource %s",
rsc->active, rsc_id);
rc = -EINPROGRESS;
}
g_hash_table_remove(rsc_list, rsc_id);
return rc;
}
static int
process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request)
{
lrmd_rsc_t *rsc = NULL;
lrmd_cmd_t *cmd = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
int call_id;
if (!rsc_id) {
return -EINVAL;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
return -ENODEV;
}
cmd = create_lrmd_cmd(request, client);
call_id = cmd->call_id;
/* Don't reference cmd after handing it off to be scheduled.
* The cmd could get merged and freed. */
schedule_lrmd_cmd(rsc, cmd);
return call_id;
}
static int
cancel_op(const char *rsc_id, const char *action, guint interval_ms)
{
GListPtr gIter = NULL;
lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
/* How to cancel an action.
* 1. Check pending ops list, if it hasn't been handed off
* to the service library or stonith recurring list remove
* it there and that will stop it.
* 2. If it isn't in the pending ops list, then it's either a
* recurring op in the stonith recurring list, or the service
* library's recurring list. Stop it there
* 3. If not found in any lists, then this operation has either
* been executed already and is not a recurring operation, or
* never existed.
*/
if (!rsc) {
return -ENODEV;
}
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (safe_str_eq(cmd->action, action)
&& (cmd->interval_ms == interval_ms)) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, rsc);
return pcmk_ok;
}
}
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
/* The service library does not handle stonith operations.
* We have to handle recurring stonith operations ourselves. */
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (safe_str_eq(cmd->action, action)
&& (cmd->interval_ms == interval_ms)) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
if (rsc->active != cmd) {
cmd_finalize(cmd, rsc);
}
return pcmk_ok;
}
}
} else if (services_action_cancel(rsc_id,
normalize_action_name(rsc, action),
interval_ms) == TRUE) {
/* The service library will tell the action_complete callback function
* this action was cancelled, which will destroy the cmd and remove
* it from the recurring_op list. Do not do that in this function
* if the service library says it cancelled it. */
return pcmk_ok;
}
return -EOPNOTSUPP;
}
static void
cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
{
GList *cmd_list = NULL;
GList *cmd_iter = NULL;
/* Notice a copy of each list is created when concat is called.
* This prevents odd behavior from occurring when the cmd_list
* is iterated through later on. It is possible the cancel_op
* function may end up modifying the recurring_ops and pending_ops
* lists. If we did not copy those lists, our cmd_list iteration
* could get messed up.*/
if (rsc->recurring_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
}
if (rsc->pending_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
}
if (!cmd_list) {
return;
}
for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
lrmd_cmd_t *cmd = cmd_iter->data;
if (cmd->interval_ms == 0) {
continue;
}
if (client_id && safe_str_neq(cmd->client_id, client_id)) {
continue;
}
cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms);
}
/* frees only the copied list data, not the cmds */
g_list_free(cmd_list);
}
static int
process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request)
{
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION);
guint interval_ms = 0;
crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &interval_ms);
if (!rsc_id || !action) {
return -EINVAL;
}
return cancel_op(rsc_id, action, interval_ms);
}
static void
add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc)
{
xmlNode *rsc_xml = create_xml_node(reply, F_LRMD_RSC);
crm_xml_add(rsc_xml, F_LRMD_RSC_ID, rsc->rsc_id);
for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) {
lrmd_cmd_t *cmd = item->data;
xmlNode *op_xml = create_xml_node(rsc_xml, T_LRMD_RSC_OP);
crm_xml_add(op_xml, F_LRMD_RSC_ACTION,
(cmd->real_action? cmd->real_action : cmd->action));
crm_xml_add_ms(op_xml, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
crm_xml_add_int(op_xml, F_LRMD_TIMEOUT, cmd->timeout_orig);
}
}
static xmlNode *
process_lrmd_get_recurring(xmlNode *request, int call_id)
{
int rc = pcmk_ok;
const char *rsc_id = NULL;
lrmd_rsc_t *rsc = NULL;
xmlNode *reply = NULL;
xmlNode *rsc_xml = NULL;
// Resource ID is optional
rsc_xml = first_named_child(request, F_LRMD_CALLDATA);
if (rsc_xml) {
rsc_xml = first_named_child(rsc_xml, F_LRMD_RSC);
}
if (rsc_xml) {
rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
}
// If resource ID is specified, resource must exist
if (rsc_id != NULL) {
rsc = g_hash_table_lookup(rsc_list, rsc_id);
if (rsc == NULL) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
rc = -ENODEV;
}
}
reply = create_lrmd_reply(__FUNCTION__, rc, call_id);
// If resource ID is not specified, check all resources
if (rsc_id == NULL) {
GHashTableIter iter;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &rsc)) {
add_recurring_op_xml(reply, rsc);
}
} else if (rsc) {
add_recurring_op_xml(reply, rsc);
}
return reply;
}
void
process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request)
{
int rc = pcmk_ok;
int call_id = 0;
const char *op = crm_element_value(request, F_LRMD_OPERATION);
int do_reply = 0;
int do_notify = 0;
xmlNode *reply = NULL;
crm_trace("Processing %s operation from %s", op, client->id);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
if (crm_str_eq(op, CRM_OP_IPC_FWD, TRUE)) {
#ifdef SUPPORT_REMOTE
ipc_proxy_forward_client(client, request);
#endif
do_reply = 1;
} else if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) {
reply = process_lrmd_signon(client, request, call_id);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_REG, TRUE)) {
rc = process_lrmd_rsc_register(client, id, request);
do_notify = 1;
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_INFO, TRUE)) {
reply = process_lrmd_get_rsc_info(request, call_id);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_UNREG, TRUE)) {
rc = process_lrmd_rsc_unregister(client, id, request);
/* don't notify anyone about failed un-registers */
if (rc == pcmk_ok || rc == -EINPROGRESS) {
do_notify = 1;
}
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_EXEC, TRUE)) {
rc = process_lrmd_rsc_exec(client, id, request);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_CANCEL, TRUE)) {
rc = process_lrmd_rsc_cancel(client, id, request);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_POKE, TRUE)) {
do_notify = 1;
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_CHECK, TRUE)) {
xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA);
const char *timeout = crm_element_value(data, F_LRMD_WATCHDOG);
CRM_LOG_ASSERT(data != NULL);
pcmk__valid_sbd_timeout(timeout);
} else if (crm_str_eq(op, LRMD_OP_ALERT_EXEC, TRUE)) {
rc = process_lrmd_alert_exec(client, id, request);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_GET_RECURRING, TRUE)) {
reply = process_lrmd_get_recurring(request, call_id);
do_reply = 1;
} else {
rc = -EOPNOTSUPP;
do_reply = 1;
crm_err("Unknown IPC request '%s' from %s", op, client->name);
}
crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d",
op, client->id, rc, do_reply, do_notify);
if (do_reply) {
int send_rc = pcmk_rc_ok;
if (reply == NULL) {
reply = create_lrmd_reply(__FUNCTION__, rc, call_id);
}
send_rc = lrmd_server_send_reply(client, id, reply);
free_xml(reply);
if (send_rc != pcmk_rc_ok) {
crm_warn("Reply to client %s failed: %s " CRM_XS " %d",
client->name, pcmk_rc_str(send_rc), send_rc);
}
}
if (do_notify) {
send_generic_notify(rc, request);
}
}
diff --git a/daemons/execd/remoted_proxy.c b/daemons/execd/remoted_proxy.c
index dda7eeda14..5c58de44f0 100644
--- a/daemons/execd/remoted_proxy.c
+++ b/daemons/execd/remoted_proxy.c
@@ -1,446 +1,451 @@
/*
* Copyright 2012-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <glib.h>
#include <unistd.h>
#include "pacemaker-execd.h"
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/services.h>
#include <crm/common/mainloop.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/cib/internal.h>
#include <crm/fencing/internal.h>
static qb_ipcs_service_t *cib_ro = NULL;
static qb_ipcs_service_t *cib_rw = NULL;
static qb_ipcs_service_t *cib_shm = NULL;
static qb_ipcs_service_t *attrd_ipcs = NULL;
static qb_ipcs_service_t *crmd_ipcs = NULL;
static qb_ipcs_service_t *stonith_ipcs = NULL;
// An IPC provider is a cluster node controller connecting as a client
static GList *ipc_providers = NULL;
/* ipc clients == things like cibadmin, crm_resource, connecting locally */
static GHashTable *ipc_clients = NULL;
/*!
* \internal
* \brief Get an IPC proxy provider
*
* \return Pointer to a provider if one exists, NULL otherwise
*
* \note Grab the first provider, which is the most recent connection. That way,
* if we haven't yet timed out an old, failed connection, we don't try to
* use it.
*/
pcmk__client_t *
ipc_proxy_get_provider()
{
return ipc_providers? (pcmk__client_t *) (ipc_providers->data) : NULL;
}
/*!
* \internal
* \brief Accept a client connection on a proxy IPC server
*
* \param[in] c Client's IPC connection
* \param[in] uid Client's user ID
* \param[in] gid Client's group ID
* \param[in] ipc_channel Name of IPC server to proxy
*
* \return pcmk_ok on success, -errno on error
*/
static int32_t
ipc_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid, const char *ipc_channel)
{
pcmk__client_t *client;
pcmk__client_t *ipc_proxy = ipc_proxy_get_provider();
xmlNode *msg;
if (ipc_proxy == NULL) {
crm_warn("Cannot proxy IPC connection from uid %d gid %d to %s "
"because not connected to cluster", uid, gid, ipc_channel);
return -EREMOTEIO;
}
/* This new client is a local IPC client on a Pacemaker Remote controlled
* node, needing to access cluster node IPC services.
*/
client = pcmk__new_client(c, uid, gid);
if (client == NULL) {
return -EREMOTEIO;
}
/* This ipc client is bound to a single ipc provider. If the
* provider goes away, this client is disconnected */
client->userdata = strdup(ipc_proxy->id);
client->name = crm_strdup_printf("proxy-%s-%d-%.8s", ipc_channel, client->pid, client->id);
+ /* Allow remote executor to distinguish between proxied local clients and
+ * actual executor API clients
+ */
+ set_bit(client->flags, pcmk__client_to_proxy);
+
g_hash_table_insert(ipc_clients, client->id, client);
msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_NEW);
crm_xml_add(msg, F_LRMD_IPC_IPC_SERVER, ipc_channel);
crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id);
lrmd_server_send_notify(ipc_proxy, msg);
free_xml(msg);
crm_debug("Accepted IPC proxy connection (session ID %s) "
"from uid %d gid %d on channel %s",
client->id, uid, gid, ipc_channel);
return 0;
}
static int32_t
crmd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
return ipc_proxy_accept(c, uid, gid, CRM_SYSTEM_CRMD);
}
static int32_t
attrd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
return ipc_proxy_accept(c, uid, gid, T_ATTRD);
}
static int32_t
stonith_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
return ipc_proxy_accept(c, uid, gid, "stonith-ng");
}
static int32_t
cib_proxy_accept_rw(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
return ipc_proxy_accept(c, uid, gid, PCMK__SERVER_BASED_RW);
}
static int32_t
cib_proxy_accept_ro(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
return ipc_proxy_accept(c, uid, gid, PCMK__SERVER_BASED_RO);
}
void
ipc_proxy_forward_client(pcmk__client_t *ipc_proxy, xmlNode *xml)
{
const char *session = crm_element_value(xml, F_LRMD_IPC_SESSION);
const char *msg_type = crm_element_value(xml, F_LRMD_IPC_OP);
xmlNode *msg = get_message_xml(xml, F_LRMD_IPC_MSG);
pcmk__client_t *ipc_client;
int rc = pcmk_rc_ok;
/* If the IPC provider is acknowledging our shutdown request,
* defuse the short exit timer to give the cluster time to
* stop any resources we're running.
*/
if (safe_str_eq(msg_type, LRMD_IPC_OP_SHUTDOWN_ACK)) {
handle_shutdown_ack();
return;
}
if (safe_str_eq(msg_type, LRMD_IPC_OP_SHUTDOWN_NACK)) {
handle_shutdown_nack();
return;
}
ipc_client = pcmk__find_client_by_id(session);
if (ipc_client == NULL) {
xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_DESTROY);
crm_xml_add(msg, F_LRMD_IPC_SESSION, session);
lrmd_server_send_notify(ipc_proxy, msg);
free_xml(msg);
return;
}
/* This is an event or response from the ipc provider
* going to the local ipc client.
*
* Looking at the chain of events.
*
* -----remote node----------------|---- cluster node ------
* ipc_client <--1--> this code
* <--2--> pacemaker-controld:remote_proxy_cb/remote_proxy_relay_event()
* <--3--> ipc server
*
* This function is receiving a msg from connection 2
* and forwarding it to connection 1.
*/
if (safe_str_eq(msg_type, LRMD_IPC_OP_EVENT)) {
crm_trace("Sending event to %s", ipc_client->id);
rc = pcmk__ipc_send_xml(ipc_client, 0, msg, crm_ipc_server_event);
} else if (safe_str_eq(msg_type, LRMD_IPC_OP_RESPONSE)) {
int msg_id = 0;
crm_element_value_int(xml, F_LRMD_IPC_MSG_ID, &msg_id);
crm_trace("Sending response to %d - %s", ipc_client->request_id, ipc_client->id);
rc = pcmk__ipc_send_xml(ipc_client, msg_id, msg, FALSE);
CRM_LOG_ASSERT(msg_id == ipc_client->request_id);
ipc_client->request_id = 0;
} else if (safe_str_eq(msg_type, LRMD_IPC_OP_DESTROY)) {
qb_ipcs_disconnect(ipc_client->ipcs);
} else {
crm_err("Unknown ipc proxy msg type %s" , msg_type);
}
if (rc != pcmk_rc_ok) {
crm_warn("Could not proxy IPC to client %s: %s " CRM_XS " rc=%d",
ipc_client->id, pcmk_rc_str(rc), rc);
}
}
static int32_t
ipc_proxy_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
pcmk__client_t *client = pcmk__find_client(c);
pcmk__client_t *ipc_proxy = pcmk__find_client_by_id(client->userdata);
xmlNode *request = NULL;
xmlNode *msg = NULL;
if (!ipc_proxy) {
qb_ipcs_disconnect(client->ipcs);
return 0;
}
/* This is a request from the local ipc client going
* to the ipc provider.
*
* Looking at the chain of events.
*
* -----remote node----------------|---- cluster node ------
* ipc_client <--1--> this code
* <--2--> pacemaker-controld:remote_proxy_dispatch_internal()
* <--3--> ipc server
*
* This function is receiving a request from connection
* 1 and forwarding it to connection 2.
*/
request = pcmk__client_data2xml(client, data, &id, &flags);
if (!request) {
return 0;
}
CRM_CHECK(client != NULL, crm_err("Invalid client");
free_xml(request); return FALSE);
CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client);
free_xml(request); return FALSE);
/* This ensures that synced request/responses happen over the event channel
* in the controller, allowing the controller to process the messages async.
*/
set_bit(flags, crm_ipc_proxied);
client->request_id = id;
msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_REQUEST);
crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id);
crm_xml_add(msg, F_LRMD_IPC_CLIENT, pcmk__client_name(client));
crm_xml_add(msg, F_LRMD_IPC_USER, client->user);
crm_xml_add_int(msg, F_LRMD_IPC_MSG_ID, id);
crm_xml_add_int(msg, F_LRMD_IPC_MSG_FLAGS, flags);
add_message_xml(msg, F_LRMD_IPC_MSG, request);
lrmd_server_send_notify(ipc_proxy, msg);
free_xml(request);
free_xml(msg);
return 0;
}
/*!
* \internal
* \brief Notify a proxy provider that we wish to shut down
*
* \return 0 on success, -1 on error
*/
int
ipc_proxy_shutdown_req(pcmk__client_t *ipc_proxy)
{
xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
int rc;
crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_SHUTDOWN_REQ);
/* We don't really have a session, but the controller needs this attribute
* to recognize this as proxy communication.
*/
crm_xml_add(msg, F_LRMD_IPC_SESSION, "0");
rc = (lrmd_server_send_notify(ipc_proxy, msg) != pcmk_rc_ok)? -1 : 0;
free_xml(msg);
return rc;
}
static int32_t
ipc_proxy_closed(qb_ipcs_connection_t * c)
{
pcmk__client_t *client = pcmk__find_client(c);
pcmk__client_t *ipc_proxy;
if (client == NULL) {
return 0;
}
ipc_proxy = pcmk__find_client_by_id(client->userdata);
crm_trace("Connection %p", c);
if (ipc_proxy) {
xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_DESTROY);
crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id);
lrmd_server_send_notify(ipc_proxy, msg);
free_xml(msg);
}
g_hash_table_remove(ipc_clients, client->id);
free(client->userdata);
client->userdata = NULL;
pcmk__free_client(client);
return 0;
}
static void
ipc_proxy_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
ipc_proxy_closed(c);
}
static struct qb_ipcs_service_handlers crmd_proxy_callbacks = {
.connection_accept = crmd_proxy_accept,
.connection_created = NULL,
.msg_process = ipc_proxy_dispatch,
.connection_closed = ipc_proxy_closed,
.connection_destroyed = ipc_proxy_destroy
};
static struct qb_ipcs_service_handlers attrd_proxy_callbacks = {
.connection_accept = attrd_proxy_accept,
.connection_created = NULL,
.msg_process = ipc_proxy_dispatch,
.connection_closed = ipc_proxy_closed,
.connection_destroyed = ipc_proxy_destroy
};
static struct qb_ipcs_service_handlers stonith_proxy_callbacks = {
.connection_accept = stonith_proxy_accept,
.connection_created = NULL,
.msg_process = ipc_proxy_dispatch,
.connection_closed = ipc_proxy_closed,
.connection_destroyed = ipc_proxy_destroy
};
static struct qb_ipcs_service_handlers cib_proxy_callbacks_ro = {
.connection_accept = cib_proxy_accept_ro,
.connection_created = NULL,
.msg_process = ipc_proxy_dispatch,
.connection_closed = ipc_proxy_closed,
.connection_destroyed = ipc_proxy_destroy
};
static struct qb_ipcs_service_handlers cib_proxy_callbacks_rw = {
.connection_accept = cib_proxy_accept_rw,
.connection_created = NULL,
.msg_process = ipc_proxy_dispatch,
.connection_closed = ipc_proxy_closed,
.connection_destroyed = ipc_proxy_destroy
};
void
ipc_proxy_add_provider(pcmk__client_t *ipc_proxy)
{
// Prepending ensures the most recent connection is always first
ipc_providers = g_list_prepend(ipc_providers, ipc_proxy);
}
void
ipc_proxy_remove_provider(pcmk__client_t *ipc_proxy)
{
GHashTableIter iter;
pcmk__client_t *ipc_client = NULL;
char *key = NULL;
GList *remove_these = NULL;
GListPtr gIter = NULL;
ipc_providers = g_list_remove(ipc_providers, ipc_proxy);
g_hash_table_iter_init(&iter, ipc_clients);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & ipc_client)) {
const char *proxy_id = ipc_client->userdata;
if (safe_str_eq(proxy_id, ipc_proxy->id)) {
crm_info("ipc proxy connection for client %s pid %d destroyed because cluster node disconnected.",
ipc_client->id, ipc_client->pid);
/* we can't remove during the iteration, so copy items
* to a list we can destroy later */
remove_these = g_list_append(remove_these, ipc_client);
}
}
for (gIter = remove_these; gIter != NULL; gIter = gIter->next) {
ipc_client = gIter->data;
// Disconnection callback will free the client here
qb_ipcs_disconnect(ipc_client->ipcs);
}
/* just frees the list, not the elements in the list */
g_list_free(remove_these);
}
void
ipc_proxy_init(void)
{
ipc_clients = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL);
pcmk__serve_based_ipc(&cib_ro, &cib_rw, &cib_shm, &cib_proxy_callbacks_ro,
&cib_proxy_callbacks_rw);
pcmk__serve_attrd_ipc(&attrd_ipcs, &attrd_proxy_callbacks);
pcmk__serve_fenced_ipc(&stonith_ipcs, &stonith_proxy_callbacks);
crmd_ipcs = pcmk__serve_controld_ipc(&crmd_proxy_callbacks);
if (crmd_ipcs == NULL) {
crm_err("Failed to create controller: exiting and inhibiting respawn");
crm_warn("Verify pacemaker and pacemaker_remote are not both enabled");
crm_exit(CRM_EX_FATAL);
}
}
void
ipc_proxy_cleanup(void)
{
if (ipc_providers) {
g_list_free(ipc_providers);
ipc_providers = NULL;
}
if (ipc_clients) {
g_hash_table_destroy(ipc_clients);
ipc_clients = NULL;
}
pcmk__stop_based_ipc(cib_ro, cib_rw, cib_shm);
qb_ipcs_destroy(attrd_ipcs);
qb_ipcs_destroy(stonith_ipcs);
qb_ipcs_destroy(crmd_ipcs);
cib_ro = NULL;
cib_rw = NULL;
cib_shm = NULL;
}
diff --git a/include/crm/common/ipc_internal.h b/include/crm/common/ipc_internal.h
index 6a1fcf39e2..91b34359d5 100644
--- a/include/crm/common/ipc_internal.h
+++ b/include/crm/common/ipc_internal.h
@@ -1,215 +1,216 @@
/*
* Copyright 2013-2020 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__IPC_INTERNAL_H
#define PCMK__IPC_INTERNAL_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h> // bool
#include <stdint.h> // uint32_t
#include <sys/uio.h> // struct iovec
#include <sys/types.h> // uid_t, gid_t, pid_t, size_t
#ifdef HAVE_GNUTLS_GNUTLS_H
# include <gnutls/gnutls.h> // gnutls_session_t
#endif
#include <glib.h> // guint, gpointer, GQueue, ...
#include <libxml/tree.h> // xmlNode
#include <qb/qbipcs.h> // qb_ipcs_connection_t, ...
#include <crm_config.h> // US_AUTH_GETPEEREID
#include <crm/common/ipc.h>
#include <crm/common/mainloop.h> // mainloop_io_t
/* denotes "non yieldable PID" on FreeBSD, or actual PID1 in scenarios that
require a delicate handling anyway (socket-based activation with systemd);
we can be reasonably sure that this PID is never possessed by the actual
child daemon, as it gets taken either by the proper init, or by pacemakerd
itself (i.e. this precludes anything else); note that value of zero
is meant to carry "unset" meaning, and better not to bet on/conditionalize
over signedness of pid_t */
#define PCMK__SPECIAL_PID 1
#if defined(US_AUTH_GETPEEREID)
/* on FreeBSD, we don't want to expose "non-yieldable PID" (leading to
"IPC liveness check only") as its nominal representation, which could
cause confusion -- this is unambiguous as long as there's no
socket-based activation like with systemd (very improbable) */
#define PCMK__SPECIAL_PID_AS_0(p) (((p) == PCMK__SPECIAL_PID) ? 0 : (p))
#else
#define PCMK__SPECIAL_PID_AS_0(p) (p)
#endif
/*!
* \internal
* \brief Check the authenticity and liveness of the process via IPC end-point
*
* When IPC daemon under given IPC end-point (name) detected, its authenticity
* is verified by the means of comparing against provided referential UID and
* GID, and the result of this check can be deduced from the return value.
* As an exception, referential UID of 0 (~ root) satisfies arbitrary
* detected daemon's credentials.
*
* \param[in] name IPC name to base the search on
* \param[in] refuid referential UID to check against
* \param[in] refgid referential GID to check against
* \param[out] gotpid to optionally store obtained PID of the found process
* upon returning 1 or -2
* (not available on FreeBSD, special value of 1,
* see PCMK__SPECIAL_PID, used instead, and the caller
* is required to special case this value respectively)
*
* \return Standard Pacemaker return code
*
* \note Return codes of particular interest include pcmk_rc_ipc_unresponsive
* indicating that no trace of IPC liveness was detected, and
* pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by
* an unauthorized process.
* \note This function emits a log message for return codes other than
* pcmk_rc_ok and pcmk_rc_ipc_unresponsive, and when there isn't a perfect
* match in respect to \p reguid and/or \p refgid, for a possible
* least privilege principle violation.
*
* \see crm_ipc_is_authentic_process
*/
int pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid,
gid_t refgid, pid_t *gotpid);
/*
* Server-related
*/
typedef struct pcmk__client_s pcmk__client_t;
enum pcmk__client_type {
PCMK__CLIENT_IPC = 1,
PCMK__CLIENT_TCP = 2,
# ifdef HAVE_GNUTLS_GNUTLS_H
PCMK__CLIENT_TLS = 3,
# endif
};
struct pcmk__remote_s {
/* Shared */
char *buffer;
size_t buffer_size;
size_t buffer_offset;
int auth_timeout;
int tcp_socket;
mainloop_io_t *source;
/* CIB-only */
bool authenticated;
char *token;
/* TLS only */
# ifdef HAVE_GNUTLS_GNUTLS_H
gnutls_session_t *tls_session;
bool tls_handshake_complete;
# endif
};
enum pcmk__client_flags {
- pcmk__client_proxied = 0x00001, /* ipc_proxy code only */
- pcmk__client_privileged = 0x00002, /* root or cluster user */
+ pcmk__client_proxied = (1 << 0), // Remote client behind proxy
+ pcmk__client_privileged = (1 << 1), // root or cluster user
+ pcmk__client_to_proxy = (1 << 2), // Local client to be proxied
};
struct pcmk__client_s {
unsigned int pid;
uid_t uid;
gid_t gid;
char *id;
char *name;
char *user;
/* Provided for server use (not used by library) */
/* @TODO merge options, flags, and kind (reserving lower bits for server) */
long long options;
int request_id;
uint32_t flags;
void *userdata;
int event_timer;
GQueue *event_queue;
/* Depending on the value of kind, only some of the following
* will be populated/valid
*/
enum pcmk__client_type kind;
qb_ipcs_connection_t *ipcs; /* IPC */
struct pcmk__remote_s *remote; /* TCP/TLS */
unsigned int queue_backlog; /* IPC queue length after last flush */
unsigned int queue_max; /* Evict client whose queue grows this big */
};
guint pcmk__ipc_client_count(void);
void pcmk__foreach_ipc_client(GHFunc func, gpointer user_data);
void pcmk__foreach_ipc_client_remove(GHRFunc func, gpointer user_data);
void pcmk__client_cleanup(void);
pcmk__client_t *pcmk__find_client(qb_ipcs_connection_t *c);
pcmk__client_t *pcmk__find_client_by_id(const char *id);
const char *pcmk__client_name(pcmk__client_t *c);
const char *pcmk__client_type_str(enum pcmk__client_type client_type);
pcmk__client_t *pcmk__new_unauth_client(void *key);
pcmk__client_t *pcmk__new_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid);
void pcmk__free_client(pcmk__client_t *c);
void pcmk__drop_all_clients(qb_ipcs_service_t *s);
bool pcmk__set_client_queue_max(pcmk__client_t *client, const char *qmax);
void pcmk__ipc_send_ack_as(const char *function, int line, pcmk__client_t *c,
uint32_t request, uint32_t flags, const char *tag);
#define pcmk__ipc_send_ack(c, req, flags, tag) \
pcmk__ipc_send_ack_as(__FUNCTION__, __LINE__, (c), (req), (flags), (tag))
int pcmk__ipc_prepare_iov(uint32_t request, xmlNode *message,
uint32_t max_send_size,
struct iovec **result, ssize_t *bytes);
int pcmk__ipc_send_xml(pcmk__client_t *c, uint32_t request, xmlNode *message,
uint32_t flags);
int pcmk__ipc_send_iov(pcmk__client_t *c, struct iovec *iov, uint32_t flags);
xmlNode *pcmk__client_data2xml(pcmk__client_t *c, void *data,
uint32_t *id, uint32_t *flags);
int pcmk__client_pid(qb_ipcs_connection_t *c);
void pcmk__serve_attrd_ipc(qb_ipcs_service_t **ipcs,
struct qb_ipcs_service_handlers *cb);
void pcmk__serve_fenced_ipc(qb_ipcs_service_t **ipcs,
struct qb_ipcs_service_handlers *cb);
qb_ipcs_service_t *pcmk__serve_controld_ipc(struct qb_ipcs_service_handlers *cb);
void pcmk__serve_based_ipc(qb_ipcs_service_t **ipcs_ro,
qb_ipcs_service_t **ipcs_rw,
qb_ipcs_service_t **ipcs_shm,
struct qb_ipcs_service_handlers *ro_cb,
struct qb_ipcs_service_handlers *rw_cb);
void pcmk__stop_based_ipc(qb_ipcs_service_t *ipcs_ro,
qb_ipcs_service_t *ipcs_rw,
qb_ipcs_service_t *ipcs_shm);
#ifdef __cplusplus
}
#endif
#endif

File Metadata

Mime Type
text/x-diff
Expires
Sat, Nov 23, 7:00 AM (1 d, 18 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1018354
Default Alt Text
(82 KB)

Event Timeline