diff --git a/daemons/execd/Makefile.am b/daemons/execd/Makefile.am index e177dbb1a8..bcf96edec0 100644 --- a/daemons/execd/Makefile.am +++ b/daemons/execd/Makefile.am @@ -1,66 +1,67 @@ # -# Copyright 2012-2024 the Pacemaker project contributors +# Copyright 2012-2025 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU Lesser General Public License # version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. # include $(top_srcdir)/mk/common.mk include $(top_srcdir)/mk/man.mk halibdir = $(CRM_DAEMON_DIR) halib_PROGRAMS = pacemaker-execd \ cts-exec-helper EXTRA_DIST = pacemaker-remoted.8.inc pacemaker_execd_CFLAGS = $(CFLAGS_HARDENED_EXE) pacemaker_execd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) pacemaker_execd_LDADD = $(top_builddir)/lib/fencing/libstonithd.la pacemaker_execd_LDADD += $(top_builddir)/lib/services/libcrmservice.la pacemaker_execd_LDADD += $(top_builddir)/lib/common/libcrmcommon.la pacemaker_execd_SOURCES = pacemaker-execd.c \ execd_commands.c \ - execd_alerts.c + execd_alerts.c \ + execd_messages.c sbin_PROGRAMS = pacemaker-remoted if BUILD_SYSTEMD systemdsystemunit_DATA = pacemaker_remote.service else if BUILD_LSB initdir = $(INITDIR) init_SCRIPTS = pacemaker_remote endif endif pacemaker_remoted_CPPFLAGS = -DPCMK__COMPILE_REMOTE \ $(AM_CPPFLAGS) pacemaker_remoted_CFLAGS = $(CFLAGS_HARDENED_EXE) pacemaker_remoted_LDFLAGS = $(LDFLAGS_HARDENED_EXE) pacemaker_remoted_LDADD = $(top_builddir)/lib/fencing/libstonithd.la pacemaker_remoted_LDADD += $(top_builddir)/lib/services/libcrmservice.la pacemaker_remoted_LDADD += $(top_builddir)/lib/cib/libcib.la pacemaker_remoted_LDADD += $(top_builddir)/lib/lrmd/liblrmd.la pacemaker_remoted_LDADD += $(top_builddir)/lib/common/libcrmcommon.la pacemaker_remoted_SOURCES = $(pacemaker_execd_SOURCES) \ remoted_tls.c \ remoted_pidone.c \ remoted_proxy.c \ remoted_schemas.c cts_exec_helper_LDADD = $(top_builddir)/lib/pengine/libpe_status.la cts_exec_helper_LDADD += $(top_builddir)/lib/cib/libcib.la cts_exec_helper_LDADD += $(top_builddir)/lib/lrmd/liblrmd.la cts_exec_helper_LDADD += $(top_builddir)/lib/services/libcrmservice.la cts_exec_helper_LDADD += $(top_builddir)/lib/common/libcrmcommon.la cts_exec_helper_SOURCES = cts-exec-helper.c noinst_HEADERS = pacemaker-execd.h CLEANFILES = $(man8_MANS) diff --git a/daemons/execd/execd_alerts.c b/daemons/execd/execd_alerts.c index bdf75f0477..19f8c17a41 100644 --- a/daemons/execd/execd_alerts.c +++ b/daemons/execd/execd_alerts.c @@ -1,199 +1,199 @@ /* * Copyright 2016-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include // xmlNode #include #include #include #include #include #include #include #include "pacemaker-execd.h" /* Track in-flight alerts so we can wait for them at shutdown */ static GHashTable *inflight_alerts; /* key = call_id, value = timeout */ static gboolean draining_alerts = FALSE; static inline void add_inflight_alert(int call_id, int timeout) { if (inflight_alerts == NULL) { inflight_alerts = pcmk__intkey_table(NULL); } pcmk__intkey_table_insert(inflight_alerts, call_id, GINT_TO_POINTER(timeout)); } static inline void remove_inflight_alert(int call_id) { if (inflight_alerts != NULL) { pcmk__intkey_table_remove(inflight_alerts, call_id); } } static int max_inflight_timeout(void) { GHashTableIter iter; gpointer timeout; int max_timeout = 0; if (inflight_alerts) { g_hash_table_iter_init(&iter, inflight_alerts); while (g_hash_table_iter_next(&iter, NULL, &timeout)) { if (GPOINTER_TO_INT(timeout) > max_timeout) { max_timeout = GPOINTER_TO_INT(timeout); } } } return max_timeout; } struct alert_cb_s { char *client_id; int call_id; }; static void alert_complete(svc_action_t *action) { struct alert_cb_s *cb_data = (struct alert_cb_s *) (action->cb_data); CRM_CHECK(cb_data != NULL, return); remove_inflight_alert(cb_data->call_id); if (action->status != PCMK_EXEC_DONE) { const char *reason = services__exit_reason(action); crm_notice("Could not send alert: %s%s%s%s " QB_XS " client=%s", pcmk_exec_status_str(action->status), (reason == NULL)? "" : " (", (reason == NULL)? "" : reason, (reason == NULL)? "" : ")", cb_data->client_id); } else if (action->rc != 0) { crm_notice("Alert [%d] completed but exited with status %d " QB_XS " client=%s", action->pid, action->rc, cb_data->client_id); } else { crm_debug("Alert [%d] completed " QB_XS " client=%s", action->pid, cb_data->client_id); } free(cb_data->client_id); free(action->cb_data); action->cb_data = NULL; } int -process_lrmd_alert_exec(pcmk__client_t *client, uint32_t id, xmlNode *request) +lrmd_process_alert_exec(pcmk__client_t *client, xmlNode *request) { static int alert_sequence_no = 0; xmlNode *alert_xml = pcmk__xpath_find_one(request->doc, "//" PCMK__XE_LRMD_ALERT, LOG_ERR); const char *alert_id = crm_element_value(alert_xml, PCMK__XA_LRMD_ALERT_ID); const char *alert_path = crm_element_value(alert_xml, PCMK__XA_LRMD_ALERT_PATH); svc_action_t *action = NULL; int alert_timeout = 0; - int rc = pcmk_ok; + int rc = pcmk_rc_ok; GHashTable *params = NULL; struct alert_cb_s *cb_data = NULL; if ((alert_id == NULL) || (alert_path == NULL) || (client == NULL) || (client->id == NULL)) { /* hint static analyzer */ - rc = -EINVAL; + rc = EINVAL; goto err; } if (draining_alerts) { - return pcmk_ok; + return pcmk_rc_ok; } crm_element_value_int(alert_xml, PCMK__XA_LRMD_TIMEOUT, &alert_timeout); crm_info("Executing alert %s for %s", alert_id, client->id); params = xml2list(alert_xml); pcmk__add_alert_key_int(params, PCMK__alert_key_node_sequence, ++alert_sequence_no); cb_data = pcmk__assert_alloc(1, sizeof(struct alert_cb_s)); cb_data->client_id = pcmk__str_copy(client->id); crm_element_value_int(request, PCMK__XA_LRMD_CALLID, &(cb_data->call_id)); action = services_alert_create(alert_id, alert_path, alert_timeout, params, alert_sequence_no, cb_data); if (action->rc != PCMK_OCF_UNKNOWN) { - rc = -E2BIG; + rc = E2BIG; goto err; } rc = services_action_user(action, CRM_DAEMON_USER); if (rc < 0) { goto err; } add_inflight_alert(cb_data->call_id, alert_timeout); if (services_alert_async(action, alert_complete) == FALSE) { services_action_free(action); } - return pcmk_ok; + return pcmk_rc_ok; err: if (cb_data) { free(cb_data->client_id); free(cb_data); } services_action_free(action); return rc; } static bool drain_check(guint remaining_timeout_ms) { if (inflight_alerts != NULL) { guint count = g_hash_table_size(inflight_alerts); if (count > 0) { crm_trace("%d alerts pending (%.3fs timeout remaining)", count, remaining_timeout_ms / 1000.0); return TRUE; } } return FALSE; } void lrmd_drain_alerts(GMainLoop *mloop) { if (inflight_alerts != NULL) { guint timer_ms = max_inflight_timeout() + 5000; crm_trace("Draining in-flight alerts (timeout %.3fs)", timer_ms / 1000.0); draining_alerts = TRUE; pcmk_drain_main_loop(mloop, timer_ms, drain_check); g_hash_table_destroy(inflight_alerts); inflight_alerts = NULL; } } diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c index a59dee2356..07d5467fa2 100644 --- a/daemons/execd/execd_commands.c +++ b/daemons/execd/execd_commands.c @@ -1,2010 +1,1869 @@ /* * Copyright 2012-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include // xmlNode // Check whether we have a high-resolution monotonic clock #undef PCMK__TIME_USE_CGT #if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC) # define PCMK__TIME_USE_CGT # include /* clock_gettime */ #endif #include #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" GHashTable *rsc_list = NULL; typedef struct lrmd_cmd_s { int timeout; guint interval_ms; int start_delay; int timeout_orig; int call_id; int call_opts; /* Timer ids, must be removed on cmd destruction. */ int delay_id; int stonith_recurring_id; int rsc_deleted; int service_flags; char *client_id; char *origin; char *rsc_id; char *action; char *real_action; char *userdata_str; pcmk__action_result_t result; /* We can track operation queue time and run time, to be saved with the CIB * resource history (and displayed in cluster status). We need * high-resolution monotonic time for this purpose, so we use * clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature * is disabled). * * However, we also need epoch timestamps for recording the time the command * last ran and the time its return value last changed, for use in time * displays (as opposed to interval calculations). We keep time_t values for * this purpose. * * The last run time is used for both purposes, so we keep redundant * monotonic and epoch values for this. Technically the two could represent * different times, but since time_t has only second resolution and the * values are used for distinct purposes, that is not significant. */ #ifdef PCMK__TIME_USE_CGT /* Recurring and systemd operations may involve more than one executor * command per operation, so they need info about the original and the most * recent. */ struct timespec t_first_run; // When op first ran struct timespec t_run; // When op most recently ran struct timespec t_first_queue; // When op was first queued struct timespec t_queue; // When op was most recently queued #endif time_t epoch_last_run; // Epoch timestamp of when op last ran time_t epoch_rcchange; // Epoch timestamp of when rc last changed bool first_notify_sent; int last_notify_rc; int last_notify_op_status; int last_pid; GHashTable *params; } lrmd_cmd_t; static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc); static gboolean execute_resource_action(gpointer user_data); static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id); #ifdef PCMK__TIME_USE_CGT /*! * \internal * \brief Check whether a struct timespec has been set * * \param[in] timespec Time to check * * \return true if timespec has been set (i.e. is nonzero), false otherwise */ static inline bool time_is_set(const struct timespec *timespec) { return (timespec != NULL) && ((timespec->tv_sec != 0) || (timespec->tv_nsec != 0)); } /* * \internal * \brief Set a timespec (and its original if unset) to the current time * * \param[out] t_current Where to store current time * \param[out] t_orig Where to copy t_current if unset */ static void get_current_time(struct timespec *t_current, struct timespec *t_orig) { clock_gettime(CLOCK_MONOTONIC, t_current); if ((t_orig != NULL) && !time_is_set(t_orig)) { *t_orig = *t_current; } } /*! * \internal * \brief Return difference between two times in milliseconds * * \param[in] now More recent time (or NULL to use current time) * \param[in] old Earlier time * * \return milliseconds difference (or 0 if old is NULL or unset) * * \note Can overflow on 32bit machines when the differences is around * 24 days or more. */ static int time_diff_ms(const struct timespec *now, const struct timespec *old) { int diff_ms = 0; if (time_is_set(old)) { struct timespec local_now = { 0, }; if (now == NULL) { clock_gettime(CLOCK_MONOTONIC, &local_now); now = &local_now; } diff_ms = (now->tv_sec - old->tv_sec) * 1000 + (now->tv_nsec - old->tv_nsec) / 1000000; } return diff_ms; } /*! * \internal * \brief Reset a command's operation times to their original values. * * Reset a command's run and queued timestamps to the timestamps of the original * command, so we report the entire time since then and not just the time since * the most recent command (for recurring and systemd operations). * * \param[in,out] cmd Executor command object to reset * * \note It's not obvious what the queued time should be for a systemd * start/stop operation, which might go like this: * initial command queued 5ms, runs 3s * monitor command queued 10ms, runs 10s * monitor command queued 10ms, runs 10s * Is the queued time for that operation 5ms, 10ms or 25ms? The current * implementation will report 5ms. If it's 25ms, then we need to * subtract 20ms from the total exec time so as not to count it twice. * We can implement that later if it matters to anyone ... */ static void cmd_original_times(lrmd_cmd_t * cmd) { cmd->t_run = cmd->t_first_run; cmd->t_queue = cmd->t_first_queue; } #endif static inline bool action_matches(const lrmd_cmd_t *cmd, const char *action, guint interval_ms) { return (cmd->interval_ms == interval_ms) && pcmk__str_eq(cmd->action, action, pcmk__str_casei); } /*! * \internal * \brief Log the result of an asynchronous command * * \param[in] cmd Command to log result for * \param[in] exec_time_ms Execution time in milliseconds, if known * \param[in] queue_time_ms Queue time in milliseconds, if known */ static void log_finished(const lrmd_cmd_t *cmd, int exec_time_ms, int queue_time_ms) { int log_level = LOG_INFO; GString *str = g_string_sized_new(100); // reasonable starting size if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) { log_level = LOG_DEBUG; } g_string_append_printf(str, "%s %s (call %d", cmd->rsc_id, cmd->action, cmd->call_id); if (cmd->last_pid != 0) { g_string_append_printf(str, ", PID %d", cmd->last_pid); } switch (cmd->result.execution_status) { case PCMK_EXEC_DONE: g_string_append_printf(str, ") exited with status %d", cmd->result.exit_status); break; case PCMK_EXEC_CANCELLED: g_string_append_printf(str, ") cancelled"); break; default: pcmk__g_strcat(str, ") could not be executed: ", pcmk_exec_status_str(cmd->result.execution_status), NULL); break; } if (cmd->result.exit_reason != NULL) { pcmk__g_strcat(str, " (", cmd->result.exit_reason, ")", NULL); } #ifdef PCMK__TIME_USE_CGT pcmk__g_strcat(str, " (execution time ", pcmk__readable_interval(exec_time_ms), NULL); if (queue_time_ms > 0) { pcmk__g_strcat(str, " after being queued ", pcmk__readable_interval(queue_time_ms), NULL); } g_string_append_c(str, ')'); #endif do_crm_log(log_level, "%s", str->str); g_string_free(str, TRUE); } static void log_execute(lrmd_cmd_t * cmd) { int log_level = LOG_INFO; if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) { log_level = LOG_DEBUG; } do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d", cmd->rsc_id, cmd->action, cmd->call_id); } static const char * normalize_action_name(lrmd_rsc_t * rsc, const char *action) { if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_casei) && pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) { return PCMK_ACTION_STATUS; } return action; } static lrmd_rsc_t * build_rsc_from_xml(xmlNode * msg) { xmlNode *rsc_xml = pcmk__xpath_find_one(msg->doc, "//" PCMK__XE_LRMD_RSC, LOG_ERR); lrmd_rsc_t *rsc = NULL; rsc = pcmk__assert_alloc(1, sizeof(lrmd_rsc_t)); crm_element_value_int(msg, PCMK__XA_LRMD_CALLOPT, &rsc->call_opts); rsc->rsc_id = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_RSC_ID); rsc->class = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_CLASS); rsc->provider = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_PROVIDER); rsc->type = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_TYPE); rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, execute_resource_action, rsc); // Initialize fence device probes (to return "not running") pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, NULL); return rsc; } static lrmd_cmd_t * create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client) { int call_options = 0; xmlNode *rsc_xml = pcmk__xpath_find_one(msg->doc, "//" PCMK__XE_LRMD_RSC, LOG_ERR); lrmd_cmd_t *cmd = NULL; cmd = pcmk__assert_alloc(1, sizeof(lrmd_cmd_t)); crm_element_value_int(msg, PCMK__XA_LRMD_CALLOPT, &call_options); cmd->call_opts = call_options; cmd->client_id = pcmk__str_copy(client->id); crm_element_value_int(msg, PCMK__XA_LRMD_CALLID, &cmd->call_id); crm_element_value_ms(rsc_xml, PCMK__XA_LRMD_RSC_INTERVAL, &cmd->interval_ms); crm_element_value_int(rsc_xml, PCMK__XA_LRMD_TIMEOUT, &cmd->timeout); crm_element_value_int(rsc_xml, PCMK__XA_LRMD_RSC_START_DELAY, &cmd->start_delay); cmd->timeout_orig = cmd->timeout; cmd->origin = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_ORIGIN); cmd->action = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_RSC_ACTION); cmd->userdata_str = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_RSC_USERDATA_STR); cmd->rsc_id = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_RSC_ID); cmd->params = xml2list(rsc_xml); if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), PCMK_VALUE_BLOCK, pcmk__str_casei)) { crm_debug("Setting flag to leave pid group on timeout and " "only kill action pid for " PCMK__OP_FMT, cmd->rsc_id, cmd->action, cmd->interval_ms); cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Action", cmd->action, 0, SVC_ACTION_LEAVE_GROUP, "SVC_ACTION_LEAVE_GROUP"); } return cmd; } static void stop_recurring_timer(lrmd_cmd_t *cmd) { if (cmd) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } cmd->stonith_recurring_id = 0; } } static void free_lrmd_cmd(lrmd_cmd_t * cmd) { stop_recurring_timer(cmd); if (cmd->delay_id) { g_source_remove(cmd->delay_id); } if (cmd->params) { g_hash_table_destroy(cmd->params); } pcmk__reset_result(&(cmd->result)); free(cmd->origin); free(cmd->action); free(cmd->real_action); free(cmd->userdata_str); free(cmd->rsc_id); free(cmd->client_id); free(cmd); } static gboolean stonith_recurring_op_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc; cmd->stonith_recurring_id = 0; if (!cmd->rsc_id) { return FALSE; } rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); pcmk__assert(rsc != NULL); /* take it out of recurring_ops list, and put it in the pending ops * to be executed */ rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef PCMK__TIME_USE_CGT get_current_time(&(cmd->t_queue), &(cmd->t_first_queue)); #endif mainloop_set_trigger(rsc->work); return FALSE; } static inline void start_recurring_timer(lrmd_cmd_t *cmd) { if (!cmd || (cmd->interval_ms <= 0)) { return; } cmd->stonith_recurring_id = pcmk__create_timer(cmd->interval_ms, stonith_recurring_op_helper, cmd); } static gboolean start_delay_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc = NULL; cmd->delay_id = 0; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc) { mainloop_set_trigger(rsc->work); } return FALSE; } /*! * \internal * \brief Check whether a list already contains the equivalent of a given action * * \param[in] action_list List to search * \param[in] cmd Action to search for */ static lrmd_cmd_t * find_duplicate_action(const GList *action_list, const lrmd_cmd_t *cmd) { for (const GList *item = action_list; item != NULL; item = item->next) { lrmd_cmd_t *dup = item->data; if (action_matches(cmd, dup->action, dup->interval_ms)) { return dup; } } return NULL; } static bool merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { lrmd_cmd_t * dup = NULL; bool dup_pending = true; if (cmd->interval_ms == 0) { return false; } // Search for a duplicate of this action (in-flight or not) dup = find_duplicate_action(rsc->pending_ops, cmd); if (dup == NULL) { dup_pending = false; dup = find_duplicate_action(rsc->recurring_ops, cmd); if (dup == NULL) { return false; } } /* Do not merge fencing monitors marked for cancellation, so we can reply to * the cancellation separately. */ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei) && (dup->result.execution_status == PCMK_EXEC_CANCELLED)) { return false; } /* This should not occur. If it does, we need to investigate how something * like this is possible in the controller. */ crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT "), merging with previous op entry", rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval_ms); // Merge new action's call ID and user data into existing action dup->first_notify_sent = false; free(dup->userdata_str); dup->userdata_str = cmd->userdata_str; cmd->userdata_str = NULL; dup->call_id = cmd->call_id; free_lrmd_cmd(cmd); cmd = NULL; /* If dup is not pending, that means it has already executed at least once * and is waiting in the interval. In that case, stop waiting and initiate * a new instance now. */ if (!dup_pending) { if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { stop_recurring_timer(dup); stonith_recurring_op_helper(dup); } else { services_action_kick(rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval_ms); } } return true; } static void schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { CRM_CHECK(cmd != NULL, return); CRM_CHECK(rsc != NULL, return); crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id); if (merge_recurring_duplicate(rsc, cmd)) { // Equivalent of cmd has already been scheduled return; } /* The controller expects the executor to automatically cancel * recurring operations before a resource stops. */ if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) { cancel_all_recurring(rsc, NULL); } rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef PCMK__TIME_USE_CGT get_current_time(&(cmd->t_queue), &(cmd->t_first_queue)); #endif mainloop_set_trigger(rsc->work); if (cmd->start_delay) { cmd->delay_id = pcmk__create_timer(cmd->start_delay, start_delay_helper, cmd); } } -static xmlNode * -create_lrmd_reply(const char *origin, int rc, int call_id) +xmlNode * +lrmd_create_reply(const char *origin, int rc, int call_id) { xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_LRMD_REPLY); crm_xml_add(reply, PCMK__XA_LRMD_ORIGIN, origin); crm_xml_add_int(reply, PCMK__XA_LRMD_RC, rc); crm_xml_add_int(reply, PCMK__XA_LRMD_CALLID, call_id); return reply; } static void send_client_notify(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; pcmk__client_t *client = value; int rc; int log_level = LOG_WARNING; const char *msg = NULL; CRM_CHECK(client != NULL, return); if (client->name == NULL) { crm_trace("Skipping notification to client without name"); return; } if (pcmk_is_set(client->flags, pcmk__client_to_proxy)) { /* We only want to notify clients of the executor IPC API. If we are * running as Pacemaker Remote, we may have clients proxied to other * IPC services in the cluster, so skip those. */ crm_trace("Skipping executor API notification to client %s", pcmk__client_name(client)); return; } rc = lrmd_server_send_notify(client, update_msg); if (rc == pcmk_rc_ok) { return; } switch (rc) { case ENOTCONN: case EPIPE: // Client exited without waiting for notification log_level = LOG_INFO; msg = "Disconnected"; break; default: msg = pcmk_rc_str(rc); break; } do_crm_log(log_level, "Could not notify client %s: %s " QB_XS " rc=%d", pcmk__client_name(client), msg, rc); } static void send_cmd_complete_notify(lrmd_cmd_t * cmd) { xmlNode *notify = NULL; int exec_time = 0; int queue_time = 0; #ifdef PCMK__TIME_USE_CGT exec_time = time_diff_ms(NULL, &(cmd->t_run)); queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue)); #endif log_finished(cmd, exec_time, queue_time); /* If the originator requested to be notified only for changes in recurring * operation results, skip the notification if the result hasn't changed. */ if (cmd->first_notify_sent && pcmk_is_set(cmd->call_opts, lrmd_opt_notify_changes_only) && (cmd->last_notify_rc == cmd->result.exit_status) && (cmd->last_notify_op_status == cmd->result.execution_status)) { return; } cmd->first_notify_sent = true; cmd->last_notify_rc = cmd->result.exit_status; cmd->last_notify_op_status = cmd->result.execution_status; notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY); crm_xml_add(notify, PCMK__XA_LRMD_ORIGIN, __func__); crm_xml_add_int(notify, PCMK__XA_LRMD_TIMEOUT, cmd->timeout); crm_xml_add_ms(notify, PCMK__XA_LRMD_RSC_INTERVAL, cmd->interval_ms); crm_xml_add_int(notify, PCMK__XA_LRMD_RSC_START_DELAY, cmd->start_delay); crm_xml_add_int(notify, PCMK__XA_LRMD_EXEC_RC, cmd->result.exit_status); crm_xml_add_int(notify, PCMK__XA_LRMD_EXEC_OP_STATUS, cmd->result.execution_status); crm_xml_add_int(notify, PCMK__XA_LRMD_CALLID, cmd->call_id); crm_xml_add_int(notify, PCMK__XA_LRMD_RSC_DELETED, cmd->rsc_deleted); crm_xml_add_ll(notify, PCMK__XA_LRMD_RUN_TIME, (long long) cmd->epoch_last_run); crm_xml_add_ll(notify, PCMK__XA_LRMD_RCCHANGE_TIME, (long long) cmd->epoch_rcchange); #ifdef PCMK__TIME_USE_CGT crm_xml_add_int(notify, PCMK__XA_LRMD_EXEC_TIME, exec_time); crm_xml_add_int(notify, PCMK__XA_LRMD_QUEUE_TIME, queue_time); #endif crm_xml_add(notify, PCMK__XA_LRMD_OP, LRMD_OP_RSC_EXEC); crm_xml_add(notify, PCMK__XA_LRMD_RSC_ID, cmd->rsc_id); if(cmd->real_action) { crm_xml_add(notify, PCMK__XA_LRMD_RSC_ACTION, cmd->real_action); } else { crm_xml_add(notify, PCMK__XA_LRMD_RSC_ACTION, cmd->action); } crm_xml_add(notify, PCMK__XA_LRMD_RSC_USERDATA_STR, cmd->userdata_str); crm_xml_add(notify, PCMK__XA_LRMD_RSC_EXIT_REASON, cmd->result.exit_reason); if (cmd->result.action_stderr != NULL) { crm_xml_add(notify, PCMK__XA_LRMD_RSC_OUTPUT, cmd->result.action_stderr); } else if (cmd->result.action_stdout != NULL) { crm_xml_add(notify, PCMK__XA_LRMD_RSC_OUTPUT, cmd->result.action_stdout); } if (cmd->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; xmlNode *args = pcmk__xe_create(notify, PCMK__XE_ATTRIBUTES); g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { hash2smartfield((gpointer) key, (gpointer) value, args); } } if ((cmd->client_id != NULL) && pcmk_is_set(cmd->call_opts, lrmd_opt_notify_orig_only)) { pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id); if (client != NULL) { send_client_notify(client->id, client, notify); } } else { pcmk__foreach_ipc_client(send_client_notify, notify); } pcmk__xml_free(notify); } -static void -send_generic_notify(int rc, xmlNode * request) +void +lrmd_send_generic_notify(int rc, xmlNode * request) { if (pcmk__ipc_client_count() != 0) { int call_id = 0; xmlNode *notify = NULL; xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc, "//" PCMK__XE_LRMD_RSC, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID); const char *op = crm_element_value(request, PCMK__XA_LRMD_OP); crm_element_value_int(request, PCMK__XA_LRMD_CALLID, &call_id); notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY); crm_xml_add(notify, PCMK__XA_LRMD_ORIGIN, __func__); crm_xml_add_int(notify, PCMK__XA_LRMD_RC, rc); crm_xml_add_int(notify, PCMK__XA_LRMD_CALLID, call_id); crm_xml_add(notify, PCMK__XA_LRMD_OP, op); crm_xml_add(notify, PCMK__XA_LRMD_RSC_ID, rsc_id); pcmk__foreach_ipc_client(send_client_notify, notify); pcmk__xml_free(notify); } } static void cmd_reset(lrmd_cmd_t * cmd) { cmd->last_pid = 0; #ifdef PCMK__TIME_USE_CGT memset(&cmd->t_run, 0, sizeof(cmd->t_run)); memset(&cmd->t_queue, 0, sizeof(cmd->t_queue)); #endif cmd->epoch_last_run = 0; pcmk__reset_result(&(cmd->result)); cmd->result.execution_status = PCMK_EXEC_DONE; } static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc) { crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action, rsc ? rsc->active : NULL, cmd); if (rsc && (rsc->active == cmd)) { rsc->active = NULL; mainloop_set_trigger(rsc->work); } if (!rsc) { cmd->rsc_deleted = 1; } /* reset original timeout so client notification has correct information */ cmd->timeout = cmd->timeout_orig; send_cmd_complete_notify(cmd); if ((cmd->interval_ms != 0) && (cmd->result.execution_status == PCMK_EXEC_CANCELLED)) { if (rsc) { rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else if (cmd->interval_ms == 0) { if (rsc) { rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else { /* Clear all the values pertaining just to the last iteration of a recurring op. */ cmd_reset(cmd); } } struct notify_new_client_data { xmlNode *notify; pcmk__client_t *new_client; }; static void notify_one_client(gpointer key, gpointer value, gpointer user_data) { pcmk__client_t *client = value; struct notify_new_client_data *data = user_data; if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) { send_client_notify(key, (gpointer) client, (gpointer) data->notify); } } void notify_of_new_client(pcmk__client_t *new_client) { struct notify_new_client_data data; data.new_client = new_client; data.notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY); crm_xml_add(data.notify, PCMK__XA_LRMD_ORIGIN, __func__); crm_xml_add(data.notify, PCMK__XA_LRMD_OP, LRMD_OP_NEW_CLIENT); pcmk__foreach_ipc_client(notify_one_client, &data); pcmk__xml_free(data.notify); } void client_disconnect_cleanup(const char *client_id) { GHashTableIter iter; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (pcmk_all_flags_set(rsc->call_opts, lrmd_opt_drop_recurring)) { /* This client is disconnecting, drop any recurring operations * it may have initiated on the resource */ cancel_all_recurring(rsc, client_id); } } } static void action_complete(svc_action_t * action) { lrmd_rsc_t *rsc; lrmd_cmd_t *cmd = action->cb_data; enum ocf_exitcode code; #ifdef PCMK__TIME_USE_CGT const char *rclass = NULL; bool goagain = false; int time_sum = 0; int timeout_left = 0; int delay = 0; #endif if (!cmd) { crm_err("Completed executor action (%s) does not match any known operations", action->id); return; } #ifdef PCMK__TIME_USE_CGT if (cmd->result.exit_status != action->rc) { cmd->epoch_rcchange = time(NULL); } #endif cmd->last_pid = action->pid; // Cast variable instead of function return to keep compilers happy code = services_result2ocf(action->standard, cmd->action, action->rc); pcmk__set_result(&(cmd->result), (int) code, action->status, services__exit_reason(action)); rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; #ifdef PCMK__TIME_USE_CGT if (rsc != NULL) { rclass = rsc->class; #if PCMK__ENABLE_SERVICE if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) { rclass = resources_find_service_class(rsc->type); } #endif } if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) { goto finalize; } if (pcmk__result_ok(&(cmd->result)) && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START, PCMK_ACTION_STOP, NULL)) { /* Getting results for when a start or stop action completes is now * handled by watching for JobRemoved() signals from systemd and * reacting to them. So, we can bypass the rest of the code in this * function for those actions, and simply finalize cmd. * * @TODO When monitors are handled in the same way, this function * can either be drastically simplified or done away with entirely. */ services__copy_result(action, &(cmd->result)); goto finalize; } else if (cmd->result.execution_status == PCMK_EXEC_PENDING && pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL) && cmd->interval_ms == 0 && cmd->real_action == NULL) { /* If the state is Pending at the time of probe, execute follow-up monitor. */ goagain = true; cmd->real_action = cmd->action; cmd->action = pcmk__str_copy(PCMK_ACTION_MONITOR); } else if (cmd->real_action != NULL) { // This is follow-up monitor to check whether start/stop/probe(monitor) completed if (cmd->result.execution_status == PCMK_EXEC_PENDING) { goagain = true; } else if (pcmk__result_ok(&(cmd->result)) && pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP, pcmk__str_casei)) { goagain = true; } else { int time_sum = time_diff_ms(NULL, &(cmd->t_first_run)); int timeout_left = cmd->timeout_orig - time_sum; crm_debug("%s systemd %s is now complete (elapsed=%dms, " "remaining=%dms): %s (%d)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, crm_exit_str(cmd->result.exit_status), cmd->result.exit_status); cmd_original_times(cmd); // Monitors may return "not running", but start/stop shouldn't if ((cmd->result.execution_status == PCMK_EXEC_DONE) && (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) { if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_START, pcmk__str_casei)) { cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR; } else if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP, pcmk__str_casei)) { cmd->result.exit_status = PCMK_OCF_OK; } } } } else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL) && (cmd->interval_ms > 0)) { /* For monitors, excluding follow-up monitors, */ /* if the pending state persists from the first notification until its timeout, */ /* it will be treated as a timeout. */ if ((cmd->result.execution_status == PCMK_EXEC_PENDING) && (cmd->last_notify_op_status == PCMK_EXEC_PENDING)) { int time_left = time(NULL) - (cmd->epoch_rcchange + (cmd->timeout_orig/1000)); if (time_left >= 0) { crm_notice("Giving up on %s %s (rc=%d): monitor pending timeout " "(first pending notification=%s timeout=%ds)", cmd->rsc_id, cmd->action, cmd->result.exit_status, pcmk__trim(ctime(&cmd->epoch_rcchange)), cmd->timeout_orig); pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT, "Investigate reason for timeout, and adjust " "configured operation timeout if necessary"); cmd_original_times(cmd); } } } if (!goagain) { goto finalize; } time_sum = time_diff_ms(NULL, &(cmd->t_first_run)); timeout_left = cmd->timeout_orig - time_sum; delay = cmd->timeout_orig / 10; if (delay >= timeout_left && timeout_left > 20) { delay = timeout_left/2; } delay = QB_MIN(2000, delay); if (delay < timeout_left) { cmd->start_delay = delay; cmd->timeout = timeout_left; if (pcmk__result_ok(&(cmd->result))) { crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay); } else if (cmd->result.execution_status == PCMK_EXEC_PENDING) { crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, time_sum, timeout_left, delay); } else { crm_notice("%s %s failed: %s: Re-scheduling (remaining " "timeout %s) " QB_XS " exitstatus=%d elapsed=%dms start_delay=%dms)", cmd->rsc_id, cmd->action, crm_exit_str(cmd->result.exit_status), pcmk__readable_interval(timeout_left), cmd->result.exit_status, time_sum, delay); } cmd_reset(cmd); if (rsc) { rsc->active = NULL; } schedule_lrmd_cmd(rsc, cmd); /* Don't finalize cmd, we're not done with it yet */ return; } else { crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)", cmd->rsc_id, (cmd->real_action? cmd->real_action : cmd->action), cmd->result.exit_status, time_sum, timeout_left); pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT, "Investigate reason for timeout, and adjust " "configured operation timeout if necessary"); cmd_original_times(cmd); } #endif finalize: pcmk__set_result_output(&(cmd->result), services__grab_stdout(action), services__grab_stderr(action)); cmd_finalize(cmd, rsc); } /*! * \internal * \brief Process the result of a fence device action (start, stop, or monitor) * * \param[in,out] cmd Fence device action that completed * \param[in] exit_status Fencer API exit status for action * \param[in] execution_status Fencer API execution status for action * \param[in] exit_reason Human-friendly detail, if action failed */ static void stonith_action_complete(lrmd_cmd_t *cmd, int exit_status, enum pcmk_exec_status execution_status, const char *exit_reason) { // This can be NULL if resource was removed before command completed lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); // Simplify fencer exit status to uniform exit status if (exit_status != CRM_EX_OK) { exit_status = PCMK_OCF_UNKNOWN_ERROR; } if (cmd->result.execution_status == PCMK_EXEC_CANCELLED) { /* An in-flight fence action was cancelled. The execution status is * already correct, so don't overwrite it. */ execution_status = PCMK_EXEC_CANCELLED; } else { /* Some execution status codes have specific meanings for the fencer * that executor clients may not expect, so map them to a simple error * status. */ switch (execution_status) { case PCMK_EXEC_NOT_CONNECTED: case PCMK_EXEC_INVALID: execution_status = PCMK_EXEC_ERROR; break; case PCMK_EXEC_NO_FENCE_DEVICE: /* This should be possible only for probes in practice, but * interpret for all actions to be safe. */ if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_none)) { exit_status = PCMK_OCF_NOT_RUNNING; } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) { exit_status = PCMK_OCF_OK; } else { exit_status = PCMK_OCF_NOT_INSTALLED; } execution_status = PCMK_EXEC_ERROR; break; case PCMK_EXEC_NOT_SUPPORTED: exit_status = PCMK_OCF_UNIMPLEMENT_FEATURE; break; default: break; } } pcmk__set_result(&cmd->result, exit_status, execution_status, exit_reason); // Certain successful actions change the known state of the resource if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) { if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) { pcmk__set_result(&rsc->fence_probe_result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); // "running" } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) { pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, NULL); // "not running" } } /* The recurring timer should not be running at this point in any case, but * as a failsafe, stop it if it is. */ stop_recurring_timer(cmd); /* Reschedule this command if appropriate. If a recurring command is *not* * rescheduled, its status must be PCMK_EXEC_CANCELLED, otherwise it will * not be removed from recurring_ops by cmd_finalize(). */ if (rsc && (cmd->interval_ms > 0) && (cmd->result.execution_status != PCMK_EXEC_CANCELLED)) { start_recurring_timer(cmd); } cmd_finalize(cmd, rsc); } static void lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) { if ((data == NULL) || (data->userdata == NULL)) { crm_err("Ignoring fence action result: " "Invalid callback arguments (bug?)"); } else { stonith_action_complete((lrmd_cmd_t *) data->userdata, stonith__exit_status(data), stonith__execution_status(data), stonith__exit_reason(data)); } } void stonith_connection_failed(void) { GHashTableIter iter; lrmd_rsc_t *rsc = NULL; crm_warn("Connection to fencer lost (any pending operations for " "fence devices will be considered failed)"); g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &rsc)) { if (!pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_none)) { continue; } /* If we registered this fence device, we don't know whether the * fencer still has the registration or not. Cause future probes to * return an error until the resource is stopped or started * successfully. This is especially important if the controller also * went away (possibly due to a cluster layer restart) and won't * receive our client notification of any monitors finalized below. */ if (rsc->fence_probe_result.execution_status == PCMK_EXEC_DONE) { pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED, "Lost connection to fencer"); } // Consider any active, pending, or recurring operations as failed for (GList *op = rsc->recurring_ops; op != NULL; op = op->next) { lrmd_cmd_t *cmd = op->data; /* This won't free a recurring op but instead restart its timer. * If cmd is rsc->active, this will set rsc->active to NULL, so we * don't have to worry about finalizing it a second time below. */ stonith_action_complete(cmd, CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED, "Lost connection to fencer"); } if (rsc->active != NULL) { rsc->pending_ops = g_list_prepend(rsc->pending_ops, rsc->active); } while (rsc->pending_ops != NULL) { // This will free the op and remove it from rsc->pending_ops stonith_action_complete((lrmd_cmd_t *) rsc->pending_ops->data, CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED, "Lost connection to fencer"); } } } /*! * \internal * \brief Execute a stonith resource "start" action * * Start a stonith resource by registering it with the fencer. * (Stonith agents don't have a start command.) * * \param[in,out] stonith_api Connection to fencer * \param[in] rsc Stonith resource to start * \param[in] cmd Start command to execute * * \return pcmk_ok on success, -errno otherwise */ static int execd_stonith_start(stonith_t *stonith_api, const lrmd_rsc_t *rsc, const lrmd_cmd_t *cmd) { char *key = NULL; char *value = NULL; stonith_key_value_t *device_params = NULL; int rc = pcmk_ok; // Convert command parameters to stonith API key/values if (cmd->params) { GHashTableIter iter; g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { device_params = stonith__key_value_add(device_params, key, value); } } /* The fencer will automatically register devices via CIB notifications * when the CIB changes, but to avoid a possible race condition between * the fencer receiving the notification and the executor requesting that * resource, the executor registers the device as well. The fencer knows how * to handle duplicate registrations. */ rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call, cmd->rsc_id, rsc->provider, rsc->type, device_params); stonith__key_value_freeall(device_params, true, true); return rc; } /*! * \internal * \brief Execute a stonith resource "stop" action * * Stop a stonith resource by unregistering it with the fencer. * (Stonith agents don't have a stop command.) * * \param[in,out] stonith_api Connection to fencer * \param[in] rsc Stonith resource to stop * * \return pcmk_ok on success, -errno otherwise */ static inline int execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc) { /* @TODO Failure would indicate a problem communicating with fencer; * perhaps we should try reconnecting and retrying a few times? */ return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, rsc->rsc_id); } /*! * \internal * \brief Initiate a stonith resource agent recurring "monitor" action * * \param[in,out] stonith_api Connection to fencer * \param[in,out] rsc Stonith resource to monitor * \param[in] cmd Monitor command being executed * * \return pcmk_ok if monitor was successfully initiated, -errno otherwise */ static inline int execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id, pcmk__timeout_ms2s(cmd->timeout)); rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd, "lrmd_stonith_callback", lrmd_stonith_callback); if (rc == TRUE) { rsc->active = cmd; rc = pcmk_ok; } else { rc = -pcmk_err_generic; } return rc; } static void execute_stonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { int rc = pcmk_ok; const char *rc_s = NULL; bool do_monitor = false; // Don't free; belongs to pacemaker-execd.c stonith_t *stonith_api = get_stonith_connection(); if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei) && (cmd->interval_ms == 0)) { // Probes don't require a fencer connection stonith_action_complete(cmd, rsc->fence_probe_result.exit_status, rsc->fence_probe_result.execution_status, rsc->fence_probe_result.exit_reason); return; } if (stonith_api == NULL) { stonith_action_complete(cmd, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_NOT_CONNECTED, "No connection to fencer"); return; } if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) { rc = execd_stonith_start(stonith_api, rsc, cmd); if (rc == pcmk_ok) { do_monitor = true; } } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) { rc = execd_stonith_stop(stonith_api, rsc); } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) { do_monitor = true; } else { stonith_action_complete(cmd, PCMK_OCF_UNIMPLEMENT_FEATURE, PCMK_EXEC_ERROR, "Invalid fence device action (bug?)"); return; } if (do_monitor) { rc = execd_stonith_monitor(stonith_api, rsc, cmd); if (rc == pcmk_ok) { // Don't clean up yet. We will get the result of the monitor later. return; } } if (rc != -pcmk_err_generic) { rc_s = pcmk_strerror(rc); } stonith_action_complete(cmd, ((rc == pcmk_rc_ok)? CRM_EX_OK : CRM_EX_ERROR), stonith__legacy2status(rc), rc_s); } static void execute_nonstonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { svc_action_t *action = NULL; GHashTable *params_copy = NULL; pcmk__assert((rsc != NULL) && (cmd != NULL)); crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s", rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type); params_copy = pcmk__str_table_dup(cmd->params); action = services__create_resource_action(rsc->rsc_id, rsc->class, rsc->provider, rsc->type, normalize_action_name(rsc, cmd->action), cmd->interval_ms, cmd->timeout, params_copy, cmd->service_flags); if (action == NULL) { pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, strerror(ENOMEM)); cmd_finalize(cmd, rsc); return; } if (action->rc != PCMK_OCF_UNKNOWN) { services__copy_result(action, &(cmd->result)); services_action_free(action); cmd_finalize(cmd, rsc); return; } action->cb_data = cmd; if (services_action_async(action, action_complete)) { /* The services library has taken responsibility for the action. It * could be pending, blocked, or merged into a duplicate recurring * action, in which case the action callback (action_complete()) * will be called when the action completes, otherwise the callback has * already been called. * * action_complete() calls cmd_finalize() which can free cmd, so cmd * cannot be used here. */ } else { /* This is a recurring action that is not being cancelled and could not * be initiated. It has been rescheduled, and the action callback * (action_complete()) has been called, which in this case has already * called cmd_finalize(), which in this case should only reset (not * free) cmd. */ services__copy_result(action, &(cmd->result)); services_action_free(action); } } static gboolean execute_resource_action(gpointer user_data) { lrmd_rsc_t *rsc = (lrmd_rsc_t *) user_data; lrmd_cmd_t *cmd = NULL; CRM_CHECK(rsc != NULL, return FALSE); if (rsc->active) { crm_trace("%s is still active", rsc->rsc_id); return TRUE; } if (rsc->pending_ops) { GList *first = rsc->pending_ops; cmd = first->data; if (cmd->delay_id) { crm_trace ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms", cmd->rsc_id, cmd->action, cmd->start_delay); return TRUE; } rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first); g_list_free_1(first); #ifdef PCMK__TIME_USE_CGT get_current_time(&(cmd->t_run), &(cmd->t_first_run)); #endif cmd->epoch_last_run = time(NULL); } if (!cmd) { crm_trace("Nothing further to do for %s", rsc->rsc_id); return TRUE; } rsc->active = cmd; /* only one op at a time for a rsc */ if (cmd->interval_ms) { rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd); } log_execute(cmd); if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { execute_stonith_action(rsc, cmd); } else { execute_nonstonith_action(rsc, cmd); } return TRUE; } void free_rsc(gpointer data) { GList *gIter = NULL; lrmd_rsc_t *rsc = data; int is_stonith = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei); gIter = rsc->pending_ops; while (gIter != NULL) { GList *next = gIter->next; lrmd_cmd_t *cmd = gIter->data; /* command was never executed */ cmd->result.execution_status = PCMK_EXEC_CANCELLED; cmd_finalize(cmd, NULL); gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->pending_ops); gIter = rsc->recurring_ops; while (gIter != NULL) { GList *next = gIter->next; lrmd_cmd_t *cmd = gIter->data; if (is_stonith) { cmd->result.execution_status = PCMK_EXEC_CANCELLED; /* If a stonith command is in-flight, just mark it as cancelled; * it is not safe to finalize/free the cmd until the stonith api * says it has either completed or timed out. */ if (rsc->active != cmd) { cmd_finalize(cmd, NULL); } } else { /* This command is already handed off to service library, * let service library cancel it and tell us via the callback * when it is cancelled. The rsc can be safely destroyed * even if we are waiting for the cancel result */ services_action_cancel(rsc->rsc_id, normalize_action_name(rsc, cmd->action), cmd->interval_ms); } gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->recurring_ops); free(rsc->rsc_id); free(rsc->class); free(rsc->provider); free(rsc->type); mainloop_destroy_trigger(rsc->work); free(rsc); } -static int -process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id, +int +lrmd_process_signon(pcmk__client_t *client, xmlNode *request, int call_id, xmlNode **reply) { - int rc = pcmk_ok; + int rc = pcmk_rc_ok; time_t now = time(NULL); const char *protocol_version = crm_element_value(request, PCMK__XA_LRMD_PROTOCOL_VERSION); const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE); if (compare_version(protocol_version, LRMD_COMPATIBLE_PROTOCOL) < 0) { crm_err("Cluster API version must be greater than or equal to %s, not %s", LRMD_COMPATIBLE_PROTOCOL, protocol_version); - rc = -EPROTO; + rc = EPROTO; } if (pcmk__xe_attr_is_true(request, PCMK__XA_LRMD_IS_IPC_PROVIDER)) { #ifdef PCMK__COMPILE_REMOTE if ((client->remote != NULL) && pcmk_is_set(client->flags, pcmk__client_tls_handshake_complete)) { const char *op = crm_element_value(request, PCMK__XA_LRMD_OP); // This is a remote connection from a cluster node's controller ipc_proxy_add_provider(client); /* @TODO Allowing multiple proxies makes no sense given that clients * have no way to choose between them. Maybe always use the most * recent one and switch any existing IPC connections to use it, * by iterating over ipc_clients here, and if client->id doesn't * match the client's userdata, replace the userdata with the new * ID. After the iteration, call lrmd_remote_client_destroy() on any * of the replaced values in ipc_providers. */ /* If this was a register operation, also ask for new schema files but * only if it's supported by the protocol version. */ if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none) && LRMD_SUPPORTS_SCHEMA_XFER(protocol_version)) { remoted_request_cib_schema_files(); } } else { - rc = -EACCES; + rc = EACCES; } #else - rc = -EPROTONOSUPPORT; + rc = EPROTONOSUPPORT; #endif } - *reply = create_lrmd_reply(__func__, rc, call_id); + *reply = lrmd_create_reply(__func__, pcmk_rc2legacy(rc), call_id); crm_xml_add(*reply, PCMK__XA_LRMD_OP, CRM_OP_REGISTER); crm_xml_add(*reply, PCMK__XA_LRMD_CLIENTID, client->id); crm_xml_add(*reply, PCMK__XA_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); crm_xml_add_ll(*reply, PCMK__XA_UPTIME, now - start_time); if (start_state) { crm_xml_add(*reply, PCMK__XA_NODE_START_STATE, start_state); } return rc; } -static int -process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request) +void +lrmd_process_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request) { - int rc = pcmk_ok; lrmd_rsc_t *rsc = build_rsc_from_xml(request); lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id); if (dup && pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) && - pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) { + pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && + pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) { crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id); free_rsc(rsc); - return rc; + return; } g_hash_table_replace(rsc_list, rsc->rsc_id, rsc); crm_info("Cached agent information for '%s'", rsc->rsc_id); - return rc; } -static xmlNode * -process_lrmd_get_rsc_info(xmlNode *request, int call_id) +int +lrmd_process_get_rsc_info(xmlNode *request, int call_id, xmlNode **reply) { - int rc = pcmk_ok; + int rc = pcmk_rc_ok; xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc, "//" PCMK__XE_LRMD_RSC, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID); - xmlNode *reply = NULL; lrmd_rsc_t *rsc = NULL; if (rsc_id == NULL) { - rc = -ENODEV; + rc = ENODEV; } else { rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Agent information for '%s' not in cache", rsc_id); - rc = -ENODEV; + rc = ENODEV; } } - reply = create_lrmd_reply(__func__, rc, call_id); + *reply = lrmd_create_reply(__func__, rc, call_id); if (rsc) { - crm_xml_add(reply, PCMK__XA_LRMD_RSC_ID, rsc->rsc_id); - crm_xml_add(reply, PCMK__XA_LRMD_CLASS, rsc->class); - crm_xml_add(reply, PCMK__XA_LRMD_PROVIDER, rsc->provider); - crm_xml_add(reply, PCMK__XA_LRMD_TYPE, rsc->type); + crm_xml_add(*reply, PCMK__XA_LRMD_RSC_ID, rsc->rsc_id); + crm_xml_add(*reply, PCMK__XA_LRMD_CLASS, rsc->class); + crm_xml_add(*reply, PCMK__XA_LRMD_PROVIDER, rsc->provider); + crm_xml_add(*reply, PCMK__XA_LRMD_TYPE, rsc->type); } - return reply; + + return rc; } -static int -process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id, - xmlNode *request) +int +lrmd_process_rsc_unregister(pcmk__client_t *client, xmlNode *request) { - int rc = pcmk_ok; + int rc = pcmk_rc_ok; lrmd_rsc_t *rsc = NULL; xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc, "//" PCMK__XE_LRMD_RSC, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID); if (!rsc_id) { - return -ENODEV; + return ENODEV; } rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Ignoring unregistration of resource '%s', which is not registered", rsc_id); - return pcmk_ok; + return pcmk_rc_ok; } if (rsc->active) { /* let the caller know there are still active ops on this rsc to watch for */ crm_trace("Operation (%p) still in progress for unregistered resource %s", rsc->active, rsc_id); - rc = -EINPROGRESS; + rc = EINPROGRESS; } g_hash_table_remove(rsc_list, rsc_id); return rc; } -static int -process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request) +int +lrmd_process_rsc_exec(pcmk__client_t *client, xmlNode *request) { lrmd_rsc_t *rsc = NULL; lrmd_cmd_t *cmd = NULL; xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc, "//" PCMK__XE_LRMD_RSC, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID); - int call_id; + int call_id = 0; if (!rsc_id) { return -EINVAL; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return -ENODEV; } cmd = create_lrmd_cmd(request, client); call_id = cmd->call_id; /* Don't reference cmd after handing it off to be scheduled. * The cmd could get merged and freed. */ schedule_lrmd_cmd(rsc, cmd); return call_id; } static int cancel_op(const char *rsc_id, const char *action, guint interval_ms) { GList *gIter = NULL; lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id); /* How to cancel an action. * 1. Check pending ops list, if it hasn't been handed off * to the service library or stonith recurring list remove * it there and that will stop it. * 2. If it isn't in the pending ops list, then it's either a * recurring op in the stonith recurring list, or the service * library's recurring list. Stop it there * 3. If not found in any lists, then this operation has either * been executed already and is not a recurring operation, or * never existed. */ if (!rsc) { - return -ENODEV; + return ENODEV; } for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (action_matches(cmd, action, interval_ms)) { cmd->result.execution_status = PCMK_EXEC_CANCELLED; cmd_finalize(cmd, rsc); - return pcmk_ok; + return pcmk_rc_ok; } } if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { /* The service library does not handle stonith operations. * We have to handle recurring stonith operations ourselves. */ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (action_matches(cmd, action, interval_ms)) { cmd->result.execution_status = PCMK_EXEC_CANCELLED; if (rsc->active != cmd) { cmd_finalize(cmd, rsc); } - return pcmk_ok; + return pcmk_rc_ok; } } } else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval_ms) == TRUE) { /* The service library will tell the action_complete callback function * this action was cancelled, which will destroy the cmd and remove * it from the recurring_op list. Do not do that in this function * if the service library says it cancelled it. */ - return pcmk_ok; + return pcmk_rc_ok; } - return -EOPNOTSUPP; + return EOPNOTSUPP; } static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id) { GList *cmd_list = NULL; GList *cmd_iter = NULL; /* Notice a copy of each list is created when concat is called. * This prevents odd behavior from occurring when the cmd_list * is iterated through later on. It is possible the cancel_op * function may end up modifying the recurring_ops and pending_ops * lists. If we did not copy those lists, our cmd_list iteration * could get messed up.*/ if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops)); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops)); } if (!cmd_list) { return; } for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { lrmd_cmd_t *cmd = cmd_iter->data; if (cmd->interval_ms == 0) { continue; } if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) { continue; } cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms); } /* frees only the copied list data, not the cmds */ g_list_free(cmd_list); } -static int -process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request) +int +lrmd_process_rsc_cancel(pcmk__client_t *client, xmlNode *request) { xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc, "//" PCMK__XE_LRMD_RSC, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID); const char *action = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ACTION); guint interval_ms = 0; crm_element_value_ms(rsc_xml, PCMK__XA_LRMD_RSC_INTERVAL, &interval_ms); if (!rsc_id || !action) { - return -EINVAL; + return EINVAL; } return cancel_op(rsc_id, action, interval_ms); } static void add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc) { xmlNode *rsc_xml = pcmk__xe_create(reply, PCMK__XE_LRMD_RSC); crm_xml_add(rsc_xml, PCMK__XA_LRMD_RSC_ID, rsc->rsc_id); for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) { lrmd_cmd_t *cmd = item->data; xmlNode *op_xml = pcmk__xe_create(rsc_xml, PCMK__XE_LRMD_RSC_OP); crm_xml_add(op_xml, PCMK__XA_LRMD_RSC_ACTION, pcmk__s(cmd->real_action, cmd->action)); crm_xml_add_ms(op_xml, PCMK__XA_LRMD_RSC_INTERVAL, cmd->interval_ms); crm_xml_add_int(op_xml, PCMK__XA_LRMD_TIMEOUT, cmd->timeout_orig); } } -static xmlNode * -process_lrmd_get_recurring(xmlNode *request, int call_id) +int +lrmd_process_get_recurring(xmlNode *request, int call_id, xmlNode **reply) { - int rc = pcmk_ok; + int rc = pcmk_rc_ok; const char *rsc_id = NULL; lrmd_rsc_t *rsc = NULL; - xmlNode *reply = NULL; xmlNode *rsc_xml = NULL; // Resource ID is optional rsc_xml = pcmk__xe_first_child(request, PCMK__XE_LRMD_CALLDATA, NULL, NULL); if (rsc_xml) { rsc_xml = pcmk__xe_first_child(rsc_xml, PCMK__XE_LRMD_RSC, NULL, NULL); } if (rsc_xml) { rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID); } // If resource ID is specified, resource must exist if (rsc_id != NULL) { rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); - rc = -ENODEV; + rc = ENODEV; } } - reply = create_lrmd_reply(__func__, rc, call_id); + *reply = lrmd_create_reply(__func__, pcmk_rc2legacy(rc), call_id); // If resource ID is not specified, check all resources if (rsc_id == NULL) { GHashTableIter iter; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &rsc)) { - add_recurring_op_xml(reply, rsc); + add_recurring_op_xml(*reply, rsc); } } else if (rsc) { - add_recurring_op_xml(reply, rsc); + add_recurring_op_xml(*reply, rsc); } - return reply; -} -void -process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request) -{ - int rc = pcmk_ok; - int call_id = 0; - const char *op = crm_element_value(request, PCMK__XA_LRMD_OP); - int do_reply = 0; - int do_notify = 0; - xmlNode *reply = NULL; - - /* Certain IPC commands may be done only by privileged users (i.e. root or - * hacluster), because they would otherwise provide a means of bypassing - * ACLs. - */ - bool allowed = pcmk_is_set(client->flags, pcmk__client_privileged); - - crm_trace("Processing %s operation from %s", op, client->id); - crm_element_value_int(request, PCMK__XA_LRMD_CALLID, &call_id); - - if (pcmk__str_eq(op, CRM_OP_IPC_FWD, pcmk__str_none)) { -#ifdef PCMK__COMPILE_REMOTE - if (allowed) { - ipc_proxy_forward_client(client, request); - } else { - rc = -EACCES; - } -#else - rc = -EPROTONOSUPPORT; -#endif - do_reply = 1; - } else if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { - rc = process_lrmd_signon(client, request, call_id, &reply); - do_reply = 1; - } else if (pcmk__str_eq(op, LRMD_OP_RSC_REG, pcmk__str_none)) { - if (allowed) { - rc = process_lrmd_rsc_register(client, id, request); - do_notify = 1; - } else { - rc = -EACCES; - } - do_reply = 1; - } else if (pcmk__str_eq(op, LRMD_OP_RSC_INFO, pcmk__str_none)) { - if (allowed) { - reply = process_lrmd_get_rsc_info(request, call_id); - } else { - rc = -EACCES; - } - do_reply = 1; - } else if (pcmk__str_eq(op, LRMD_OP_RSC_UNREG, pcmk__str_none)) { - if (allowed) { - rc = process_lrmd_rsc_unregister(client, id, request); - /* don't notify anyone about failed un-registers */ - if (rc == pcmk_ok || rc == -EINPROGRESS) { - do_notify = 1; - } - } else { - rc = -EACCES; - } - do_reply = 1; - } else if (pcmk__str_eq(op, LRMD_OP_RSC_EXEC, pcmk__str_none)) { - if (allowed) { - rc = process_lrmd_rsc_exec(client, id, request); - } else { - rc = -EACCES; - } - do_reply = 1; - } else if (pcmk__str_eq(op, LRMD_OP_RSC_CANCEL, pcmk__str_none)) { - if (allowed) { - rc = process_lrmd_rsc_cancel(client, id, request); - } else { - rc = -EACCES; - } - do_reply = 1; - } else if (pcmk__str_eq(op, LRMD_OP_POKE, pcmk__str_none)) { - do_notify = 1; - do_reply = 1; - } else if (pcmk__str_eq(op, LRMD_OP_CHECK, pcmk__str_none)) { - if (allowed) { - xmlNode *wrapper = pcmk__xe_first_child(request, - PCMK__XE_LRMD_CALLDATA, - NULL, NULL); - xmlNode *data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); - - const char *timeout = NULL; - - CRM_LOG_ASSERT(data != NULL); - timeout = crm_element_value(data, PCMK__XA_LRMD_WATCHDOG); - pcmk__valid_stonith_watchdog_timeout(timeout); - } else { - rc = -EACCES; - } - } else if (pcmk__str_eq(op, LRMD_OP_ALERT_EXEC, pcmk__str_none)) { - if (allowed) { - rc = process_lrmd_alert_exec(client, id, request); - } else { - rc = -EACCES; - } - do_reply = 1; - } else if (pcmk__str_eq(op, LRMD_OP_GET_RECURRING, pcmk__str_none)) { - if (allowed) { - reply = process_lrmd_get_recurring(request, call_id); - } else { - rc = -EACCES; - } - do_reply = 1; - } else { - rc = -EOPNOTSUPP; - do_reply = 1; - crm_err("Unknown IPC request '%s' from client %s", - op, pcmk__client_name(client)); - } - - if (rc == -EACCES) { - crm_warn("Rejecting IPC request '%s' from unprivileged client %s", - op, pcmk__client_name(client)); - } - - crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d", - op, client->id, rc, do_reply, do_notify); - - if (do_reply) { - int send_rc = pcmk_rc_ok; - - if (reply == NULL) { - reply = create_lrmd_reply(__func__, rc, call_id); - } - send_rc = lrmd_server_send_reply(client, id, reply); - pcmk__xml_free(reply); - if (send_rc != pcmk_rc_ok) { - crm_warn("Reply to client %s failed: %s " QB_XS " rc=%d", - pcmk__client_name(client), pcmk_rc_str(send_rc), send_rc); - } - } - - if (do_notify) { - send_generic_notify(rc, request); - } + return rc; } diff --git a/daemons/execd/execd_messages.c b/daemons/execd/execd_messages.c new file mode 100644 index 0000000000..828f8dd3a1 --- /dev/null +++ b/daemons/execd/execd_messages.c @@ -0,0 +1,616 @@ +/* + * Copyright 2012-2025 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "pacemaker-execd.h" + +static GHashTable *execd_handlers = NULL; +static int lrmd_call_id = 0; + +static xmlNode * +handle_ipc_fwd_request(pcmk__request_t *request) +{ + int call_id = 0; + int rc = pcmk_rc_ok; + xmlNode *reply = NULL; + +#ifdef PCMK__COMPILE_REMOTE + bool allowed = pcmk_is_set(request->ipc_client->flags, + pcmk__client_privileged); + + if (!allowed) { + pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, + PCMK_EXEC_INVALID, NULL); + crm_warn("Rejecting IPC request '%s' from unprivileged client %s", + request->op, pcmk__client_name(request->ipc_client)); + return NULL; + } + + ipc_proxy_forward_client(request->ipc_client, request->xml); +#else + rc = EPROTONOSUPPORT; +#endif + + crm_element_value_int(request->xml, PCMK__XA_LRMD_CALLID, &call_id); + + /* Create a generic reply since forwarding doesn't create a more specific one */ + reply = lrmd_create_reply(__func__, pcmk_rc2legacy(rc), call_id); + return reply; +} + +static xmlNode * +handle_register_request(pcmk__request_t *request) +{ + int call_id = 0; + int rc = pcmk_rc_ok; + xmlNode *reply = NULL; + + crm_element_value_int(request->xml, PCMK__XA_LRMD_CALLID, &call_id); + rc = lrmd_process_signon(request->ipc_client, request->xml, call_id, &reply); + + if (rc != pcmk_rc_ok) { + pcmk__set_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + pcmk_rc_str(rc)); + return NULL; + } + + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return reply; +} + +static xmlNode * +handle_alert_exec_request(pcmk__request_t *request) +{ + int call_id = 0; + int rc = pcmk_rc_ok; + bool allowed = pcmk_is_set(request->ipc_client->flags, + pcmk__client_privileged); + xmlNode *reply = NULL; + + if (!allowed) { + pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, + PCMK_EXEC_INVALID, NULL); + crm_warn("Rejecting IPC request '%s' from unprivileged client %s", + request->op, pcmk__client_name(request->ipc_client)); + return NULL; + } + + crm_element_value_int(request->xml, PCMK__XA_LRMD_CALLID, &call_id); + + rc = lrmd_process_alert_exec(request->ipc_client, request->xml); + + if (rc == pcmk_rc_ok) { + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + } else { + pcmk__set_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + pcmk_rc_str(rc)); + } + + /* Create a generic reply since executing an alert doesn't create a + * more specific one. + */ + reply = lrmd_create_reply(__func__, pcmk_rc2legacy(rc), call_id); + return reply; +} + +static xmlNode * +handle_check_request(pcmk__request_t *request) +{ + bool allowed = pcmk_is_set(request->ipc_client->flags, + pcmk__client_privileged); + xmlNode *wrapper = NULL; + xmlNode *data = NULL; + const char *timeout = NULL; + + if (!allowed) { + pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, + PCMK_EXEC_INVALID, NULL); + crm_warn("Rejecting IPC request '%s' from unprivileged client %s", + request->op, pcmk__client_name(request->ipc_client)); + return NULL; + } + + wrapper = pcmk__xe_first_child(request->xml, + PCMK__XE_LRMD_CALLDATA, + NULL, NULL); + data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); + + if (data == NULL) { + pcmk__set_result(&request->result, CRM_EX_CONFIG, PCMK_EXEC_INVALID, + NULL); + return NULL; + } + + timeout = crm_element_value(data, PCMK__XA_LRMD_WATCHDOG); + /* FIXME: This just exits on certain conditions, which seems like a pretty + * extreme reaction for a daemon to take. + */ + pcmk__valid_stonith_watchdog_timeout(timeout); + + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; +} + +static xmlNode * +handle_get_recurring_request(pcmk__request_t *request) +{ + int call_id = 0; + int rc = pcmk_rc_ok; + bool allowed = pcmk_is_set(request->ipc_client->flags, + pcmk__client_privileged); + xmlNode *reply = NULL; + + if (!allowed) { + pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, + PCMK_EXEC_INVALID, NULL); + crm_warn("Rejecting IPC request '%s' from unprivileged client %s", + request->op, pcmk__client_name(request->ipc_client)); + return NULL; + } + + crm_element_value_int(request->xml, PCMK__XA_LRMD_CALLID, &call_id); + + rc = lrmd_process_get_recurring(request->xml, call_id, &reply); + + if (rc == pcmk_rc_ok) { + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + } else { + pcmk__set_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, NULL); + } + + return reply; +} + +static xmlNode * +handle_poke_request(pcmk__request_t *request) +{ + int call_id = 0; + xmlNode *reply = NULL; + + crm_element_value_int(request->xml, PCMK__XA_LRMD_CALLID, &call_id); + + /* Create a generic reply since this doesn't create a more specific one */ + reply = lrmd_create_reply(__func__, pcmk_rc_ok, call_id); + return reply; +} + +static xmlNode * +handle_rsc_cancel_request(pcmk__request_t *request) +{ + int call_id = 0; + int rc = pcmk_rc_ok; + bool allowed = pcmk_is_set(request->ipc_client->flags, + pcmk__client_privileged); + xmlNode *reply = NULL; + + if (!allowed) { + pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, + PCMK_EXEC_INVALID, NULL); + crm_warn("Rejecting IPC request '%s' from unprivileged client %s", + request->op, pcmk__client_name(request->ipc_client)); + return NULL; + } + + crm_element_value_int(request->xml, PCMK__XA_LRMD_CALLID, &call_id); + + rc = lrmd_process_rsc_cancel(request->ipc_client, request->xml); + + if (rc == pcmk_rc_ok) { + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + } else { + pcmk__set_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + pcmk_rc_str(rc)); + } + + /* Create a generic reply since canceling a resource doesn't create a + * more specific one. + */ + reply = lrmd_create_reply(__func__, pcmk_rc2legacy(rc), call_id); + return reply; +} + +static xmlNode * +handle_rsc_exec_request(pcmk__request_t *request) +{ + int call_id = 0; + int rc = pcmk_rc_ok; + bool allowed = pcmk_is_set(request->ipc_client->flags, + pcmk__client_privileged); + xmlNode *reply = NULL; + + if (!allowed) { + pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, + PCMK_EXEC_INVALID, NULL); + crm_warn("Rejecting IPC request '%s' from unprivileged client %s", + request->op, pcmk__client_name(request->ipc_client)); + return NULL; + } + + crm_element_value_int(request->xml, PCMK__XA_LRMD_CALLID, &call_id); + + /* Positive return value is the call ID, negative return value is an error */ + rc = lrmd_process_rsc_exec(request->ipc_client, request->xml); + + if (rc >= 0) { + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + } else { + pcmk__set_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, + pcmk_rc_str(rc)); + } + + /* Create a generic reply since executing a resource doesn't create a + * more specific one. + */ + reply = lrmd_create_reply(__func__, rc, call_id); + return reply; +} + +static xmlNode * +handle_rsc_info_request(pcmk__request_t *request) +{ + int call_id = 0; + int rc = pcmk_rc_ok; + bool allowed = pcmk_is_set(request->ipc_client->flags, + pcmk__client_privileged); + xmlNode *reply = NULL; + + if (!allowed) { + pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, + PCMK_EXEC_INVALID, NULL); + crm_warn("Rejecting IPC request '%s' from unprivileged client %s", + request->op, pcmk__client_name(request->ipc_client)); + return NULL; + } + + crm_element_value_int(request->xml, PCMK__XA_LRMD_CALLID, &call_id); + + /* This returns ENODEV if the device isn't in the cache which will be + * logged as an error. However, this isn't fatal to the client - it + * may be querying to see if the device exists before deciding to + * register it. + */ + rc = lrmd_process_get_rsc_info(request->xml, call_id, &reply); + + if (rc == pcmk_rc_ok) { + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + } else { + pcmk__set_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, NULL); + } + + return reply; +} + +static xmlNode * +handle_rsc_reg_request(pcmk__request_t *request) +{ + int call_id = 0; + bool allowed = pcmk_is_set(request->ipc_client->flags, + pcmk__client_privileged); + xmlNode *reply = NULL; + + if (!allowed) { + pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, + PCMK_EXEC_INVALID, NULL); + crm_warn("Rejecting IPC request '%s' from unprivileged client %s", + request->op, pcmk__client_name(request->ipc_client)); + return NULL; + } + + crm_element_value_int(request->xml, PCMK__XA_LRMD_CALLID, &call_id); + + lrmd_process_rsc_register(request->ipc_client, request->ipc_id, request->xml); + + /* Create a generic reply since registering a resource doesn't create + * a more specific one. + */ + reply = lrmd_create_reply(__func__, pcmk_ok, call_id); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return reply; +} + +static xmlNode * +handle_rsc_unreg_request(pcmk__request_t *request) +{ + int call_id = 0; + int rc = pcmk_rc_ok; + bool allowed = pcmk_is_set(request->ipc_client->flags, + pcmk__client_privileged); + xmlNode *reply = NULL; + + if (!allowed) { + pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, + PCMK_EXEC_INVALID, NULL); + crm_warn("Rejecting IPC request '%s' from unprivileged client %s", + request->op, pcmk__client_name(request->ipc_client)); + return NULL; + } + + crm_element_value_int(request->xml, PCMK__XA_LRMD_CALLID, &call_id); + + rc = lrmd_process_rsc_unregister(request->ipc_client, request->xml); + + /* Create a generic reply since unregistering a resource doesn't create + * a more specific one. + */ + reply = lrmd_create_reply(__func__, pcmk_rc2legacy(rc), call_id); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return reply; +} + +static bool +requires_notify(const char *command, int rc) +{ + if (pcmk__str_eq(command, LRMD_OP_RSC_UNREG, pcmk__str_none)) { + /* Don't notify about failed unregisters */ + return (rc == pcmk_rc_ok) || (rc == EINPROGRESS); + } else { + return pcmk__str_any_of(command, LRMD_OP_POKE, LRMD_OP_RSC_REG, NULL); + } +} + + +static xmlNode * +handle_unknown_request(pcmk__request_t *request) +{ + pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, + PCMK__XE_ACK, NULL, CRM_EX_INVALID_PARAM); + + pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, + "Unknown IPC request type '%s' (bug?)", + pcmk__client_name(request->ipc_client)); + return NULL; +} + +static void +execd_register_handlers(void) +{ + pcmk__server_command_t handlers[] = { + { CRM_OP_IPC_FWD, handle_ipc_fwd_request }, + { CRM_OP_REGISTER, handle_register_request }, + { LRMD_OP_ALERT_EXEC, handle_alert_exec_request }, + { LRMD_OP_CHECK, handle_check_request }, + { LRMD_OP_GET_RECURRING, handle_get_recurring_request }, + { LRMD_OP_POKE, handle_poke_request }, + { LRMD_OP_RSC_CANCEL, handle_rsc_cancel_request }, + { LRMD_OP_RSC_EXEC, handle_rsc_exec_request }, + { LRMD_OP_RSC_INFO, handle_rsc_info_request }, + { LRMD_OP_RSC_REG, handle_rsc_reg_request }, + { LRMD_OP_RSC_UNREG, handle_rsc_unreg_request }, + { NULL, handle_unknown_request }, + }; + + execd_handlers = pcmk__register_handlers(handlers); +} + +static int32_t +lrmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) +{ + crm_trace("Connection %p", c); + if (pcmk__new_client(c, uid, gid) == NULL) { + return -ENOMEM; + } + return 0; +} + +static void +lrmd_ipc_created(qb_ipcs_connection_t * c) +{ + pcmk__client_t *new_client = pcmk__find_client(c); + + crm_trace("Connection %p", c); + pcmk__assert(new_client != NULL); + /* Now that the connection is offically established, alert + * the other clients a new connection exists. */ + + notify_of_new_client(new_client); +} + +static int32_t +lrmd_ipc_dispatch(qb_ipcs_connection_t *qbc, void *data, size_t size) +{ + int rc = pcmk_rc_ok; + uint32_t id = 0; + uint32_t flags = 0; + pcmk__client_t *c = pcmk__find_client(qbc); + xmlNode *msg = NULL; + + CRM_CHECK(c != NULL, crm_err("Invalid client"); + return FALSE); + CRM_CHECK(c->id != NULL, crm_err("Invalid client: %p", c); + return FALSE); + + rc = pcmk__ipc_msg_append(&c->buffer, data); + + if (rc == pcmk_rc_ipc_more) { + /* We haven't read the complete message yet, so just return. */ + return 0; + + } else if (rc == pcmk_rc_ok) { + /* We've read the complete message and there's already a header on + * the front. Pass it off for processing. + */ + msg = pcmk__client_data2xml(c, &id, &flags); + g_byte_array_free(c->buffer, TRUE); + c->buffer = NULL; + + } else { + /* Some sort of error occurred reassembling the message. All we can + * do is clean up, log an error and return. + */ + crm_err("Error when reading IPC message: %s", pcmk_rc_str(rc)); + + if (c->buffer != NULL) { + g_byte_array_free(c->buffer, TRUE); + c->buffer = NULL; + } + + return 0; + } + + CRM_CHECK(flags & crm_ipc_client_response, crm_err("Invalid client request: %p", c); + return FALSE); + + if (!msg) { + return 0; + } + + lrmd_process_message(c, id, flags, msg); + pcmk__xml_free(msg); + return 0; +} + +static int32_t +lrmd_ipc_closed(qb_ipcs_connection_t * c) +{ + pcmk__client_t *client = pcmk__find_client(c); + + if (client == NULL) { + return 0; + } + + crm_trace("Connection %p", c); + client_disconnect_cleanup(client->id); +#ifdef PCMK__COMPILE_REMOTE + ipc_proxy_remove_provider(client); +#endif + lrmd_client_destroy(client); + return 0; +} + +static void +lrmd_ipc_destroy(qb_ipcs_connection_t * c) +{ + lrmd_ipc_closed(c); + crm_trace("Connection %p", c); +} + +struct qb_ipcs_service_handlers lrmd_ipc_callbacks = { + .connection_accept = lrmd_ipc_accept, + .connection_created = lrmd_ipc_created, + .msg_process = lrmd_ipc_dispatch, + .connection_closed = lrmd_ipc_closed, + .connection_destroyed = lrmd_ipc_destroy +}; + +static bool +invalid_msg(xmlNode *msg) +{ + const char *to = crm_element_value(msg, PCMK__XA_T); + + /* IPC proxy messages do not get a t="" attribute set on them. */ + bool invalid = (!pcmk__str_eq(to, CRM_SYSTEM_LRMD, pcmk__str_none)) && + (!pcmk__xe_is(msg, PCMK__XE_LRMD_IPC_PROXY)); + + if (invalid) { + crm_info("Ignoring invalid IPC message: to '%s' not " CRM_SYSTEM_LRMD, + pcmk__s(to, "")); + crm_log_xml_info(msg, "[Invalid]"); + } + + return invalid; +} + +void +lrmd_process_message(pcmk__client_t *c, uint32_t id, uint32_t flags, xmlNode *msg) +{ + int rc = pcmk_rc_ok; + + if (execd_handlers == NULL) { + execd_register_handlers(); + } + + if (!c->name) { + c->name = crm_element_value_copy(msg, PCMK__XA_LRMD_CLIENTNAME); + } + + lrmd_call_id++; + if (lrmd_call_id < 1) { + lrmd_call_id = 1; + } + + crm_xml_add(msg, PCMK__XA_LRMD_CLIENTID, c->id); + crm_xml_add(msg, PCMK__XA_LRMD_CLIENTNAME, c->name); + crm_xml_add_int(msg, PCMK__XA_LRMD_CALLID, lrmd_call_id); + + if (invalid_msg(msg)) { + pcmk__ipc_send_ack(c, id, flags, PCMK__XE_ACK, NULL, + CRM_EX_INDETERMINATE); + } else { + char *log_msg = NULL; + const char *reason = NULL; + xmlNode *reply = NULL; + + pcmk__request_t request = { + .ipc_client = c, + .ipc_id = id, + .ipc_flags = flags, + .peer = NULL, + .xml = msg, + .call_options = 0, + .result = PCMK__UNKNOWN_RESULT, + }; + + request.op = crm_element_value_copy(request.xml, PCMK__XA_LRMD_OP); + CRM_CHECK(request.op != NULL, return); + + crm_trace("Processing %s operation from %s", request.op, c->id); + + reply = pcmk__process_request(&request, execd_handlers); + + if (reply != NULL) { + int reply_rc = pcmk_rc_ok; + + rc = lrmd_server_send_reply(c, id, reply); + if (rc != pcmk_rc_ok) { + crm_warn("Reply to client %s failed: %s " QB_XS " rc=%d", + pcmk__client_name(c), pcmk_rc_str(rc), rc); + } + + crm_element_value_int(reply, PCMK__XA_LRMD_RC, &reply_rc); + if (requires_notify(request.op, reply_rc)) { + lrmd_send_generic_notify(reply_rc, request.xml); + } + + pcmk__xml_free(reply); + } + + reason = request.result.exit_reason; + + log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s", + request.op, pcmk__request_origin_type(&request), + pcmk__request_origin(&request), + pcmk_exec_status_str(request.result.execution_status), + (reason == NULL)? "" : " (", + (reason == NULL)? "" : reason, + (reason == NULL)? "" : ")"); + + if (!pcmk__result_ok(&request.result)) { + crm_warn("%s", log_msg); + } else { + crm_debug("%s", log_msg); + } + + free(log_msg); + pcmk__reset_request(&request); + } +} diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c index 3229487e4e..a26ffbb6e1 100644 --- a/daemons/execd/pacemaker-execd.c +++ b/daemons/execd/pacemaker-execd.c @@ -1,606 +1,470 @@ /* * Copyright 2012-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include // stonith__api_new() #include #include "pacemaker-execd.h" #ifdef PCMK__COMPILE_REMOTE # define EXECD_TYPE "remote" # define EXECD_NAME PCMK__SERVER_REMOTED # define SUMMARY "resource agent executor daemon for Pacemaker Remote nodes" #else # define EXECD_TYPE "local" # define EXECD_NAME PCMK__SERVER_EXECD # define SUMMARY "resource agent executor daemon for Pacemaker cluster nodes" #endif static GMainLoop *mainloop = NULL; static qb_ipcs_service_t *ipcs = NULL; static stonith_t *stonith_api = NULL; -int lrmd_call_id = 0; time_t start_time; static struct { gchar **log_files; #ifdef PCMK__COMPILE_REMOTE gchar *port; #endif // PCMK__COMPILE_REMOTE } options; #ifdef PCMK__COMPILE_REMOTE /* whether shutdown request has been sent */ static gboolean shutting_down = FALSE; #endif static void exit_executor(void); static void stonith_connection_destroy_cb(stonith_t * st, stonith_event_t * e) { stonith_api->state = stonith_disconnected; stonith_connection_failed(); } stonith_t * get_stonith_connection(void) { if (stonith_api && stonith_api->state == stonith_disconnected) { stonith__api_free(stonith_api); stonith_api = NULL; } if (stonith_api == NULL) { int rc = pcmk_ok; stonith_api = stonith__api_new(); if (stonith_api == NULL) { crm_err("Could not connect to fencer: API memory allocation failed"); return NULL; } rc = stonith__api_connect_retry(stonith_api, crm_system_name, 10); if (rc != pcmk_rc_ok) { crm_err("Could not connect to fencer in 10 attempts: %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); stonith__api_free(stonith_api); stonith_api = NULL; } else { stonith_api_operations_t *cmds = stonith_api->cmds; cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_DISCONNECT, stonith_connection_destroy_cb); } } return stonith_api; } -static int32_t -lrmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) -{ - crm_trace("Connection %p", c); - if (pcmk__new_client(c, uid, gid) == NULL) { - return -ENOMEM; - } - return 0; -} - -static void -lrmd_ipc_created(qb_ipcs_connection_t * c) -{ - pcmk__client_t *new_client = pcmk__find_client(c); - - crm_trace("Connection %p", c); - pcmk__assert(new_client != NULL); - /* Now that the connection is offically established, alert - * the other clients a new connection exists. */ - - notify_of_new_client(new_client); -} - -static int32_t -lrmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) -{ - int rc = pcmk_rc_ok; - uint32_t id = 0; - uint32_t flags = 0; - pcmk__client_t *client = pcmk__find_client(c); - xmlNode *request = NULL; - - CRM_CHECK(client != NULL, crm_err("Invalid client"); - return FALSE); - CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client); - return FALSE); - - rc = pcmk__ipc_msg_append(&client->buffer, data); - - if (rc == pcmk_rc_ipc_more) { - /* We haven't read the complete message yet, so just return. */ - return 0; - - } else if (rc == pcmk_rc_ok) { - /* We've read the complete message and there's already a header on - * the front. Pass it off for processing. - */ - request = pcmk__client_data2xml(client, &id, &flags); - g_byte_array_free(client->buffer, TRUE); - client->buffer = NULL; - - } else { - /* Some sort of error occurred reassembling the message. All we can - * do is clean up, log an error and return. - */ - crm_err("Error when reading IPC message: %s", pcmk_rc_str(rc)); - - if (client->buffer != NULL) { - g_byte_array_free(client->buffer, TRUE); - client->buffer = NULL; - } - - return 0; - } - - CRM_CHECK(flags & crm_ipc_client_response, crm_err("Invalid client request: %p", client); - return FALSE); - - if (!request) { - return 0; - } - - /* @TODO functionize some of this to reduce duplication with - * lrmd_remote_client_msg() - */ - - if (!client->name) { - const char *value = crm_element_value(request, - PCMK__XA_LRMD_CLIENTNAME); - - if (value == NULL) { - client->name = pcmk__itoa(pcmk__client_pid(c)); - } else { - client->name = pcmk__str_copy(value); - } - } - - lrmd_call_id++; - if (lrmd_call_id < 1) { - lrmd_call_id = 1; - } - - crm_xml_add(request, PCMK__XA_LRMD_CLIENTID, client->id); - crm_xml_add(request, PCMK__XA_LRMD_CLIENTNAME, client->name); - crm_xml_add_int(request, PCMK__XA_LRMD_CALLID, lrmd_call_id); - - process_lrmd_message(client, id, request); - - pcmk__xml_free(request); - return 0; -} - /*! * \internal * \brief Free a client connection, and exit if appropriate * * \param[in,out] client Client connection to free */ void lrmd_client_destroy(pcmk__client_t *client) { pcmk__free_client(client); #ifdef PCMK__COMPILE_REMOTE /* If we were waiting to shut down, we can now safely do so * if there are no more proxied IPC providers */ if (shutting_down && (ipc_proxy_get_provider() == NULL)) { exit_executor(); } #endif } -static int32_t -lrmd_ipc_closed(qb_ipcs_connection_t * c) -{ - pcmk__client_t *client = pcmk__find_client(c); - - if (client == NULL) { - return 0; - } - - crm_trace("Connection %p", c); - client_disconnect_cleanup(client->id); -#ifdef PCMK__COMPILE_REMOTE - ipc_proxy_remove_provider(client); -#endif - lrmd_client_destroy(client); - return 0; -} - -static void -lrmd_ipc_destroy(qb_ipcs_connection_t * c) -{ - lrmd_ipc_closed(c); - crm_trace("Connection %p", c); -} - -static struct qb_ipcs_service_handlers lrmd_ipc_callbacks = { - .connection_accept = lrmd_ipc_accept, - .connection_created = lrmd_ipc_created, - .msg_process = lrmd_ipc_dispatch, - .connection_closed = lrmd_ipc_closed, - .connection_destroyed = lrmd_ipc_destroy -}; - // \return Standard Pacemaker return code int lrmd_server_send_reply(pcmk__client_t *client, uint32_t id, xmlNode *reply) { crm_trace("Sending reply (%d) to client (%s)", id, client->id); switch (PCMK__CLIENT_TYPE(client)) { case pcmk__client_ipc: return pcmk__ipc_send_xml(client, id, reply, crm_ipc_flags_none); #ifdef PCMK__COMPILE_REMOTE case pcmk__client_tls: return lrmd__remote_send_xml(client->remote, reply, id, "reply"); #endif default: crm_err("Could not send reply: unknown type for client %s " QB_XS " flags=%#llx", pcmk__client_name(client), client->flags); } return ENOTCONN; } // \return Standard Pacemaker return code int lrmd_server_send_notify(pcmk__client_t *client, xmlNode *msg) { crm_trace("Sending notification to client (%s)", client->id); switch (PCMK__CLIENT_TYPE(client)) { case pcmk__client_ipc: if (client->ipcs == NULL) { crm_trace("Could not notify local client: disconnected"); return ENOTCONN; } return pcmk__ipc_send_xml(client, 0, msg, crm_ipc_server_event); #ifdef PCMK__COMPILE_REMOTE case pcmk__client_tls: if (client->remote == NULL) { crm_trace("Could not notify remote client: disconnected"); return ENOTCONN; } else { return lrmd__remote_send_xml(client->remote, msg, 0, "notify"); } #endif default: crm_err("Could not notify client %s with unknown transport " QB_XS " flags=%#llx", pcmk__client_name(client), client->flags); } return ENOTCONN; } /*! * \internal * \brief Clean up and exit immediately */ static void exit_executor(void) { const guint nclients = pcmk__ipc_client_count(); crm_info("Terminating with %d client%s", nclients, pcmk__plural_s(nclients)); stonith__api_free(stonith_api); if (ipcs) { mainloop_del_ipc_server(ipcs); } #ifdef PCMK__COMPILE_REMOTE execd_stop_tls_server(); ipc_proxy_cleanup(); #endif pcmk__client_cleanup(); if (mainloop) { lrmd_drain_alerts(mainloop); } g_hash_table_destroy(rsc_list); // @TODO End mainloop instead so all cleanup is done crm_exit(CRM_EX_OK); } /*! * \internal * \brief Request cluster shutdown if appropriate, otherwise exit immediately * * \param[in] nsig Signal that caused invocation (ignored) */ static void lrmd_shutdown(int nsig) { #ifdef PCMK__COMPILE_REMOTE pcmk__client_t *ipc_proxy = ipc_proxy_get_provider(); /* If there are active proxied IPC providers, then we may be running * resources, so notify the cluster that we wish to shut down. */ if (ipc_proxy) { if (shutting_down) { crm_notice("Waiting for cluster to stop resources before exiting"); return; } crm_info("Sending shutdown request to cluster"); if (ipc_proxy_shutdown_req(ipc_proxy) < 0) { crm_crit("Shutdown request failed, exiting immediately"); } else { /* We requested a shutdown. Now, we need to wait for an * acknowledgement from the proxy host, then wait for all proxy * hosts to disconnect (which ensures that all resources have been * stopped). */ shutting_down = TRUE; /* Stop accepting new proxy connections */ execd_stop_tls_server(); /* Currently, we let the OS kill us if the clients don't disconnect * in a reasonable time. We could instead set a long timer here * (shorter than what the OS is likely to use) and exit immediately * if it pops. */ return; } } #endif exit_executor(); } /*! * \internal * \brief Log a shutdown acknowledgment */ void handle_shutdown_ack(void) { #ifdef PCMK__COMPILE_REMOTE if (shutting_down) { crm_info("IPC proxy provider acknowledged shutdown request"); return; } #endif crm_debug("Ignoring unexpected shutdown acknowledgment " "from IPC proxy provider"); } /*! * \internal * \brief Handle rejection of shutdown request */ void handle_shutdown_nack(void) { #ifdef PCMK__COMPILE_REMOTE if (shutting_down) { crm_info("Exiting immediately after IPC proxy provider " "indicated no resources will be stopped"); exit_executor(); return; } #endif crm_debug("Ignoring unexpected shutdown rejection from IPC proxy provider"); } static GOptionEntry entries[] = { { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, &options.log_files, "Send logs to the additional named logfile", NULL }, #ifdef PCMK__COMPILE_REMOTE { "port", 'p', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.port, "Port to listen on (defaults to " G_STRINGIFY(DEFAULT_REMOTE_PORT) ")", NULL }, #endif // PCMK__COMPILE_REMOTE { NULL } }; static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv) { int rc = pcmk_rc_ok; crm_exit_t exit_code = CRM_EX_OK; const char *option = NULL; pcmk__output_t *out = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = NULL; gchar **processed_args = NULL; GOptionContext *context = NULL; #ifdef PCMK__COMPILE_REMOTE // If necessary, create PID 1 now before any file descriptors are opened remoted_spawn_pidone(argc, argv); #endif args = pcmk__new_common_args(SUMMARY); #ifdef PCMK__COMPILE_REMOTE processed_args = pcmk__cmdline_preproc(argv, "lp"); #else processed_args = pcmk__cmdline_preproc(argv, "l"); #endif // PCMK__COMPILE_REMOTE context = build_arg_context(args, &output_group); crm_log_preinit(EXECD_NAME, argc, argv); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } // Open additional log files if (options.log_files != NULL) { for (gchar **fname = options.log_files; *fname != NULL; fname++) { rc = pcmk__add_logfile(*fname); if (rc != pcmk_rc_ok) { out->err(out, "Logging to %s is disabled: %s", *fname, pcmk_rc_str(rc)); } } } pcmk__cli_init_logging(EXECD_NAME, args->verbosity); crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); // ocf_log() (in resource-agents) uses the capitalized env options below option = pcmk__env_option(PCMK__ENV_LOGFACILITY); if (!pcmk__str_eq(option, PCMK_VALUE_NONE, pcmk__str_casei|pcmk__str_null_matches) && !pcmk__str_eq(option, "/dev/null", pcmk__str_none)) { pcmk__set_env_option("LOGFACILITY", option, true); } option = pcmk__env_option(PCMK__ENV_LOGFILE); if (!pcmk__str_eq(option, PCMK_VALUE_NONE, pcmk__str_casei|pcmk__str_null_matches)) { pcmk__set_env_option("LOGFILE", option, true); if (pcmk__env_option_enabled(crm_system_name, PCMK__ENV_DEBUG)) { pcmk__set_env_option("DEBUGLOG", option, true); } } #ifdef PCMK__COMPILE_REMOTE if (options.port != NULL) { pcmk__set_env_option(PCMK__ENV_REMOTE_PORT, options.port, false); } #endif // PCMK__COMPILE_REMOTE start_time = time(NULL); crm_notice("Starting Pacemaker " EXECD_TYPE " executor"); /* The presence of this variable allegedly controls whether child * processes like httpd will try and use Systemd's sd_notify * API */ unsetenv("NOTIFY_SOCKET"); { // Temporary directory for resource agent use (leave owned by root) int rc = pcmk__build_path(PCMK__OCF_TMP_DIR, 0755); if (rc != pcmk_rc_ok) { crm_warn("Could not create resource agent temporary directory " PCMK__OCF_TMP_DIR ": %s", pcmk_rc_str(rc)); } } rsc_list = pcmk__strkey_table(NULL, free_rsc); ipcs = mainloop_add_ipc_server(CRM_SYSTEM_LRMD, QB_IPC_SHM, &lrmd_ipc_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); exit_code = CRM_EX_FATAL; goto done; } #ifdef PCMK__COMPILE_REMOTE if (lrmd_init_remote_tls_server() < 0) { crm_err("Failed to create TLS listener: shutting down and staying down"); exit_code = CRM_EX_FATAL; goto done; } ipc_proxy_init(); #endif mainloop_add_signal(SIGTERM, lrmd_shutdown); mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker " EXECD_TYPE " executor successfully started and accepting connections"); crm_notice("OCF resource agent search path is %s", PCMK__OCF_RA_PATH); g_main_loop_run(mainloop); /* should never get here */ exit_executor(); done: g_strfreev(options.log_files); #ifdef PCMK__COMPILE_REMOTE g_free(options.port); #endif // PCMK__COMPILE_REMOTE g_strfreev(processed_args); pcmk__free_arg_context(context); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } diff --git a/daemons/execd/pacemaker-execd.h b/daemons/execd/pacemaker-execd.h index a66f5ef888..1beb455660 100644 --- a/daemons/execd/pacemaker-execd.h +++ b/daemons/execd/pacemaker-execd.h @@ -1,107 +1,120 @@ /* - * Copyright 2012-2024 the Pacemaker project contributors + * Copyright 2012-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PACEMAKER_EXECD__H # define PACEMAKER_EXECD__H # include # include # include # include # include extern GHashTable *rsc_list; extern time_t start_time; +extern struct qb_ipcs_service_handlers lrmd_ipc_callbacks; + typedef struct lrmd_rsc_s { char *rsc_id; char *class; char *provider; char *type; int call_opts; /* NEVER dereference this pointer, * It simply exists as a switch to let us know * when the currently active operation has completed */ void *active; /* Operations in this list * have not been executed yet. */ GList *pending_ops; /* Operations in this list are recurring operations * that have been handed off from the pending ops list. */ GList *recurring_ops; /* If this resource is a fence device, probes are handled internally by the * executor, and this value indicates the result that should currently be * returned for probes. It should be one of: * PCMK_EXEC_DONE (to indicate "running"), * PCMK_EXEC_NO_FENCE_DEVICE ("not running"), or * PCMK_EXEC_NOT_CONNECTED ("unknown because fencer connection was lost"). */ pcmk__action_result_t fence_probe_result; crm_trigger_t *work; } lrmd_rsc_t; // in remoted_tls.c int lrmd_init_remote_tls_server(void); void execd_stop_tls_server(void); int lrmd_server_send_reply(pcmk__client_t *client, uint32_t id, xmlNode *reply); int lrmd_server_send_notify(pcmk__client_t *client, xmlNode *msg); void notify_of_new_client(pcmk__client_t *new_client); -void process_lrmd_message(pcmk__client_t *client, uint32_t id, - xmlNode *request); - void free_rsc(gpointer data); void handle_shutdown_ack(void); void handle_shutdown_nack(void); void lrmd_client_destroy(pcmk__client_t *client); void client_disconnect_cleanup(const char *client_id); /*! * \brief Don't worry about freeing this connection. It is * taken care of after mainloop exits by the main() function. */ stonith_t *get_stonith_connection(void); /*! * \brief This is a callback that tells the lrmd * the current stonith connection has gone away. This allows * us to timeout any pending stonith commands */ void stonith_connection_failed(void); #ifdef PCMK__COMPILE_REMOTE void ipc_proxy_init(void); void ipc_proxy_cleanup(void); void ipc_proxy_add_provider(pcmk__client_t *client); void ipc_proxy_remove_provider(pcmk__client_t *client); void ipc_proxy_forward_client(pcmk__client_t *client, xmlNode *xml); pcmk__client_t *ipc_proxy_get_provider(void); int ipc_proxy_shutdown_req(pcmk__client_t *ipc_proxy); void remoted_spawn_pidone(int argc, char **argv); void remoted_request_cib_schema_files(void); #endif -int process_lrmd_alert_exec(pcmk__client_t *client, uint32_t id, - xmlNode *request); void lrmd_drain_alerts(GMainLoop *mloop); +void lrmd_process_message(pcmk__client_t *c, uint32_t id, uint32_t flags, + xmlNode *msg); + +xmlNode *lrmd_create_reply(const char *origin, int rc, int call_id); +void lrmd_send_generic_notify(int rc, xmlNode * request); + +int lrmd_process_alert_exec(pcmk__client_t *client, xmlNode *request); +int lrmd_process_get_recurring(xmlNode *request, int call_id, xmlNode **reply); +int lrmd_process_get_rsc_info(xmlNode *request, int call_id, xmlNode **reply); +int lrmd_process_rsc_cancel(pcmk__client_t *client, xmlNode *request); +int lrmd_process_rsc_exec(pcmk__client_t *client, xmlNode *request); +void lrmd_process_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request); +int lrmd_process_rsc_unregister(pcmk__client_t *client, xmlNode *request); +int lrmd_process_signon(pcmk__client_t *client, xmlNode *request, int call_id, + xmlNode **reply); + #endif // PACEMAKER_EXECD__H diff --git a/daemons/execd/remoted_tls.c b/daemons/execd/remoted_tls.c index 13c867f3e6..6cfafb44d9 100644 --- a/daemons/execd/remoted_tls.c +++ b/daemons/execd/remoted_tls.c @@ -1,439 +1,425 @@ /* * Copyright 2012-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" #include #define LRMD_REMOTE_AUTH_TIMEOUT 10000 static pcmk__tls_t *tls = NULL; static int ssock = -1; -extern int lrmd_call_id; /*! * \internal * \brief Read (more) TLS handshake data from client * * \param[in,out] client IPC client doing handshake * * \return 0 on success or more data needed, -1 on error */ static int remoted__read_handshake_data(pcmk__client_t *client) { int rc = pcmk__read_handshake_data(client); if (rc == EAGAIN) { /* No more data is available at the moment. Just return for now; * we'll get invoked again once the client sends more. */ return 0; } else if (rc != pcmk_rc_ok) { return -1; } if (client->remote->auth_timeout) { g_source_remove(client->remote->auth_timeout); } client->remote->auth_timeout = 0; pcmk__set_client_flags(client, pcmk__client_tls_handshake_complete); crm_notice("Remote client connection accepted"); /* Now that the handshake is done, see if any client TLS certificate is * close to its expiration date and log if so. If a TLS certificate is not * in use, this function will just return so we don't need to check for the * session type here. */ pcmk__tls_check_cert_expiration(client->remote->tls_session); /* Only a client with access to the TLS key can connect, so we can treat * it as privileged. */ pcmk__set_client_flags(client, pcmk__client_privileged); // Alert other clients of the new connection notify_of_new_client(client); return 0; } static int lrmd_remote_client_msg(gpointer data) { int id = 0; int rc = pcmk_rc_ok; xmlNode *request = NULL; pcmk__client_t *client = data; if (!pcmk_is_set(client->flags, pcmk__client_tls_handshake_complete)) { return remoted__read_handshake_data(client); } rc = pcmk__remote_ready(client->remote, 0); switch (rc) { case pcmk_rc_ok: break; case ETIME: /* No message available to read */ return 0; default: /* Error */ crm_info("Error polling remote client: %s", pcmk_rc_str(rc)); return -1; } rc = pcmk__read_available_remote_data(client->remote); switch (rc) { case pcmk_rc_ok: break; case EAGAIN: /* We haven't read the whole message yet */ return 0; default: /* Error */ crm_info("Error reading from remote client: %s", pcmk_rc_str(rc)); return -1; } request = pcmk__remote_message_xml(client->remote); if (request == NULL) { return 0; } crm_element_value_int(request, PCMK__XA_LRMD_REMOTE_MSG_ID, &id); crm_trace("Processing remote client request %d", id); - if (!client->name) { - client->name = crm_element_value_copy(request, - PCMK__XA_LRMD_CLIENTNAME); - } - - lrmd_call_id++; - if (lrmd_call_id < 1) { - lrmd_call_id = 1; - } - - crm_xml_add(request, PCMK__XA_LRMD_CLIENTID, client->id); - crm_xml_add(request, PCMK__XA_LRMD_CLIENTNAME, client->name); - crm_xml_add_int(request, PCMK__XA_LRMD_CALLID, lrmd_call_id); - process_lrmd_message(client, id, request); + lrmd_process_message(client, id, client->flags, request); pcmk__xml_free(request); return 0; } static void lrmd_remote_client_destroy(gpointer user_data) { pcmk__client_t *client = user_data; if (client == NULL) { return; } crm_notice("Cleaning up after remote client %s disconnected", pcmk__client_name(client)); ipc_proxy_remove_provider(client); /* if this is the last remote connection, stop recurring * operations */ if (pcmk__ipc_client_count() == 1) { client_disconnect_cleanup(NULL); } if (client->remote->tls_session) { int csock = pcmk__tls_get_client_sock(client->remote); gnutls_bye(client->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(client->remote->tls_session); client->remote->tls_session = NULL; close(csock); } lrmd_client_destroy(client); return; } static gboolean lrmd_auth_timeout_cb(gpointer data) { pcmk__client_t *client = data; client->remote->auth_timeout = 0; if (pcmk_is_set(client->flags, pcmk__client_tls_handshake_complete)) { return FALSE; } mainloop_del_fd(client->remote->source); client->remote->source = NULL; crm_err("Remote client authentication timed out"); return FALSE; } // Dispatch callback for remote server socket static int lrmd_remote_listen(gpointer data) { int csock = -1; gnutls_session_t session = NULL; pcmk__client_t *new_client = NULL; // For client socket static struct mainloop_fd_callbacks lrmd_remote_fd_cb = { .dispatch = lrmd_remote_client_msg, .destroy = lrmd_remote_client_destroy, }; CRM_CHECK(ssock >= 0, return TRUE); if (pcmk__accept_remote_connection(ssock, &csock) != pcmk_rc_ok) { return TRUE; } session = pcmk__new_tls_session(tls, csock); if (session == NULL) { close(csock); return TRUE; } new_client = pcmk__new_unauth_client(NULL); new_client->remote = pcmk__assert_alloc(1, sizeof(pcmk__remote_t)); pcmk__set_client_flags(new_client, pcmk__client_tls); new_client->remote->tls_session = session; // Require the client to authenticate within this time new_client->remote->auth_timeout = pcmk__create_timer(LRMD_REMOTE_AUTH_TIMEOUT, lrmd_auth_timeout_cb, new_client); crm_info("Remote client pending authentication " QB_XS " %p id: %s", new_client, new_client->id); new_client->remote->source = mainloop_add_fd("pacemaker-remote-client", G_PRIORITY_DEFAULT, csock, new_client, &lrmd_remote_fd_cb); return TRUE; } static void tls_server_dropped(gpointer user_data) { crm_notice("TLS server session ended"); return; } // \return 0 on success, -1 on error (gnutls_psk_server_credentials_function) static int lrmd_tls_server_key_cb(gnutls_session_t session, const char *username, gnutls_datum_t * key) { return (lrmd__init_remote_key(key) == pcmk_rc_ok)? 0 : -1; } static int bind_and_listen(struct addrinfo *addr) { int optval; int fd; int rc; char buffer[INET6_ADDRSTRLEN] = { 0, }; pcmk__sockaddr2str(addr->ai_addr, buffer); crm_trace("Attempting to bind to address %s", buffer); fd = socket(addr->ai_family, addr->ai_socktype, addr->ai_protocol); if (fd < 0) { rc = errno; crm_err("Listener socket creation failed: %", pcmk_rc_str(rc)); return -rc; } /* reuse address */ optval = 1; rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)); if (rc < 0) { rc = errno; crm_err("Local address reuse not allowed on %s: %s", buffer, pcmk_rc_str(rc)); close(fd); return -rc; } if (addr->ai_family == AF_INET6) { optval = 0; rc = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &optval, sizeof(optval)); if (rc < 0) { rc = errno; crm_err("Couldn't disable IPV6-only on %s: %s", buffer, pcmk_rc_str(rc)); close(fd); return -rc; } } if (bind(fd, addr->ai_addr, addr->ai_addrlen) != 0) { rc = errno; crm_err("Cannot bind to %s: %s", buffer, pcmk_rc_str(rc)); close(fd); return -rc; } if (listen(fd, 10) == -1) { rc = errno; crm_err("Cannot listen on %s: %s", buffer, pcmk_rc_str(rc)); close(fd); return -rc; } return fd; } static int get_address_info(const char *bind_name, int port, struct addrinfo **res) { int rc = pcmk_rc_ok; char *port_s = pcmk__itoa(port); struct addrinfo hints; memset(&hints, 0, sizeof(struct addrinfo)); hints.ai_flags = AI_PASSIVE; hints.ai_family = AF_UNSPEC; // IPv6 or IPv4 hints.ai_socktype = SOCK_STREAM; hints.ai_protocol = IPPROTO_TCP; rc = getaddrinfo(bind_name, port_s, &hints, res); rc = pcmk__gaierror2rc(rc); if (rc != pcmk_rc_ok) { crm_err("Unable to get IP address(es) for %s: %s", (bind_name? bind_name : "local node"), pcmk_rc_str(rc)); } free(port_s); return rc; } int lrmd_init_remote_tls_server(void) { int rc = pcmk_rc_ok; int filter; int port = crm_default_remote_port(); struct addrinfo *res = NULL, *iter; const char *bind_name = pcmk__env_option(PCMK__ENV_REMOTE_ADDRESS); bool use_cert = pcmk__x509_enabled(); static struct mainloop_fd_callbacks remote_listen_fd_callbacks = { .dispatch = lrmd_remote_listen, .destroy = tls_server_dropped, }; CRM_CHECK(ssock == -1, return ssock); crm_debug("Starting TLS listener on %s port %d", (bind_name? bind_name : "all addresses on"), port); rc = pcmk__init_tls(&tls, true, use_cert ? GNUTLS_CRD_CERTIFICATE : GNUTLS_CRD_PSK); if (rc != pcmk_rc_ok) { return -1; } if (!use_cert) { gnutls_datum_t psk_key = { NULL, 0 }; pcmk__tls_add_psk_callback(tls, lrmd_tls_server_key_cb); /* The key callback won't get called until the first client connection * attempt. Do it once here, so we can warn the user at start-up if we can't * read the key. We don't error out, though, because it's fine if the key is * going to be added later. */ if (lrmd__init_remote_key(&psk_key) != pcmk_rc_ok) { crm_warn("A cluster connection will not be possible until the key is available"); } gnutls_free(psk_key.data); } if (get_address_info(bind_name, port, &res) != pcmk_rc_ok) { return -1; } /* Currently we listen on only one address from the resulting list (the * first IPv6 address we can bind to if possible, otherwise the first IPv4 * address we can bind to). When bind_name is NULL, this should be the * respective wildcard address. * * @TODO If there is demand for specifying more than one address, allow * bind_name to be a space-separated list, call getaddrinfo() for each, * and create a socket for each result (set IPV6_V6ONLY on IPv6 sockets * since IPv4 listeners will have their own sockets). */ iter = res; filter = AF_INET6; while (iter) { if (iter->ai_family == filter) { ssock = bind_and_listen(iter); } if (ssock >= 0) { break; } iter = iter->ai_next; if (iter == NULL && filter == AF_INET6) { iter = res; filter = AF_INET; } } if (ssock >= 0) { mainloop_add_fd("pacemaker-remote-server", G_PRIORITY_DEFAULT, ssock, NULL, &remote_listen_fd_callbacks); crm_debug("Started TLS listener on %s port %d", (bind_name? bind_name : "all addresses on"), port); } freeaddrinfo(res); return ssock; } void execd_stop_tls_server(void) { if (tls != NULL) { pcmk__free_tls(tls); tls = NULL; } if (ssock >= 0) { close(ssock); ssock = -1; } }