diff --git a/daemons/execd/execd_alerts.c b/daemons/execd/execd_alerts.c index baace91470..8f9bfb356c 100644 --- a/daemons/execd/execd_alerts.c +++ b/daemons/execd/execd_alerts.c @@ -1,177 +1,182 @@ /* * Copyright 2016-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" /* Track in-flight alerts so we can wait for them at shutdown */ static GHashTable *inflight_alerts; /* key = call_id, value = timeout */ static gboolean draining_alerts = FALSE; static inline void add_inflight_alert(int call_id, int timeout) { if (inflight_alerts == NULL) { inflight_alerts = pcmk__intkey_table(NULL); } pcmk__intkey_table_insert(inflight_alerts, call_id, GINT_TO_POINTER(timeout)); } static inline void remove_inflight_alert(int call_id) { if (inflight_alerts != NULL) { pcmk__intkey_table_remove(inflight_alerts, call_id); } } static int max_inflight_timeout(void) { GHashTableIter iter; gpointer timeout; int max_timeout = 0; if (inflight_alerts) { g_hash_table_iter_init(&iter, inflight_alerts); while (g_hash_table_iter_next(&iter, NULL, &timeout)) { if (GPOINTER_TO_INT(timeout) > max_timeout) { max_timeout = GPOINTER_TO_INT(timeout); } } } return max_timeout; } struct alert_cb_s { char *client_id; int call_id; }; static void alert_complete(svc_action_t *action) { struct alert_cb_s *cb_data = (struct alert_cb_s *) (action->cb_data); remove_inflight_alert(cb_data->call_id); crm_debug("Alert pid %d for %s completed with rc=%d", action->pid, cb_data->client_id, action->rc); free(cb_data->client_id); free(action->cb_data); action->cb_data = NULL; } int process_lrmd_alert_exec(pcmk__client_t *client, uint32_t id, xmlNode *request) { static int alert_sequence_no = 0; xmlNode *alert_xml = get_xpath_object("//" F_LRMD_ALERT, request, LOG_ERR); const char *alert_id = crm_element_value(alert_xml, F_LRMD_ALERT_ID); const char *alert_path = crm_element_value(alert_xml, F_LRMD_ALERT_PATH); svc_action_t *action = NULL; int alert_timeout = 0; int rc = pcmk_ok; GHashTable *params = NULL; struct alert_cb_s *cb_data = NULL; if ((alert_id == NULL) || (alert_path == NULL) || (client == NULL) || (client->id == NULL)) { /* hint static analyzer */ return -EINVAL; } if (draining_alerts) { return pcmk_ok; } crm_element_value_int(alert_xml, F_LRMD_TIMEOUT, &alert_timeout); crm_info("Executing alert %s for %s", alert_id, client->id); params = xml2list(alert_xml); pcmk__add_alert_key_int(params, PCMK__alert_key_node_sequence, ++alert_sequence_no); cb_data = calloc(1, sizeof(struct alert_cb_s)); CRM_CHECK(cb_data != NULL, rc = -ENOMEM; goto err); /* coverity[deref_ptr] False Positive */ cb_data->client_id = strdup(client->id); CRM_CHECK(cb_data->client_id != NULL, rc = -ENOMEM; goto err); crm_element_value_int(request, F_LRMD_CALLID, &(cb_data->call_id)); action = services_alert_create(alert_id, alert_path, alert_timeout, params, alert_sequence_no, cb_data); + if (action->rc != PCMK_OCF_UNKNOWN) { + rc = -E2BIG; + goto err; + } + rc = services_action_user(action, CRM_DAEMON_USER); if (rc < 0) { goto err; } add_inflight_alert(cb_data->call_id, alert_timeout); if (services_alert_async(action, alert_complete) == FALSE) { services_action_free(action); } return pcmk_ok; err: if (cb_data) { if (cb_data->client_id) { free(cb_data->client_id); } free(cb_data); } if (action) { services_action_free(action); } return rc; } static bool drain_check(guint remaining_timeout_ms) { if (inflight_alerts != NULL) { guint count = g_hash_table_size(inflight_alerts); if (count > 0) { crm_trace("%d alerts pending (%.3fs timeout remaining)", count, remaining_timeout_ms / 1000.0); return TRUE; } } return FALSE; } void lrmd_drain_alerts(GMainLoop *mloop) { if (inflight_alerts != NULL) { guint timer_ms = max_inflight_timeout() + 5000; crm_trace("Draining in-flight alerts (timeout %.3fs)", timer_ms / 1000.0); draining_alerts = TRUE; pcmk_drain_main_loop(mloop, timer_ms, drain_check); g_hash_table_destroy(inflight_alerts); inflight_alerts = NULL; } } diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c index 0966c29424..d4df9f290c 100644 --- a/daemons/execd/execd_commands.c +++ b/daemons/execd/execd_commands.c @@ -1,1997 +1,1956 @@ /* * Copyright 2012-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include // Check whether we have a high-resolution monotonic clock #undef PCMK__TIME_USE_CGT #if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC) # define PCMK__TIME_USE_CGT # include /* clock_gettime */ #endif #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" -#define EXIT_REASON_MAX_LEN 128 - GHashTable *rsc_list = NULL; typedef struct lrmd_cmd_s { int timeout; guint interval_ms; int start_delay; int timeout_orig; int call_id; int call_opts; /* Timer ids, must be removed on cmd destruction. */ int delay_id; int stonith_recurring_id; int rsc_deleted; int service_flags; char *client_id; char *origin; char *rsc_id; char *action; char *real_action; char *userdata_str; pcmk__action_result_t result; /* We can track operation queue time and run time, to be saved with the CIB * resource history (and displayed in cluster status). We need * high-resolution monotonic time for this purpose, so we use * clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature * is disabled). * * However, we also need epoch timestamps for recording the time the command * last ran and the time its return value last changed, for use in time * displays (as opposed to interval calculations). We keep time_t values for * this purpose. * * The last run time is used for both purposes, so we keep redundant * monotonic and epoch values for this. Technically the two could represent * different times, but since time_t has only second resolution and the * values are used for distinct purposes, that is not significant. */ #ifdef PCMK__TIME_USE_CGT /* Recurring and systemd operations may involve more than one executor * command per operation, so they need info about the original and the most * recent. */ struct timespec t_first_run; // When op first ran struct timespec t_run; // When op most recently ran struct timespec t_first_queue; // When op was first queued struct timespec t_queue; // When op was most recently queued #endif time_t epoch_last_run; // Epoch timestamp of when op last ran time_t epoch_rcchange; // Epoch timestamp of when rc last changed bool first_notify_sent; int last_notify_rc; int last_notify_op_status; int last_pid; GHashTable *params; } lrmd_cmd_t; static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc); static gboolean lrmd_rsc_dispatch(gpointer user_data); static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id); #ifdef PCMK__TIME_USE_CGT /*! * \internal * \brief Check whether a struct timespec has been set * * \param[in] timespec Time to check * * \return true if timespec has been set (i.e. is nonzero), false otherwise */ static inline bool time_is_set(struct timespec *timespec) { return (timespec != NULL) && ((timespec->tv_sec != 0) || (timespec->tv_nsec != 0)); } /* * \internal * \brief Set a timespec (and its original if unset) to the current time * * \param[out] t_current Where to store current time * \param[out] t_orig Where to copy t_current if unset */ static void get_current_time(struct timespec *t_current, struct timespec *t_orig) { clock_gettime(CLOCK_MONOTONIC, t_current); if ((t_orig != NULL) && !time_is_set(t_orig)) { *t_orig = *t_current; } } /*! * \internal * \brief Return difference between two times in milliseconds * * \param[in] now More recent time (or NULL to use current time) * \param[in] old Earlier time * * \return milliseconds difference (or 0 if old is NULL or unset) * * \note Can overflow on 32bit machines when the differences is around * 24 days or more. */ static int time_diff_ms(struct timespec *now, struct timespec *old) { int diff_ms = 0; if (time_is_set(old)) { struct timespec local_now = { 0, }; if (now == NULL) { clock_gettime(CLOCK_MONOTONIC, &local_now); now = &local_now; } diff_ms = (now->tv_sec - old->tv_sec) * 1000 + (now->tv_nsec - old->tv_nsec) / 1000000; } return diff_ms; } /*! * \internal * \brief Reset a command's operation times to their original values. * * Reset a command's run and queued timestamps to the timestamps of the original * command, so we report the entire time since then and not just the time since * the most recent command (for recurring and systemd operations). * * \param[in] cmd Executor command object to reset * * \note It's not obvious what the queued time should be for a systemd * start/stop operation, which might go like this: * initial command queued 5ms, runs 3s * monitor command queued 10ms, runs 10s * monitor command queued 10ms, runs 10s * Is the queued time for that operation 5ms, 10ms or 25ms? The current * implementation will report 5ms. If it's 25ms, then we need to * subtract 20ms from the total exec time so as not to count it twice. * We can implement that later if it matters to anyone ... */ static void cmd_original_times(lrmd_cmd_t * cmd) { cmd->t_run = cmd->t_first_run; cmd->t_queue = cmd->t_first_queue; } #endif static inline bool action_matches(lrmd_cmd_t *cmd, const char *action, guint interval_ms) { return (cmd->interval_ms == interval_ms) && pcmk__str_eq(cmd->action, action, pcmk__str_casei); } static void log_finished(lrmd_cmd_t * cmd, int exec_time, int queue_time) { char pid_str[32] = { 0, }; int log_level = LOG_INFO; if (cmd->last_pid) { snprintf(pid_str, 32, "%d", cmd->last_pid); } if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { log_level = LOG_DEBUG; } #ifdef PCMK__TIME_USE_CGT do_crm_log(log_level, "%s %s (call %d%s%s) exited with status %d" " (execution time %dms, queue time %dms)", cmd->rsc_id, cmd->action, cmd->call_id, (cmd->last_pid? ", PID " : ""), pid_str, cmd->result.exit_status, exec_time, queue_time); #else do_crm_log(log_level, "%s %s (call %d%s%s) exited with status %d", cmd->rsc_id, cmd->action, cmd->call_id, (cmd->last_pid? ", PID " : ""), pid_str, cmd->result.exit_status); #endif } static void log_execute(lrmd_cmd_t * cmd) { int log_level = LOG_INFO; if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { log_level = LOG_DEBUG; } do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d", cmd->rsc_id, cmd->action, cmd->call_id); } static const char * normalize_action_name(lrmd_rsc_t * rsc, const char *action) { if (pcmk__str_eq(action, "monitor", pcmk__str_casei) && pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) { return "status"; } return action; } static lrmd_rsc_t * build_rsc_from_xml(xmlNode * msg) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_rsc_t *rsc = NULL; rsc = calloc(1, sizeof(lrmd_rsc_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts); rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS); rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER); rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE); rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc); rsc->st_probe_rc = -ENODEV; // if stonith, initialize to "not running" return rsc; } static lrmd_cmd_t * create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client) { int call_options = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_cmd_t *cmd = NULL; cmd = calloc(1, sizeof(lrmd_cmd_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options); cmd->call_opts = call_options; cmd->client_id = strdup(client->id); crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id); crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval_ms); crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout); crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay); cmd->timeout_orig = cmd->timeout; cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN); cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION); cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR); cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); cmd->params = xml2list(rsc_xml); if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block", pcmk__str_casei)) { crm_debug("Setting flag to leave pid group on timeout and " "only kill action pid for " PCMK__OP_FMT, cmd->rsc_id, cmd->action, cmd->interval_ms); cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Action", cmd->action, 0, SVC_ACTION_LEAVE_GROUP, "SVC_ACTION_LEAVE_GROUP"); } return cmd; } static void stop_recurring_timer(lrmd_cmd_t *cmd) { if (cmd) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } cmd->stonith_recurring_id = 0; } } static void free_lrmd_cmd(lrmd_cmd_t * cmd) { stop_recurring_timer(cmd); if (cmd->delay_id) { g_source_remove(cmd->delay_id); } if (cmd->params) { g_hash_table_destroy(cmd->params); } pcmk__reset_result(&(cmd->result)); free(cmd->origin); free(cmd->action); free(cmd->real_action); free(cmd->userdata_str); free(cmd->rsc_id); free(cmd->client_id); free(cmd); } static gboolean stonith_recurring_op_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc; cmd->stonith_recurring_id = 0; if (!cmd->rsc_id) { return FALSE; } rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); CRM_ASSERT(rsc != NULL); /* take it out of recurring_ops list, and put it in the pending ops * to be executed */ rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef PCMK__TIME_USE_CGT get_current_time(&(cmd->t_queue), &(cmd->t_first_queue)); #endif mainloop_set_trigger(rsc->work); return FALSE; } static inline void start_recurring_timer(lrmd_cmd_t *cmd) { if (cmd && (cmd->interval_ms > 0)) { cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms, stonith_recurring_op_helper, cmd); } } static gboolean start_delay_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc = NULL; cmd->delay_id = 0; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc) { mainloop_set_trigger(rsc->work); } return FALSE; } /*! * \internal * \brief Check whether a list already contains the equivalent of a given action */ static lrmd_cmd_t * find_duplicate_action(GList *action_list, lrmd_cmd_t *cmd) { for (GList *item = action_list; item != NULL; item = item->next) { lrmd_cmd_t *dup = item->data; if (action_matches(cmd, dup->action, dup->interval_ms)) { return dup; } } return NULL; } static bool merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { lrmd_cmd_t * dup = NULL; bool dup_pending = true; if (cmd->interval_ms == 0) { return false; } // Search for a duplicate of this action (in-flight or not) dup = find_duplicate_action(rsc->pending_ops, cmd); if (dup == NULL) { dup_pending = false; dup = find_duplicate_action(rsc->recurring_ops, cmd); if (dup == NULL) { return false; } } /* Do not merge fencing monitors marked for cancellation, so we can reply to * the cancellation separately. */ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei) && (dup->result.execution_status == PCMK_EXEC_CANCELLED)) { return false; } /* This should not occur. If it does, we need to investigate how something * like this is possible in the controller. */ crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT "), merging with previous op entry", rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval_ms); // Merge new action's call ID and user data into existing action dup->first_notify_sent = false; free(dup->userdata_str); dup->userdata_str = cmd->userdata_str; cmd->userdata_str = NULL; dup->call_id = cmd->call_id; free_lrmd_cmd(cmd); cmd = NULL; /* If dup is not pending, that means it has already executed at least once * and is waiting in the interval. In that case, stop waiting and initiate * a new instance now. */ if (!dup_pending) { if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { stop_recurring_timer(dup); stonith_recurring_op_helper(dup); } else { services_action_kick(rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval_ms); } } return true; } static void schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { CRM_CHECK(cmd != NULL, return); CRM_CHECK(rsc != NULL, return); crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id); if (merge_recurring_duplicate(rsc, cmd)) { // Equivalent of cmd has already been scheduled return; } /* The controller expects the executor to automatically cancel * recurring operations before a resource stops. */ if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { cancel_all_recurring(rsc, NULL); } rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef PCMK__TIME_USE_CGT get_current_time(&(cmd->t_queue), &(cmd->t_first_queue)); #endif mainloop_set_trigger(rsc->work); if (cmd->start_delay) { cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); } } static xmlNode * create_lrmd_reply(const char *origin, int rc, int call_id) { xmlNode *reply = create_xml_node(NULL, T_LRMD_REPLY); crm_xml_add(reply, F_LRMD_ORIGIN, origin); crm_xml_add_int(reply, F_LRMD_RC, rc); crm_xml_add_int(reply, F_LRMD_CALLID, call_id); return reply; } static void send_client_notify(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; pcmk__client_t *client = value; int rc; int log_level = LOG_WARNING; const char *msg = NULL; CRM_CHECK(client != NULL, return); if (client->name == NULL) { crm_trace("Skipping notification to client without name"); return; } if (pcmk_is_set(client->flags, pcmk__client_to_proxy)) { /* We only want to notify clients of the executor IPC API. If we are * running as Pacemaker Remote, we may have clients proxied to other * IPC services in the cluster, so skip those. */ crm_trace("Skipping executor API notification to client %s", pcmk__client_name(client)); return; } rc = lrmd_server_send_notify(client, update_msg); if (rc == pcmk_rc_ok) { return; } switch (rc) { case ENOTCONN: case EPIPE: // Client exited without waiting for notification log_level = LOG_INFO; msg = "Disconnected"; break; default: msg = pcmk_rc_str(rc); break; } do_crm_log(log_level, "Could not notify client %s: %s " CRM_XS " rc=%d", pcmk__client_name(client), msg, rc); } static void send_cmd_complete_notify(lrmd_cmd_t * cmd) { xmlNode *notify = NULL; #ifdef PCMK__TIME_USE_CGT int exec_time = time_diff_ms(NULL, &(cmd->t_run)); int queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue)); log_finished(cmd, exec_time, queue_time); #else log_finished(cmd, 0, 0); #endif /* if the first notify result for a cmd has already been sent earlier, and the * the option to only send notifies on result changes is set. Check to see * if the last result is the same as the new one. If so, suppress this update */ if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) { if ((cmd->last_notify_rc == cmd->result.exit_status) && (cmd->last_notify_op_status == cmd->result.execution_status)) { /* only send changes */ return; } } cmd->first_notify_sent = true; cmd->last_notify_rc = cmd->result.exit_status; cmd->last_notify_op_status = cmd->result.execution_status; notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __func__); crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout); crm_xml_add_ms(notify, F_LRMD_RSC_INTERVAL, cmd->interval_ms); crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay); crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->result.exit_status); crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->result.execution_status); crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id); crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted); crm_xml_add_ll(notify, F_LRMD_RSC_RUN_TIME, (long long) cmd->epoch_last_run); crm_xml_add_ll(notify, F_LRMD_RSC_RCCHANGE_TIME, (long long) cmd->epoch_rcchange); #ifdef PCMK__TIME_USE_CGT crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time); crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time); #endif crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC); crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id); if(cmd->real_action) { crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action); } else { crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action); } crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str); crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->result.exit_reason); if (cmd->result.action_stderr != NULL) { crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stderr); } else if (cmd->result.action_stdout != NULL) { crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stdout); } if (cmd->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS); g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { hash2smartfield((gpointer) key, (gpointer) value, args); } } if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) { pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id); if (client) { send_client_notify(client->id, client, notify); } } else { pcmk__foreach_ipc_client(send_client_notify, notify); } free_xml(notify); } static void send_generic_notify(int rc, xmlNode * request) { if (pcmk__ipc_client_count() != 0) { int call_id = 0; xmlNode *notify = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *op = crm_element_value(request, F_LRMD_OPERATION); crm_element_value_int(request, F_LRMD_CALLID, &call_id); notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __func__); crm_xml_add_int(notify, F_LRMD_RC, rc); crm_xml_add_int(notify, F_LRMD_CALLID, call_id); crm_xml_add(notify, F_LRMD_OPERATION, op); crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id); pcmk__foreach_ipc_client(send_client_notify, notify); free_xml(notify); } } static void cmd_reset(lrmd_cmd_t * cmd) { cmd->last_pid = 0; #ifdef PCMK__TIME_USE_CGT memset(&cmd->t_run, 0, sizeof(cmd->t_run)); memset(&cmd->t_queue, 0, sizeof(cmd->t_queue)); #endif cmd->epoch_last_run = 0; pcmk__reset_result(&(cmd->result)); cmd->result.execution_status = PCMK_EXEC_DONE; } static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc) { crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action, rsc ? rsc->active : NULL, cmd); if (rsc && (rsc->active == cmd)) { rsc->active = NULL; mainloop_set_trigger(rsc->work); } if (!rsc) { cmd->rsc_deleted = 1; } /* reset original timeout so client notification has correct information */ cmd->timeout = cmd->timeout_orig; send_cmd_complete_notify(cmd); if ((cmd->interval_ms != 0) && (cmd->result.execution_status == PCMK_EXEC_CANCELLED)) { if (rsc) { rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else if (cmd->interval_ms == 0) { if (rsc) { rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else { /* Clear all the values pertaining just to the last iteration of a recurring op. */ cmd_reset(cmd); } } static int ocf2uniform_rc(int rc) { switch (rc) { case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_PROMOTED: break; default: if (rc < 0 || rc > PCMK_OCF_FAILED_PROMOTED) { return PCMK_OCF_UNKNOWN_ERROR; } } return rc; } static int stonith2uniform_rc(const char *action, int rc) { switch (rc) { case pcmk_ok: rc = PCMK_OCF_OK; break; case -ENODEV: /* This should be possible only for probes in practice, but * interpret for all actions to be safe. */ if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) { rc = PCMK_OCF_NOT_RUNNING; } else if (pcmk__str_eq(action, "stop", pcmk__str_casei)) { rc = PCMK_OCF_OK; } else { rc = PCMK_OCF_NOT_INSTALLED; } break; case -EOPNOTSUPP: rc = PCMK_OCF_UNIMPLEMENT_FEATURE; break; default: rc = PCMK_OCF_UNKNOWN_ERROR; break; } return rc; } #if SUPPORT_NAGIOS static int nagios2uniform_rc(const char *action, int rc) { if (rc < 0) { return PCMK_OCF_UNKNOWN_ERROR; } switch (rc) { case NAGIOS_STATE_OK: return PCMK_OCF_OK; case NAGIOS_INSUFFICIENT_PRIV: return PCMK_OCF_INSUFFICIENT_PRIV; case NAGIOS_NOT_INSTALLED: return PCMK_OCF_NOT_INSTALLED; case NAGIOS_STATE_WARNING: case NAGIOS_STATE_CRITICAL: case NAGIOS_STATE_UNKNOWN: case NAGIOS_STATE_DEPENDENT: default: return PCMK_OCF_UNKNOWN_ERROR; } return PCMK_OCF_UNKNOWN_ERROR; } #endif static int get_uniform_rc(const char *standard, const char *action, int rc) { if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_casei)) { return ocf2uniform_rc(rc); } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { return stonith2uniform_rc(action, rc); } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) { return rc; } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_UPSTART, pcmk__str_casei)) { return rc; #if SUPPORT_NAGIOS } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) { return nagios2uniform_rc(action, rc); #endif } else { return services_get_ocf_exitcode(action, rc); } } static int action_get_uniform_rc(svc_action_t * action) { lrmd_cmd_t *cmd = action->cb_data; return get_uniform_rc(action->standard, cmd->action, action->rc); } struct notify_new_client_data { xmlNode *notify; pcmk__client_t *new_client; }; static void notify_one_client(gpointer key, gpointer value, gpointer user_data) { pcmk__client_t *client = value; struct notify_new_client_data *data = user_data; if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) { send_client_notify(key, (gpointer) client, (gpointer) data->notify); } } void notify_of_new_client(pcmk__client_t *new_client) { struct notify_new_client_data data; data.new_client = new_client; data.notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(data.notify, F_LRMD_ORIGIN, __func__); crm_xml_add(data.notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT); pcmk__foreach_ipc_client(notify_one_client, &data); free_xml(data.notify); } -static char * -parse_exit_reason(const char *output) -{ - const char *cur = NULL; - const char *last = NULL; - static int cookie_len = 0; - char *eol = NULL; - size_t reason_len = EXIT_REASON_MAX_LEN; - - if (output == NULL) { - return NULL; - } - - if (!cookie_len) { - cookie_len = strlen(PCMK_OCF_REASON_PREFIX); - } - - cur = strstr(output, PCMK_OCF_REASON_PREFIX); - for (; cur != NULL; cur = strstr(cur, PCMK_OCF_REASON_PREFIX)) { - /* skip over the cookie delimiter string */ - cur += cookie_len; - last = cur; - } - if (last == NULL) { - return NULL; - } - - // Truncate everything after a new line, and limit reason string size - eol = strchr(last, '\n'); - if (eol) { - reason_len = QB_MIN(reason_len, eol - last); - } - return strndup(last, reason_len); -} - void client_disconnect_cleanup(const char *client_id) { GHashTableIter iter; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (rsc->call_opts & lrmd_opt_drop_recurring) { /* This client is disconnecting, drop any recurring operations * it may have initiated on the resource */ cancel_all_recurring(rsc, client_id); } } } static void action_complete(svc_action_t * action) { lrmd_rsc_t *rsc; lrmd_cmd_t *cmd = action->cb_data; #ifdef PCMK__TIME_USE_CGT const char *rclass = NULL; bool goagain = false; #endif if (!cmd) { crm_err("Completed executor action (%s) does not match any known operations", action->id); return; } #ifdef PCMK__TIME_USE_CGT if (cmd->result.exit_status != action->rc) { cmd->epoch_rcchange = time(NULL); } #endif cmd->last_pid = action->pid; pcmk__set_result(&(cmd->result), action_get_uniform_rc(action), - action->status, NULL); + action->status, services__exit_reason(action)); rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; #ifdef PCMK__TIME_USE_CGT if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) { rclass = resources_find_service_class(rsc->type); } else if(rsc) { rclass = rsc->class; } if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) { if ((cmd->result.exit_status == PCMK_OCF_OK) && pcmk__strcase_any_of(cmd->action, "start", "stop", NULL)) { /* systemd returns from start and stop actions after the action * begins, not after it completes. We have to jump through a few * hoops so that we don't report 'complete' to the rest of pacemaker * until it's actually done. */ goagain = true; cmd->real_action = cmd->action; cmd->action = strdup("monitor"); } else if (cmd->real_action != NULL) { // This is follow-up monitor to check whether start/stop completed if (cmd->result.execution_status == PCMK_EXEC_PENDING) { goagain = true; } else if ((cmd->result.exit_status == PCMK_OCF_OK) && pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) { goagain = true; } else { int time_sum = time_diff_ms(NULL, &(cmd->t_first_run)); int timeout_left = cmd->timeout_orig - time_sum; crm_debug("%s systemd %s is now complete (elapsed=%dms, " "remaining=%dms): %s (%d)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, services_ocf_exitcode_str(cmd->result.exit_status), cmd->result.exit_status); cmd_original_times(cmd); // Monitors may return "not running", but start/stop shouldn't if ((cmd->result.execution_status == PCMK_EXEC_DONE) && (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) { if (pcmk__str_eq(cmd->real_action, "start", pcmk__str_casei)) { cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR; } else if (pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) { cmd->result.exit_status = PCMK_OCF_OK; } } } } } #endif #if SUPPORT_NAGIOS if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) { if (action_matches(cmd, "monitor", 0) && (cmd->result.exit_status == PCMK_OCF_OK)) { /* Successfully executed --version for the nagios plugin */ cmd->result.exit_status = PCMK_OCF_NOT_RUNNING; } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei) && (cmd->result.exit_status != PCMK_OCF_OK)) { #ifdef PCMK__TIME_USE_CGT goagain = true; #endif } } #endif #ifdef PCMK__TIME_USE_CGT if (goagain) { int time_sum = time_diff_ms(NULL, &(cmd->t_first_run)); int timeout_left = cmd->timeout_orig - time_sum; int delay = cmd->timeout_orig / 10; if(delay >= timeout_left && timeout_left > 20) { delay = timeout_left/2; } delay = QB_MIN(2000, delay); if (delay < timeout_left) { cmd->start_delay = delay; cmd->timeout = timeout_left; if (cmd->result.exit_status == PCMK_OCF_OK) { crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay); } else if (cmd->result.execution_status == PCMK_EXEC_PENDING) { crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, time_sum, timeout_left, delay); } else { crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, services_ocf_exitcode_str(cmd->result.exit_status), cmd->result.exit_status, time_sum, timeout_left, delay); } cmd_reset(cmd); if(rsc) { rsc->active = NULL; } schedule_lrmd_cmd(rsc, cmd); /* Don't finalize cmd, we're not done with it yet */ return; } else { crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)", cmd->rsc_id, (cmd->real_action? cmd->real_action : cmd->action), cmd->result.exit_status, time_sum, timeout_left); pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT, "Investigate reason for timeout, and adjust " "configured operation timeout if necessary"); cmd_original_times(cmd); } } #endif pcmk__set_result_output(&(cmd->result), action->stdout_data, action->stderr_data); - if (action->stderr_data) { - cmd->result.exit_reason = parse_exit_reason(action->stderr_data); - } - cmd_finalize(cmd, rsc); } /*! * \internal * \brief Determine operation status of a stonith operation * * Non-stonith resource operations get their operation status directly from the * service library, but the fencer does not have an equivalent, so we must infer * an operation status from the fencer API's return code. * * \param[in] action Name of action performed on stonith resource * \param[in] interval_ms Action interval * \param[in] rc Action result from fencer * * \return Operation status corresponding to fencer API return code */ static int stonith_rc2status(const char *action, guint interval_ms, int rc) { int status = PCMK_EXEC_DONE; switch (rc) { case pcmk_ok: break; case -EOPNOTSUPP: case -EPROTONOSUPPORT: status = PCMK_EXEC_NOT_SUPPORTED; break; case -ETIME: case -ETIMEDOUT: status = PCMK_EXEC_TIMEOUT; break; case -ENOTCONN: case -ECOMM: // Couldn't talk to fencer status = PCMK_EXEC_ERROR; break; case -ENODEV: // The device is not registered with the fencer status = PCMK_EXEC_ERROR; break; default: break; } return status; } static void stonith_action_complete(lrmd_cmd_t * cmd, int rc) { // This can be NULL if resource was removed before command completed lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); cmd->result.exit_status = stonith2uniform_rc(cmd->action, rc); /* This function may be called with status already set to cancelled, if a * pending action was aborted. Otherwise, we need to determine status from * the fencer return code. */ if (cmd->result.execution_status != PCMK_EXEC_CANCELLED) { cmd->result.execution_status = stonith_rc2status(cmd->action, cmd->interval_ms, rc); // Certain successful actions change the known state of the resource if ((rsc != NULL) && (cmd->result.exit_status == PCMK_OCF_OK)) { if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) { rsc->st_probe_rc = pcmk_ok; // maps to PCMK_OCF_OK } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { rsc->st_probe_rc = -ENODEV; // maps to PCMK_OCF_NOT_RUNNING } } } // Give the user more detail than an OCF code if (rc != -pcmk_err_generic) { cmd->result.exit_reason = strdup(pcmk_strerror(rc)); } /* The recurring timer should not be running at this point in any case, but * as a failsafe, stop it if it is. */ stop_recurring_timer(cmd); /* Reschedule this command if appropriate. If a recurring command is *not* * rescheduled, its status must be PCMK_EXEC_CANCELLED, otherwise it will * not be removed from recurring_ops by cmd_finalize(). */ if (rsc && (cmd->interval_ms > 0) && (cmd->result.execution_status != PCMK_EXEC_CANCELLED)) { start_recurring_timer(cmd); } cmd_finalize(cmd, rsc); } static void lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) { stonith_action_complete(data->userdata, data->rc); } void stonith_connection_failed(void) { GHashTableIter iter; GList *cmd_list = NULL; GList *cmd_iter = NULL; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { /* If we registered this fence device, we don't know whether the * fencer still has the registration or not. Cause future probes to * return PCMK_OCF_UNKNOWN_ERROR until the resource is stopped or * started successfully. This is especially important if the * controller also went away (possibly due to a cluster layer * restart) and won't receive our client notification of any * monitors finalized below. */ if (rsc->st_probe_rc == pcmk_ok) { rsc->st_probe_rc = pcmk_err_generic; } if (rsc->active) { cmd_list = g_list_append(cmd_list, rsc->active); } if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, rsc->recurring_ops); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, rsc->pending_ops); } rsc->pending_ops = rsc->recurring_ops = NULL; } } if (!cmd_list) { return; } crm_err("Connection to fencer failed, finalizing %d pending operations", g_list_length(cmd_list)); for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { stonith_action_complete(cmd_iter->data, -ENOTCONN); } g_list_free(cmd_list); } /*! * \internal * \brief Execute a stonith resource "start" action * * Start a stonith resource by registering it with the fencer. * (Stonith agents don't have a start command.) * * \param[in] stonith_api Connection to fencer * \param[in] rsc Stonith resource to start * \param[in] cmd Start command to execute * * \return pcmk_ok on success, -errno otherwise */ static int execd_stonith_start(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { char *key = NULL; char *value = NULL; stonith_key_value_t *device_params = NULL; int rc = pcmk_ok; // Convert command parameters to stonith API key/values if (cmd->params) { GHashTableIter iter; g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { device_params = stonith_key_value_add(device_params, key, value); } } /* The fencer will automatically register devices via CIB notifications * when the CIB changes, but to avoid a possible race condition between * the fencer receiving the notification and the executor requesting that * resource, the executor registers the device as well. The fencer knows how * to handle duplicate registrations. */ rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call, cmd->rsc_id, rsc->provider, rsc->type, device_params); stonith_key_value_freeall(device_params, 1, 1); return rc; } /*! * \internal * \brief Execute a stonith resource "stop" action * * Stop a stonith resource by unregistering it with the fencer. * (Stonith agents don't have a stop command.) * * \param[in] stonith_api Connection to fencer * \param[in] rsc Stonith resource to stop * * \return pcmk_ok on success, -errno otherwise */ static inline int execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc) { /* @TODO Failure would indicate a problem communicating with fencer; * perhaps we should try reconnecting and retrying a few times? */ return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, rsc->rsc_id); } /*! * \internal * \brief Initiate a stonith resource agent recurring "monitor" action * * \param[in] stonith_api Connection to fencer * \param[in] rsc Stonith resource to monitor * \param[in] cmd Monitor command being executed * * \return pcmk_ok if monitor was successfully initiated, -errno otherwise */ static inline int execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id, cmd->timeout / 1000); rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd, "lrmd_stonith_callback", lrmd_stonith_callback); if (rc == TRUE) { rsc->active = cmd; rc = pcmk_ok; } else { rc = -pcmk_err_generic; } return rc; } static void lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { int rc = 0; bool do_monitor = FALSE; stonith_t *stonith_api = get_stonith_connection(); if (!stonith_api) { rc = -ENOTCONN; } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) { rc = execd_stonith_start(stonith_api, rsc, cmd); if (rc == 0) { do_monitor = TRUE; } } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { rc = execd_stonith_stop(stonith_api, rsc); } else if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { if (cmd->interval_ms > 0) { do_monitor = TRUE; } else { rc = rsc->st_probe_rc; } } if (do_monitor) { rc = execd_stonith_monitor(stonith_api, rsc, cmd); if (rc == pcmk_ok) { // Don't clean up yet, we will find out result of the monitor later return; } } stonith_action_complete(cmd, rc); } static int lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { svc_action_t *action = NULL; GHashTable *params_copy = NULL; CRM_ASSERT(rsc); CRM_ASSERT(cmd); crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s", rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type); #if SUPPORT_NAGIOS /* Recurring operations are cancelled anyway for a stop operation */ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei) && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { cmd->result.exit_status = PCMK_OCF_OK; goto exec_done; } #endif params_copy = pcmk__str_table_dup(cmd->params); action = services__create_resource_action(rsc->rsc_id, rsc->class, rsc->provider, rsc->type, normalize_action_name(rsc, cmd->action), cmd->interval_ms, cmd->timeout, params_copy, cmd->service_flags); - if (!action) { - // Invalid arguments (which would be a bug) or out-of-memory - crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id); + if (action == NULL) { pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, - PCMK_EXEC_ERROR, "Internal Pacemaker error"); + PCMK_EXEC_ERROR, strerror(ENOMEM)); goto exec_done; } if (action->rc != PCMK_OCF_UNKNOWN) { - pcmk__set_result(&(cmd->result), action->rc, action->status, NULL); + pcmk__set_result(&(cmd->result), action->rc, action->status, + services__exit_reason(action)); services_action_free(action); goto exec_done; } action->cb_data = cmd; if (services_action_async(action, action_complete)) { /* When services_action_async() returns TRUE, the callback might have * been called -- in this case action_complete(), which might free cmd, * so cmd cannot be used here. */ return TRUE; } - pcmk__set_result(&(cmd->result), action->rc, action->status, NULL); + pcmk__set_result(&(cmd->result), action->rc, action->status, + services__exit_reason(action)); services_action_free(action); action = NULL; exec_done: cmd_finalize(cmd, rsc); return TRUE; } static gboolean lrmd_rsc_execute(lrmd_rsc_t * rsc) { lrmd_cmd_t *cmd = NULL; CRM_CHECK(rsc != NULL, return FALSE); if (rsc->active) { crm_trace("%s is still active", rsc->rsc_id); return TRUE; } if (rsc->pending_ops) { GList *first = rsc->pending_ops; cmd = first->data; if (cmd->delay_id) { crm_trace ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms", cmd->rsc_id, cmd->action, cmd->start_delay); return TRUE; } rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first); g_list_free_1(first); #ifdef PCMK__TIME_USE_CGT get_current_time(&(cmd->t_run), &(cmd->t_first_run)); #endif cmd->epoch_last_run = time(NULL); } if (!cmd) { crm_trace("Nothing further to do for %s", rsc->rsc_id); return TRUE; } rsc->active = cmd; /* only one op at a time for a rsc */ if (cmd->interval_ms) { rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd); } log_execute(cmd); if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { lrmd_rsc_execute_stonith(rsc, cmd); } else { lrmd_rsc_execute_service_lib(rsc, cmd); } return TRUE; } static gboolean lrmd_rsc_dispatch(gpointer user_data) { return lrmd_rsc_execute(user_data); } void free_rsc(gpointer data) { GList *gIter = NULL; lrmd_rsc_t *rsc = data; int is_stonith = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei); gIter = rsc->pending_ops; while (gIter != NULL) { GList *next = gIter->next; lrmd_cmd_t *cmd = gIter->data; /* command was never executed */ cmd->result.execution_status = PCMK_EXEC_CANCELLED; cmd_finalize(cmd, NULL); gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->pending_ops); gIter = rsc->recurring_ops; while (gIter != NULL) { GList *next = gIter->next; lrmd_cmd_t *cmd = gIter->data; if (is_stonith) { cmd->result.execution_status = PCMK_EXEC_CANCELLED; /* If a stonith command is in-flight, just mark it as cancelled; * it is not safe to finalize/free the cmd until the stonith api * says it has either completed or timed out. */ if (rsc->active != cmd) { cmd_finalize(cmd, NULL); } } else { /* This command is already handed off to service library, * let service library cancel it and tell us via the callback * when it is cancelled. The rsc can be safely destroyed * even if we are waiting for the cancel result */ services_action_cancel(rsc->rsc_id, normalize_action_name(rsc, cmd->action), cmd->interval_ms); } gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->recurring_ops); free(rsc->rsc_id); free(rsc->class); free(rsc->provider); free(rsc->type); mainloop_destroy_trigger(rsc->work); free(rsc); } static int process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id, xmlNode **reply) { int rc = pcmk_ok; const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER); const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION); if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) { crm_err("Cluster API version must be greater than or equal to %s, not %s", LRMD_MIN_PROTOCOL_VERSION, protocol_version); rc = -EPROTO; } if (crm_is_true(is_ipc_provider)) { #ifdef PCMK__COMPILE_REMOTE if ((client->remote != NULL) && client->remote->tls_handshake_complete) { // This is a remote connection from a cluster node's controller ipc_proxy_add_provider(client); } else { rc = -EACCES; } #else rc = -EPROTONOSUPPORT; #endif } *reply = create_lrmd_reply(__func__, rc, call_id); crm_xml_add(*reply, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(*reply, F_LRMD_CLIENTID, client->id); crm_xml_add(*reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); return rc; } static int process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = build_rsc_from_xml(request); lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id); if (dup && pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) && pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) { crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id); free_rsc(rsc); return rc; } g_hash_table_replace(rsc_list, rsc->rsc_id, rsc); crm_info("Cached agent information for '%s'", rsc->rsc_id); return rc; } static xmlNode * process_lrmd_get_rsc_info(xmlNode *request, int call_id) { int rc = pcmk_ok; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); xmlNode *reply = NULL; lrmd_rsc_t *rsc = NULL; if (rsc_id == NULL) { rc = -ENODEV; } else { rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Agent information for '%s' not in cache", rsc_id); rc = -ENODEV; } } reply = create_lrmd_reply(__func__, rc, call_id); if (rsc) { crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id); crm_xml_add(reply, F_LRMD_CLASS, rsc->class); crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider); crm_xml_add(reply, F_LRMD_TYPE, rsc->type); } return reply; } static int process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id, xmlNode *request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); if (!rsc_id) { return -ENODEV; } rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Ignoring unregistration of resource '%s', which is not registered", rsc_id); return pcmk_ok; } if (rsc->active) { /* let the caller know there are still active ops on this rsc to watch for */ crm_trace("Operation (0x%p) still in progress for unregistered resource %s", rsc->active, rsc_id); rc = -EINPROGRESS; } g_hash_table_remove(rsc_list, rsc_id); return rc; } static int process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request) { lrmd_rsc_t *rsc = NULL; lrmd_cmd_t *cmd = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); int call_id; if (!rsc_id) { return -EINVAL; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return -ENODEV; } cmd = create_lrmd_cmd(request, client); call_id = cmd->call_id; /* Don't reference cmd after handing it off to be scheduled. * The cmd could get merged and freed. */ schedule_lrmd_cmd(rsc, cmd); return call_id; } static int cancel_op(const char *rsc_id, const char *action, guint interval_ms) { GList *gIter = NULL; lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id); /* How to cancel an action. * 1. Check pending ops list, if it hasn't been handed off * to the service library or stonith recurring list remove * it there and that will stop it. * 2. If it isn't in the pending ops list, then it's either a * recurring op in the stonith recurring list, or the service * library's recurring list. Stop it there * 3. If not found in any lists, then this operation has either * been executed already and is not a recurring operation, or * never existed. */ if (!rsc) { return -ENODEV; } for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (action_matches(cmd, action, interval_ms)) { cmd->result.execution_status = PCMK_EXEC_CANCELLED; cmd_finalize(cmd, rsc); return pcmk_ok; } } if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { /* The service library does not handle stonith operations. * We have to handle recurring stonith operations ourselves. */ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (action_matches(cmd, action, interval_ms)) { cmd->result.execution_status = PCMK_EXEC_CANCELLED; if (rsc->active != cmd) { cmd_finalize(cmd, rsc); } return pcmk_ok; } } } else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval_ms) == TRUE) { /* The service library will tell the action_complete callback function * this action was cancelled, which will destroy the cmd and remove * it from the recurring_op list. Do not do that in this function * if the service library says it cancelled it. */ return pcmk_ok; } return -EOPNOTSUPP; } static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id) { GList *cmd_list = NULL; GList *cmd_iter = NULL; /* Notice a copy of each list is created when concat is called. * This prevents odd behavior from occurring when the cmd_list * is iterated through later on. It is possible the cancel_op * function may end up modifying the recurring_ops and pending_ops * lists. If we did not copy those lists, our cmd_list iteration * could get messed up.*/ if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops)); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops)); } if (!cmd_list) { return; } for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { lrmd_cmd_t *cmd = cmd_iter->data; if (cmd->interval_ms == 0) { continue; } if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) { continue; } cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms); } /* frees only the copied list data, not the cmds */ g_list_free(cmd_list); } static int process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION); guint interval_ms = 0; crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &interval_ms); if (!rsc_id || !action) { return -EINVAL; } return cancel_op(rsc_id, action, interval_ms); } static void add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc) { xmlNode *rsc_xml = create_xml_node(reply, F_LRMD_RSC); crm_xml_add(rsc_xml, F_LRMD_RSC_ID, rsc->rsc_id); for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) { lrmd_cmd_t *cmd = item->data; xmlNode *op_xml = create_xml_node(rsc_xml, T_LRMD_RSC_OP); crm_xml_add(op_xml, F_LRMD_RSC_ACTION, (cmd->real_action? cmd->real_action : cmd->action)); crm_xml_add_ms(op_xml, F_LRMD_RSC_INTERVAL, cmd->interval_ms); crm_xml_add_int(op_xml, F_LRMD_TIMEOUT, cmd->timeout_orig); } } static xmlNode * process_lrmd_get_recurring(xmlNode *request, int call_id) { int rc = pcmk_ok; const char *rsc_id = NULL; lrmd_rsc_t *rsc = NULL; xmlNode *reply = NULL; xmlNode *rsc_xml = NULL; // Resource ID is optional rsc_xml = first_named_child(request, F_LRMD_CALLDATA); if (rsc_xml) { rsc_xml = first_named_child(rsc_xml, F_LRMD_RSC); } if (rsc_xml) { rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); } // If resource ID is specified, resource must exist if (rsc_id != NULL) { rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); rc = -ENODEV; } } reply = create_lrmd_reply(__func__, rc, call_id); // If resource ID is not specified, check all resources if (rsc_id == NULL) { GHashTableIter iter; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &rsc)) { add_recurring_op_xml(reply, rsc); } } else if (rsc) { add_recurring_op_xml(reply, rsc); } return reply; } void process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request) { int rc = pcmk_ok; int call_id = 0; const char *op = crm_element_value(request, F_LRMD_OPERATION); int do_reply = 0; int do_notify = 0; xmlNode *reply = NULL; /* Certain IPC commands may be done only by privileged users (i.e. root or * hacluster), because they would otherwise provide a means of bypassing * ACLs. */ bool allowed = pcmk_is_set(client->flags, pcmk__client_privileged); crm_trace("Processing %s operation from %s", op, client->id); crm_element_value_int(request, F_LRMD_CALLID, &call_id); if (pcmk__str_eq(op, CRM_OP_IPC_FWD, pcmk__str_none)) { #ifdef PCMK__COMPILE_REMOTE if (allowed) { ipc_proxy_forward_client(client, request); } else { rc = -EACCES; } #else rc = -EPROTONOSUPPORT; #endif do_reply = 1; } else if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { rc = process_lrmd_signon(client, request, call_id, &reply); do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_REG, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_register(client, id, request); do_notify = 1; } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_INFO, pcmk__str_none)) { if (allowed) { reply = process_lrmd_get_rsc_info(request, call_id); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_UNREG, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_unregister(client, id, request); /* don't notify anyone about failed un-registers */ if (rc == pcmk_ok || rc == -EINPROGRESS) { do_notify = 1; } } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_EXEC, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_exec(client, id, request); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_CANCEL, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_cancel(client, id, request); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_POKE, pcmk__str_none)) { do_notify = 1; do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_CHECK, pcmk__str_none)) { if (allowed) { xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA); CRM_LOG_ASSERT(data != NULL); pcmk__valid_sbd_timeout(crm_element_value(data, F_LRMD_WATCHDOG)); } else { rc = -EACCES; } } else if (pcmk__str_eq(op, LRMD_OP_ALERT_EXEC, pcmk__str_none)) { if (allowed) { rc = process_lrmd_alert_exec(client, id, request); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_GET_RECURRING, pcmk__str_none)) { if (allowed) { reply = process_lrmd_get_recurring(request, call_id); } else { rc = -EACCES; } do_reply = 1; } else { rc = -EOPNOTSUPP; do_reply = 1; crm_err("Unknown IPC request '%s' from client %s", op, pcmk__client_name(client)); } if (rc == -EACCES) { crm_warn("Rejecting IPC request '%s' from unprivileged client %s", op, pcmk__client_name(client)); } crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d", op, client->id, rc, do_reply, do_notify); if (do_reply) { int send_rc = pcmk_rc_ok; if (reply == NULL) { reply = create_lrmd_reply(__func__, rc, call_id); } send_rc = lrmd_server_send_reply(client, id, reply); free_xml(reply); if (send_rc != pcmk_rc_ok) { crm_warn("Reply to client %s failed: %s " CRM_XS " rc=%d", pcmk__client_name(client), pcmk_rc_str(send_rc), send_rc); } } if (do_notify) { send_generic_notify(rc, request); } } diff --git a/include/crm/services_internal.h b/include/crm/services_internal.h index b99de53f1a..c7d572b16c 100644 --- a/include/crm/services_internal.h +++ b/include/crm/services_internal.h @@ -1,46 +1,48 @@ /* * Copyright 2010-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__SERVICES_INTERNAL__H # define PCMK__SERVICES_INTERNAL__H #ifdef __cplusplus extern "C" { #endif /** * \brief Create a new resource action * * \param[in] name Name of resource * \param[in] standard Resource agent standard (ocf, lsb, etc.) * \param[in] provider Resource agent provider * \param[in] agent Resource agent name * \param[in] action action (start, stop, monitor, etc.) * \param[in] interval_ms How often to repeat this action (if 0, execute once) * \param[in] timeout Consider action failed if it does not complete in this many milliseconds * \param[in] params Action parameters * * \return NULL if not enough memory, otherwise newly allocated action instance * (if its rc member is not PCMK_OCF_UNKNOWN, the action is invalid) * * \post After the call, 'params' is owned, and later free'd by the svc_action_t result * \note The caller is responsible for freeing the return value using * services_action_free(). */ svc_action_t *services__create_resource_action(const char *name, const char *standard, const char *provider, const char *agent, const char *action, guint interval_ms, int timeout /* ms */, GHashTable *params, enum svc_action_flags flags); +const char *services__exit_reason(svc_action_t *action); + # ifdef __cplusplus } # endif #endif /* PCMK__SERVICES_INTERNAL__H */ diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index d92c024bcc..86613ab418 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -1,2771 +1,2777 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(stonith); struct stonith_action_s { /*! user defined data */ char *agent; char *action; char *victim; GHashTable *args; int timeout; int async; void *userdata; void (*done_cb) (GPid pid, gint status, const char *output, gpointer user_data); void (*fork_cb) (GPid pid, gpointer user_data); svc_action_t *svc_action; /*! internal timing information */ time_t initial_start_time; int tries; int remaining_timeout; int max_retries; /* device output data */ GPid pid; int rc; char *output; char *error; }; typedef struct stonith_private_s { char *token; crm_ipc_t *ipc; mainloop_io_t *source; GHashTable *stonith_op_callback_table; GList *notify_list; int notify_refcnt; bool notify_deletes; void (*op_callback) (stonith_t * st, stonith_callback_data_t * data); } stonith_private_t; typedef struct stonith_notify_client_s { const char *event; const char *obj_id; /* implement one day */ const char *obj_type; /* implement one day */ void (*notify) (stonith_t * st, stonith_event_t * e); bool delete; } stonith_notify_client_t; typedef struct stonith_callback_client_s { void (*callback) (stonith_t * st, stonith_callback_data_t * data); const char *id; void *user_data; gboolean only_success; gboolean allow_timeout_updates; struct timer_rec_s *timer; } stonith_callback_client_t; struct notify_blob_s { stonith_t *stonith; xmlNode *xml; }; struct timer_rec_s { int call_id; int timeout; guint ref; stonith_t *stonith; }; typedef int (*stonith_op_t) (const char *, int, const char *, xmlNode *, xmlNode *, xmlNode *, xmlNode **, xmlNode **); bool stonith_dispatch(stonith_t * st); xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); static int stonith_send_command(stonith_t *stonith, const char *op, xmlNode *data, xmlNode **output_data, int call_options, int timeout); static void stonith_connection_destroy(gpointer user_data); static void stonith_send_notification(gpointer data, gpointer user_data); static int internal_stonith_action_execute(stonith_action_t * action); static void log_action(stonith_action_t *action, pid_t pid); /*! * \brief Get agent namespace by name * * \param[in] namespace_s Name of namespace as string * * \return Namespace as enum value */ enum stonith_namespace stonith_text2namespace(const char *namespace_s) { if (pcmk__str_eq(namespace_s, "any", pcmk__str_null_matches)) { return st_namespace_any; } else if (!strcmp(namespace_s, "redhat") || !strcmp(namespace_s, "stonith-ng")) { return st_namespace_rhcs; } else if (!strcmp(namespace_s, "internal")) { return st_namespace_internal; } else if (!strcmp(namespace_s, "heartbeat")) { return st_namespace_lha; } return st_namespace_invalid; } /*! * \brief Get agent namespace name * * \param[in] namespace Namespace as enum value * * \return Namespace name as string */ const char * stonith_namespace2text(enum stonith_namespace st_namespace) { switch (st_namespace) { case st_namespace_any: return "any"; case st_namespace_rhcs: return "stonith-ng"; case st_namespace_internal: return "internal"; case st_namespace_lha: return "heartbeat"; default: break; } return "unsupported"; } /*! * \brief Determine namespace of a fence agent * * \param[in] agent Fence agent type * \param[in] namespace_s Name of agent namespace as string, if known * * \return Namespace of specified agent, as enum value */ enum stonith_namespace stonith_get_namespace(const char *agent, const char *namespace_s) { if (pcmk__str_eq(namespace_s, "internal", pcmk__str_casei)) { return st_namespace_internal; } if (stonith__agent_is_rhcs(agent)) { return st_namespace_rhcs; } #if HAVE_STONITH_STONITH_H if (stonith__agent_is_lha(agent)) { return st_namespace_lha; } #endif crm_err("Unknown fence agent: %s", agent); return st_namespace_invalid; } gboolean stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node) { gboolean rv = FALSE; stonith_t *stonith_api = st?st:stonith_api_new(); char *list = NULL; if(stonith_api) { if (stonith_api->state == stonith_disconnected) { int rc = stonith_api->cmds->connect(stonith_api, "stonith-api", NULL); if (rc != pcmk_ok) { crm_err("Failed connecting to Stonith-API for watchdog-fencing-query."); } } if (stonith_api->state != stonith_disconnected) { /* caveat!!! * this might fail when when stonithd is just updating the device-list * probably something we should fix as well for other api-calls */ int rc = stonith_api->cmds->list(stonith_api, st_opt_sync_call, STONITH_WATCHDOG_ID, &list, 0); if ((rc != pcmk_ok) || (list == NULL)) { /* due to the race described above it can happen that * we drop in here - so as not to make remote nodes * panic on that answer */ crm_warn("watchdog-fencing-query failed"); } else if (list[0] == '\0') { rv = TRUE; } else { GList *targets = stonith__parse_targets(list); rv = pcmk__str_in_list(node, targets, pcmk__str_casei); g_list_free_full(targets, free); } free(list); if (!st) { /* if we're provided the api we still might have done the * connection - but let's assume the caller won't bother */ stonith_api->cmds->disconnect(stonith_api); } } if (!st) { stonith_api_delete(stonith_api); } } else { crm_err("Stonith-API for watchdog-fencing-query couldn't be created."); } crm_trace("Pacemaker assumes node %s %sto do watchdog-fencing.", node, rv?"":"not "); return rv; } gboolean stonith__watchdog_fencing_enabled_for_node(const char *node) { return stonith__watchdog_fencing_enabled_for_node_api(NULL, node); } static void log_action(stonith_action_t *action, pid_t pid) { if (action->output) { /* Logging the whole string confuses syslog when the string is xml */ char *prefix = crm_strdup_printf("%s[%d] stdout:", action->agent, pid); crm_log_output(LOG_TRACE, prefix, action->output); free(prefix); } if (action->error) { /* Logging the whole string confuses syslog when the string is xml */ char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid); crm_log_output(LOG_WARNING, prefix, action->error); free(prefix); } } /* when cycling through the list we don't want to delete items so just mark them and when we know nobody is using the list loop over it to remove the marked items */ static void foreach_notify_entry (stonith_private_t *private, GFunc func, gpointer user_data) { private->notify_refcnt++; g_list_foreach(private->notify_list, func, user_data); private->notify_refcnt--; if ((private->notify_refcnt == 0) && private->notify_deletes) { GList *list_item = private->notify_list; private->notify_deletes = FALSE; while (list_item != NULL) { stonith_notify_client_t *list_client = list_item->data; GList *next = g_list_next(list_item); if (list_client->delete) { free(list_client); private->notify_list = g_list_delete_link(private->notify_list, list_item); } list_item = next; } } } static void stonith_connection_destroy(gpointer user_data) { stonith_t *stonith = user_data; stonith_private_t *native = NULL; struct notify_blob_s blob; crm_trace("Sending destroyed notification"); blob.stonith = stonith; blob.xml = create_xml_node(NULL, "notify"); native = stonith->st_private; native->ipc = NULL; native->source = NULL; free(native->token); native->token = NULL; stonith->state = stonith_disconnected; crm_xml_add(blob.xml, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(blob.xml, F_SUBTYPE, T_STONITH_NOTIFY_DISCONNECT); foreach_notify_entry(native, stonith_send_notification, &blob); free_xml(blob.xml); } xmlNode * create_device_registration_xml(const char *id, enum stonith_namespace namespace, const char *agent, stonith_key_value_t *params, const char *rsc_provides) { xmlNode *data = create_xml_node(NULL, F_STONITH_DEVICE); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); #if HAVE_STONITH_STONITH_H if (namespace == st_namespace_any) { namespace = stonith_get_namespace(agent, NULL); } if (namespace == st_namespace_lha) { hash2field((gpointer) "plugin", (gpointer) agent, args); agent = "fence_legacy"; } #endif crm_xml_add(data, XML_ATTR_ID, id); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add(data, "agent", agent); if ((namespace != st_namespace_any) && (namespace != st_namespace_invalid)) { crm_xml_add(data, "namespace", stonith_namespace2text(namespace)); } if (rsc_provides) { crm_xml_add(data, "rsc_provides", rsc_provides); } for (; params; params = params->next) { hash2field((gpointer) params->key, (gpointer) params->value, args); } return data; } static int stonith_api_register_device(stonith_t * st, int call_options, const char *id, const char *namespace, const char *agent, stonith_key_value_t * params) { int rc = 0; xmlNode *data = NULL; data = create_device_registration_xml(id, stonith_text2namespace(namespace), agent, params, NULL); rc = stonith_send_command(st, STONITH_OP_DEVICE_ADD, data, NULL, call_options, 0); free_xml(data); return rc; } static int stonith_api_remove_device(stonith_t * st, int call_options, const char *name) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add(data, XML_ATTR_ID, name); rc = stonith_send_command(st, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0); free_xml(data); return rc; } static int stonith_api_remove_level_full(stonith_t *st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level) { int rc = 0; xmlNode *data = NULL; CRM_CHECK(node || pattern || (attr && value), return -EINVAL); data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); crm_xml_add(data, F_STONITH_ORIGIN, __func__); if (node) { crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); } else if (pattern) { crm_xml_add(data, XML_ATTR_STONITH_TARGET_PATTERN, pattern); } else { crm_xml_add(data, XML_ATTR_STONITH_TARGET_ATTRIBUTE, attr); crm_xml_add(data, XML_ATTR_STONITH_TARGET_VALUE, value); } crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); rc = stonith_send_command(st, STONITH_OP_LEVEL_DEL, data, NULL, options, 0); free_xml(data); return rc; } static int stonith_api_remove_level(stonith_t * st, int options, const char *node, int level) { return stonith_api_remove_level_full(st, options, node, NULL, NULL, NULL, level); } /*! * \internal * \brief Create XML for fence topology level registration request * * \param[in] node If not NULL, target level by this node name * \param[in] pattern If not NULL, target by node name using this regex * \param[in] attr If not NULL, target by this node attribute * \param[in] value If not NULL, target by this node attribute value * \param[in] level Index number of level to register * \param[in] device_list List of devices in level * * \return Newly allocated XML tree on success, NULL otherwise * * \note The caller should set only one of node, pattern or attr/value. */ xmlNode * create_level_registration_xml(const char *node, const char *pattern, const char *attr, const char *value, int level, stonith_key_value_t *device_list) { size_t len = 0; char *list = NULL; xmlNode *data; CRM_CHECK(node || pattern || (attr && value), return NULL); data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); CRM_CHECK(data, return NULL); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add_int(data, XML_ATTR_ID, level); crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); if (node) { crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); } else if (pattern) { crm_xml_add(data, XML_ATTR_STONITH_TARGET_PATTERN, pattern); } else { crm_xml_add(data, XML_ATTR_STONITH_TARGET_ATTRIBUTE, attr); crm_xml_add(data, XML_ATTR_STONITH_TARGET_VALUE, value); } // cppcheck seems not to understand the abort logic behind pcmk__realloc // cppcheck-suppress memleak for (; device_list; device_list = device_list->next) { pcmk__add_separated_word(&list, &len, device_list->value, ","); } crm_xml_add(data, XML_ATTR_STONITH_DEVICES, list); free(list); return data; } static int stonith_api_register_level_full(stonith_t * st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level, stonith_key_value_t *device_list) { int rc = 0; xmlNode *data = create_level_registration_xml(node, pattern, attr, value, level, device_list); CRM_CHECK(data != NULL, return -EINVAL); rc = stonith_send_command(st, STONITH_OP_LEVEL_ADD, data, NULL, options, 0); free_xml(data); return rc; } static int stonith_api_register_level(stonith_t * st, int options, const char *node, int level, stonith_key_value_t * device_list) { return stonith_api_register_level_full(st, options, node, NULL, NULL, NULL, level, device_list); } static void append_config_arg(gpointer key, gpointer value, gpointer user_data) { /* The fencer will filter "action" out when it registers the device, * but ignore it here in case any external API users don't. * * Also filter out parameters handled directly by Pacemaker. */ if (!pcmk__str_eq(key, STONITH_ATTR_ACTION_OP, pcmk__str_casei) && !pcmk_stonith_param(key) && (strstr(key, CRM_META) == NULL) && !pcmk__str_eq(key, "crm_feature_set", pcmk__str_casei)) { crm_trace("Passing %s=%s with fence action", (const char *) key, (const char *) (value? value : "")); g_hash_table_insert((GHashTable *) user_data, strdup(key), strdup(value? value : "")); } } static GHashTable * make_args(const char *agent, const char *action, const char *victim, uint32_t victim_nodeid, GHashTable * device_args, GHashTable * port_map, const char *host_arg) { GHashTable *arg_list = NULL; const char *value = NULL; CRM_CHECK(action != NULL, return NULL); arg_list = pcmk__strkey_table(free, free); // Add action to arguments (using an alias if requested) if (device_args) { char buffer[512]; snprintf(buffer, sizeof(buffer), "pcmk_%s_action", action); value = g_hash_table_lookup(device_args, buffer); if (value) { crm_debug("Substituting '%s' for fence action %s targeting %s", value, action, victim); action = value; } } g_hash_table_insert(arg_list, strdup(STONITH_ATTR_ACTION_OP), strdup(action)); /* If this is a fencing operation against another node, add more standard * arguments. */ if (victim && device_args) { const char *param = NULL; /* Always pass the target's name, per * https://github.com/ClusterLabs/fence-agents/blob/master/doc/FenceAgentAPI.md */ g_hash_table_insert(arg_list, strdup("nodename"), strdup(victim)); // If the target's node ID was specified, pass it, too if (victim_nodeid) { char *nodeid = crm_strdup_printf("%" PRIu32, victim_nodeid); // cts-fencing looks for this log message crm_info("Passing '%s' as nodeid with fence action '%s' targeting %s", nodeid, action, victim); g_hash_table_insert(arg_list, strdup("nodeid"), nodeid); } // Check whether target must be specified in some other way param = g_hash_table_lookup(device_args, PCMK_STONITH_HOST_ARGUMENT); if (!pcmk__str_eq(agent, "fence_legacy", pcmk__str_none) && !pcmk__str_eq(param, "none", pcmk__str_casei)) { if (param == NULL) { /* Use the caller's default for pcmk_host_argument, or "port" if * none was given */ param = (host_arg == NULL)? "port" : host_arg; } value = g_hash_table_lookup(device_args, param); if (pcmk__str_eq(value, "dynamic", pcmk__str_casei|pcmk__str_null_matches)) { /* If the host argument was "dynamic" or not explicitly specified, * add it with the target */ const char *alias = NULL; if (port_map) { alias = g_hash_table_lookup(port_map, victim); } if (alias == NULL) { alias = victim; } crm_debug("Passing %s='%s' with fence action %s targeting %s", param, alias, action, victim); g_hash_table_insert(arg_list, strdup(param), strdup(alias)); } } } if (device_args) { g_hash_table_foreach(device_args, append_config_arg, arg_list); } return arg_list; } /*! * \internal * \brief Free all memory used by a stonith action * * \param[in,out] action Action to free */ void stonith__destroy_action(stonith_action_t *action) { if (action) { free(action->agent); if (action->args) { g_hash_table_destroy(action->args); } free(action->action); free(action->victim); if (action->svc_action) { services_action_free(action->svc_action); } free(action->output); free(action->error); free(action); } } /*! * \internal * \brief Get the result of an executed stonith action * * \param[in,out] action Executed action * \param[out] rc Where to store result code (or NULL) * \param[out] output Where to store standard output (or NULL) * \param[out] error_output Where to store standard error output (or NULL) * * \note If output or error_output is not NULL, the caller is responsible for * freeing the memory. */ void stonith__action_result(stonith_action_t *action, int *rc, char **output, char **error_output) { if (rc) { *rc = pcmk_ok; } if (output) { *output = NULL; } if (error_output) { *error_output = NULL; } if (action != NULL) { if (rc) { *rc = action->rc; } if (output && action->output) { *output = action->output; action->output = NULL; // hand off memory management to caller } if (error_output && action->error) { *error_output = action->error; action->error = NULL; // hand off memory management to caller } } } #define FAILURE_MAX_RETRIES 2 stonith_action_t * stonith_action_create(const char *agent, const char *_action, const char *victim, uint32_t victim_nodeid, int timeout, GHashTable * device_args, GHashTable * port_map, const char *host_arg) { stonith_action_t *action; action = calloc(1, sizeof(stonith_action_t)); action->args = make_args(agent, _action, victim, victim_nodeid, device_args, port_map, host_arg); crm_debug("Preparing '%s' action for %s using agent %s", _action, (victim? victim : "no target"), agent); action->agent = strdup(agent); action->action = strdup(_action); if (victim) { action->victim = strdup(victim); } action->timeout = action->remaining_timeout = timeout; action->max_retries = FAILURE_MAX_RETRIES; if (device_args) { char buffer[512]; const char *value = NULL; snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", _action); value = g_hash_table_lookup(device_args, buffer); if (value) { action->max_retries = atoi(value); } } return action; } static gboolean update_remaining_timeout(stonith_action_t * action) { int diff = time(NULL) - action->initial_start_time; if (action->tries >= action->max_retries) { crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed", action->agent, action->action, action->max_retries); action->remaining_timeout = 0; } else if ((action->rc != -ETIME) && diff < (action->timeout * 0.7)) { /* only set remaining timeout period if there is 30% * or greater of the original timeout period left */ action->remaining_timeout = action->timeout - diff; } else { action->remaining_timeout = 0; } return action->remaining_timeout ? TRUE : FALSE; } static int svc_action_to_errno(svc_action_t *svc_action) { int rv = pcmk_ok; if (svc_action->status == PCMK_EXEC_TIMEOUT) { rv = -ETIME; } else if (svc_action->rc != PCMK_OCF_OK) { /* Try to provide a useful error code based on the fence agent's * error output. */ if (svc_action->stderr_data == NULL) { rv = -ENODATA; } else if (strstr(svc_action->stderr_data, "imed out")) { /* Some agents have their own internal timeouts */ rv = -ETIME; } else if (strstr(svc_action->stderr_data, "Unrecognised action")) { rv = -EOPNOTSUPP; } else { rv = -pcmk_err_generic; } } return rv; } static void stonith_action_async_done(svc_action_t *svc_action) { stonith_action_t *action = (stonith_action_t *) svc_action->cb_data; action->rc = svc_action_to_errno(svc_action); action->output = svc_action->stdout_data; svc_action->stdout_data = NULL; action->error = svc_action->stderr_data; svc_action->stderr_data = NULL; svc_action->params = NULL; crm_debug("Child process %d performing action '%s' exited with rc %d", action->pid, action->action, svc_action->rc); log_action(action, action->pid); if (action->rc != pcmk_ok && update_remaining_timeout(action)) { int rc = internal_stonith_action_execute(action); if (rc == pcmk_ok) { return; } } if (action->done_cb) { action->done_cb(action->pid, action->rc, action->output, action->userdata); } action->svc_action = NULL; // don't remove our caller stonith__destroy_action(action); } static void stonith_action_async_forked(svc_action_t *svc_action) { stonith_action_t *action = (stonith_action_t *) svc_action->cb_data; action->pid = svc_action->pid; action->svc_action = svc_action; if (action->fork_cb) { (action->fork_cb) (svc_action->pid, action->userdata); } crm_trace("Child process %d performing action '%s' successfully forked", action->pid, action->action); } static int internal_stonith_action_execute(stonith_action_t * action) { int rc = -EPROTO; int is_retry = 0; svc_action_t *svc_action = NULL; static int stonith_sequence = 0; char *buffer = NULL; if ((action == NULL) || (action->action == NULL) || (action->args == NULL) || (action->agent == NULL)) { return -EPROTO; } if (!action->tries) { action->initial_start_time = time(NULL); } action->tries++; if (action->tries > 1) { crm_info("Attempt %d to execute %s (%s). remaining timeout is %d", action->tries, action->agent, action->action, action->remaining_timeout); is_retry = 1; } buffer = crm_strdup_printf(PCMK__FENCE_BINDIR "/%s", basename(action->agent)); svc_action = services_action_create_generic(buffer, NULL); free(buffer); + + if (svc_action->rc != PCMK_OCF_UNKNOWN) { + services_action_free(svc_action); + return -E2BIG; + } + svc_action->timeout = 1000 * action->remaining_timeout; svc_action->standard = strdup(PCMK_RESOURCE_CLASS_STONITH); svc_action->id = crm_strdup_printf("%s_%s_%d", basename(action->agent), action->action, action->tries); svc_action->agent = strdup(action->agent); svc_action->sequence = stonith_sequence++; svc_action->params = action->args; svc_action->cb_data = (void *) action; svc_action->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Action", svc_action->id, svc_action->flags, SVC_ACTION_NON_BLOCKED, "SVC_ACTION_NON_BLOCKED"); /* keep retries from executing out of control and free previous results */ if (is_retry) { free(action->output); action->output = NULL; free(action->error); action->error = NULL; sleep(1); } if (action->async) { /* async */ if (services_action_async_fork_notify(svc_action, &stonith_action_async_done, &stonith_action_async_forked)) { return pcmk_ok; } } else if (services_action_sync(svc_action)) { // sync success rc = pcmk_ok; action->rc = svc_action_to_errno(svc_action); action->output = svc_action->stdout_data; svc_action->stdout_data = NULL; action->error = svc_action->stderr_data; svc_action->stderr_data = NULL; } else { // sync failure action->rc = -ECONNABORTED; rc = action->rc; } svc_action->params = NULL; services_action_free(svc_action); return rc; } /*! * \internal * \brief Kick off execution of an async stonith action * * \param[in,out] action Action to be executed * \param[in,out] userdata Datapointer to be passed to callbacks * \param[in] done Callback to notify action has failed/succeeded * \param[in] fork_callback Callback to notify successful fork of child * * \return pcmk_ok if ownership of action has been taken, -errno otherwise */ int stonith_action_execute_async(stonith_action_t * action, void *userdata, void (*done) (GPid pid, int rc, const char *output, gpointer user_data), void (*fork_cb) (GPid pid, gpointer user_data)) { if (!action) { return -EINVAL; } action->userdata = userdata; action->done_cb = done; action->fork_cb = fork_cb; action->async = 1; return internal_stonith_action_execute(action); } /*! * \internal * \brief Execute a stonith action * * \param[in,out] action Action to execute * * \return pcmk_ok on success, -errno otherwise */ int stonith__execute(stonith_action_t *action) { int rc = pcmk_ok; CRM_CHECK(action != NULL, return -EINVAL); // Keep trying until success, max retries, or timeout do { rc = internal_stonith_action_execute(action); } while ((rc != pcmk_ok) && update_remaining_timeout(action)); return rc; } static int stonith_api_device_list(stonith_t * stonith, int call_options, const char *namespace, stonith_key_value_t ** devices, int timeout) { int count = 0; enum stonith_namespace ns = stonith_text2namespace(namespace); if (devices == NULL) { crm_err("Parameter error: stonith_api_device_list"); return -EFAULT; } #if HAVE_STONITH_STONITH_H // Include Linux-HA agents if requested if ((ns == st_namespace_any) || (ns == st_namespace_lha)) { count += stonith__list_lha_agents(devices); } #endif // Include Red Hat agents if requested if ((ns == st_namespace_any) || (ns == st_namespace_rhcs)) { count += stonith__list_rhcs_agents(devices); } return count; } static int stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent, const char *namespace, char **output, int timeout) { /* By executing meta-data directly, we can get it from stonith_admin when * the cluster is not running, which is important for higher-level tools. */ enum stonith_namespace ns = stonith_get_namespace(agent, namespace); crm_trace("Looking up metadata for %s agent %s", stonith_namespace2text(ns), agent); switch (ns) { case st_namespace_rhcs: return stonith__rhcs_metadata(agent, timeout, output); #if HAVE_STONITH_STONITH_H case st_namespace_lha: return stonith__lha_metadata(agent, timeout, output); #endif default: crm_err("Can't get fence agent '%s' meta-data: No such agent", agent); break; } return -ENODEV; } static int stonith_api_query(stonith_t * stonith, int call_options, const char *target, stonith_key_value_t ** devices, int timeout) { int rc = 0, lpc = 0, max = 0; xmlNode *data = NULL; xmlNode *output = NULL; xmlXPathObjectPtr xpathObj = NULL; CRM_CHECK(devices != NULL, return -EINVAL); data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add(data, F_STONITH_TARGET, target); crm_xml_add(data, F_STONITH_ACTION, "off"); rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout); if (rc < 0) { return rc; } xpathObj = xpath_search(output, "//@agent"); if (xpathObj) { max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { xmlChar *match_path = xmlGetNodePath(match); crm_info("%s[%d] = %s", "//@agent", lpc, match_path); free(match_path); *devices = stonith_key_value_add(*devices, NULL, crm_element_value(match, XML_ATTR_ID)); } } freeXpathObject(xpathObj); } free_xml(output); free_xml(data); return max; } static int stonith_api_call(stonith_t * stonith, int call_options, const char *id, const char *action, const char *victim, int timeout, xmlNode ** output) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add(data, F_STONITH_DEVICE, id); crm_xml_add(data, F_STONITH_ACTION, action); crm_xml_add(data, F_STONITH_TARGET, victim); rc = stonith_send_command(stonith, STONITH_OP_EXEC, data, output, call_options, timeout); free_xml(data); return rc; } static int stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **list_info, int timeout) { int rc; xmlNode *output = NULL; rc = stonith_api_call(stonith, call_options, id, "list", NULL, timeout, &output); if (output && list_info) { const char *list_str; list_str = crm_element_value(output, "st_output"); if (list_str) { *list_info = strdup(list_str); } } if (output) { free_xml(output); } return rc; } static int stonith_api_monitor(stonith_t * stonith, int call_options, const char *id, int timeout) { return stonith_api_call(stonith, call_options, id, "monitor", NULL, timeout, NULL); } static int stonith_api_status(stonith_t * stonith, int call_options, const char *id, const char *port, int timeout) { return stonith_api_call(stonith, call_options, id, "status", port, timeout, NULL); } static int stonith_api_fence_with_delay(stonith_t * stonith, int call_options, const char *node, const char *action, int timeout, int tolerance, int delay) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, __func__); crm_xml_add(data, F_STONITH_TARGET, node); crm_xml_add(data, F_STONITH_ACTION, action); crm_xml_add_int(data, F_STONITH_TIMEOUT, timeout); crm_xml_add_int(data, F_STONITH_TOLERANCE, tolerance); crm_xml_add_int(data, F_STONITH_DELAY, delay); rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout); free_xml(data); return rc; } static int stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const char *action, int timeout, int tolerance) { return stonith_api_fence_with_delay(stonith, call_options, node, action, timeout, tolerance, 0); } static int stonith_api_confirm(stonith_t * stonith, int call_options, const char *target) { stonith__set_call_options(call_options, target, st_opt_manual_ack); return stonith_api_fence(stonith, call_options, target, "off", 0, 0); } static int stonith_api_history(stonith_t * stonith, int call_options, const char *node, stonith_history_t ** history, int timeout) { int rc = 0; xmlNode *data = NULL; xmlNode *output = NULL; stonith_history_t *last = NULL; *history = NULL; if (node) { data = create_xml_node(NULL, __func__); crm_xml_add(data, F_STONITH_TARGET, node); } stonith__set_call_options(call_options, node, st_opt_sync_call); rc = stonith_send_command(stonith, STONITH_OP_FENCE_HISTORY, data, &output, call_options, timeout); free_xml(data); if (rc == 0) { xmlNode *op = NULL; xmlNode *reply = get_xpath_object("//" F_STONITH_HISTORY_LIST, output, LOG_NEVER); for (op = pcmk__xml_first_child(reply); op != NULL; op = pcmk__xml_next(op)) { stonith_history_t *kvp; long long completed; long long completed_nsec = 0L; kvp = calloc(1, sizeof(stonith_history_t)); kvp->target = crm_element_value_copy(op, F_STONITH_TARGET); kvp->action = crm_element_value_copy(op, F_STONITH_ACTION); kvp->origin = crm_element_value_copy(op, F_STONITH_ORIGIN); kvp->delegate = crm_element_value_copy(op, F_STONITH_DELEGATE); kvp->client = crm_element_value_copy(op, F_STONITH_CLIENTNAME); crm_element_value_ll(op, F_STONITH_DATE, &completed); kvp->completed = (time_t) completed; crm_element_value_ll(op, F_STONITH_DATE_NSEC, &completed_nsec); kvp->completed_nsec = completed_nsec; crm_element_value_int(op, F_STONITH_STATE, &kvp->state); if (last) { last->next = kvp; } else { *history = kvp; } last = kvp; } } free_xml(output); return rc; } void stonith_history_free(stonith_history_t *history) { stonith_history_t *hp, *hp_old; for (hp = history; hp; hp_old = hp, hp = hp->next, free(hp_old)) { free(hp->target); free(hp->action); free(hp->origin); free(hp->delegate); free(hp->client); } } static gint stonithlib_GCompareFunc(gconstpointer a, gconstpointer b) { int rc = 0; const stonith_notify_client_t *a_client = a; const stonith_notify_client_t *b_client = b; if (a_client->delete || b_client->delete) { /* make entries marked for deletion not findable */ return -1; } CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0); rc = strcmp(a_client->event, b_client->event); if (rc == 0) { if (a_client->notify == NULL || b_client->notify == NULL) { return 0; } else if (a_client->notify == b_client->notify) { return 0; } else if (((long)a_client->notify) < ((long)b_client->notify)) { crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return -1; } crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return 1; } return rc; } xmlNode * stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options) { xmlNode *op_msg = create_xml_node(NULL, "stonith_command"); CRM_CHECK(op_msg != NULL, return NULL); CRM_CHECK(token != NULL, return NULL); crm_xml_add(op_msg, F_XML_TAGNAME, "stonith_command"); crm_xml_add(op_msg, F_TYPE, T_STONITH_NG); crm_xml_add(op_msg, F_STONITH_CALLBACK_TOKEN, token); crm_xml_add(op_msg, F_STONITH_OPERATION, op); crm_xml_add_int(op_msg, F_STONITH_CALLID, call_id); crm_trace("Sending call options: %.8lx, %d", (long)call_options, call_options); crm_xml_add_int(op_msg, F_STONITH_CALLOPTS, call_options); if (data != NULL) { add_message_xml(op_msg, F_STONITH_CALLDATA, data); } return op_msg; } static void stonith_destroy_op_callback(gpointer data) { stonith_callback_client_t *blob = data; if (blob->timer && blob->timer->ref > 0) { g_source_remove(blob->timer->ref); } free(blob->timer); free(blob); } static int stonith_api_signoff(stonith_t * stonith) { stonith_private_t *native = stonith->st_private; crm_debug("Disconnecting from the fencer"); if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } free(native->token); native->token = NULL; stonith->state = stonith_disconnected; return pcmk_ok; } static int stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks) { stonith_private_t *private = stonith->st_private; if (all_callbacks) { private->op_callback = NULL; g_hash_table_destroy(private->stonith_op_callback_table); private->stonith_op_callback_table = pcmk__intkey_table(stonith_destroy_op_callback); } else if (call_id == 0) { private->op_callback = NULL; } else { pcmk__intkey_table_remove(private->stonith_op_callback_table, call_id); } return pcmk_ok; } static void invoke_callback(stonith_t * st, int call_id, int rc, void *userdata, void (*callback) (stonith_t * st, stonith_callback_data_t * data)) { stonith_callback_data_t data = { 0, }; data.call_id = call_id; data.rc = rc; data.userdata = userdata; callback(st, &data); } static void stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc) { stonith_private_t *private = NULL; stonith_callback_client_t *blob = NULL; stonith_callback_client_t local_blob; CRM_CHECK(stonith != NULL, return); CRM_CHECK(stonith->st_private != NULL, return); private = stonith->st_private; local_blob.id = NULL; local_blob.callback = NULL; local_blob.user_data = NULL; local_blob.only_success = FALSE; if (msg != NULL) { crm_element_value_int(msg, F_STONITH_RC, &rc); crm_element_value_int(msg, F_STONITH_CALLID, &call_id); } CRM_CHECK(call_id > 0, crm_log_xml_err(msg, "Bad result")); blob = pcmk__intkey_table_lookup(private->stonith_op_callback_table, call_id); if (blob != NULL) { local_blob = *blob; blob = NULL; stonith_api_del_callback(stonith, call_id, FALSE); } else { crm_trace("No callback found for call %d", call_id); local_blob.callback = NULL; } if (local_blob.callback != NULL && (rc == pcmk_ok || local_blob.only_success == FALSE)) { crm_trace("Invoking callback %s for call %d", crm_str(local_blob.id), call_id); invoke_callback(stonith, call_id, rc, local_blob.user_data, local_blob.callback); } else if (private->op_callback == NULL && rc != pcmk_ok) { crm_warn("Fencing command failed: %s", pcmk_strerror(rc)); crm_log_xml_debug(msg, "Failed fence update"); } if (private->op_callback != NULL) { crm_trace("Invoking global callback for call %d", call_id); invoke_callback(stonith, call_id, rc, NULL, private->op_callback); } crm_trace("OP callback activated."); } static gboolean stonith_async_timeout_handler(gpointer data) { struct timer_rec_s *timer = data; crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout); stonith_perform_callback(timer->stonith, NULL, timer->call_id, -ETIME); /* Always return TRUE, never remove the handler * We do that in stonith_del_callback() */ return TRUE; } static void set_callback_timeout(stonith_callback_client_t * callback, stonith_t * stonith, int call_id, int timeout) { struct timer_rec_s *async_timer = callback->timer; if (timeout <= 0) { return; } if (!async_timer) { async_timer = calloc(1, sizeof(struct timer_rec_s)); callback->timer = async_timer; } async_timer->stonith = stonith; async_timer->call_id = call_id; /* Allow a fair bit of grace to allow the server to tell us of a timeout * This is only a fallback */ async_timer->timeout = (timeout + 60) * 1000; if (async_timer->ref) { g_source_remove(async_timer->ref); } async_timer->ref = g_timeout_add(async_timer->timeout, stonith_async_timeout_handler, async_timer); } static void update_callback_timeout(int call_id, int timeout, stonith_t * st) { stonith_callback_client_t *callback = NULL; stonith_private_t *private = st->st_private; callback = pcmk__intkey_table_lookup(private->stonith_op_callback_table, call_id); if (!callback || !callback->allow_timeout_updates) { return; } set_callback_timeout(callback, st, call_id, timeout); } static int stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) { const char *type = NULL; struct notify_blob_s blob; stonith_t *st = userdata; stonith_private_t *private = NULL; CRM_ASSERT(st != NULL); private = st->st_private; blob.stonith = st; blob.xml = string2xml(buffer); if (blob.xml == NULL) { crm_warn("Received malformed message from fencer: %s", buffer); return 0; } /* do callbacks */ type = crm_element_value(blob.xml, F_TYPE); crm_trace("Activating %s callbacks...", type); if (pcmk__str_eq(type, T_STONITH_NG, pcmk__str_casei)) { stonith_perform_callback(st, blob.xml, 0, 0); } else if (pcmk__str_eq(type, T_STONITH_NOTIFY, pcmk__str_casei)) { foreach_notify_entry(private, stonith_send_notification, &blob); } else if (pcmk__str_eq(type, T_STONITH_TIMEOUT_VALUE, pcmk__str_casei)) { int call_id = 0; int timeout = 0; crm_element_value_int(blob.xml, F_STONITH_TIMEOUT, &timeout); crm_element_value_int(blob.xml, F_STONITH_CALLID, &call_id); update_callback_timeout(call_id, timeout, st); } else { crm_err("Unknown message type: %s", type); crm_log_xml_warn(blob.xml, "BadReply"); } free_xml(blob.xml); return 1; } static int stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd) { int rc = pcmk_ok; stonith_private_t *native = NULL; const char *display_name = name? name : "client"; struct ipc_client_callbacks st_callbacks = { .dispatch = stonith_dispatch_internal, .destroy = stonith_connection_destroy }; CRM_CHECK(stonith != NULL, return -EINVAL); native = stonith->st_private; CRM_ASSERT(native != NULL); crm_debug("Attempting fencer connection by %s with%s mainloop", display_name, (stonith_fd? "out" : "")); stonith->state = stonith_connected_command; if (stonith_fd) { /* No mainloop */ native->ipc = crm_ipc_new("stonith-ng", 0); if (native->ipc && crm_ipc_connect(native->ipc)) { *stonith_fd = crm_ipc_get_fd(native->ipc); } else if (native->ipc) { crm_ipc_close(native->ipc); crm_ipc_destroy(native->ipc); native->ipc = NULL; } } else { /* With mainloop */ native->source = mainloop_add_ipc_client("stonith-ng", G_PRIORITY_MEDIUM, 0, stonith, &st_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { rc = -ENOTCONN; } else { xmlNode *reply = NULL; xmlNode *hello = create_xml_node(NULL, "stonith_command"); crm_xml_add(hello, F_TYPE, T_STONITH_NG); crm_xml_add(hello, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(hello, F_STONITH_CLIENTNAME, name); rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply); if (rc < 0) { crm_debug("Couldn't register with the fencer: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); rc = -ECOMM; } else if (reply == NULL) { crm_debug("Couldn't register with the fencer: no reply"); rc = -EPROTO; } else { const char *msg_type = crm_element_value(reply, F_STONITH_OPERATION); native->token = crm_element_value_copy(reply, F_STONITH_CLIENTID); if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_casei)) { crm_debug("Couldn't register with the fencer: invalid reply type '%s'", (msg_type? msg_type : "(missing)")); crm_log_xml_debug(reply, "Invalid fencer reply"); rc = -EPROTO; } else if (native->token == NULL) { crm_debug("Couldn't register with the fencer: no token in reply"); crm_log_xml_debug(reply, "Invalid fencer reply"); rc = -EPROTO; } else { #if HAVE_MSGFROMIPC_TIMEOUT stonith->call_timeout = PCMK__IPC_TIMEOUT; #endif crm_debug("Connection to fencer by %s succeeded (registration token: %s)", display_name, native->token); rc = pcmk_ok; } } free_xml(reply); free_xml(hello); } if (rc != pcmk_ok) { crm_debug("Connection attempt to fencer by %s failed: %s " CRM_XS " rc=%d", display_name, pcmk_strerror(rc), rc); stonith->cmds->disconnect(stonith); } return rc; } static int stonith_set_notification(stonith_t * stonith, const char *callback, int enabled) { int rc = pcmk_ok; xmlNode *notify_msg = create_xml_node(NULL, __func__); stonith_private_t *native = stonith->st_private; if (stonith->state != stonith_disconnected) { crm_xml_add(notify_msg, F_STONITH_OPERATION, T_STONITH_NOTIFY); if (enabled) { crm_xml_add(notify_msg, F_STONITH_NOTIFY_ACTIVATE, callback); } else { crm_xml_add(notify_msg, F_STONITH_NOTIFY_DEACTIVATE, callback); } rc = crm_ipc_send(native->ipc, notify_msg, crm_ipc_client_response, -1, NULL); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't register for fencing notifications: %d", rc); rc = -ECOMM; } else { rc = pcmk_ok; } } free_xml(notify_msg); return rc; } static int stonith_api_add_notification(stonith_t * stonith, const char *event, void (*callback) (stonith_t * stonith, stonith_event_t * e)) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = NULL; private = stonith->st_private; crm_trace("Adding callback for %s events (%d)", event, g_list_length(private->notify_list)); new_client = calloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = callback; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); if (list_item != NULL) { crm_warn("Callback already present"); free(new_client); return -ENOTUNIQ; } else { private->notify_list = g_list_append(private->notify_list, new_client); stonith_set_notification(stonith, event, 1); crm_trace("Callback added (%d)", g_list_length(private->notify_list)); } return pcmk_ok; } static int stonith_api_del_notification(stonith_t * stonith, const char *event) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = NULL; crm_debug("Removing callback for %s events", event); private = stonith->st_private; new_client = calloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = NULL; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); stonith_set_notification(stonith, event, 0); if (list_item != NULL) { stonith_notify_client_t *list_client = list_item->data; if (private->notify_refcnt) { list_client->delete = TRUE; private->notify_deletes = TRUE; } else { private->notify_list = g_list_remove(private->notify_list, list_client); free(list_client); } crm_trace("Removed callback"); } else { crm_trace("Callback not present"); } free(new_client); return pcmk_ok; } static int stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int options, void *user_data, const char *callback_name, void (*callback) (stonith_t * st, stonith_callback_data_t * data)) { stonith_callback_client_t *blob = NULL; stonith_private_t *private = NULL; CRM_CHECK(stonith != NULL, return -EINVAL); CRM_CHECK(stonith->st_private != NULL, return -EINVAL); private = stonith->st_private; if (call_id == 0) { private->op_callback = callback; } else if (call_id < 0) { if (!(options & st_opt_report_only_success)) { crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id)); invoke_callback(stonith, call_id, call_id, user_data, callback); } else { crm_warn("Fencer call failed: %s", pcmk_strerror(call_id)); } return FALSE; } blob = calloc(1, sizeof(stonith_callback_client_t)); blob->id = callback_name; blob->only_success = (options & st_opt_report_only_success) ? TRUE : FALSE; blob->user_data = user_data; blob->callback = callback; blob->allow_timeout_updates = (options & st_opt_timeout_updates) ? TRUE : FALSE; if (timeout > 0) { set_callback_timeout(blob, stonith, call_id, timeout); } pcmk__intkey_table_insert(private->stonith_op_callback_table, call_id, blob); crm_trace("Added callback to %s for call %d", callback_name, call_id); return TRUE; } static void stonith_dump_pending_op(gpointer key, gpointer value, gpointer user_data) { int call = GPOINTER_TO_INT(key); stonith_callback_client_t *blob = value; crm_debug("Call %d (%s): pending", call, crm_str(blob->id)); } void stonith_dump_pending_callbacks(stonith_t * stonith) { stonith_private_t *private = stonith->st_private; if (private->stonith_op_callback_table == NULL) { return; } return g_hash_table_foreach(private->stonith_op_callback_table, stonith_dump_pending_op, NULL); } /* */ static stonith_event_t * xml_to_event(xmlNode * msg) { stonith_event_t *event = calloc(1, sizeof(stonith_event_t)); const char *ntype = crm_element_value(msg, F_SUBTYPE); char *data_addr = crm_strdup_printf("//%s", ntype); xmlNode *data = get_xpath_object(data_addr, msg, LOG_DEBUG); crm_log_xml_trace(msg, "stonith_notify"); crm_element_value_int(msg, F_STONITH_RC, &(event->result)); if (pcmk__str_eq(ntype, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) { event->operation = crm_element_value_copy(msg, F_STONITH_OPERATION); if (data) { event->origin = crm_element_value_copy(data, F_STONITH_ORIGIN); event->action = crm_element_value_copy(data, F_STONITH_ACTION); event->target = crm_element_value_copy(data, F_STONITH_TARGET); event->executioner = crm_element_value_copy(data, F_STONITH_DELEGATE); event->id = crm_element_value_copy(data, F_STONITH_REMOTE_OP_ID); event->client_origin = crm_element_value_copy(data, F_STONITH_CLIENTNAME); event->device = crm_element_value_copy(data, F_STONITH_DEVICE); } else { crm_err("No data for %s event", ntype); crm_log_xml_notice(msg, "BadEvent"); } } free(data_addr); return event; } static void event_free(stonith_event_t * event) { free(event->id); free(event->type); free(event->message); free(event->operation); free(event->origin); free(event->action); free(event->target); free(event->executioner); free(event->device); free(event->client_origin); free(event); } static void stonith_send_notification(gpointer data, gpointer user_data) { struct notify_blob_s *blob = user_data; stonith_notify_client_t *entry = data; stonith_event_t *st_event = NULL; const char *event = NULL; if (blob->xml == NULL) { crm_warn("Skipping callback - NULL message"); return; } event = crm_element_value(blob->xml, F_SUBTYPE); if (entry == NULL) { crm_warn("Skipping callback - NULL callback client"); return; } else if (entry->delete) { crm_trace("Skipping callback - marked for deletion"); return; } else if (entry->notify == NULL) { crm_warn("Skipping callback - NULL callback"); return; } else if (!pcmk__str_eq(entry->event, event, pcmk__str_casei)) { crm_trace("Skipping callback - event mismatch %p/%s vs. %s", entry, entry->event, event); return; } st_event = xml_to_event(blob->xml); crm_trace("Invoking callback for %p/%s event...", entry, event); entry->notify(blob->stonith, st_event); crm_trace("Callback invoked..."); event_free(st_event); } /*! * \internal * \brief Create and send an API request * * \param[in] stonith Stonith connection * \param[in] op API operation to request * \param[in] data Data to attach to request * \param[out] output_data If not NULL, will be set to reply if synchronous * \param[in] call_options Bitmask of stonith_call_options to use * \param[in] timeout Error if not completed within this many seconds * * \return pcmk_ok (for synchronous requests) or positive call ID * (for asynchronous requests) on success, -errno otherwise */ static int stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data, int call_options, int timeout) { int rc = 0; int reply_id = -1; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; stonith_private_t *native = NULL; CRM_ASSERT(stonith && stonith->st_private && op); native = stonith->st_private; if (output_data != NULL) { *output_data = NULL; } if ((stonith->state == stonith_disconnected) || (native->token == NULL)) { return -ENOTCONN; } /* Increment the call ID, which must be positive to avoid conflicting with * error codes. This shouldn't be a problem unless the client mucked with * it or the counter wrapped around. */ stonith->call_id++; if (stonith->call_id < 1) { stonith->call_id = 1; } op_msg = stonith_create_op(stonith->call_id, native->token, op, data, call_options); if (op_msg == NULL) { return -EINVAL; } crm_xml_add_int(op_msg, F_STONITH_TIMEOUT, timeout); crm_trace("Sending %s message to fencer with timeout %ds", op, timeout); if (data) { const char *delay_s = crm_element_value(data, F_STONITH_DELAY); if (delay_s) { crm_xml_add(op_msg, F_STONITH_DELAY, delay_s); } } { enum crm_ipc_flags ipc_flags = crm_ipc_flags_none; if (call_options & st_opt_sync_call) { pcmk__set_ipc_flags(ipc_flags, "stonith command", crm_ipc_client_response); } rc = crm_ipc_send(native->ipc, op_msg, ipc_flags, 1000 * (timeout + 60), &op_reply); } free_xml(op_msg); if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%ds): %d", op, timeout, rc); rc = -ECOMM; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (!(call_options & st_opt_sync_call)) { crm_trace("Async call %d, returning", stonith->call_id); free_xml(op_reply); return stonith->call_id; } rc = pcmk_ok; crm_element_value_int(op_reply, F_STONITH_CALLID, &reply_id); if (reply_id == stonith->call_id) { crm_trace("Synchronous reply %d received", reply_id); if (crm_element_value_int(op_reply, F_STONITH_RC, &rc) != 0) { rc = -ENOMSG; } if ((call_options & st_opt_discard_reply) || output_data == NULL) { crm_trace("Discarding reply"); } else { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } } else if (reply_id <= 0) { crm_err("Received bad reply: No id set"); crm_log_xml_err(op_reply, "Bad reply"); free_xml(op_reply); rc = -ENOMSG; } else { crm_err("Received bad reply: %d (wanted %d)", reply_id, stonith->call_id); crm_log_xml_err(op_reply, "Old reply"); free_xml(op_reply); rc = -ENOMSG; } done: if (crm_ipc_connected(native->ipc) == FALSE) { crm_err("Fencer disconnected"); free(native->token); native->token = NULL; stonith->state = stonith_disconnected; } free_xml(op_reply); return rc; } /* Not used with mainloop */ bool stonith_dispatch(stonith_t * st) { gboolean stay_connected = TRUE; stonith_private_t *private = NULL; CRM_ASSERT(st != NULL); private = st->st_private; while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); stonith_dispatch_internal(msg, strlen(msg), st); } if (crm_ipc_connected(private->ipc) == FALSE) { crm_err("Connection closed"); stay_connected = FALSE; } } return stay_connected; } static int stonith_api_free(stonith_t * stonith) { int rc = pcmk_ok; crm_trace("Destroying %p", stonith); if (stonith->state != stonith_disconnected) { crm_trace("Disconnecting %p first", stonith); rc = stonith->cmds->disconnect(stonith); } if (stonith->state == stonith_disconnected) { stonith_private_t *private = stonith->st_private; crm_trace("Removing %d callbacks", g_hash_table_size(private->stonith_op_callback_table)); g_hash_table_destroy(private->stonith_op_callback_table); crm_trace("Destroying %d notification clients", g_list_length(private->notify_list)); g_list_free_full(private->notify_list, free); free(stonith->st_private); free(stonith->cmds); free(stonith); } else { crm_err("Not free'ing active connection: %s (%d)", pcmk_strerror(rc), rc); } return rc; } void stonith_api_delete(stonith_t * stonith) { crm_trace("Destroying %p", stonith); if(stonith) { stonith->cmds->free(stonith); } } static int stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id, const char *namespace_s, const char *agent, stonith_key_value_t *params, int timeout, char **output, char **error_output) { /* Validation should be done directly via the agent, so we can get it from * stonith_admin when the cluster is not running, which is important for * higher-level tools. */ int rc = pcmk_ok; /* Use a dummy node name in case the agent requires a target. We assume the * actual target doesn't matter for validation purposes (if in practice, * that is incorrect, we will need to allow the caller to pass the target). */ const char *target = "node1"; const char *host_arg = NULL; GHashTable *params_table = pcmk__strkey_table(free, free); // Convert parameter list to a hash table for (; params; params = params->next) { if (pcmk__str_eq(params->key, PCMK_STONITH_HOST_ARGUMENT, pcmk__str_casei)) { host_arg = params->value; } if (!pcmk_stonith_param(params->key)) { g_hash_table_insert(params_table, strdup(params->key), strdup(params->value)); } } #if SUPPORT_CIBSECRETS rc = pcmk__substitute_secrets(rsc_id, params_table); if (rc != pcmk_rc_ok) { crm_warn("Could not replace secret parameters for validation of %s: %s", agent, pcmk_rc_str(rc)); // rc is standard return value, don't return it in this function } #endif if (output) { *output = NULL; } if (error_output) { *error_output = NULL; } switch (stonith_get_namespace(agent, namespace_s)) { case st_namespace_rhcs: rc = stonith__rhcs_validate(st, call_options, target, agent, params_table, host_arg, timeout, output, error_output); break; #if HAVE_STONITH_STONITH_H case st_namespace_lha: rc = stonith__lha_validate(st, call_options, target, agent, params_table, timeout, output, error_output); break; #endif default: rc = -EINVAL; errno = EINVAL; crm_perror(LOG_ERR, "Agent %s not found or does not support validation", agent); break; } g_hash_table_destroy(params_table); return rc; } stonith_t * stonith_api_new(void) { stonith_t *new_stonith = NULL; stonith_private_t *private = NULL; new_stonith = calloc(1, sizeof(stonith_t)); if (new_stonith == NULL) { return NULL; } private = calloc(1, sizeof(stonith_private_t)); if (private == NULL) { free(new_stonith); return NULL; } new_stonith->st_private = private; private->stonith_op_callback_table = pcmk__intkey_table(stonith_destroy_op_callback); private->notify_list = NULL; private->notify_refcnt = 0; private->notify_deletes = FALSE; new_stonith->call_id = 1; new_stonith->state = stonith_disconnected; new_stonith->cmds = calloc(1, sizeof(stonith_api_operations_t)); if (new_stonith->cmds == NULL) { free(new_stonith->st_private); free(new_stonith); return NULL; } /* *INDENT-OFF* */ new_stonith->cmds->free = stonith_api_free; new_stonith->cmds->connect = stonith_api_signon; new_stonith->cmds->disconnect = stonith_api_signoff; new_stonith->cmds->list = stonith_api_list; new_stonith->cmds->monitor = stonith_api_monitor; new_stonith->cmds->status = stonith_api_status; new_stonith->cmds->fence = stonith_api_fence; new_stonith->cmds->fence_with_delay = stonith_api_fence_with_delay; new_stonith->cmds->confirm = stonith_api_confirm; new_stonith->cmds->history = stonith_api_history; new_stonith->cmds->list_agents = stonith_api_device_list; new_stonith->cmds->metadata = stonith_api_device_metadata; new_stonith->cmds->query = stonith_api_query; new_stonith->cmds->remove_device = stonith_api_remove_device; new_stonith->cmds->register_device = stonith_api_register_device; new_stonith->cmds->remove_level = stonith_api_remove_level; new_stonith->cmds->remove_level_full = stonith_api_remove_level_full; new_stonith->cmds->register_level = stonith_api_register_level; new_stonith->cmds->register_level_full = stonith_api_register_level_full; new_stonith->cmds->remove_callback = stonith_api_del_callback; new_stonith->cmds->register_callback = stonith_api_add_callback; new_stonith->cmds->remove_notification = stonith_api_del_notification; new_stonith->cmds->register_notification = stonith_api_add_notification; new_stonith->cmds->validate = stonith_api_validate; /* *INDENT-ON* */ return new_stonith; } /*! * \brief Make a blocking connection attempt to the fencer * * \param[in,out] st Fencer API object * \param[in] name Client name to use with fencer * \param[in] max_attempts Return error if this many attempts fail * * \return pcmk_ok on success, result of last attempt otherwise */ int stonith_api_connect_retry(stonith_t *st, const char *name, int max_attempts) { int rc = -EINVAL; // if max_attempts is not positive for (int attempt = 1; attempt <= max_attempts; attempt++) { rc = st->cmds->connect(st, name, NULL); if (rc == pcmk_ok) { return pcmk_ok; } else if (attempt < max_attempts) { crm_notice("Fencer connection attempt %d of %d failed (retrying in 2s): %s " CRM_XS " rc=%d", attempt, max_attempts, pcmk_strerror(rc), rc); sleep(2); } } crm_notice("Could not connect to fencer: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); return rc; } stonith_key_value_t * stonith_key_value_add(stonith_key_value_t * head, const char *key, const char *value) { stonith_key_value_t *p, *end; p = calloc(1, sizeof(stonith_key_value_t)); if (key) { p->key = strdup(key); } if (value) { p->value = strdup(value); } end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void stonith_key_value_freeall(stonith_key_value_t * head, int keys, int values) { stonith_key_value_t *p; while (head) { p = head->next; if (keys) { free(head->key); } if (values) { free(head->value); } free(head); head = p; } } #define api_log_open() openlog("stonith-api", LOG_CONS | LOG_NDELAY | LOG_PID, LOG_DAEMON) #define api_log(level, fmt, args...) syslog(level, "%s: "fmt, __func__, args) int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off) { int rc = pcmk_ok; stonith_t *st = stonith_api_new(); const char *action = off? "off" : "reboot"; api_log_open(); if (st == NULL) { api_log(LOG_ERR, "API initialization failed, could not kick (%s) node %u/%s", action, nodeid, uname); return -EPROTO; } rc = st->cmds->connect(st, "stonith-api", NULL); if (rc != pcmk_ok) { api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } else { char *name = (uname == NULL)? pcmk__itoa(nodeid) : strdup(uname); int opts = 0; stonith__set_call_options(opts, name, st_opt_sync_call|st_opt_allow_suicide); if ((uname == NULL) && (nodeid > 0)) { stonith__set_call_options(opts, name, st_opt_cs_nodeid); } rc = st->cmds->fence(st, opts, name, action, timeout, 0); free(name); if (rc != pcmk_ok) { api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } else { api_log(LOG_NOTICE, "Node %u/%s kicked: %s", nodeid, uname, action); } } stonith_api_delete(st); return rc; } time_t stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress) { int rc = pcmk_ok; time_t when = 0; stonith_t *st = stonith_api_new(); stonith_history_t *history = NULL, *hp = NULL; if (st == NULL) { api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: " "API initialization failed", nodeid, uname); return when; } rc = st->cmds->connect(st, "stonith-api", NULL); if (rc != pcmk_ok) { api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc); } else { int entries = 0; int progress = 0; int completed = 0; int opts = 0; char *name = (uname == NULL)? pcmk__itoa(nodeid) : strdup(uname); stonith__set_call_options(opts, name, st_opt_sync_call); if ((uname == NULL) && (nodeid > 0)) { stonith__set_call_options(opts, name, st_opt_cs_nodeid); } rc = st->cmds->history(st, opts, name, &history, 120); free(name); for (hp = history; hp; hp = hp->next) { entries++; if (in_progress) { progress++; if (hp->state != st_done && hp->state != st_failed) { when = time(NULL); } } else if (hp->state == st_done) { completed++; if (hp->completed > when) { when = hp->completed; } } } stonith_history_free(history); if(rc == pcmk_ok) { api_log(LOG_INFO, "Found %d entries for %u/%s: %d in progress, %d completed", entries, nodeid, uname, progress, completed); } else { api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: %s (%d)", nodeid, uname, pcmk_strerror(rc), rc); } } stonith_api_delete(st); if(when) { api_log(LOG_INFO, "Node %u/%s last kicked at: %ld", nodeid, uname, (long int)when); } return when; } bool stonith_agent_exists(const char *agent, int timeout) { stonith_t *st = NULL; stonith_key_value_t *devices = NULL; stonith_key_value_t *dIter = NULL; bool rc = FALSE; if (agent == NULL) { return rc; } st = stonith_api_new(); if (st == NULL) { crm_err("Could not list fence agents: API memory allocation failed"); return FALSE; } st->cmds->list_agents(st, st_opt_sync_call, NULL, &devices, timeout == 0 ? 120 : timeout); for (dIter = devices; dIter != NULL; dIter = dIter->next) { if (pcmk__str_eq(dIter->value, agent, pcmk__str_none)) { rc = TRUE; break; } } stonith_key_value_freeall(devices, 1, 1); stonith_api_delete(st); return rc; } const char * stonith_action_str(const char *action) { if (action == NULL) { return "fencing"; } else if (!strcmp(action, "on")) { return "unfencing"; } else if (!strcmp(action, "off")) { return "turning off"; } else { return action; } } /*! * \internal * \brief Parse a target name from one line of a target list string * * \param[in] line One line of a target list string * \param[in] len String length of line * \param[in,out] output List to add newly allocated target name to */ static void parse_list_line(const char *line, int len, GList **output) { size_t i = 0; size_t entry_start = 0; /* Skip complaints about additional parameters device doesn't understand * * @TODO Document or eliminate the implied restriction of target names */ if (strstr(line, "invalid") || strstr(line, "variable")) { crm_debug("Skipping list output line: %s", line); return; } // Process line content, character by character for (i = 0; i <= len; i++) { if (isspace(line[i]) || (line[i] == ',') || (line[i] == ';') || (line[i] == '\0')) { // We've found a separator (i.e. the end of an entry) int rc = 0; char *entry = NULL; if (i == entry_start) { // Skip leading and sequential separators entry_start = i + 1; continue; } entry = calloc(i - entry_start + 1, sizeof(char)); CRM_ASSERT(entry != NULL); /* Read entry, stopping at first separator * * @TODO Document or eliminate these character restrictions */ rc = sscanf(line + entry_start, "%[a-zA-Z0-9_-.]", entry); if (rc != 1) { crm_warn("Could not parse list output entry: %s " CRM_XS " entry_start=%d position=%d", line + entry_start, entry_start, i); free(entry); } else if (pcmk__strcase_any_of(entry, "on", "off", NULL)) { /* Some agents print the target status in the list output, * though none are known now (the separate list-status command * is used for this, but it can also print "UNKNOWN"). To handle * this possibility, skip such entries. * * @TODO Document or eliminate the implied restriction of target * names. */ free(entry); } else { // We have a valid entry *output = g_list_append(*output, entry); } entry_start = i + 1; } } } /*! * \internal * \brief Parse a list of targets from a string * * \param[in] list_output Target list as a string * * \return List of target names * \note The target list string format is flexible, to allow for user-specified * lists such pcmk_host_list and the output of an agent's list action * (whether direct or via the API, which escapes newlines). There may be * multiple lines, separated by either a newline or an escaped newline * (backslash n). Each line may have one or more target names, separated * by any combination of whitespace, commas, and semi-colons. Lines * containing "invalid" or "variable" will be ignored entirely. Target * names "on" or "off" (case-insensitive) will be ignored. Target names * may contain only alphanumeric characters, underbars (_), dashes (-), * and dots (.) (if any other character occurs in the name, it and all * subsequent characters in the name will be ignored). * \note The caller is responsible for freeing the result with * g_list_free_full(result, free). */ GList * stonith__parse_targets(const char *target_spec) { GList *targets = NULL; if (target_spec != NULL) { size_t out_len = strlen(target_spec); size_t line_start = 0; // Starting index of line being processed for (size_t i = 0; i <= out_len; ++i) { if ((target_spec[i] == '\n') || (target_spec[i] == '\0') || ((target_spec[i] == '\\') && (target_spec[i + 1] == 'n'))) { // We've reached the end of one line of output int len = i - line_start; if (len > 0) { char *line = strndup(target_spec + line_start, len); line[len] = '\0'; // Because it might be a newline parse_list_line(line, len, &targets); free(line); } if (target_spec[i] == '\\') { ++i; // backslash-n takes up two positions } line_start = i + 1; } } } return targets; } /*! * \internal * \brief Determine if a later stonith event succeeded. * * \note Before calling this function, use stonith__sort_history() to sort the * top_history argument. */ gboolean stonith__later_succeeded(stonith_history_t *event, stonith_history_t *top_history) { gboolean ret = FALSE; for (stonith_history_t *prev_hp = top_history; prev_hp; prev_hp = prev_hp->next) { if (prev_hp == event) { break; } if ((prev_hp->state == st_done) && pcmk__str_eq(event->target, prev_hp->target, pcmk__str_casei) && pcmk__str_eq(event->action, prev_hp->action, pcmk__str_casei) && pcmk__str_eq(event->delegate, prev_hp->delegate, pcmk__str_casei) && ((event->completed < prev_hp->completed) || ((event->completed == prev_hp->completed) && (event->completed_nsec < prev_hp->completed_nsec)))) { ret = TRUE; break; } } return ret; } /*! * \internal * \brief Sort the stonith-history * sort by competed most current on the top * pending actions lacking a completed-stamp are gathered at the top * * \param[in] history List of stonith actions * */ stonith_history_t * stonith__sort_history(stonith_history_t *history) { stonith_history_t *new = NULL, *pending = NULL, *hp, *np, *tmp; for (hp = history; hp; ) { tmp = hp->next; if ((hp->state == st_done) || (hp->state == st_failed)) { /* sort into new */ if ((!new) || (hp->completed > new->completed) || ((hp->completed == new->completed) && (hp->completed_nsec > new->completed_nsec))) { hp->next = new; new = hp; } else { np = new; do { if ((!np->next) || (hp->completed > np->next->completed) || ((hp->completed == np->next->completed) && (hp->completed_nsec > np->next->completed_nsec))) { hp->next = np->next; np->next = hp; break; } np = np->next; } while (1); } } else { /* put into pending */ hp->next = pending; pending = hp; } hp = tmp; } /* pending actions don't have a completed-stamp so make them go front */ if (pending) { stonith_history_t *last_pending = pending; while (last_pending->next) { last_pending = last_pending->next; } last_pending->next = new; new = pending; } return new; } /*! * \brief Return string equivalent of an operation state value * * \param[in] state Fencing operation state value * * \return Human-friendly string equivalent of state */ const char * stonith_op_state_str(enum op_state state) { switch (state) { case st_query: return "querying"; case st_exec: return "executing"; case st_done: return "completed"; case st_duplicate: return "duplicate"; case st_failed: return "failed"; } return "unknown"; } stonith_history_t * stonith__first_matching_event(stonith_history_t *history, bool (*matching_fn)(stonith_history_t *, void *), void *user_data) { for (stonith_history_t *hp = history; hp; hp = hp->next) { if (matching_fn(hp, user_data)) { return hp; } } return NULL; } bool stonith__event_state_pending(stonith_history_t *history, void *user_data) { return history->state != st_failed && history->state != st_done; } bool stonith__event_state_eq(stonith_history_t *history, void *user_data) { return history->state == GPOINTER_TO_INT(user_data); } bool stonith__event_state_neq(stonith_history_t *history, void *user_data) { return history->state != GPOINTER_TO_INT(user_data); } void stonith__device_parameter_flags(uint32_t *device_flags, const char *device_name, xmlNode *metadata) { xmlXPathObjectPtr xpath = NULL; int max = 0; int lpc = 0; CRM_CHECK((device_flags != NULL) && (metadata != NULL), return); xpath = xpath_search(metadata, "//parameter"); max = numXpathResults(xpath); if (max <= 0) { freeXpathObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *parameter = NULL; xmlNode *match = getXpathResult(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if (match == NULL) { continue; } parameter = crm_element_value(match, "name"); if (pcmk__str_eq(parameter, "plug", pcmk__str_casei)) { stonith__set_device_flags(*device_flags, device_name, st_device_supports_parameter_plug); } else if (pcmk__str_eq(parameter, "port", pcmk__str_casei)) { stonith__set_device_flags(*device_flags, device_name, st_device_supports_parameter_port); } } freeXpathObject(xpath); } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START const char *get_stonith_provider(const char *agent, const char *provider); const char * get_stonith_provider(const char *agent, const char *provider) { return stonith_namespace2text(stonith_get_namespace(agent, provider)); } // LCOV_EXCL_STOP // End deprecated API diff --git a/lib/services/services.c b/lib/services/services.c index defb4308e0..958ebfeb01 100644 --- a/lib/services/services.c +++ b/lib/services/services.c @@ -1,1213 +1,1366 @@ /* * Copyright 2010-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include "services_private.h" #include "services_lsb.h" #if SUPPORT_UPSTART # include #endif #if SUPPORT_SYSTEMD # include #endif #if SUPPORT_NAGIOS # include #endif /* TODO: Develop a rollover strategy */ static int operations = 0; static GHashTable *recurring_actions = NULL; /* ops waiting to run async because of conflicting active * pending ops */ static GList *blocked_ops = NULL; /* ops currently active (in-flight) */ static GList *inflight_ops = NULL; static void handle_blocked_ops(void); /*! * \brief Find first service class that can provide a specified agent * * \param[in] agent Name of agent to search for * * \return Service class if found, NULL otherwise * * \note The priority is LSB, then systemd, then upstart. It would be preferable * to put systemd first, but LSB merely requires a file existence check, * while systemd requires contacting D-Bus. */ const char * resources_find_service_class(const char *agent) { if (services__lsb_agent_exists(agent)) { return PCMK_RESOURCE_CLASS_LSB; } #if SUPPORT_SYSTEMD if (systemd_unit_exists(agent)) { return PCMK_RESOURCE_CLASS_SYSTEMD; } #endif #if SUPPORT_UPSTART if (upstart_job_exists(agent)) { return PCMK_RESOURCE_CLASS_UPSTART; } #endif return NULL; } static inline void init_recurring_actions(void) { if (recurring_actions == NULL) { recurring_actions = pcmk__strkey_table(NULL, NULL); } } /*! * \internal * \brief Check whether op is in-flight systemd or upstart op * * \param[in] op Operation to check * * \return TRUE if op is in-flight systemd or upstart op */ static inline gboolean inflight_systemd_or_upstart(svc_action_t *op) { return pcmk__strcase_any_of(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD, PCMK_RESOURCE_CLASS_UPSTART, NULL) && g_list_find(inflight_ops, op) != NULL; } /*! * \internal * \brief Expand "service" alias to an actual resource class * * \param[in] rsc Resource name (for logging only) * \param[in] standard Resource class as configured * \param[in] agent Agent name to look for * * \return Newly allocated string with actual resource class * * \note The caller is responsible for calling free() on the result. */ static char * expand_resource_class(const char *rsc, const char *standard, const char *agent) { char *expanded_class = NULL; if (strcasecmp(standard, PCMK_RESOURCE_CLASS_SERVICE) == 0) { const char *found_class = resources_find_service_class(agent); if (found_class) { crm_debug("Found %s agent %s for %s", found_class, agent, rsc); expanded_class = strdup(found_class); } else { crm_info("Assuming resource class lsb for agent %s for %s", agent, rsc); expanded_class = strdup(PCMK_RESOURCE_CLASS_LSB); } } else { expanded_class = strdup(standard); } CRM_ASSERT(expanded_class); return expanded_class; } /*! * \internal * \brief Create a simple svc_action_t instance * * \return Newly allocated instance (or NULL if not enough memory) */ static svc_action_t * new_action(void) { svc_action_t *op = calloc(1, sizeof(svc_action_t)); if (op == NULL) { return NULL; } op->opaque = calloc(1, sizeof(svc_action_private_t)); if (op->opaque == NULL) { free(op); return NULL; } // Initialize result - op->rc = PCMK_OCF_UNKNOWN; - op->status = PCMK_EXEC_UNKNOWN; + services__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN, NULL); return op; } -svc_action_t * -services__create_resource_action(const char *name, const char *standard, - const char *provider, const char *agent, - const char *action, guint interval_ms, int timeout, - GHashTable *params, enum svc_action_flags flags) +static bool +required_argument_missing(uint32_t ra_caps, const char *name, + const char *standard, const char *provider, + const char *agent, const char *action) { - svc_action_t *op = NULL; - uint32_t ra_caps = 0; - - op = new_action(); - if (op == NULL) { - crm_crit("Cannot prepare action: %s", strerror(ENOMEM)); - if (params != NULL) { - g_hash_table_destroy(params); - } - return NULL; - } - - /* - * Do some up front sanity checks before we go off and - * build the svc_action_t instance. - */ - if (pcmk__str_empty(name)) { - crm_err("Cannot create operation without resource name"); - goto return_error; + crm_info("Cannot create operation without resource name (bug?)"); + return true; } if (pcmk__str_empty(standard)) { - crm_err("Cannot create operation for %s without resource class", name); - goto return_error; + crm_info("Cannot create operation for %s without resource class (bug?)", + name); + return true; } - ra_caps = pcmk_get_ra_caps(standard); if (pcmk_is_set(ra_caps, pcmk_ra_cap_provider) && pcmk__str_empty(provider)) { - crm_err("Cannot create operation for %s without provider", name); - goto return_error; + crm_info("Cannot create operation for %s resource %s " + "without provider (bug?)", standard, name); + return true; } if (pcmk__str_empty(agent)) { - crm_err("Cannot create operation for %s without agent name", name); - goto return_error; + crm_info("Cannot create operation for %s without agent name (bug?)", + name); + return true; } if (pcmk__str_empty(action)) { - crm_err("Cannot create operation for %s without operation name", name); - goto return_error; + crm_info("Cannot create operation for %s without action name (bug?)", + name); + return true; } + return false; +} - /* - * Sanity checks passed, proceed! - */ - +// \return Standard Pacemaker return code (pcmk_rc_ok or ENOMEM) +static int +copy_action_arguments(svc_action_t *op, uint32_t ra_caps, const char *name, + const char *standard, const char *provider, + const char *agent, const char *action) +{ op->rsc = strdup(name); - op->interval_ms = interval_ms; - op->timeout = timeout; - op->standard = expand_resource_class(name, standard, agent); + if (op->rsc == NULL) { + return ENOMEM; + } + op->agent = strdup(agent); - op->sequence = ++operations; - op->flags = flags; - op->id = pcmk__op_key(name, action, interval_ms); + if (op->agent == NULL) { + return ENOMEM; + } + + op->standard = expand_resource_class(name, standard, agent); + if (op->standard == NULL) { + return ENOMEM; + } if (pcmk_is_set(ra_caps, pcmk_ra_cap_status) && pcmk__str_eq(action, "monitor", pcmk__str_casei)) { - - op->action = strdup("status"); - } else { - op->action = strdup(action); + action = "status"; + } + op->action = strdup(action); + if (op->action == NULL) { + return ENOMEM; } if (pcmk_is_set(ra_caps, pcmk_ra_cap_provider)) { op->provider = strdup(provider); + if (op->provider == NULL) { + return ENOMEM; + } + } + return pcmk_rc_ok; +} + +svc_action_t * +services__create_resource_action(const char *name, const char *standard, + const char *provider, const char *agent, + const char *action, guint interval_ms, int timeout, + GHashTable *params, enum svc_action_flags flags) +{ + svc_action_t *op = NULL; + uint32_t ra_caps = pcmk_get_ra_caps(standard); + + op = new_action(); + if (op == NULL) { + crm_crit("Cannot prepare action: %s", strerror(ENOMEM)); + if (params != NULL) { + g_hash_table_destroy(params); + } + return NULL; } + op->interval_ms = interval_ms; + op->timeout = timeout; + op->flags = flags; + op->sequence = ++operations; + + // Take ownership of params if (pcmk_is_set(ra_caps, pcmk_ra_cap_params)) { op->params = params; - params = NULL; // so we don't free them in this function + } else if (params != NULL) { + g_hash_table_destroy(params); + params = NULL; + } + + if (required_argument_missing(ra_caps, name, standard, provider, agent, + action)) { + services__set_result(op, services__generic_error(op), + PCMK_EXEC_ERROR_FATAL, + "Required agent or action information missing"); + return op; + } + + op->id = pcmk__op_key(name, action, interval_ms); + + if (copy_action_arguments(op, ra_caps, name, standard, provider, agent, + action) != pcmk_rc_ok) { + crm_crit("Cannot prepare %s action for %s: %s", + action, name, strerror(ENOMEM)); + services__handle_exec_error(op, ENOMEM); + return op; } if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_OCF) == 0) { char *dirs = NULL; char *dir = NULL; char *buf = NULL; struct stat st; if (pcmk__str_empty(OCF_RA_PATH)) { crm_err("Cannot execute OCF actions because resource agent path " "was not configured in this build"); - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->status = PCMK_EXEC_ERROR_HARD; + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, + PCMK_EXEC_ERROR_HARD, + "No OCF agent path configured in build"); return op; } dirs = strdup(OCF_RA_PATH); if (dirs == NULL) { - crm_err("Cannot create %s operation for %s: %s", - action, name, strerror(ENOMEM)); + crm_crit("Cannot prepare %s action for %s: %s", + action, name, strerror(ENOMEM)); services__handle_exec_error(op, ENOMEM); return op; } for (dir = strtok(dirs, ":"); dir != NULL; dir = strtok(NULL, ":")) { buf = crm_strdup_printf("%s/%s/%s", dir, provider, agent); if (stat(buf, &st) == 0) { break; } free(buf); buf = NULL; } free(dirs); if (buf) { op->opaque->exec = buf; } else { crm_err("Cannot create %s operation for %s: %s", action, name, strerror(ENOENT)); services__handle_exec_error(op, ENOENT); return op; } op->opaque->args[0] = strdup(op->opaque->exec); op->opaque->args[1] = strdup(op->action); + if ((op->opaque->args[0] == NULL) || (op->opaque->args[1] == NULL)) { + crm_crit("Cannot prepare %s action for %s: %s", + action, name, strerror(ENOMEM)); + services__handle_exec_error(op, ENOMEM); + return op; + } } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_LSB) == 0) { op->opaque->exec = pcmk__full_path(op->agent, LSB_ROOT_DIR); op->opaque->args[0] = strdup(op->opaque->exec); op->opaque->args[1] = strdup(op->action); + if ((op->opaque->args[0] == NULL) || (op->opaque->args[1] == NULL)) { + crm_crit("Cannot prepare %s action for %s: %s", + action, name, strerror(ENOMEM)); + services__handle_exec_error(op, ENOMEM); + return op; + } #if SUPPORT_SYSTEMD } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) { op->opaque->exec = strdup("systemd-dbus"); + if (op->opaque->exec == NULL) { + crm_crit("Cannot prepare %s action for %s: %s", + action, name, strerror(ENOMEM)); + services__handle_exec_error(op, ENOMEM); + return op; + } #endif #if SUPPORT_UPSTART } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_UPSTART) == 0) { op->opaque->exec = strdup("upstart-dbus"); + if (op->opaque->exec == NULL) { + crm_crit("Cannot prepare %s action for %s: %s", + action, name, strerror(ENOMEM)); + services__handle_exec_error(op, ENOMEM); + return op; + } #endif #if SUPPORT_NAGIOS } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_NAGIOS) == 0) { op->opaque->exec = pcmk__full_path(op->agent, NAGIOS_PLUGIN_DIR); op->opaque->args[0] = strdup(op->opaque->exec); + if (op->opaque->args[0] == NULL) { + crm_crit("Cannot prepare %s action for %s: %s", + action, name, strerror(ENOMEM)); + services__handle_exec_error(op, ENOMEM); + return op; + } if (pcmk__str_eq(op->action, "monitor", pcmk__str_casei) && (op->interval_ms == 0)) { /* Invoke --version for a nagios probe */ op->opaque->args[1] = strdup("--version"); + if (op->opaque->args[1] == NULL) { + crm_crit("Cannot prepare %s action for %s: %s", + action, name, strerror(ENOMEM)); + services__handle_exec_error(op, ENOMEM); + return op; + } } else if (op->params) { GHashTableIter iter; char *key = NULL; char *value = NULL; - int index = 1; - static int args_size = sizeof(op->opaque->args) / sizeof(char *); + int index = 1; // 0 is already set to executable name g_hash_table_iter_init(&iter, op->params); - while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value) && - index <= args_size - 3) { + while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { + + if (index > (PCMK__NELEM(op->opaque->args) - 2)) { + crm_info("Cannot prepare %s action for %s: Too many parameters", + action, name); + services__set_result(op, NAGIOS_STATE_UNKNOWN, + PCMK_EXEC_ERROR_HARD, + "Too many parameters"); + break; + } if (pcmk__str_eq(key, XML_ATTR_CRM_VERSION, pcmk__str_casei) || strstr(key, CRM_META "_")) { continue; } op->opaque->args[index++] = crm_strdup_printf("--%s", key); op->opaque->args[index++] = strdup(value); + if (op->opaque->args[index - 1] == NULL) { + crm_crit("Cannot prepare %s action for %s: %s", + action, name, strerror(ENOMEM)); + services__handle_exec_error(op, ENOMEM); + return op; + } } } // Nagios actions don't need to keep the parameters if (op->params != NULL) { g_hash_table_destroy(op->params); op->params = NULL; } #endif } else { crm_err("Unknown resource standard: %s", op->standard); services__handle_exec_error(op, ENOENT); } - return_error: - if(params) { - g_hash_table_destroy(params); - } - return op; } svc_action_t * resources_action_create(const char *name, const char *standard, const char *provider, const char *agent, const char *action, guint interval_ms, int timeout, GHashTable *params, enum svc_action_flags flags) { svc_action_t *op = services__create_resource_action(name, standard, provider, agent, action, interval_ms, timeout, params, flags); if (op == NULL || op->rc != 0) { services_action_free(op); return NULL; } else { // Preserve public API backward compatibility op->rc = PCMK_OCF_OK; op->status = PCMK_EXEC_DONE; return op; } } svc_action_t * services_action_create_generic(const char *exec, const char *args[]) { svc_action_t *op = new_action(); - unsigned int cur_arg; CRM_ASSERT(op != NULL); op->opaque->exec = strdup(exec); op->opaque->args[0] = strdup(exec); + if ((op->opaque->exec == NULL) || (op->opaque->args[0] == NULL)) { + crm_crit("Cannot prepare action for '%s': %s", exec, strerror(ENOMEM)); + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + strerror(ENOMEM)); + return op; + } - for (cur_arg = 1; args && args[cur_arg - 1]; cur_arg++) { - op->opaque->args[cur_arg] = strdup(args[cur_arg - 1]); + if (args == NULL) { + return op; + } + + for (int cur_arg = 1; args[cur_arg - 1] != NULL; cur_arg++) { + + if (cur_arg == PCMK__NELEM(op->opaque->args)) { + crm_info("Cannot prepare action for '%s': Too many arguments", + exec); + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, + PCMK_EXEC_ERROR_HARD, "Too many arguments"); + break; + } - if (cur_arg == PCMK__NELEM(op->opaque->args) - 1) { - crm_err("svc_action_t args list not long enough for '%s' execution request.", exec); + op->opaque->args[cur_arg] = strdup(args[cur_arg - 1]); + if (op->opaque->args[cur_arg] == NULL) { + crm_crit("Cannot prepare action for '%s': %s", + exec, strerror(ENOMEM)); + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + strerror(ENOMEM)); break; } } return op; } /*! * \brief Create an alert agent action * * \param[in] id Alert ID * \param[in] exec Path to alert agent executable * \param[in] timeout Action timeout * \param[in] params Parameters to use with action * \param[in] sequence Action sequence number * \param[in] cb_data Data to pass to callback function * * \return New action on success, NULL on error * \note It is the caller's responsibility to free cb_data. * The caller should not free params explicitly. */ svc_action_t * services_alert_create(const char *id, const char *exec, int timeout, GHashTable *params, int sequence, void *cb_data) { svc_action_t *action = services_action_create_generic(exec, NULL); action->timeout = timeout; action->id = strdup(id); action->params = params; action->sequence = sequence; action->cb_data = cb_data; return action; } /*! * \brief Set the user and group that an action will execute as * * \param[in,out] action Action to modify * \param[in] user Name of user to execute action as * \param[in] group Name of group to execute action as * * \return pcmk_ok on success, -errno otherwise * * \note This will have no effect unless the process executing the action runs * as root, and the action is not a systemd or upstart action. * We could implement this for systemd by adding User= and Group= to * [Service] in the override file, but that seems more likely to cause * problems than be useful. */ int services_action_user(svc_action_t *op, const char *user) { CRM_CHECK((op != NULL) && (user != NULL), return -EINVAL); return crm_user_lookup(user, &(op->opaque->uid), &(op->opaque->gid)); } /*! * \brief Execute an alert agent action * * \param[in] action Action to execute * \param[in] cb Function to call when action completes * * \return TRUE if the library will free action, FALSE otherwise * * \note If this function returns FALSE, it is the caller's responsibility to * free the action with services_action_free(). */ gboolean services_alert_async(svc_action_t *action, void (*cb)(svc_action_t *op)) { action->synchronous = false; action->opaque->callback = cb; return services__execute_file(action) == pcmk_rc_ok; } #if SUPPORT_DBUS /*! * \internal * \brief Update operation's pending DBus call, unreferencing old one if needed * * \param[in,out] op Operation to modify * \param[in] pending Pending call to set */ void services_set_op_pending(svc_action_t *op, DBusPendingCall *pending) { if (op->opaque->pending && (op->opaque->pending != pending)) { if (pending) { crm_info("Lost pending %s DBus call (%p)", op->id, op->opaque->pending); } else { crm_trace("Done with pending %s DBus call (%p)", op->id, op->opaque->pending); } dbus_pending_call_unref(op->opaque->pending); } op->opaque->pending = pending; if (pending) { crm_trace("Updated pending %s DBus call (%p)", op->id, pending); } else { crm_trace("Cleared pending %s DBus call", op->id); } } #endif void services_action_cleanup(svc_action_t * op) { if ((op == NULL) || (op->opaque == NULL)) { return; } #if SUPPORT_DBUS if(op->opaque->timerid != 0) { crm_trace("Removing timer for call %s to %s", op->action, op->rsc); g_source_remove(op->opaque->timerid); op->opaque->timerid = 0; } if(op->opaque->pending) { if (dbus_pending_call_get_completed(op->opaque->pending)) { // This should never be the case crm_warn("Result of %s op %s was unhandled", op->standard, op->id); } else { crm_debug("Will ignore any result of canceled %s op %s", op->standard, op->id); } dbus_pending_call_cancel(op->opaque->pending); services_set_op_pending(op, NULL); } #endif if (op->opaque->stderr_gsource) { mainloop_del_fd(op->opaque->stderr_gsource); op->opaque->stderr_gsource = NULL; } if (op->opaque->stdout_gsource) { mainloop_del_fd(op->opaque->stdout_gsource); op->opaque->stdout_gsource = NULL; } } void services_action_free(svc_action_t * op) { unsigned int i; if (op == NULL) { return; } /* The operation should be removed from all tracking lists by this point. * If it's not, we have a bug somewhere, so bail. That may lead to a * memory leak, but it's better than a use-after-free segmentation fault. */ CRM_CHECK(g_list_find(inflight_ops, op) == NULL, return); CRM_CHECK(g_list_find(blocked_ops, op) == NULL, return); CRM_CHECK((recurring_actions == NULL) || (g_hash_table_lookup(recurring_actions, op->id) == NULL), return); services_action_cleanup(op); if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } free(op->id); free(op->opaque->exec); for (i = 0; i < PCMK__NELEM(op->opaque->args); i++) { free(op->opaque->args[i]); } + free(op->opaque->exit_reason); free(op->opaque); free(op->rsc); free(op->action); free(op->standard); free(op->agent); free(op->provider); free(op->stdout_data); free(op->stderr_data); if (op->params) { g_hash_table_destroy(op->params); op->params = NULL; } free(op); } gboolean cancel_recurring_action(svc_action_t * op) { crm_info("Cancelling %s operation %s", op->standard, op->id); if (recurring_actions) { g_hash_table_remove(recurring_actions, op->id); } if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } return TRUE; } /*! * \brief Cancel a recurring action * * \param[in] name Name of resource that operation is for * \param[in] action Name of operation to cancel * \param[in] interval_ms Interval of operation to cancel * * \return TRUE if action was successfully cancelled, FALSE otherwise */ gboolean services_action_cancel(const char *name, const char *action, guint interval_ms) { gboolean cancelled = FALSE; char *id = pcmk__op_key(name, action, interval_ms); svc_action_t *op = NULL; /* We can only cancel a recurring action */ init_recurring_actions(); op = g_hash_table_lookup(recurring_actions, id); if (op == NULL) { goto done; } // Tell services__finalize_async_op() not to reschedule the operation op->cancel = TRUE; /* Stop tracking it as a recurring operation, and stop its repeat timer */ cancel_recurring_action(op); /* If the op has a PID, it's an in-flight child process, so kill it. * * Whether the kill succeeds or fails, the main loop will send the op to * async_action_complete() (and thus services__finalize_async_op()) when the * process goes away. */ if (op->pid != 0) { crm_info("Terminating in-flight op %s[%d] early because it was cancelled", id, op->pid); cancelled = mainloop_child_kill(op->pid); if (cancelled == FALSE) { crm_err("Termination of %s[%d] failed", id, op->pid); } goto done; } #if SUPPORT_DBUS // In-flight systemd and upstart ops don't have a pid if (inflight_systemd_or_upstart(op)) { inflight_ops = g_list_remove(inflight_ops, op); /* This will cause any result that comes in later to be discarded, so we * don't call the callback and free the operation twice. */ services_action_cleanup(op); } #endif /* The rest of this is essentially equivalent to * services__finalize_async_op(), minus the handle_blocked_ops() call. */ // Report operation as cancelled - op->status = PCMK_EXEC_CANCELLED; + services__set_result(op, op->rc, PCMK_EXEC_CANCELLED, NULL); if (op->opaque->callback) { op->opaque->callback(op); } blocked_ops = g_list_remove(blocked_ops, op); services_action_free(op); cancelled = TRUE; // @TODO Initiate handle_blocked_ops() asynchronously done: free(id); return cancelled; } gboolean services_action_kick(const char *name, const char *action, guint interval_ms) { svc_action_t * op = NULL; char *id = pcmk__op_key(name, action, interval_ms); init_recurring_actions(); op = g_hash_table_lookup(recurring_actions, id); free(id); if (op == NULL) { return FALSE; } if (op->pid || inflight_systemd_or_upstart(op)) { return TRUE; } else { if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } recurring_action_timer(op); return TRUE; } } /*! * \internal * \brief Add a new recurring operation, checking for duplicates * * \param[in] op Operation to add * * \return TRUE if duplicate found (and reschedule), FALSE otherwise */ static gboolean handle_duplicate_recurring(svc_action_t * op) { svc_action_t * dup = NULL; /* check for duplicates */ dup = g_hash_table_lookup(recurring_actions, op->id); if (dup && (dup != op)) { /* update user data */ if (op->opaque->callback) { dup->opaque->callback = op->opaque->callback; dup->cb_data = op->cb_data; op->cb_data = NULL; } /* immediately execute the next interval */ if (dup->pid != 0) { if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } recurring_action_timer(dup); } /* free the duplicate */ services_action_free(op); return TRUE; } return FALSE; } /*! * \internal * \brief Execute an action appropriately according to its standard * * \param[in] op Action to execute * * \return Standard Pacemaker return code * \retval EBUSY Recurring operation could not be initiated * \retval pcmk_rc_error Synchronous action failed * \retval pcmk_rc_ok Synchronous action succeeded, or asynchronous action * should not be freed (because it already was or is * pending) * * \note If the return value for an asynchronous action is not pcmk_rc_ok, the * caller is responsible for freeing the action. */ static int execute_action(svc_action_t *op) { #if SUPPORT_UPSTART if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_UPSTART, pcmk__str_casei)) { return services__execute_upstart(op); } #endif #if SUPPORT_SYSTEMD if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) { return services__execute_systemd(op); } #endif return services__execute_file(op); } void services_add_inflight_op(svc_action_t * op) { if (op == NULL) { return; } CRM_ASSERT(op->synchronous == FALSE); /* keep track of ops that are in-flight to avoid collisions in the same namespace */ if (op->rsc) { inflight_ops = g_list_append(inflight_ops, op); } } /*! * \internal * \brief Stop tracking an operation that completed * * \param[in] op Operation to stop tracking */ void services_untrack_op(svc_action_t *op) { /* Op is no longer in-flight or blocked */ inflight_ops = g_list_remove(inflight_ops, op); blocked_ops = g_list_remove(blocked_ops, op); /* Op is no longer blocking other ops, so check if any need to run */ handle_blocked_ops(); } gboolean services_action_async_fork_notify(svc_action_t * op, void (*action_callback) (svc_action_t *), void (*action_fork_callback) (svc_action_t *)) { op->synchronous = false; if (action_callback) { op->opaque->callback = action_callback; } if (action_fork_callback) { op->opaque->fork_callback = action_fork_callback; } if (op->interval_ms > 0) { init_recurring_actions(); if (handle_duplicate_recurring(op) == TRUE) { /* entry rescheduled, dup freed */ /* exit early */ return TRUE; } g_hash_table_replace(recurring_actions, op->id, op); } if (!pcmk_is_set(op->flags, SVC_ACTION_NON_BLOCKED) && op->rsc && is_op_blocked(op->rsc)) { blocked_ops = g_list_append(blocked_ops, op); return TRUE; } return execute_action(op) == pcmk_rc_ok; } gboolean services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *)) { return services_action_async_fork_notify(op, action_callback, NULL); } static gboolean processing_blocked_ops = FALSE; gboolean is_op_blocked(const char *rsc) { GList *gIter = NULL; svc_action_t *op = NULL; for (gIter = inflight_ops; gIter != NULL; gIter = gIter->next) { op = gIter->data; if (pcmk__str_eq(op->rsc, rsc, pcmk__str_casei)) { return TRUE; } } return FALSE; } static void handle_blocked_ops(void) { GList *executed_ops = NULL; GList *gIter = NULL; svc_action_t *op = NULL; if (processing_blocked_ops) { /* avoid nested calling of this function */ return; } processing_blocked_ops = TRUE; /* n^2 operation here, but blocked ops are incredibly rare. this list * will be empty 99% of the time. */ for (gIter = blocked_ops; gIter != NULL; gIter = gIter->next) { op = gIter->data; if (is_op_blocked(op->rsc)) { continue; } executed_ops = g_list_append(executed_ops, op); if (execute_action(op) != pcmk_rc_ok) { /* this can cause this function to be called recursively * which is why we have processing_blocked_ops static variable */ services__finalize_async_op(op); } } for (gIter = executed_ops; gIter != NULL; gIter = gIter->next) { op = gIter->data; blocked_ops = g_list_remove(blocked_ops, op); } g_list_free(executed_ops); processing_blocked_ops = FALSE; } /*! * \internal * \brief Execute a meta-data action appropriately to standard * * \param[in] op Meta-data action to execute * * \return Standard Pacemaker return code */ static int execute_metadata_action(svc_action_t *op) { const char *class = op->standard; if (op->agent == NULL) { crm_err("meta-data requested without specifying agent"); - op->rc = services__generic_error(op); - op->status = PCMK_EXEC_ERROR_FATAL; + services__set_result(op, services__generic_error(op), + PCMK_EXEC_ERROR_FATAL, "Agent not specified"); return EINVAL; } if (class == NULL) { crm_err("meta-data requested for agent %s without specifying class", op->agent); - op->rc = services__generic_error(op); - op->status = PCMK_EXEC_ERROR_FATAL; + services__set_result(op, services__generic_error(op), + PCMK_EXEC_ERROR_FATAL, + "Agent standard not specified"); return EINVAL; } if (!strcmp(class, PCMK_RESOURCE_CLASS_SERVICE)) { class = resources_find_service_class(op->agent); } if (class == NULL) { crm_err("meta-data requested for %s, but could not determine class", op->agent); - op->rc = services__generic_error(op); - op->status = PCMK_EXEC_ERROR_HARD; + services__set_result(op, services__generic_error(op), + PCMK_EXEC_ERROR_HARD, + "Agent standard could not be determined"); return EINVAL; } if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei)) { return pcmk_legacy2rc(services__get_lsb_metadata(op->agent, &op->stdout_data)); } #if SUPPORT_NAGIOS if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) { return pcmk_legacy2rc(services__get_nagios_metadata(op->agent, &op->stdout_data)); } #endif return execute_action(op); } gboolean services_action_sync(svc_action_t * op) { gboolean rc = TRUE; if (op == NULL) { crm_trace("No operation to execute"); return FALSE; } op->synchronous = true; if (pcmk__str_eq(op->action, "meta-data", pcmk__str_casei)) { /* Synchronous meta-data operations are handled specially. Since most * resource classes don't provide any meta-data, it has to be * synthesized from available information about the agent. * * services_action_async() doesn't treat meta-data actions specially, so * it will result in an error for classes that don't support the action. */ rc = (execute_metadata_action(op) == pcmk_rc_ok); } else { rc = (execute_action(op) == pcmk_rc_ok); } crm_trace(" > " PCMK__OP_FMT ": %s = %d", op->rsc, op->action, op->interval_ms, op->opaque->exec, op->rc); if (op->stdout_data) { crm_trace(" > stdout: %s", op->stdout_data); } if (op->stderr_data) { crm_trace(" > stderr: %s", op->stderr_data); } return rc; } GList * get_directory_list(const char *root, gboolean files, gboolean executable) { return services_os_get_directory_list(root, files, executable); } GList * resources_list_standards(void) { GList *standards = NULL; standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_OCF)); standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_LSB)); standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_SERVICE)); #if SUPPORT_SYSTEMD { GList *agents = systemd_unit_listall(); if (agents != NULL) { standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_SYSTEMD)); g_list_free_full(agents, free); } } #endif #if SUPPORT_UPSTART { GList *agents = upstart_job_listall(); if (agents != NULL) { standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_UPSTART)); g_list_free_full(agents, free); } } #endif #if SUPPORT_NAGIOS { GList *agents = services__list_nagios_agents(); if (agents != NULL) { standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_NAGIOS)); g_list_free_full(agents, free); } } #endif return standards; } GList * resources_list_providers(const char *standard) { if (pcmk_is_set(pcmk_get_ra_caps(standard), pcmk_ra_cap_provider)) { return resources_os_list_ocf_providers(); } return NULL; } GList * resources_list_agents(const char *standard, const char *provider) { if ((standard == NULL) || (strcasecmp(standard, PCMK_RESOURCE_CLASS_SERVICE) == 0)) { GList *tmp1; GList *tmp2; GList *result = services__list_lsb_agents(); if (standard == NULL) { tmp1 = result; tmp2 = resources_os_list_ocf_agents(NULL); if (tmp2) { result = g_list_concat(tmp1, tmp2); } } #if SUPPORT_SYSTEMD tmp1 = result; tmp2 = systemd_unit_listall(); if (tmp2) { result = g_list_concat(tmp1, tmp2); } #endif #if SUPPORT_UPSTART tmp1 = result; tmp2 = upstart_job_listall(); if (tmp2) { result = g_list_concat(tmp1, tmp2); } #endif return result; } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF) == 0) { return resources_os_list_ocf_agents(provider); } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_LSB) == 0) { return services__list_lsb_agents(); #if SUPPORT_SYSTEMD } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) { return systemd_unit_listall(); #endif #if SUPPORT_UPSTART } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_UPSTART) == 0) { return upstart_job_listall(); #endif #if SUPPORT_NAGIOS } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_NAGIOS) == 0) { return services__list_nagios_agents(); #endif } return NULL; } gboolean resources_agent_exists(const char *standard, const char *provider, const char *agent) { GList *standards = NULL; GList *providers = NULL; GList *iter = NULL; gboolean rc = FALSE; gboolean has_providers = FALSE; standards = resources_list_standards(); for (iter = standards; iter != NULL; iter = iter->next) { if (pcmk__str_eq(iter->data, standard, pcmk__str_none)) { rc = TRUE; break; } } if (rc == FALSE) { goto done; } rc = FALSE; has_providers = pcmk_is_set(pcmk_get_ra_caps(standard), pcmk_ra_cap_provider); if (has_providers == TRUE && provider != NULL) { providers = resources_list_providers(standard); for (iter = providers; iter != NULL; iter = iter->next) { if (pcmk__str_eq(iter->data, provider, pcmk__str_none)) { rc = TRUE; break; } } } else if (has_providers == FALSE && provider == NULL) { rc = TRUE; } if (rc == FALSE) { goto done; } if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) { if (services__lsb_agent_exists(agent)) { rc = TRUE; #if SUPPORT_SYSTEMD } else if (systemd_unit_exists(agent)) { rc = TRUE; #endif #if SUPPORT_UPSTART } else if (upstart_job_exists(agent)) { rc = TRUE; #endif } else { rc = FALSE; } } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_casei)) { rc = services__ocf_agent_exists(provider, agent); } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei)) { rc = services__lsb_agent_exists(agent); #if SUPPORT_SYSTEMD } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) { rc = systemd_unit_exists(agent); #endif #if SUPPORT_UPSTART } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_UPSTART, pcmk__str_casei)) { rc = upstart_job_exists(agent); #endif #if SUPPORT_NAGIOS } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) { rc = services__nagios_agent_exists(agent); #endif } else { rc = FALSE; } done: g_list_free(standards); g_list_free(providers); return rc; } + +/*! + * \internal + * \brief Set the result of an action + * + * \param[out] action Where to set action result + * \param[in] agent_status Exit status to set + * \param[in] exec_status Execution status to set + * \param[in] reason Human-friendly description of event to set + */ +void +services__set_result(svc_action_t *action, int agent_status, + enum pcmk_exec_status exec_status, const char *reason) +{ + if (action == NULL) { + return; + } + + action->rc = agent_status; + action->status = exec_status; + + if (!pcmk__str_eq(action->opaque->exit_reason, reason, + pcmk__str_none)) { + free(action->opaque->exit_reason); + action->opaque->exit_reason = (reason == NULL)? NULL : strdup(reason); + } +} + +/*! + * \internal + * \brief Get the exit reason of an action + * + * \param[in] action Action to check + * + * \return Action's exit reason (or NULL if none) + */ +const char * +services__exit_reason(svc_action_t *action) +{ + return action->opaque->exit_reason; +} diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c index 84d26a766b..018d851540 100644 --- a/lib/services/services_linux.c +++ b/lib/services/services_linux.c @@ -1,1381 +1,1429 @@ /* * Copyright 2010-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include "crm/crm.h" #include "crm/common/mainloop.h" #include "crm/services.h" #include "services_private.h" static void close_pipe(int fildes[]); /* We have two alternative ways of handling SIGCHLD when synchronously waiting * for spawned processes to complete. Both rely on polling a file descriptor to * discover SIGCHLD events. * * If sys/signalfd.h is available (e.g. on Linux), we call signalfd() to * generate the file descriptor. Otherwise, we use the "self-pipe trick" * (opening a pipe and writing a byte to it when SIGCHLD is received). */ #ifdef HAVE_SYS_SIGNALFD_H // signalfd() implementation #include // Everything needed to manage SIGCHLD handling struct sigchld_data_s { sigset_t mask; // Signals to block now (including SIGCHLD) sigset_t old_mask; // Previous set of blocked signals }; // Initialize SIGCHLD data and prepare for use static bool sigchld_setup(struct sigchld_data_s *data) { sigemptyset(&(data->mask)); sigaddset(&(data->mask), SIGCHLD); sigemptyset(&(data->old_mask)); // Block SIGCHLD (saving previous set of blocked signals to restore later) if (sigprocmask(SIG_BLOCK, &(data->mask), &(data->old_mask)) < 0) { crm_err("Wait for child process completion failed: %s " CRM_XS " source=sigprocmask", pcmk_strerror(errno)); return false; } return true; } // Get a file descriptor suitable for polling for SIGCHLD events static int sigchld_open(struct sigchld_data_s *data) { int fd; CRM_CHECK(data != NULL, return -1); fd = signalfd(-1, &(data->mask), SFD_NONBLOCK); if (fd < 0) { crm_err("Wait for child process completion failed: %s " CRM_XS " source=signalfd", pcmk_strerror(errno)); } return fd; } // Close a file descriptor returned by sigchld_open() static void sigchld_close(int fd) { if (fd > 0) { close(fd); } } // Return true if SIGCHLD was received from polled fd static bool sigchld_received(int fd) { struct signalfd_siginfo fdsi; ssize_t s; if (fd < 0) { return false; } s = read(fd, &fdsi, sizeof(struct signalfd_siginfo)); if (s != sizeof(struct signalfd_siginfo)) { crm_err("Wait for child process completion failed: %s " CRM_XS " source=read", pcmk_strerror(errno)); } else if (fdsi.ssi_signo == SIGCHLD) { return true; } return false; } // Do anything needed after done waiting for SIGCHLD static void sigchld_cleanup(struct sigchld_data_s *data) { // Restore the original set of blocked signals if ((sigismember(&(data->old_mask), SIGCHLD) == 0) && (sigprocmask(SIG_UNBLOCK, &(data->mask), NULL) < 0)) { crm_warn("Could not clean up after child process completion: %s", pcmk_strerror(errno)); } } #else // HAVE_SYS_SIGNALFD_H not defined // Self-pipe implementation (see above for function descriptions) struct sigchld_data_s { int pipe_fd[2]; // Pipe file descriptors struct sigaction sa; // Signal handling info (with SIGCHLD) struct sigaction old_sa; // Previous signal handling info }; // We need a global to use in the signal handler volatile struct sigchld_data_s *last_sigchld_data = NULL; static void sigchld_handler() { // We received a SIGCHLD, so trigger pipe polling if ((last_sigchld_data != NULL) && (last_sigchld_data->pipe_fd[1] >= 0) && (write(last_sigchld_data->pipe_fd[1], "", 1) == -1)) { crm_err("Wait for child process completion failed: %s " CRM_XS " source=write", pcmk_strerror(errno)); } } static bool sigchld_setup(struct sigchld_data_s *data) { int rc; data->pipe_fd[0] = data->pipe_fd[1] = -1; if (pipe(data->pipe_fd) == -1) { crm_err("Wait for child process completion failed: %s " CRM_XS " source=pipe", pcmk_strerror(errno)); return false; } rc = pcmk__set_nonblocking(data->pipe_fd[0]); if (rc != pcmk_rc_ok) { crm_warn("Could not set pipe input non-blocking: %s " CRM_XS " rc=%d", pcmk_rc_str(rc), rc); } rc = pcmk__set_nonblocking(data->pipe_fd[1]); if (rc != pcmk_rc_ok) { crm_warn("Could not set pipe output non-blocking: %s " CRM_XS " rc=%d", pcmk_rc_str(rc), rc); } // Set SIGCHLD handler data->sa.sa_handler = sigchld_handler; data->sa.sa_flags = 0; sigemptyset(&(data->sa.sa_mask)); if (sigaction(SIGCHLD, &(data->sa), &(data->old_sa)) < 0) { crm_err("Wait for child process completion failed: %s " CRM_XS " source=sigaction", pcmk_strerror(errno)); } // Remember data for use in signal handler last_sigchld_data = data; return true; } static int sigchld_open(struct sigchld_data_s *data) { CRM_CHECK(data != NULL, return -1); return data->pipe_fd[0]; } static void sigchld_close(int fd) { // Pipe will be closed in sigchld_cleanup() return; } static bool sigchld_received(int fd) { char ch; if (fd < 0) { return false; } // Clear out the self-pipe while (read(fd, &ch, 1) == 1) /*omit*/; return true; } static void sigchld_cleanup(struct sigchld_data_s *data) { // Restore the previous SIGCHLD handler if (sigaction(SIGCHLD, &(data->old_sa), NULL) < 0) { crm_warn("Could not clean up after child process completion: %s", pcmk_strerror(errno)); } close_pipe(data->pipe_fd); } #endif /*! * \internal * \brief Close the two file descriptors of a pipe * * \param[in] fildes Array of file descriptors opened by pipe() */ static void close_pipe(int fildes[]) { if (fildes[0] >= 0) { close(fildes[0]); fildes[0] = -1; } if (fildes[1] >= 0) { close(fildes[1]); fildes[1] = -1; } } static gboolean svc_read_output(int fd, svc_action_t * op, bool is_stderr) { char *data = NULL; int rc = 0, len = 0; char buf[500]; static const size_t buf_read_len = sizeof(buf) - 1; if (fd < 0) { crm_trace("No fd for %s", op->id); return FALSE; } if (is_stderr && op->stderr_data) { len = strlen(op->stderr_data); data = op->stderr_data; crm_trace("Reading %s stderr into offset %d", op->id, len); } else if (is_stderr == FALSE && op->stdout_data) { len = strlen(op->stdout_data); data = op->stdout_data; crm_trace("Reading %s stdout into offset %d", op->id, len); } else { crm_trace("Reading %s %s into offset %d", op->id, is_stderr?"stderr":"stdout", len); } do { rc = read(fd, buf, buf_read_len); if (rc > 0) { buf[rc] = 0; crm_trace("Got %d chars: %.80s", rc, buf); data = pcmk__realloc(data, len + rc + 1); len += sprintf(data + len, "%s", buf); } else if (errno != EINTR) { /* error or EOF * Cleanup happens in pipe_done() */ rc = FALSE; break; } } while (rc == buf_read_len || rc < 0); if (is_stderr) { op->stderr_data = data; } else { op->stdout_data = data; } return rc; } static int dispatch_stdout(gpointer userdata) { svc_action_t *op = (svc_action_t *) userdata; return svc_read_output(op->opaque->stdout_fd, op, FALSE); } static int dispatch_stderr(gpointer userdata) { svc_action_t *op = (svc_action_t *) userdata; return svc_read_output(op->opaque->stderr_fd, op, TRUE); } static void pipe_out_done(gpointer user_data) { svc_action_t *op = (svc_action_t *) user_data; crm_trace("%p", op); op->opaque->stdout_gsource = NULL; if (op->opaque->stdout_fd > STDOUT_FILENO) { close(op->opaque->stdout_fd); } op->opaque->stdout_fd = -1; } static void pipe_err_done(gpointer user_data) { svc_action_t *op = (svc_action_t *) user_data; op->opaque->stderr_gsource = NULL; if (op->opaque->stderr_fd > STDERR_FILENO) { close(op->opaque->stderr_fd); } op->opaque->stderr_fd = -1; } static struct mainloop_fd_callbacks stdout_callbacks = { .dispatch = dispatch_stdout, .destroy = pipe_out_done, }; static struct mainloop_fd_callbacks stderr_callbacks = { .dispatch = dispatch_stderr, .destroy = pipe_err_done, }; static void set_ocf_env(const char *key, const char *value, gpointer user_data) { if (setenv(key, value, 1) != 0) { crm_perror(LOG_ERR, "setenv failed for key:%s and value:%s", key, value); } } static void set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data) { char buffer[500]; snprintf(buffer, sizeof(buffer), strcmp(key, "OCF_CHECK_LEVEL") != 0 ? "OCF_RESKEY_%s" : "%s", (char *)key); set_ocf_env(buffer, value, user_data); } static void set_alert_env(gpointer key, gpointer value, gpointer user_data) { int rc; if (value != NULL) { rc = setenv(key, value, 1); } else { rc = unsetenv(key); } if (rc < 0) { crm_perror(LOG_ERR, "setenv %s=%s", (char*)key, (value? (char*)value : "")); } else { crm_trace("setenv %s=%s", (char*)key, (value? (char*)value : "")); } } /*! * \internal * \brief Add environment variables suitable for an action * * \param[in] op Action to use */ static void add_action_env_vars(const svc_action_t *op) { void (*env_setter)(gpointer, gpointer, gpointer) = NULL; if (op->agent == NULL) { env_setter = set_alert_env; /* we deal with alert handler */ } else if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_casei)) { env_setter = set_ocf_env_with_prefix; } if (env_setter != NULL && op->params != NULL) { g_hash_table_foreach(op->params, env_setter, NULL); } if (env_setter == NULL || env_setter == set_alert_env) { return; } set_ocf_env("OCF_RA_VERSION_MAJOR", PCMK_OCF_MAJOR_VERSION, NULL); set_ocf_env("OCF_RA_VERSION_MINOR", PCMK_OCF_MINOR_VERSION, NULL); set_ocf_env("OCF_ROOT", OCF_ROOT_DIR, NULL); set_ocf_env("OCF_EXIT_REASON_PREFIX", PCMK_OCF_REASON_PREFIX, NULL); if (op->rsc) { set_ocf_env("OCF_RESOURCE_INSTANCE", op->rsc, NULL); } if (op->agent != NULL) { set_ocf_env("OCF_RESOURCE_TYPE", op->agent, NULL); } /* Notes: this is not added to specification yet. Sept 10,2004 */ if (op->provider != NULL) { set_ocf_env("OCF_RESOURCE_PROVIDER", op->provider, NULL); } } static void pipe_in_single_parameter(gpointer key, gpointer value, gpointer user_data) { svc_action_t *op = user_data; char *buffer = crm_strdup_printf("%s=%s\n", (char *)key, (char *) value); int ret, total = 0, len = strlen(buffer); do { errno = 0; ret = write(op->opaque->stdin_fd, buffer + total, len - total); if (ret > 0) { total += ret; } } while ((errno == EINTR) && (total < len)); free(buffer); } /*! * \internal * \brief Pipe parameters in via stdin for action * * \param[in] op Action to use */ static void pipe_in_action_stdin_parameters(const svc_action_t *op) { crm_debug("sending args"); if (op->params) { g_hash_table_foreach(op->params, pipe_in_single_parameter, (gpointer) op); } } gboolean recurring_action_timer(gpointer data) { svc_action_t *op = data; crm_debug("Scheduling another invocation of %s", op->id); /* Clean out the old result */ free(op->stdout_data); op->stdout_data = NULL; free(op->stderr_data); op->stderr_data = NULL; op->opaque->repeat_timer = 0; services_action_async(op, NULL); return FALSE; } /*! * \internal * \brief Finalize handling of an asynchronous operation * * Given a completed asynchronous operation, cancel or reschedule it as * appropriate if recurring, call its callback if registered, stop tracking it, * and clean it up. * * \param[in,out] op Operation to finalize * * \return Standard Pacemaker return code * \retval EINVAL Caller supplied NULL or invalid \p op * \retval EBUSY Uncanceled recurring action has only been cleaned up * \retval pcmk_rc_ok Action has been freed * * \note If the return value is not pcmk_rc_ok, the caller is responsible for * freeing the action. */ int services__finalize_async_op(svc_action_t *op) { CRM_CHECK((op != NULL) && !(op->synchronous), return EINVAL); if (op->interval_ms != 0) { // Recurring operations must be either cancelled or rescheduled if (op->cancel) { - op->status = PCMK_EXEC_CANCELLED; + services__set_result(op, op->rc, PCMK_EXEC_CANCELLED, NULL); cancel_recurring_action(op); } else { op->opaque->repeat_timer = g_timeout_add(op->interval_ms, recurring_action_timer, (void *) op); } } if (op->opaque->callback != NULL) { op->opaque->callback(op); } // Stop tracking the operation (as in-flight or blocked) op->pid = 0; services_untrack_op(op); if ((op->interval_ms != 0) && !(op->cancel)) { // Do not free recurring actions (they will get freed when cancelled) services_action_cleanup(op); return EBUSY; } services_action_free(op); return pcmk_rc_ok; } static void close_op_input(svc_action_t *op) { if (op->opaque->stdin_fd >= 0) { close(op->opaque->stdin_fd); } } static void finish_op_output(svc_action_t *op, bool is_stderr) { mainloop_io_t **source; int fd; if (is_stderr) { source = &(op->opaque->stderr_gsource); fd = op->opaque->stderr_fd; } else { source = &(op->opaque->stdout_gsource); fd = op->opaque->stdout_fd; } if (op->synchronous || *source) { crm_trace("Finish reading %s[%d] %s", op->id, op->pid, (is_stderr? "stdout" : "stderr")); svc_read_output(fd, op, is_stderr); if (op->synchronous) { close(fd); } else { mainloop_del_fd(*source); *source = NULL; } } } // Log an operation's stdout and stderr static void log_op_output(svc_action_t *op) { char *prefix = crm_strdup_printf("%s[%d] error output", op->id, op->pid); crm_log_output(LOG_NOTICE, prefix, op->stderr_data); strcpy(prefix + strlen(prefix) - strlen("error output"), "output"); crm_log_output(LOG_DEBUG, prefix, op->stdout_data); free(prefix); } +// Truncate exit reasons at this many characters +#define EXIT_REASON_MAX_LEN 128 + +static void +parse_exit_reason_from_stderr(svc_action_t *op) +{ + const char *reason_start = NULL; + const char *reason_end = NULL; + const int prefix_len = strlen(PCMK_OCF_REASON_PREFIX); + + if ((op->stderr_data == NULL) || + // Only OCF agents have exit reasons in stderr + !pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_none)) { + return; + } + + // Find the last occurrence of the magic string indicating an exit reason + for (const char *cur = strstr(op->stderr_data, PCMK_OCF_REASON_PREFIX); + cur != NULL; cur = strstr(cur, PCMK_OCF_REASON_PREFIX)) { + + cur += prefix_len; // Skip over magic string + reason_start = cur; + } + + if ((reason_start == NULL) || (reason_start[0] == '\n') + || (reason_start[0] == '\0')) { + return; // No or empty exit reason + } + + // Exit reason goes to end of line (or end of output) + reason_end = strchr(reason_start, '\n'); + if (reason_end == NULL) { + reason_end = reason_start + strlen(reason_start); + } + + // Limit size of exit reason to something reasonable + if (reason_end > (reason_start + EXIT_REASON_MAX_LEN)) { + reason_end = reason_start + EXIT_REASON_MAX_LEN; + } + + free(op->opaque->exit_reason); + op->opaque->exit_reason = strndup(reason_start, reason_end - reason_start); +} + /*! * \internal * \brief Process the completion of an asynchronous child process * * \param[in] p Child process that completed * \param[in] pid Process ID of child * \param[in] core (unused) * \param[in] signo Signal that interrupted child, if any * \param[in] exitcode Exit status of child process */ static void async_action_complete(mainloop_child_t *p, pid_t pid, int core, int signo, int exitcode) { svc_action_t *op = mainloop_child_userdata(p); mainloop_clear_child_userdata(p); CRM_CHECK(op->pid == pid, - op->rc = services__generic_error(op); - op->status = PCMK_EXEC_ERROR; + services__set_result(op, services__generic_error(op), + PCMK_EXEC_ERROR, "Bug in mainloop handling"); return); /* Depending on the priority the mainloop gives the stdout and stderr * file descriptors, this function could be called before everything has * been read from them, so force a final read now. */ finish_op_output(op, true); finish_op_output(op, false); close_op_input(op); if (signo == 0) { crm_debug("%s[%d] exited with status %d", op->id, op->pid, exitcode); - op->status = PCMK_EXEC_DONE; - op->rc = exitcode; + services__set_result(op, exitcode, PCMK_EXEC_DONE, NULL); + log_op_output(op); + parse_exit_reason_from_stderr(op); } else if (mainloop_child_timeout(p)) { crm_warn("%s[%d] timed out after %dms", op->id, op->pid, op->timeout); - op->status = PCMK_EXEC_TIMEOUT; - op->rc = services__generic_error(op); + services__set_result(op, services__generic_error(op), PCMK_EXEC_TIMEOUT, + "Process did not exit within specified timeout"); } else if (op->cancel) { /* If an in-flight recurring operation was killed because it was * cancelled, don't treat that as a failure. */ crm_info("%s[%d] terminated with signal: %s " CRM_XS " (%d)", op->id, op->pid, strsignal(signo), signo); - op->status = PCMK_EXEC_CANCELLED; - op->rc = PCMK_OCF_OK; + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_CANCELLED, NULL); } else { crm_warn("%s[%d] terminated with signal: %s " CRM_XS " (%d)", op->id, op->pid, strsignal(signo), signo); - op->status = PCMK_EXEC_ERROR; - op->rc = PCMK_OCF_UNKNOWN_ERROR; + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "Process interrupted by signal"); } - log_op_output(op); services__finalize_async_op(op); } /*! * \internal * \brief Return agent standard's exit status for "generic error" * * When returning an internal error for an action, a value that is appropriate * to the action's agent standard must be used. This function returns a value * appropriate for errors in general. * * \param[in] op Action that error is for * * \return Exit status appropriate to agent standard * \note Actions without a standard will get PCMK_OCF_UNKNOWN_ERROR. */ int services__generic_error(svc_action_t *op) { if ((op == NULL) || (op->standard == NULL)) { return PCMK_OCF_UNKNOWN_ERROR; } if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei) && pcmk__str_eq(op->action, "status", pcmk__str_casei)) { return PCMK_LSB_STATUS_UNKNOWN; } #if SUPPORT_NAGIOS if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) { return NAGIOS_STATE_UNKNOWN; } #endif return PCMK_OCF_UNKNOWN_ERROR; } /*! * \internal * \brief Return agent standard's exit status for "not installed" * * When returning an internal error for an action, a value that is appropriate * to the action's agent standard must be used. This function returns a value * appropriate for "not installed" errors. * * \param[in] op Action that error is for * * \return Exit status appropriate to agent standard * \note Actions without a standard will get PCMK_OCF_UNKNOWN_ERROR. */ int services__not_installed_error(svc_action_t *op) { if ((op == NULL) || (op->standard == NULL)) { return PCMK_OCF_UNKNOWN_ERROR; } if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei) && pcmk__str_eq(op->action, "status", pcmk__str_casei)) { return PCMK_LSB_STATUS_NOT_INSTALLED; } #if SUPPORT_NAGIOS if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) { return NAGIOS_NOT_INSTALLED; } #endif return PCMK_OCF_NOT_INSTALLED; } /*! * \internal * \brief Return agent standard's exit status for "insufficient privileges" * * When returning an internal error for an action, a value that is appropriate * to the action's agent standard must be used. This function returns a value * appropriate for "insufficient privileges" errors. * * \param[in] op Action that error is for * * \return Exit status appropriate to agent standard * \note Actions without a standard will get PCMK_OCF_UNKNOWN_ERROR. */ int services__authorization_error(svc_action_t *op) { if ((op == NULL) || (op->standard == NULL)) { return PCMK_OCF_UNKNOWN_ERROR; } if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei) && pcmk__str_eq(op->action, "status", pcmk__str_casei)) { return PCMK_LSB_STATUS_INSUFFICIENT_PRIV; } #if SUPPORT_NAGIOS if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) { return NAGIOS_INSUFFICIENT_PRIV; } #endif return PCMK_OCF_INSUFFICIENT_PRIV; } /*! * \internal * \brief Set operation rc and status per errno from stat(), fork() or execvp() * * \param[in,out] op Operation to set rc and status for * \param[in] error Value of errno after system call * * \return void */ void services__handle_exec_error(svc_action_t * op, int error) { switch (error) { /* see execve(2), stat(2) and fork(2) */ case ENOENT: /* No such file or directory */ case EISDIR: /* Is a directory */ case ENOTDIR: /* Path component is not a directory */ case EINVAL: /* Invalid executable format */ case ENOEXEC: /* Invalid executable format */ - op->rc = services__not_installed_error(op); - op->status = PCMK_EXEC_NOT_INSTALLED; + services__set_result(op, services__not_installed_error(op), + PCMK_EXEC_NOT_INSTALLED, pcmk_rc_str(error)); break; case EACCES: /* permission denied (various errors) */ case EPERM: /* permission denied (various errors) */ - op->rc = services__authorization_error(op); - op->status = PCMK_EXEC_ERROR; + services__set_result(op, services__authorization_error(op), + PCMK_EXEC_ERROR, pcmk_rc_str(error)); break; default: - op->rc = services__generic_error(op); - op->status = PCMK_EXEC_ERROR; + services__set_result(op, services__generic_error(op), + PCMK_EXEC_ERROR, pcmk_rc_str(error)); } } static void action_launch_child(svc_action_t *op) { /* SIGPIPE is ignored (which is different from signal blocking) by the gnutls library. * Depending on the libqb version in use, libqb may set SIGPIPE to be ignored as well. * We do not want this to be inherited by the child process. By resetting this the signal * to the default behavior, we avoid some potential odd problems that occur during OCF * scripts when SIGPIPE is ignored by the environment. */ signal(SIGPIPE, SIG_DFL); #if defined(HAVE_SCHED_SETSCHEDULER) if (sched_getscheduler(0) != SCHED_OTHER) { struct sched_param sp; memset(&sp, 0, sizeof(sp)); sp.sched_priority = 0; if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) { crm_perror(LOG_ERR, "Could not reset scheduling policy to SCHED_OTHER for %s", op->id); } } #endif if (setpriority(PRIO_PROCESS, 0, 0) == -1) { crm_perror(LOG_ERR, "Could not reset process priority to 0 for %s", op->id); } /* Man: The call setpgrp() is equivalent to setpgid(0,0) * _and_ compiles on BSD variants too * need to investigate if it works the same too. */ setpgid(0, 0); pcmk__close_fds_in_child(false); #if SUPPORT_CIBSECRETS if (pcmk__substitute_secrets(op->rsc, op->params) != pcmk_rc_ok) { /* replacing secrets failed! */ if (pcmk__str_eq(op->action, "stop", pcmk__str_casei)) { /* don't fail on stop! */ crm_info("proceeding with the stop operation for %s", op->rsc); } else { crm_err("failed to get secrets for %s, " "considering resource not configured", op->rsc); _exit(PCMK_OCF_NOT_CONFIGURED); } } #endif add_action_env_vars(op); /* Become the desired user */ if (op->opaque->uid && (geteuid() == 0)) { // If requested, set effective group if (op->opaque->gid && (setgid(op->opaque->gid) < 0)) { crm_perror(LOG_ERR, "Could not set child group to %d", op->opaque->gid); _exit(PCMK_OCF_NOT_CONFIGURED); } // Erase supplementary group list // (We could do initgroups() if we kept a copy of the username) if (setgroups(0, NULL) < 0) { crm_perror(LOG_ERR, "Could not set child groups"); _exit(PCMK_OCF_NOT_CONFIGURED); } // Set effective user if (setuid(op->opaque->uid) < 0) { crm_perror(LOG_ERR, "setting user to %d", op->opaque->uid); _exit(PCMK_OCF_NOT_CONFIGURED); } } /* execute the RA */ execvp(op->opaque->exec, op->opaque->args); /* Most cases should have been already handled by stat() */ services__handle_exec_error(op, errno); _exit(op->rc); } /*! * \internal * \brief Wait for synchronous action to complete, and set its result * * \param[in] op Action to wait for * \param[in] data Child signal data */ static void wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data) { int status = 0; int timeout = op->timeout; time_t start = time(NULL); struct pollfd fds[3]; int wait_rc = 0; + const char *wait_reason = NULL; fds[0].fd = op->opaque->stdout_fd; fds[0].events = POLLIN; fds[0].revents = 0; fds[1].fd = op->opaque->stderr_fd; fds[1].events = POLLIN; fds[1].revents = 0; fds[2].fd = sigchld_open(data); fds[2].events = POLLIN; fds[2].revents = 0; crm_trace("Waiting for %s[%d]", op->id, op->pid); do { int poll_rc = poll(fds, 3, timeout); + wait_reason = NULL; + if (poll_rc > 0) { if (fds[0].revents & POLLIN) { svc_read_output(op->opaque->stdout_fd, op, FALSE); } if (fds[1].revents & POLLIN) { svc_read_output(op->opaque->stderr_fd, op, TRUE); } if ((fds[2].revents & POLLIN) && sigchld_received(fds[2].fd)) { wait_rc = waitpid(op->pid, &status, WNOHANG); if ((wait_rc > 0) || ((wait_rc < 0) && (errno == ECHILD))) { // Child process exited or doesn't exist break; } else if (wait_rc < 0) { + wait_reason = pcmk_rc_str(errno); crm_warn("Wait for completion of %s[%d] failed: %s " CRM_XS " source=waitpid", - op->id, op->pid, pcmk_strerror(errno)); + op->id, op->pid, wait_reason); wait_rc = 0; // Act as if process is still running } } } else if (poll_rc == 0) { // Poll timed out with no descriptors ready timeout = 0; break; } else if ((poll_rc < 0) && (errno != EINTR)) { + wait_reason = pcmk_rc_str(errno); crm_err("Wait for completion of %s[%d] failed: %s " - CRM_XS " source=poll", - op->id, op->pid, pcmk_strerror(errno)); + CRM_XS " source=poll", op->id, op->pid, wait_reason); break; } timeout = op->timeout - (time(NULL) - start) * 1000; } while ((op->timeout < 0 || timeout > 0)); crm_trace("Stopped waiting for %s[%d]", op->id, op->pid); + finish_op_output(op, true); + finish_op_output(op, false); + close_op_input(op); + sigchld_close(fds[2].fd); + if (wait_rc <= 0) { if ((op->timeout > 0) && (timeout <= 0)) { - op->rc = services__generic_error(op); - op->status = PCMK_EXEC_TIMEOUT; + services__set_result(op, services__generic_error(op), + PCMK_EXEC_TIMEOUT, + "Process did not exit within specified timeout"); crm_warn("%s[%d] timed out after %dms", op->id, op->pid, op->timeout); } else { - op->rc = services__generic_error(op); - op->status = PCMK_EXEC_ERROR; + services__set_result(op, services__generic_error(op), + PCMK_EXEC_ERROR, wait_reason); } /* If only child hasn't been successfully waited for, yet. This is to limit killing wrong target a bit more. */ if ((wait_rc == 0) && (waitpid(op->pid, &status, WNOHANG) == 0)) { if (kill(op->pid, SIGKILL)) { crm_warn("Could not kill rogue child %s[%d]: %s", op->id, op->pid, pcmk_strerror(errno)); } /* Safe to skip WNOHANG here as we sent non-ignorable signal. */ while ((waitpid(op->pid, &status, 0) == (pid_t) -1) && (errno == EINTR)) { /* keep waiting */; } } } else if (WIFEXITED(status)) { - op->status = PCMK_EXEC_DONE; - op->rc = WEXITSTATUS(status); + services__set_result(op, WEXITSTATUS(status), PCMK_EXEC_DONE, NULL); + parse_exit_reason_from_stderr(op); crm_info("%s[%d] exited with status %d", op->id, op->pid, op->rc); } else if (WIFSIGNALED(status)) { int signo = WTERMSIG(status); - op->rc = services__generic_error(op); - op->status = PCMK_EXEC_ERROR; + services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR, + "Process interrupted by signal"); crm_err("%s[%d] terminated with signal: %s " CRM_XS " (%d)", op->id, op->pid, strsignal(signo), signo); #ifdef WCOREDUMP if (WCOREDUMP(status)) { crm_warn("%s[%d] dumped core", op->id, op->pid); } #endif } else { // Shouldn't be possible to get here - op->rc = services__generic_error(op); - op->status = PCMK_EXEC_ERROR; + services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR, + "Unable to wait for child to complete"); } - - finish_op_output(op, true); - finish_op_output(op, false); - close_op_input(op); - sigchld_close(fds[2].fd); } /*! * \internal * \brief Execute an action whose standard uses executable files * * \param[in] op Action to execute * * \return Standard Pacemaker return value * \retval EBUSY Recurring operation could not be initiated * \retval pcmk_rc_error Synchronous action failed * \retval pcmk_rc_ok Synchronous action succeeded, or asynchronous action * should not be freed (because it already was or is * pending) * * \note If the return value for an asynchronous action is not pcmk_rc_ok, the * caller is responsible for freeing the action. */ int services__execute_file(svc_action_t *op) { int stdout_fd[2]; int stderr_fd[2]; int stdin_fd[2] = {-1, -1}; int rc; struct stat st; struct sigchld_data_s data; // Catch common failure conditions early if (stat(op->opaque->exec, &st) != 0) { rc = errno; crm_warn("Cannot execute '%s': %s " CRM_XS " stat rc=%d", op->opaque->exec, pcmk_strerror(rc), rc); services__handle_exec_error(op, rc); goto done; } if (pipe(stdout_fd) < 0) { rc = errno; crm_err("Cannot execute '%s': %s " CRM_XS " pipe(stdout) rc=%d", op->opaque->exec, pcmk_strerror(rc), rc); services__handle_exec_error(op, rc); goto done; } if (pipe(stderr_fd) < 0) { rc = errno; close_pipe(stdout_fd); crm_err("Cannot execute '%s': %s " CRM_XS " pipe(stderr) rc=%d", op->opaque->exec, pcmk_strerror(rc), rc); services__handle_exec_error(op, rc); goto done; } if (pcmk_is_set(pcmk_get_ra_caps(op->standard), pcmk_ra_cap_stdin)) { if (pipe(stdin_fd) < 0) { rc = errno; close_pipe(stdout_fd); close_pipe(stderr_fd); crm_err("Cannot execute '%s': %s " CRM_XS " pipe(stdin) rc=%d", op->opaque->exec, pcmk_strerror(rc), rc); services__handle_exec_error(op, rc); goto done; } } if (op->synchronous && !sigchld_setup(&data)) { close_pipe(stdin_fd); close_pipe(stdout_fd); close_pipe(stderr_fd); sigchld_cleanup(&data); - op->rc = services__generic_error(op); - op->status = PCMK_EXEC_ERROR; + services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR, + "Could not manage signals for child process"); goto done; } op->pid = fork(); switch (op->pid) { case -1: rc = errno; close_pipe(stdin_fd); close_pipe(stdout_fd); close_pipe(stderr_fd); crm_err("Cannot execute '%s': %s " CRM_XS " fork rc=%d", op->opaque->exec, pcmk_strerror(rc), rc); services__handle_exec_error(op, rc); if (op->synchronous) { sigchld_cleanup(&data); } goto done; break; case 0: /* Child */ close(stdout_fd[0]); close(stderr_fd[0]); if (stdin_fd[1] >= 0) { close(stdin_fd[1]); } if (STDOUT_FILENO != stdout_fd[1]) { if (dup2(stdout_fd[1], STDOUT_FILENO) != STDOUT_FILENO) { crm_warn("Can't redirect output from '%s': %s " CRM_XS " errno=%d", op->opaque->exec, pcmk_strerror(errno), errno); } close(stdout_fd[1]); } if (STDERR_FILENO != stderr_fd[1]) { if (dup2(stderr_fd[1], STDERR_FILENO) != STDERR_FILENO) { crm_warn("Can't redirect error output from '%s': %s " CRM_XS " errno=%d", op->opaque->exec, pcmk_strerror(errno), errno); } close(stderr_fd[1]); } if ((stdin_fd[0] >= 0) && (STDIN_FILENO != stdin_fd[0])) { if (dup2(stdin_fd[0], STDIN_FILENO) != STDIN_FILENO) { crm_warn("Can't redirect input to '%s': %s " CRM_XS " errno=%d", op->opaque->exec, pcmk_strerror(errno), errno); } close(stdin_fd[0]); } if (op->synchronous) { sigchld_cleanup(&data); } action_launch_child(op); CRM_ASSERT(0); /* action_launch_child is effectively noreturn */ } /* Only the parent reaches here */ close(stdout_fd[1]); close(stderr_fd[1]); if (stdin_fd[0] >= 0) { close(stdin_fd[0]); } op->opaque->stdout_fd = stdout_fd[0]; rc = pcmk__set_nonblocking(op->opaque->stdout_fd); if (rc != pcmk_rc_ok) { crm_warn("Could not set '%s' output non-blocking: %s " CRM_XS " rc=%d", op->opaque->exec, pcmk_rc_str(rc), rc); } op->opaque->stderr_fd = stderr_fd[0]; rc = pcmk__set_nonblocking(op->opaque->stderr_fd); if (rc != pcmk_rc_ok) { crm_warn("Could not set '%s' error output non-blocking: %s " CRM_XS " rc=%d", op->opaque->exec, pcmk_rc_str(rc), rc); } op->opaque->stdin_fd = stdin_fd[1]; if (op->opaque->stdin_fd >= 0) { // using buffer behind non-blocking-fd here - that could be improved // as long as no other standard uses stdin_fd assume stonith rc = pcmk__set_nonblocking(op->opaque->stdin_fd); if (rc != pcmk_rc_ok) { crm_warn("Could not set '%s' input non-blocking: %s " CRM_XS " fd=%d,rc=%d", op->opaque->exec, pcmk_rc_str(rc), op->opaque->stdin_fd, rc); } pipe_in_action_stdin_parameters(op); // as long as we are handling parameters directly in here just close close(op->opaque->stdin_fd); op->opaque->stdin_fd = -1; } // after fds are setup properly and before we plug anything into mainloop if (op->opaque->fork_callback) { op->opaque->fork_callback(op); } if (op->synchronous) { wait_for_sync_result(op, &data); sigchld_cleanup(&data); goto done; } crm_trace("Waiting async for '%s'[%d]", op->opaque->exec, op->pid); mainloop_child_add_with_flags(op->pid, op->timeout, op->id, op, pcmk_is_set(op->flags, SVC_ACTION_LEAVE_GROUP)? mainloop_leave_pid_group : 0, async_action_complete); op->opaque->stdout_gsource = mainloop_add_fd(op->id, G_PRIORITY_LOW, op->opaque->stdout_fd, op, &stdout_callbacks); op->opaque->stderr_gsource = mainloop_add_fd(op->id, G_PRIORITY_LOW, op->opaque->stderr_fd, op, &stderr_callbacks); services_add_inflight_op(op); return pcmk_rc_ok; done: if (op->synchronous) { return (op->rc == PCMK_OCF_OK)? pcmk_rc_ok : pcmk_rc_error; } else { return services__finalize_async_op(op); } } static GList * services_os_get_single_directory_list(const char *root, gboolean files, gboolean executable) { GList *list = NULL; struct dirent **namelist; int entries = 0, lpc = 0; char buffer[PATH_MAX]; entries = scandir(root, &namelist, NULL, alphasort); if (entries <= 0) { return list; } for (lpc = 0; lpc < entries; lpc++) { struct stat sb; if ('.' == namelist[lpc]->d_name[0]) { free(namelist[lpc]); continue; } snprintf(buffer, sizeof(buffer), "%s/%s", root, namelist[lpc]->d_name); if (stat(buffer, &sb)) { continue; } if (S_ISDIR(sb.st_mode)) { if (files) { free(namelist[lpc]); continue; } } else if (S_ISREG(sb.st_mode)) { if (files == FALSE) { free(namelist[lpc]); continue; } else if (executable && (sb.st_mode & S_IXUSR) == 0 && (sb.st_mode & S_IXGRP) == 0 && (sb.st_mode & S_IXOTH) == 0) { free(namelist[lpc]); continue; } } list = g_list_append(list, strdup(namelist[lpc]->d_name)); free(namelist[lpc]); } free(namelist); return list; } GList * services_os_get_directory_list(const char *root, gboolean files, gboolean executable) { GList *result = NULL; char *dirs = strdup(root); char *dir = NULL; if (pcmk__str_empty(dirs)) { free(dirs); return result; } for (dir = strtok(dirs, ":"); dir != NULL; dir = strtok(NULL, ":")) { GList *tmp = services_os_get_single_directory_list(dir, files, executable); if (tmp) { result = g_list_concat(result, tmp); } } free(dirs); return result; } static GList * services_os_get_directory_list_provider(const char *root, const char *provider, gboolean files, gboolean executable) { GList *result = NULL; char *dirs = strdup(root); char *dir = NULL; char buffer[PATH_MAX]; if (pcmk__str_empty(dirs)) { free(dirs); return result; } for (dir = strtok(dirs, ":"); dir != NULL; dir = strtok(NULL, ":")) { GList *tmp = NULL; sprintf(buffer, "%s/%s", dir, provider); tmp = services_os_get_single_directory_list(buffer, files, executable); if (tmp) { result = g_list_concat(result, tmp); } } free(dirs); return result; } GList * resources_os_list_ocf_providers(void) { return get_directory_list(OCF_RA_PATH, FALSE, TRUE); } GList * resources_os_list_ocf_agents(const char *provider) { GList *gIter = NULL; GList *result = NULL; GList *providers = NULL; if (provider) { return services_os_get_directory_list_provider(OCF_RA_PATH, provider, TRUE, TRUE); } providers = resources_os_list_ocf_providers(); for (gIter = providers; gIter != NULL; gIter = gIter->next) { GList *tmp1 = result; GList *tmp2 = resources_os_list_ocf_agents(gIter->data); if (tmp2) { result = g_list_concat(tmp1, tmp2); } } g_list_free_full(providers, free); return result; } gboolean services__ocf_agent_exists(const char *provider, const char *agent) { gboolean rc = FALSE; struct stat st; char *dirs = strdup(OCF_RA_PATH); char *dir = NULL; char *buf = NULL; if (provider == NULL || agent == NULL || pcmk__str_empty(dirs)) { free(dirs); return rc; } for (dir = strtok(dirs, ":"); dir != NULL; dir = strtok(NULL, ":")) { buf = crm_strdup_printf("%s/%s/%s", dir, provider, agent); if (stat(buf, &st) == 0) { free(buf); rc = TRUE; break; } free(buf); } free(dirs); return rc; } diff --git a/lib/services/services_private.h b/lib/services/services_private.h index 2f71f6fc3a..4fed5bb7d2 100644 --- a/lib/services/services_private.h +++ b/lib/services/services_private.h @@ -1,96 +1,102 @@ /* * Copyright 2010-2011 Red Hat, Inc. * Later changes copyright 2012-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef SERVICES_PRIVATE__H # define SERVICES_PRIVATE__H # include # include "crm/services.h" #if SUPPORT_DBUS # include #endif #define MAX_ARGC 255 struct svc_action_private_s { char *exec; + char *exit_reason; char *args[MAX_ARGC]; uid_t uid; gid_t gid; guint repeat_timer; void (*callback) (svc_action_t * op); void (*fork_callback) (svc_action_t * op); int stderr_fd; mainloop_io_t *stderr_gsource; int stdout_fd; mainloop_io_t *stdout_gsource; int stdin_fd; #if SUPPORT_DBUS DBusPendingCall* pending; unsigned timerid; #endif }; G_GNUC_INTERNAL GList *services_os_get_directory_list(const char *root, gboolean files, gboolean executable); G_GNUC_INTERNAL int services__execute_file(svc_action_t *op); G_GNUC_INTERNAL GList *resources_os_list_ocf_providers(void); G_GNUC_INTERNAL GList *resources_os_list_ocf_agents(const char *provider); G_GNUC_INTERNAL gboolean services__ocf_agent_exists(const char *provider, const char *agent); G_GNUC_INTERNAL gboolean cancel_recurring_action(svc_action_t * op); G_GNUC_INTERNAL gboolean recurring_action_timer(gpointer data); G_GNUC_INTERNAL int services__finalize_async_op(svc_action_t *op); G_GNUC_INTERNAL int services__generic_error(svc_action_t *op); G_GNUC_INTERNAL int services__not_installed_error(svc_action_t *op); G_GNUC_INTERNAL int services__authorization_error(svc_action_t *op); G_GNUC_INTERNAL void services__handle_exec_error(svc_action_t * op, int error); G_GNUC_INTERNAL void services_add_inflight_op(svc_action_t *op); G_GNUC_INTERNAL void services_untrack_op(svc_action_t *op); G_GNUC_INTERNAL gboolean is_op_blocked(const char *rsc); +G_GNUC_INTERNAL +void services__set_result(svc_action_t *action, int agent_status, + enum pcmk_exec_status exec_status, + const char *exit_reason); + #if SUPPORT_DBUS G_GNUC_INTERNAL void services_set_op_pending(svc_action_t *op, DBusPendingCall *pending); #endif #endif /* SERVICES_PRIVATE__H */ diff --git a/lib/services/systemd.c b/lib/services/systemd.c index 511e16d8ff..0e2763036c 100644 --- a/lib/services/systemd.c +++ b/lib/services/systemd.c @@ -1,1027 +1,1018 @@ /* * Copyright 2012-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include static void invoke_unit_by_path(svc_action_t *op, const char *unit); #define BUS_NAME "org.freedesktop.systemd1" #define BUS_NAME_MANAGER BUS_NAME ".Manager" #define BUS_NAME_UNIT BUS_NAME ".Unit" #define BUS_PATH "/org/freedesktop/systemd1" static inline DBusMessage * systemd_new_method(const char *method) { crm_trace("Calling: %s on " BUS_NAME_MANAGER, method); return dbus_message_new_method_call(BUS_NAME, BUS_PATH, BUS_NAME_MANAGER, method); } /* * Functions to manage a static DBus connection */ static DBusConnection* systemd_proxy = NULL; static inline DBusPendingCall * systemd_send(DBusMessage *msg, void(*done)(DBusPendingCall *pending, void *user_data), void *user_data, int timeout) { return pcmk_dbus_send(msg, systemd_proxy, done, user_data, timeout); } static inline DBusMessage * systemd_send_recv(DBusMessage *msg, DBusError *error, int timeout) { return pcmk_dbus_send_recv(msg, systemd_proxy, error, timeout); } /*! * \internal * \brief Send a method to systemd without arguments, and wait for reply * * \param[in] method Method to send * * \return Systemd reply on success, NULL (and error will be logged) otherwise * * \note The caller must call dbus_message_unref() on the reply after * handling it. */ static DBusMessage * systemd_call_simple_method(const char *method) { DBusMessage *msg = systemd_new_method(method); DBusMessage *reply = NULL; DBusError error; /* Don't call systemd_init() here, because that calls this */ CRM_CHECK(systemd_proxy, return NULL); if (msg == NULL) { crm_err("Could not create message to send %s to systemd", method); return NULL; } dbus_error_init(&error); reply = systemd_send_recv(msg, &error, DBUS_TIMEOUT_USE_DEFAULT); dbus_message_unref(msg); if (dbus_error_is_set(&error)) { crm_err("Could not send %s to systemd: %s (%s)", method, error.message, error.name); dbus_error_free(&error); return NULL; } else if (reply == NULL) { crm_err("Could not send %s to systemd: no reply received", method); return NULL; } return reply; } static gboolean systemd_init(void) { static int need_init = 1; // https://dbus.freedesktop.org/doc/api/html/group__DBusConnection.html if (systemd_proxy && dbus_connection_get_is_connected(systemd_proxy) == FALSE) { crm_warn("Connection to System DBus is closed. Reconnecting..."); pcmk_dbus_disconnect(systemd_proxy); systemd_proxy = NULL; need_init = 1; } if (need_init) { need_init = 0; systemd_proxy = pcmk_dbus_connect(); } if (systemd_proxy == NULL) { return FALSE; } return TRUE; } static inline char * systemd_get_property(const char *unit, const char *name, void (*callback)(const char *name, const char *value, void *userdata), void *userdata, DBusPendingCall **pending, int timeout) { return systemd_proxy? pcmk_dbus_get_property(systemd_proxy, BUS_NAME, unit, BUS_NAME_UNIT, name, callback, userdata, pending, timeout) : NULL; } void systemd_cleanup(void) { if (systemd_proxy) { pcmk_dbus_disconnect(systemd_proxy); systemd_proxy = NULL; } } /* * end of systemd_proxy functions */ /*! * \internal * \brief Check whether a file name represents a manageable systemd unit * * \param[in] name File name to check * * \return Pointer to "dot" before filename extension if so, NULL otherwise */ static const char * systemd_unit_extension(const char *name) { if (name) { const char *dot = strrchr(name, '.'); if (dot && (!strcmp(dot, ".service") || !strcmp(dot, ".socket") || !strcmp(dot, ".mount") || !strcmp(dot, ".timer") || !strcmp(dot, ".path"))) { return dot; } } return NULL; } static char * systemd_service_name(const char *name) { if (name == NULL) { return NULL; } if (systemd_unit_extension(name)) { return strdup(name); } return crm_strdup_printf("%s.service", name); } static void systemd_daemon_reload_complete(DBusPendingCall *pending, void *user_data) { DBusError error; DBusMessage *reply = NULL; unsigned int reload_count = GPOINTER_TO_UINT(user_data); dbus_error_init(&error); if(pending) { reply = dbus_pending_call_steal_reply(pending); } if (pcmk_dbus_find_error(pending, reply, &error)) { crm_err("Could not issue systemd reload %d: %s", reload_count, error.message); dbus_error_free(&error); } else { crm_trace("Reload %d complete", reload_count); } if(pending) { dbus_pending_call_unref(pending); } if(reply) { dbus_message_unref(reply); } } static bool systemd_daemon_reload(int timeout) { static unsigned int reload_count = 0; DBusMessage *msg = systemd_new_method("Reload"); reload_count++; CRM_ASSERT(msg != NULL); systemd_send(msg, systemd_daemon_reload_complete, GUINT_TO_POINTER(reload_count), timeout); dbus_message_unref(msg); return TRUE; } /*! * \internal * \brief Set an action result based on a method error * * \param[in] op Action to set result for * \param[in] error Method error */ static void set_result_from_method_error(svc_action_t *op, const DBusError *error) { - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->status = PCMK_EXEC_ERROR; + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "Unable to invoke systemd DBus method"); if (strstr(error->name, "org.freedesktop.systemd1.InvalidName") || strstr(error->name, "org.freedesktop.systemd1.LoadFailed") || strstr(error->name, "org.freedesktop.systemd1.NoSuchUnit")) { if (pcmk__str_eq(op->action, "stop", pcmk__str_casei)) { crm_trace("Masking systemd stop failure (%s) for %s " "because unknown service can be considered stopped", error->name, crm_str(op->rsc)); - op->rc = PCMK_OCF_OK; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); return; } - op->rc = PCMK_OCF_NOT_INSTALLED; - op->status = PCMK_EXEC_NOT_INSTALLED; + services__set_result(op, PCMK_OCF_NOT_INSTALLED, + PCMK_EXEC_NOT_INSTALLED, "systemd unit not found"); } crm_err("DBus request for %s of systemd unit %s for resource %s failed: %s", op->action, op->agent, crm_str(op->rsc), error->message); } /*! * \internal * \brief Extract unit path from LoadUnit reply, and execute action * * \param[in] reply LoadUnit reply * \param[in] op Action to execute (or NULL to just return path) * * \return DBus object path for specified unit if successful (only valid for * lifetime of \p reply), otherwise NULL */ static const char * execute_after_loadunit(DBusMessage *reply, svc_action_t *op) { const char *path = NULL; DBusError error; /* path here is not used other than as a non-NULL flag to indicate that a * request was indeed sent */ if (pcmk_dbus_find_error((void *) &path, reply, &error)) { if (op != NULL) { set_result_from_method_error(op, &error); } dbus_error_free(&error); } else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, __func__, __LINE__)) { if (op != NULL) { - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->status = PCMK_EXEC_ERROR; + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "systemd DBus method had unexpected reply"); crm_err("Could not load systemd unit %s for %s: " "DBus reply has unexpected type", op->agent, op->id); } else { crm_err("Could not load systemd unit: " "DBus reply has unexpected type"); } } else { dbus_message_get_args (reply, NULL, DBUS_TYPE_OBJECT_PATH, &path, DBUS_TYPE_INVALID); } if (op != NULL) { if (path != NULL) { invoke_unit_by_path(op, path); } else if (!(op->synchronous)) { - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->status = PCMK_EXEC_ERROR; + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "No DBus object found for systemd unit"); services__finalize_async_op(op); } } return path; } /*! * \internal * \brief Execute a systemd action after its LoadUnit completes * * \param[in] pending If not NULL, DBus call associated with LoadUnit request * \param[in] user_data Action to execute */ static void loadunit_completed(DBusPendingCall *pending, void *user_data) { DBusMessage *reply = NULL; svc_action_t *op = user_data; crm_trace("LoadUnit result for %s arrived", op->id); // Grab the reply if (pending != NULL) { reply = dbus_pending_call_steal_reply(pending); } // The call is no longer pending CRM_LOG_ASSERT(pending == op->opaque->pending); services_set_op_pending(op, NULL); // Execute the desired action based on the reply execute_after_loadunit(reply, user_data); if (reply != NULL) { dbus_message_unref(reply); } } /*! * \internal * \brief Execute a systemd action, given the unit name * * \param[in] arg_name Unit name (possibly shortened, i.e. without ".service") * \param[in] op Action to execute (if NULL, just get the object path) * \param[out] path If non-NULL and \p op is NULL or synchronous, where to * store DBus object path for specified unit * * \return Standard Pacemaker return code (for NULL \p op, pcmk_rc_ok means unit * was found; for synchronous actions, pcmk_rc_ok means unit was * executed, with the actual result stored in \p op; for asynchronous * actions, pcmk_rc_ok means action was initiated) * \note It is the caller's responsibility to free the return value if non-NULL. */ static int invoke_unit_by_name(const char *arg_name, svc_action_t *op, char **path) { DBusMessage *msg; DBusMessage *reply = NULL; DBusPendingCall *pending = NULL; char *name = NULL; if (!systemd_init()) { if (op != NULL) { - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->status = PCMK_EXEC_ERROR; + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "No DBus connection"); } return ENOTCONN; } /* Create a LoadUnit DBus method (equivalent to GetUnit if already loaded), * which makes the unit usable via further DBus methods. * * * * * */ msg = systemd_new_method("LoadUnit"); CRM_ASSERT(msg != NULL); // Add the (expanded) unit name as the argument name = systemd_service_name(arg_name); CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID)); free(name); if ((op == NULL) || op->synchronous) { // For synchronous ops, wait for a reply and extract the result const char *unit = NULL; int rc = pcmk_rc_ok; reply = systemd_send_recv(msg, NULL, (op? op->timeout : DBUS_TIMEOUT_USE_DEFAULT)); dbus_message_unref(msg); unit = execute_after_loadunit(reply, op); if (unit == NULL) { rc = ENOENT; if (path != NULL) { *path = NULL; } } else if (path != NULL) { *path = strdup(unit); if (*path == NULL) { rc = ENOMEM; } } if (reply != NULL) { dbus_message_unref(reply); } return rc; } // For asynchronous ops, initiate the LoadUnit call and return pending = systemd_send(msg, loadunit_completed, op, op->timeout); if (pending == NULL) { - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->status = PCMK_EXEC_ERROR; + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "Unable to send DBus message"); dbus_message_unref(msg); return ECOMM; } // LoadUnit was successfully initiated - op->rc = PCMK_OCF_UNKNOWN; - op->status = PCMK_EXEC_PENDING; + services__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); services_set_op_pending(op, pending); dbus_message_unref(msg); return pcmk_rc_ok; } /*! * \internal * \brief Compare two strings alphabetically (case-insensitive) * * \param[in] a First string to compare * \param[in] b Second string to compare * * \return 0 if strings are equal, -1 if a < b, 1 if a > b * * \note Usable as a GCompareFunc with g_list_sort(). * NULL is considered less than non-NULL. */ static gint sort_str(gconstpointer a, gconstpointer b) { if (!a && !b) { return 0; } else if (!a) { return -1; } else if (!b) { return 1; } return strcasecmp(a, b); } GList * systemd_unit_listall(void) { int nfiles = 0; GList *units = NULL; DBusMessageIter args; DBusMessageIter unit; DBusMessageIter elem; DBusMessage *reply = NULL; if (systemd_init() == FALSE) { return NULL; } /* " \n" \ " \n" \ " \n" \ */ reply = systemd_call_simple_method("ListUnitFiles"); if (reply == NULL) { return NULL; } if (!dbus_message_iter_init(reply, &args)) { crm_err("Could not list systemd unit files: systemd reply has no arguments"); dbus_message_unref(reply); return NULL; } if (!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_ARRAY, __func__, __LINE__)) { crm_err("Could not list systemd unit files: systemd reply has invalid arguments"); dbus_message_unref(reply); return NULL; } dbus_message_iter_recurse(&args, &unit); for (; dbus_message_iter_get_arg_type(&unit) != DBUS_TYPE_INVALID; dbus_message_iter_next(&unit)) { DBusBasicValue value; const char *match = NULL; char *unit_name = NULL; char *basename = NULL; if(!pcmk_dbus_type_check(reply, &unit, DBUS_TYPE_STRUCT, __func__, __LINE__)) { crm_warn("Skipping systemd reply argument with unexpected type"); continue; } dbus_message_iter_recurse(&unit, &elem); if(!pcmk_dbus_type_check(reply, &elem, DBUS_TYPE_STRING, __func__, __LINE__)) { crm_warn("Skipping systemd reply argument with no string"); continue; } dbus_message_iter_get_basic(&elem, &value); if (value.str == NULL) { crm_debug("ListUnitFiles reply did not provide a string"); continue; } crm_trace("DBus ListUnitFiles listed: %s", value.str); match = systemd_unit_extension(value.str); if (match == NULL) { // This is not a unit file type we know how to manage crm_debug("ListUnitFiles entry '%s' is not supported as resource", value.str); continue; } // ListUnitFiles returns full path names, we just want base name basename = strrchr(value.str, '/'); if (basename) { basename = basename + 1; } else { basename = value.str; } if (!strcmp(match, ".service")) { // Service is the "default" unit type, so strip it unit_name = strndup(basename, match - basename); } else { unit_name = strdup(basename); } nfiles++; units = g_list_prepend(units, unit_name); } dbus_message_unref(reply); crm_trace("Found %d manageable systemd unit files", nfiles); units = g_list_sort(units, sort_str); return units; } gboolean systemd_unit_exists(const char *name) { char *path = NULL; char *state = NULL; /* Note: Makes a blocking dbus calls * Used by resources_find_service_class() when resource class=service */ if ((invoke_unit_by_name(name, NULL, &path) != pcmk_rc_ok) || (path == NULL)) { return FALSE; } /* A successful LoadUnit is not sufficient to determine the unit's * existence; it merely means the LoadUnit request received a reply. * We must make another blocking call to check the LoadState property. */ state = systemd_get_property(path, "LoadState", NULL, NULL, NULL, DBUS_TIMEOUT_USE_DEFAULT); if (pcmk__str_any_of(state, "loaded", "masked", NULL)) { free(state); return TRUE; } free(state); return FALSE; } #define METADATA_FORMAT \ "\n" \ "\n" \ "\n" \ " 1.1\n" \ " \n" \ " %s\n" \ " \n" \ " systemd unit file for %s\n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ "\n" static char * systemd_unit_metadata(const char *name, int timeout) { char *meta = NULL; char *desc = NULL; char *path = NULL; if (invoke_unit_by_name(name, NULL, &path) == pcmk_rc_ok) { /* TODO: Worth a making blocking call for? Probably not. Possibly if cached. */ desc = systemd_get_property(path, "Description", NULL, NULL, NULL, timeout); } else { desc = crm_strdup_printf("Systemd unit file for %s", name); } meta = crm_strdup_printf(METADATA_FORMAT, name, desc, name); free(desc); free(path); return meta; } /*! * \internal * \brief Determine result of method from reply * * \param[in] reply Reply to start, stop, or restart request * \param[in] op Action that was executed */ static void process_unit_method_reply(DBusMessage *reply, svc_action_t *op) { DBusError error; /* The first use of error here is not used other than as a non-NULL flag to * indicate that a request was indeed sent */ if (pcmk_dbus_find_error((void *) &error, reply, &error)) { set_result_from_method_error(op, &error); dbus_error_free(&error); } else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, __func__, __LINE__)) { crm_warn("DBus request for %s of %s succeeded but " "return type was unexpected", op->action, crm_str(op->rsc)); - op->rc = PCMK_OCF_OK; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, + "systemd DBus method had unexpected reply"); } else { const char *path = NULL; dbus_message_get_args(reply, NULL, DBUS_TYPE_OBJECT_PATH, &path, DBUS_TYPE_INVALID); crm_debug("DBus request for %s of %s using %s succeeded", op->action, crm_str(op->rsc), path); - op->rc = PCMK_OCF_OK; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); } } /*! * \internal * \brief Process the completion of an asynchronous unit start, stop, or restart * * \param[in] pending If not NULL, DBus call associated with request * \param[in] user_data Action that was executed */ static void unit_method_complete(DBusPendingCall *pending, void *user_data) { DBusMessage *reply = NULL; svc_action_t *op = user_data; crm_trace("Result for %s arrived", op->id); // Grab the reply if (pending != NULL) { reply = dbus_pending_call_steal_reply(pending); } // The call is no longer pending CRM_LOG_ASSERT(pending == op->opaque->pending); services_set_op_pending(op, NULL); // Determine result and finalize action process_unit_method_reply(reply, op); services__finalize_async_op(op); if (reply != NULL) { dbus_message_unref(reply); } } #define SYSTEMD_OVERRIDE_ROOT "/run/systemd/system/" /* When the cluster manages a systemd resource, we create a unit file override * to order the service "before" pacemaker. The "before" relationship won't * actually be used, since systemd won't ever start the resource -- we're * interested in the reverse shutdown ordering it creates, to ensure that * systemd doesn't stop the resource at shutdown while pacemaker is still * running. * * @TODO Add start timeout */ #define SYSTEMD_OVERRIDE_TEMPLATE \ "[Unit]\n" \ "Description=Cluster Controlled %s\n" \ "Before=pacemaker.service pacemaker_remote.service\n" \ "\n" \ "[Service]\n" \ "Restart=no\n" // Temporarily use rwxr-xr-x umask when opening a file for writing static FILE * create_world_readable(const char *filename) { mode_t orig_umask = umask(S_IWGRP | S_IWOTH); FILE *fp = fopen(filename, "w"); umask(orig_umask); return fp; } static void create_override_dir(const char *agent) { char *override_dir = crm_strdup_printf(SYSTEMD_OVERRIDE_ROOT "/%s.service.d", agent); int rc = pcmk__build_path(override_dir, 0755); if (rc != pcmk_rc_ok) { crm_warn("Could not create systemd override directory %s: %s", override_dir, pcmk_rc_str(rc)); } free(override_dir); } static char * get_override_filename(const char *agent) { return crm_strdup_printf(SYSTEMD_OVERRIDE_ROOT "/%s.service.d/50-pacemaker.conf", agent); } static void systemd_create_override(const char *agent, int timeout) { FILE *file_strm = NULL; char *override_file = get_override_filename(agent); create_override_dir(agent); /* Ensure the override file is world-readable. This is not strictly * necessary, but it avoids a systemd warning in the logs. */ file_strm = create_world_readable(override_file); if (file_strm == NULL) { crm_err("Cannot open systemd override file %s for writing", override_file); } else { char *override = crm_strdup_printf(SYSTEMD_OVERRIDE_TEMPLATE, agent); int rc = fprintf(file_strm, "%s\n", override); free(override); if (rc < 0) { crm_perror(LOG_WARNING, "Cannot write to systemd override file %s", override_file); } fflush(file_strm); fclose(file_strm); systemd_daemon_reload(timeout); } free(override_file); } static void systemd_remove_override(const char *agent, int timeout) { char *override_file = get_override_filename(agent); int rc = unlink(override_file); if (rc < 0) { // Stop may be called when already stopped, which is fine crm_perror(LOG_DEBUG, "Cannot remove systemd override file %s", override_file); } else { systemd_daemon_reload(timeout); } free(override_file); } /*! * \internal * \brief Parse result of systemd status check * * Set a status action's exit status and execution status based on a DBus * property check result, and finalize the action if asynchronous. * * \param[in] name DBus interface name for property that was checked * \param[in] state Property value * \param[in] userdata Status action that check was done for */ static void parse_status_result(const char *name, const char *state, void *userdata) { svc_action_t *op = userdata; crm_trace("Resource %s has %s='%s'", crm_str(op->rsc), name, crm_str(state)); if (pcmk__str_eq(state, "active", pcmk__str_none)) { - op->rc = PCMK_OCF_OK; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); } else if (pcmk__str_eq(state, "reloading", pcmk__str_none)) { - op->rc = PCMK_OCF_OK; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); } else if (pcmk__str_eq(state, "activating", pcmk__str_none)) { - op->rc = PCMK_OCF_UNKNOWN; - op->status = PCMK_EXEC_PENDING; + services__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); } else if (pcmk__str_eq(state, "deactivating", pcmk__str_none)) { - op->rc = PCMK_OCF_UNKNOWN; - op->status = PCMK_EXEC_PENDING; + services__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); } else { - op->rc = PCMK_OCF_NOT_RUNNING; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_NOT_RUNNING, PCMK_EXEC_DONE, state); } if (!(op->synchronous)) { services_set_op_pending(op, NULL); services__finalize_async_op(op); } } /*! * \internal * \brief Invoke a systemd unit, given its DBus object path * * \param[in] op Action to execute * \param[in] unit DBus object path of systemd unit to invoke */ static void invoke_unit_by_path(svc_action_t *op, const char *unit) { const char *method = NULL; DBusMessage *msg = NULL; DBusMessage *reply = NULL; if (pcmk__str_any_of(op->action, "monitor", "status", NULL)) { DBusPendingCall *pending = NULL; char *state; state = systemd_get_property(unit, "ActiveState", (op->synchronous? NULL : parse_status_result), op, (op->synchronous? NULL : &pending), op->timeout); if (op->synchronous) { parse_status_result("ActiveState", state, op); free(state); } else if (pending == NULL) { // Could not get ActiveState property - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->status = PCMK_EXEC_ERROR; + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "Could not get unit state from DBus"); services__finalize_async_op(op); } else { services_set_op_pending(op, pending); } return; } else if (pcmk__str_eq(op->action, "start", pcmk__str_none)) { method = "StartUnit"; systemd_create_override(op->agent, op->timeout); } else if (pcmk__str_eq(op->action, "stop", pcmk__str_none)) { method = "StopUnit"; systemd_remove_override(op->agent, op->timeout); } else if (pcmk__str_eq(op->action, "restart", pcmk__str_none)) { method = "RestartUnit"; } else { - op->rc = PCMK_OCF_UNIMPLEMENT_FEATURE; - op->status = PCMK_EXEC_ERROR; + services__set_result(op, PCMK_OCF_UNIMPLEMENT_FEATURE, PCMK_EXEC_ERROR, + "Action not implemented for systemd resources"); if (!(op->synchronous)) { services__finalize_async_op(op); } return; } crm_trace("Calling %s for unit path %s named %s", method, unit, crm_str(op->rsc)); msg = systemd_new_method(method); CRM_ASSERT(msg != NULL); /* (ss) */ { const char *replace_s = "replace"; char *name = systemd_service_name(op->agent); CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID)); CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &replace_s, DBUS_TYPE_INVALID)); free(name); } if (op->synchronous) { reply = systemd_send_recv(msg, NULL, op->timeout); dbus_message_unref(msg); process_unit_method_reply(reply, op); if (reply != NULL) { dbus_message_unref(reply); } } else { DBusPendingCall *pending = systemd_send(msg, unit_method_complete, op, op->timeout); dbus_message_unref(msg); if (pending == NULL) { - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->status = PCMK_EXEC_ERROR; + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "Unable to send DBus message"); services__finalize_async_op(op); } else { services_set_op_pending(op, pending); } } } static gboolean systemd_timeout_callback(gpointer p) { svc_action_t * op = p; op->opaque->timerid = 0; crm_warn("%s operation on systemd unit %s named '%s' timed out", op->action, op->agent, op->rsc); - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->status = PCMK_EXEC_TIMEOUT; + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT, + "Systemd action did not complete within specified timeout"); services__finalize_async_op(op); return FALSE; } /*! * \internal * \brief Execute a systemd action * * \param[in] op Action to execute * * \return Standard Pacemaker return code * \retval EBUSY Recurring operation could not be initiated * \retval pcmk_rc_error Synchronous action failed * \retval pcmk_rc_ok Synchronous action succeeded, or asynchronous action * should not be freed (because it already was or is * pending) * * \note If the return value for an asynchronous action is not pcmk_rc_ok, the * caller is responsible for freeing the action. */ int services__execute_systemd(svc_action_t *op) { CRM_ASSERT(op != NULL); if ((op->action == NULL) || (op->agent == NULL)) { - op->rc = PCMK_OCF_NOT_CONFIGURED; - op->status = PCMK_EXEC_ERROR_FATAL; + services__set_result(op, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR_FATAL, + "Bug in action caller"); goto done; } if (!systemd_init()) { - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->status = PCMK_EXEC_ERROR; + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "No DBus connection"); goto done; } crm_debug("Performing %ssynchronous %s op on systemd unit %s named '%s'", (op->synchronous? "" : "a"), op->action, op->agent, crm_str(op->rsc)); if (pcmk__str_eq(op->action, "meta-data", pcmk__str_casei)) { op->stdout_data = systemd_unit_metadata(op->agent, op->timeout); - op->rc = PCMK_OCF_OK; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); goto done; } /* invoke_unit_by_name() should always override these values, which are here * just as a fail-safe in case there are any code paths that neglect to */ - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->status = PCMK_EXEC_ERROR; + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "Bug in service library"); if (invoke_unit_by_name(op->agent, op, NULL) == pcmk_rc_ok) { op->opaque->timerid = g_timeout_add(op->timeout + 5000, systemd_timeout_callback, op); services_add_inflight_op(op); return pcmk_rc_ok; } done: if (op->synchronous) { return (op->rc == PCMK_OCF_OK)? pcmk_rc_ok : pcmk_rc_error; } else { return services__finalize_async_op(op); } } diff --git a/lib/services/upstart.c b/lib/services/upstart.c index b5c3477ef3..32634759be 100644 --- a/lib/services/upstart.c +++ b/lib/services/upstart.c @@ -1,673 +1,663 @@ /* * Original copyright 2010 Senko Rasic * and Ante Karamatic * Later changes copyright 2012-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #define BUS_NAME "com.ubuntu.Upstart" #define BUS_PATH "/com/ubuntu/Upstart" #define UPSTART_06_API BUS_NAME"0_6" #define UPSTART_JOB_IFACE UPSTART_06_API".Job" #define BUS_PROPERTY_IFACE "org.freedesktop.DBus.Properties" /* http://upstart.ubuntu.com/wiki/DBusInterface */ static DBusConnection *upstart_proxy = NULL; static gboolean upstart_init(void) { static int need_init = 1; if (need_init) { need_init = 0; upstart_proxy = pcmk_dbus_connect(); } if (upstart_proxy == NULL) { return FALSE; } return TRUE; } void upstart_cleanup(void) { if (upstart_proxy) { pcmk_dbus_disconnect(upstart_proxy); upstart_proxy = NULL; } } /*! * \internal * \brief Get the DBus object path corresponding to a job name * * \param[in] arg_name Name of job to get path for * \param[out] path If not NULL, where to store DBus object path * \param[in] timeout Give up after this many seconds * * \return true if object path was found, false otherwise * \note The caller is responsible for freeing *path if it is non-NULL. */ static bool object_path_for_job(const gchar *arg_name, char **path, int timeout) { /* com.ubuntu.Upstart0_6.GetJobByName (in String name, out ObjectPath job) */ DBusError error; DBusMessage *msg; DBusMessage *reply = NULL; bool rc = false; if (path != NULL) { *path = NULL; } if (!upstart_init()) { return false; } msg = dbus_message_new_method_call(BUS_NAME, // target for the method call BUS_PATH, // object to call on UPSTART_06_API, // interface to call on "GetJobByName"); // method name dbus_error_init(&error); CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &arg_name, DBUS_TYPE_INVALID)); reply = pcmk_dbus_send_recv(msg, upstart_proxy, &error, timeout); dbus_message_unref(msg); if (dbus_error_is_set(&error)) { crm_err("Could not get DBus object path for %s: %s", arg_name, error.message); dbus_error_free(&error); } else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, __func__, __LINE__)) { crm_err("Could not get DBus object path for %s: Invalid return type", arg_name); } else { if (path != NULL) { dbus_message_get_args(reply, NULL, DBUS_TYPE_OBJECT_PATH, path, DBUS_TYPE_INVALID); if (*path != NULL) { *path = strdup(*path); } } rc = true; } if (reply != NULL) { dbus_message_unref(reply); } return rc; } static void fix(char *input, const char *search, char replace) { char *match = NULL; int shuffle = strlen(search) - 1; while (TRUE) { int len, lpc; match = strstr(input, search); if (match == NULL) { break; } crm_trace("Found: %s", match); match[0] = replace; len = strlen(match) - shuffle; for (lpc = 1; lpc <= len; lpc++) { match[lpc] = match[lpc + shuffle]; } } } static char * fix_upstart_name(const char *input) { char *output = strdup(input); fix(output, "_2b", '+'); fix(output, "_2c", ','); fix(output, "_2d", '-'); fix(output, "_2e", '.'); fix(output, "_40", '@'); fix(output, "_5f", '_'); return output; } GList * upstart_job_listall(void) { GList *units = NULL; DBusMessageIter args; DBusMessageIter unit; DBusMessage *msg = NULL; DBusMessage *reply = NULL; const char *method = "GetAllJobs"; DBusError error; int lpc = 0; if (upstart_init() == FALSE) { return NULL; } /* com.ubuntu.Upstart0_6.GetAllJobs (out jobs) */ dbus_error_init(&error); msg = dbus_message_new_method_call(BUS_NAME, // target for the method call BUS_PATH, // object to call on UPSTART_06_API, // interface to call on method); // method name CRM_ASSERT(msg != NULL); reply = pcmk_dbus_send_recv(msg, upstart_proxy, &error, DBUS_TIMEOUT_USE_DEFAULT); dbus_message_unref(msg); if (dbus_error_is_set(&error)) { crm_err("Call to %s failed: %s", method, error.message); dbus_error_free(&error); return NULL; } else if (!dbus_message_iter_init(reply, &args)) { crm_err("Call to %s failed: Message has no arguments", method); dbus_message_unref(reply); return NULL; } if(!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_ARRAY, __func__, __LINE__)) { crm_err("Call to %s failed: Message has invalid arguments", method); dbus_message_unref(reply); return NULL; } dbus_message_iter_recurse(&args, &unit); while (dbus_message_iter_get_arg_type (&unit) != DBUS_TYPE_INVALID) { DBusBasicValue value; const char *job = NULL; char *path = NULL; if(!pcmk_dbus_type_check(reply, &unit, DBUS_TYPE_OBJECT_PATH, __func__, __LINE__)) { crm_warn("Skipping Upstart reply argument with unexpected type"); continue; } dbus_message_iter_get_basic(&unit, &value); if(value.str) { int llpc = 0; path = value.str; job = value.str; while (path[llpc] != 0) { if (path[llpc] == '/') { job = path + llpc + 1; } llpc++; } lpc++; crm_trace("%s -> %s", path, job); units = g_list_append(units, fix_upstart_name(job)); } dbus_message_iter_next (&unit); } dbus_message_unref(reply); crm_trace("Found %d upstart jobs", lpc); return units; } gboolean upstart_job_exists(const char *name) { return object_path_for_job(name, NULL, DBUS_TIMEOUT_USE_DEFAULT); } static char * get_first_instance(const gchar * job, int timeout) { char *instance = NULL; const char *method = "GetAllInstances"; DBusError error; DBusMessage *msg; DBusMessage *reply; DBusMessageIter args; DBusMessageIter unit; dbus_error_init(&error); msg = dbus_message_new_method_call(BUS_NAME, // target for the method call job, // object to call on UPSTART_JOB_IFACE, // interface to call on method); // method name CRM_ASSERT(msg != NULL); dbus_message_append_args(msg, DBUS_TYPE_INVALID); reply = pcmk_dbus_send_recv(msg, upstart_proxy, &error, timeout); dbus_message_unref(msg); if (dbus_error_is_set(&error)) { crm_err("Call to %s failed: %s", method, error.message); dbus_error_free(&error); goto done; } else if(reply == NULL) { crm_err("Call to %s failed: no reply", method); goto done; } else if (!dbus_message_iter_init(reply, &args)) { crm_err("Call to %s failed: Message has no arguments", method); goto done; } if(!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_ARRAY, __func__, __LINE__)) { crm_err("Call to %s failed: Message has invalid arguments", method); goto done; } dbus_message_iter_recurse(&args, &unit); if(pcmk_dbus_type_check(reply, &unit, DBUS_TYPE_OBJECT_PATH, __func__, __LINE__)) { DBusBasicValue value; dbus_message_iter_get_basic(&unit, &value); if(value.str) { instance = strdup(value.str); crm_trace("Result: %s", instance); } } done: if(reply) { dbus_message_unref(reply); } return instance; } /*! * \internal * \brief Parse result of Upstart status check * * \param[in] name DBus interface name for property that was checked * \param[in] state Property value * \param[in] userdata Status action that check was done for */ static void parse_status_result(const char *name, const char *state, void *userdata) { svc_action_t *op = userdata; if (pcmk__str_eq(state, "running", pcmk__str_none)) { - op->rc = PCMK_OCF_OK; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); } else { - op->rc = PCMK_OCF_NOT_RUNNING; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_NOT_RUNNING, PCMK_EXEC_DONE, state); } if (!(op->synchronous)) { services_set_op_pending(op, NULL); services__finalize_async_op(op); } } #define METADATA_FORMAT \ "\n" \ "\n" \ "\n" \ " 1.1\n" \ " \n" \ " Upstart agent for controlling the system %s service\n" \ " \n" \ " Upstart job for %s\n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ "\n" static char * upstart_job_metadata(const char *name) { return crm_strdup_printf(METADATA_FORMAT, name, name, name); } /*! * \internal * \brief Set an action result based on a method error * * \param[in] op Action to set result for * \param[in] error Method error */ static void set_result_from_method_error(svc_action_t *op, const DBusError *error) { - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->status = PCMK_EXEC_ERROR; + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "Unable to invoke Upstart DBus method"); if (strstr(error->name, UPSTART_06_API ".Error.UnknownInstance")) { if (pcmk__str_eq(op->action, "stop", pcmk__str_casei)) { crm_trace("Masking stop failure (%s) for %s " "because unknown service can be considered stopped", error->name, crm_str(op->rsc)); - op->rc = PCMK_OCF_OK; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); return; } - op->rc = PCMK_OCF_NOT_INSTALLED; - op->status = PCMK_EXEC_NOT_INSTALLED; + services__set_result(op, PCMK_OCF_NOT_INSTALLED, + PCMK_EXEC_NOT_INSTALLED, "Upstart job not found"); } else if (pcmk__str_eq(op->action, "start", pcmk__str_casei) && strstr(error->name, UPSTART_06_API ".Error.AlreadyStarted")) { crm_trace("Masking start failure (%s) for %s " "because already started resource is OK", error->name, crm_str(op->rsc)); - op->rc = PCMK_OCF_OK; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); return; } crm_err("DBus request for %s of Upstart job %s for resource %s failed: %s", op->action, op->agent, crm_str(op->rsc), error->message); } /*! * \internal * \brief Process the completion of an asynchronous job start, stop, or restart * * \param[in] pending If not NULL, DBus call associated with request * \param[in] user_data Action that was executed */ static void job_method_complete(DBusPendingCall *pending, void *user_data) { DBusError error; DBusMessage *reply = NULL; svc_action_t *op = user_data; // Grab the reply if (pending != NULL) { reply = dbus_pending_call_steal_reply(pending); } // Determine result dbus_error_init(&error); if (pcmk_dbus_find_error(pending, reply, &error)) { set_result_from_method_error(op, &error); dbus_error_free(&error); } else if (pcmk__str_eq(op->action, "stop", pcmk__str_none)) { // Call has no return value crm_debug("DBus request for stop of %s succeeded", crm_str(op->rsc)); - op->rc = PCMK_OCF_OK; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); } else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, __func__, __LINE__)) { crm_warn("DBus request for %s of %s succeeded but " "return type was unexpected", op->action, crm_str(op->rsc)); - op->rc = PCMK_OCF_OK; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); } else { const char *path = NULL; dbus_message_get_args(reply, NULL, DBUS_TYPE_OBJECT_PATH, &path, DBUS_TYPE_INVALID); crm_debug("DBus request for %s of %s using %s succeeded", op->action, crm_str(op->rsc), path); - op->rc = PCMK_OCF_OK; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); } // The call is no longer pending CRM_LOG_ASSERT(pending == op->opaque->pending); services_set_op_pending(op, NULL); // Finalize action services__finalize_async_op(op); if (reply != NULL) { dbus_message_unref(reply); } } /*! * \internal * \brief Execute an Upstart action * * \param[in] op Action to execute * * \return Standard Pacemaker return code * \retval EBUSY Recurring operation could not be initiated * \retval pcmk_rc_error Synchronous action failed * \retval pcmk_rc_ok Synchronous action succeeded, or asynchronous action * should not be freed (because it already was or is * pending) * * \note If the return value for an asynchronous action is not pcmk_rc_ok, the * caller is responsible for freeing the action. */ int services__execute_upstart(svc_action_t *op) { char *job = NULL; int arg_wait = TRUE; const char *arg_env = "pacemaker=1"; const char *action = op->action; DBusError error; DBusMessage *msg = NULL; DBusMessage *reply = NULL; DBusMessageIter iter, array_iter; CRM_ASSERT(op != NULL); if ((op->action == NULL) || (op->agent == NULL)) { - op->rc = PCMK_OCF_NOT_CONFIGURED; - op->status = PCMK_EXEC_ERROR_FATAL; + services__set_result(op, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR_FATAL, + "Bug in action caller"); goto cleanup; } if (!upstart_init()) { - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->status = PCMK_EXEC_ERROR; + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "No DBus connection"); goto cleanup; } if (pcmk__str_eq(op->action, "meta-data", pcmk__str_casei)) { op->stdout_data = upstart_job_metadata(op->agent); - op->rc = PCMK_OCF_OK; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); goto cleanup; } if (!object_path_for_job(op->agent, &job, op->timeout)) { if (pcmk__str_eq(action, "stop", pcmk__str_none)) { - op->rc = PCMK_OCF_OK; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); } else { - op->rc = PCMK_OCF_NOT_INSTALLED; - op->status = PCMK_EXEC_NOT_INSTALLED; + services__set_result(op, PCMK_OCF_NOT_INSTALLED, + PCMK_EXEC_NOT_INSTALLED, + "Upstart job not found"); } goto cleanup; } if (job == NULL) { // Shouldn't normally be possible -- maybe a memory error op->rc = PCMK_OCF_UNKNOWN_ERROR; op->status = PCMK_EXEC_ERROR; goto cleanup; } if (pcmk__strcase_any_of(op->action, "monitor", "status", NULL)) { DBusPendingCall *pending = NULL; char *state = NULL; char *path = get_first_instance(job, op->timeout); - op->rc = PCMK_OCF_NOT_RUNNING; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_NOT_RUNNING, PCMK_EXEC_DONE, + "No Upstart job instances found"); if (path == NULL) { goto cleanup; } state = pcmk_dbus_get_property(upstart_proxy, BUS_NAME, path, UPSTART_06_API ".Instance", "state", op->synchronous? NULL : parse_status_result, op, op->synchronous? NULL : &pending, op->timeout); free(path); if (op->synchronous) { parse_status_result("state", state, op); free(state); } else if (pending == NULL) { - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->status = PCMK_EXEC_ERROR; + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "Could not get job state from DBus"); } else { // Successfully initiated async op free(job); services_set_op_pending(op, pending); services_add_inflight_op(op); return pcmk_rc_ok; } goto cleanup; } else if (pcmk__str_eq(action, "start", pcmk__str_none)) { action = "Start"; } else if (pcmk__str_eq(action, "stop", pcmk__str_none)) { action = "Stop"; } else if (pcmk__str_eq(action, "restart", pcmk__str_none)) { action = "Restart"; } else { - op->rc = PCMK_OCF_UNIMPLEMENT_FEATURE; - op->status = PCMK_EXEC_ERROR_HARD; + services__set_result(op, PCMK_OCF_UNIMPLEMENT_FEATURE, + PCMK_EXEC_ERROR_HARD, + "Action not implemented for Upstart resources"); goto cleanup; } // Initialize rc/status in case called functions don't set them - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_DONE, + "Bug in service library"); crm_debug("Calling %s for %s on %s", action, crm_str(op->rsc), job); msg = dbus_message_new_method_call(BUS_NAME, // target for the method call job, // object to call on UPSTART_JOB_IFACE, // interface to call on action); // method name CRM_ASSERT(msg != NULL); dbus_message_iter_init_append (msg, &iter); CRM_LOG_ASSERT(dbus_message_iter_open_container(&iter, DBUS_TYPE_ARRAY, DBUS_TYPE_STRING_AS_STRING, &array_iter)); CRM_LOG_ASSERT(dbus_message_iter_append_basic(&array_iter, DBUS_TYPE_STRING, &arg_env)); CRM_LOG_ASSERT(dbus_message_iter_close_container(&iter, &array_iter)); CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_BOOLEAN, &arg_wait, DBUS_TYPE_INVALID)); if (!(op->synchronous)) { DBusPendingCall *pending = pcmk_dbus_send(msg, upstart_proxy, job_method_complete, op, op->timeout); if (pending == NULL) { - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->status = PCMK_EXEC_ERROR; + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "Unable to send DBus message"); goto cleanup; } else { // Successfully initiated async op free(job); services_set_op_pending(op, pending); services_add_inflight_op(op); return pcmk_rc_ok; } } // Synchronous call dbus_error_init(&error); reply = pcmk_dbus_send_recv(msg, upstart_proxy, &error, op->timeout); if (dbus_error_is_set(&error)) { set_result_from_method_error(op, &error); dbus_error_free(&error); } else if (pcmk__str_eq(op->action, "stop", pcmk__str_none)) { // DBus call does not return a value - op->rc = PCMK_OCF_OK; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); } else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, __func__, __LINE__)) { crm_warn("Call to %s passed but return type was unexpected", op->action); - op->rc = PCMK_OCF_OK; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); } else { const char *path = NULL; dbus_message_get_args(reply, NULL, DBUS_TYPE_OBJECT_PATH, &path, DBUS_TYPE_INVALID); crm_info("Call to %s passed: %s", op->action, path); - op->rc = PCMK_OCF_OK; - op->status = PCMK_EXEC_DONE; + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); } cleanup: free(job); if (msg != NULL) { dbus_message_unref(msg); } if (reply != NULL) { dbus_message_unref(reply); } if (op->synchronous) { return (op->rc == PCMK_OCF_OK)? pcmk_rc_ok : pcmk_rc_error; } else { return services__finalize_async_op(op); } }