diff --git a/include/crm/services.h b/include/crm/services.h index 17879991e2..aa3be7e722 100644 --- a/include/crm/services.h +++ b/include/crm/services.h @@ -1,416 +1,414 @@ /* * Copyright (C) 2010 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /** * \file * \brief Services API * \ingroup core */ #ifndef __PCMK_SERVICES__ # define __PCMK_SERVICES__ # ifdef __cplusplus extern "C" { # endif # include # include # include # include # ifndef OCF_ROOT_DIR # define OCF_ROOT_DIR "/usr/lib/ocf" # endif # ifndef LSB_ROOT_DIR # define LSB_ROOT_DIR "/etc/init.d" # endif /* TODO: Autodetect these two ?*/ # ifndef SYSTEMCTL # define SYSTEMCTL "/bin/systemctl" # endif /* Deprecated and unused by Pacemaker, kept for API backward compatibility */ # ifndef SERVICE_SCRIPT # define SERVICE_SCRIPT "/sbin/service" # endif /* Known resource classes */ #define PCMK_RESOURCE_CLASS_OCF "ocf" #define PCMK_RESOURCE_CLASS_SERVICE "service" #define PCMK_RESOURCE_CLASS_LSB "lsb" #define PCMK_RESOURCE_CLASS_SYSTEMD "systemd" #define PCMK_RESOURCE_CLASS_UPSTART "upstart" #define PCMK_RESOURCE_CLASS_HB "heartbeat" #define PCMK_RESOURCE_CLASS_NAGIOS "nagios" #define PCMK_RESOURCE_CLASS_STONITH "stonith" #define PCMK_ALERT_CLASS "alert" /* This is the string passed in the OCF_EXIT_REASON_PREFIX * environment variable. The stderr output that occurs * after this prefix is encountered is considered the exit * reason for a completed operationt */ #define PCMK_OCF_REASON_PREFIX "ocf-exit-reason:" enum lsb_exitcode { PCMK_LSB_OK = 0, PCMK_LSB_UNKNOWN_ERROR = 1, PCMK_LSB_INVALID_PARAM = 2, PCMK_LSB_UNIMPLEMENT_FEATURE = 3, PCMK_LSB_INSUFFICIENT_PRIV = 4, PCMK_LSB_NOT_INSTALLED = 5, PCMK_LSB_NOT_CONFIGURED = 6, PCMK_LSB_NOT_RUNNING = 7, }; /* The return codes for the status operation are not the same for other * operatios - go figure */ enum lsb_status_exitcode { PCMK_LSB_STATUS_OK = 0, PCMK_LSB_STATUS_VAR_PID = 1, PCMK_LSB_STATUS_VAR_LOCK = 2, PCMK_LSB_STATUS_NOT_RUNNING = 3, PCMK_LSB_STATUS_UNKNOWN = 4, /* custom codes should be in the 150-199 range reserved for application use */ PCMK_LSB_STATUS_NOT_INSTALLED = 150, PCMK_LSB_STATUS_INSUFFICIENT_PRIV = 151, }; /* Uniform exit codes * Everything is mapped to its OCF equivalent so that Pacemaker only deals with one set of codes */ enum ocf_exitcode { PCMK_OCF_OK = 0, PCMK_OCF_UNKNOWN_ERROR = 1, PCMK_OCF_INVALID_PARAM = 2, PCMK_OCF_UNIMPLEMENT_FEATURE = 3, PCMK_OCF_INSUFFICIENT_PRIV = 4, PCMK_OCF_NOT_INSTALLED = 5, PCMK_OCF_NOT_CONFIGURED = 6, PCMK_OCF_NOT_RUNNING = 7, /* End of overlap with LSB */ PCMK_OCF_RUNNING_MASTER = 8, PCMK_OCF_FAILED_MASTER = 9, /* 150-199 reserved for application use */ PCMK_OCF_CONNECTION_DIED = 189, /* Operation failure implied by disconnection of the LRM API to a local or remote node */ PCMK_OCF_DEGRADED = 190, /* Active resource that is no longer 100% functional */ PCMK_OCF_DEGRADED_MASTER = 191, /* Promoted resource that is no longer 100% functional */ PCMK_OCF_EXEC_ERROR = 192, /* Generic problem invoking the agent */ PCMK_OCF_UNKNOWN = 193, /* State of the service is unknown - used for recording in-flight operations */ PCMK_OCF_SIGNAL = 194, PCMK_OCF_NOT_SUPPORTED = 195, PCMK_OCF_PENDING = 196, PCMK_OCF_CANCELLED = 197, PCMK_OCF_TIMEOUT = 198, PCMK_OCF_OTHER_ERROR = 199, /* Keep the same codes as PCMK_LSB */ }; enum op_status { PCMK_LRM_OP_PENDING = -1, PCMK_LRM_OP_DONE, PCMK_LRM_OP_CANCELLED, PCMK_LRM_OP_TIMEOUT, PCMK_LRM_OP_NOTSUPPORTED, PCMK_LRM_OP_ERROR, PCMK_LRM_OP_ERROR_HARD, PCMK_LRM_OP_ERROR_FATAL, PCMK_LRM_OP_NOT_INSTALLED, }; enum nagios_exitcode { NAGIOS_STATE_OK = 0, NAGIOS_STATE_WARNING = 1, NAGIOS_STATE_CRITICAL = 2, NAGIOS_STATE_UNKNOWN = 3, NAGIOS_STATE_DEPENDENT = 4, NAGIOS_INSUFFICIENT_PRIV = 100, NAGIOS_NOT_INSTALLED = 101, }; enum svc_action_flags { /* On timeout, only kill pid, do not kill entire pid group */ SVC_ACTION_LEAVE_GROUP = 0x01, }; typedef struct svc_action_private_s svc_action_private_t; typedef struct svc_action_s { char *id; char *rsc; char *action; int interval; char *standard; char *provider; char *agent; int timeout; - GHashTable *params; /* used by OCF agents */ + GHashTable *params; /* used by OCF agents and alert agents */ int rc; int pid; int cancel; int status; int sequence; int expected_rc; int synchronous; enum svc_action_flags flags; char *stderr_data; char *stdout_data; /*! * Data stored by the creator of the action. * * This may be used to hold data that is needed later on by a callback, * for example. */ void *cb_data; svc_action_private_t *opaque; - - GHashTable *alert_params; /* used by alert agents */ } svc_action_t; /** * \brief Get a list of files or directories in a given path * * \param[in] root full path to a directory to read * \param[in] files return list of files if TRUE or directories if FALSE * \param[in] executable if TRUE and files is TRUE, only return executable files * * \return a list of what was found. The list items are char *. * \note It is the caller's responsibility to free the result with g_list_free_full(list, free). */ GList *get_directory_list(const char *root, gboolean files, gboolean executable); /** * Get a list of services * * \return a list of services. The list items are gchar *. This list _must_ * be destroyed using g_list_free_full(list, free). */ GList *services_list(void); /** * \brief Get a list of providers * * \param[in] standard list providers of this standard (e.g. ocf, lsb, etc.) * * \return a list of providers as char * list items (or NULL if standard does not support providers) * \note The caller is responsible for freeing the result using g_list_free_full(list, free). */ GList *resources_list_providers(const char *standard); /** * \brief Get a list of resource agents * * \param[in] standard list agents using this standard (e.g. ocf, lsb, etc.) (or NULL for all) * \param[in] provider list agents from this provider (or NULL for all) * * \return a list of resource agents. The list items are char *. * \note The caller is responsible for freeing the result using g_list_free_full(list, free). */ GList *resources_list_agents(const char *standard, const char *provider); /** * Get list of available standards * * \return a list of resource standards. The list items are char *. This list _must_ * be destroyed using g_list_free_full(list, free). */ GList *resources_list_standards(void); svc_action_t *services_action_create(const char *name, const char *action, int interval /* ms */ , int timeout /* ms */ ); /** * \brief Create a new resource action * * \param[in] name name of resource * \param[in] standard resource agent standard (ocf, lsb, etc.) * \param[in] provider resource agent provider * \param[in] agent resource agent name * \param[in] action action (start, stop, monitor, etc.) * \param[in] interval how often to repeat this action, in milliseconds (if 0, execute only once) * \param[in] timeout consider action failed if it does not complete in this many milliseconds * \param[in] params action parameters * * \return newly allocated action instance * * \post After the call, 'params' is owned, and later free'd by the svc_action_t result * \note The caller is responsible for freeing the return value using * services_action_free(). */ svc_action_t *resources_action_create(const char *name, const char *standard, const char *provider, const char *agent, const char *action, int interval /* ms */ , int timeout /* ms */ , GHashTable * params, enum svc_action_flags flags); /** * Kick a recurring action so it is scheduled immediately for re-execution */ gboolean services_action_kick(const char *name, const char *action, int interval /* ms */); /** * Find the first class that can provide service::${agent} * * \param[in] agent which agent to search for * \return NULL, or the first class that provides the named agent */ const char *resources_find_service_class(const char *agent); /** * Utilize services API to execute an arbitrary command. * * This API has useful infrastructure in place to be able to run a command * in the background and get notified via a callback when the command finishes. * * \param[in] exec command to execute * \param[in] args arguments to the command, NULL terminated * * \return a svc_action_t object, used to pass to the execute function * (services_action_sync() or services_action_async()) and is * provided to the callback. */ svc_action_t *services_action_create_generic(const char *exec, const char *args[]); void services_action_cleanup(svc_action_t * op); void services_action_free(svc_action_t * op); gboolean services_action_sync(svc_action_t * op); /** * Run an action asynchronously. * * \param[in] op services action data * \param[in] action_callback callback for when the action completes * * \retval TRUE succesfully started execution * \retval FALSE failed to start execution, no callback will be received */ gboolean services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *)); gboolean services_action_cancel(const char *name, const char *action, int interval); static inline const char *services_lrm_status_str(enum op_status status) { switch (status) { case PCMK_LRM_OP_PENDING: return "pending"; case PCMK_LRM_OP_DONE:return "complete"; case PCMK_LRM_OP_CANCELLED:return "Cancelled"; case PCMK_LRM_OP_TIMEOUT:return "Timed Out"; case PCMK_LRM_OP_NOTSUPPORTED:return "NOT SUPPORTED"; case PCMK_LRM_OP_ERROR:return "Error"; case PCMK_LRM_OP_NOT_INSTALLED:return "Not installed"; default:return "UNKNOWN!"; } } static inline const char *services_ocf_exitcode_str(enum ocf_exitcode code) { switch (code) { case PCMK_OCF_OK: return "ok"; case PCMK_OCF_UNKNOWN_ERROR: return "unknown error"; case PCMK_OCF_INVALID_PARAM: return "invalid parameter"; case PCMK_OCF_UNIMPLEMENT_FEATURE: return "unimplemented feature"; case PCMK_OCF_INSUFFICIENT_PRIV: return "insufficient privileges"; case PCMK_OCF_NOT_INSTALLED: return "not installed"; case PCMK_OCF_NOT_CONFIGURED: return "not configured"; case PCMK_OCF_NOT_RUNNING: return "not running"; case PCMK_OCF_RUNNING_MASTER: return "master"; case PCMK_OCF_FAILED_MASTER: return "master (failed)"; case PCMK_OCF_SIGNAL: return "OCF_SIGNAL"; case PCMK_OCF_NOT_SUPPORTED: return "OCF_NOT_SUPPORTED"; case PCMK_OCF_PENDING: return "OCF_PENDING"; case PCMK_OCF_CANCELLED: return "OCF_CANCELLED"; case PCMK_OCF_TIMEOUT: return "OCF_TIMEOUT"; case PCMK_OCF_OTHER_ERROR: return "OCF_OTHER_ERROR"; case PCMK_OCF_DEGRADED: return "OCF_DEGRADED"; case PCMK_OCF_DEGRADED_MASTER: return "OCF_DEGRADED_MASTER"; default: return "unknown"; } } /** * \brief Get OCF equivalent of LSB exit code * * \param[in] action LSB action that produced exit code * \param[in] lsb_exitcode Exit code of LSB action * * \return PCMK_OCF_* constant that corresponds to LSB exit code */ static inline enum ocf_exitcode services_get_ocf_exitcode(const char *action, int lsb_exitcode) { /* For non-status actions, LSB and OCF share error code meaning <= 7 */ if (action && strcmp(action, "status") && strcmp(action, "monitor")) { if ((lsb_exitcode < 0) || (lsb_exitcode > PCMK_LSB_NOT_RUNNING)) { return PCMK_OCF_UNKNOWN_ERROR; } return (enum ocf_exitcode)lsb_exitcode; } /* status has different return codes */ switch (lsb_exitcode) { case PCMK_LSB_STATUS_OK: return PCMK_OCF_OK; case PCMK_LSB_STATUS_NOT_INSTALLED: return PCMK_OCF_NOT_INSTALLED; case PCMK_LSB_STATUS_INSUFFICIENT_PRIV: return PCMK_OCF_INSUFFICIENT_PRIV; case PCMK_LSB_STATUS_VAR_PID: case PCMK_LSB_STATUS_VAR_LOCK: case PCMK_LSB_STATUS_NOT_RUNNING: return PCMK_OCF_NOT_RUNNING; } return PCMK_OCF_UNKNOWN_ERROR; } # ifdef __cplusplus } # endif #endif /* __PCMK_SERVICES__ */ diff --git a/lib/services/services.c b/lib/services/services.c index 1a031e044d..2765e437bd 100644 --- a/lib/services/services.c +++ b/lib/services/services.c @@ -1,951 +1,946 @@ /* * Copyright (C) 2010-2016 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include "services_private.h" #if SUPPORT_UPSTART # include #endif #if SUPPORT_SYSTEMD # include #endif /* TODO: Develop a rollover strategy */ static int operations = 0; static GHashTable *recurring_actions = NULL; /* ops waiting to run async because of conflicting active * pending ops */ static GList *blocked_ops = NULL; /* ops currently active (in-flight) */ static GList *inflight_ops = NULL; static void handle_blocked_ops(void); svc_action_t * services_action_create(const char *name, const char *action, int interval, int timeout) { return resources_action_create(name, PCMK_RESOURCE_CLASS_LSB, NULL, name, action, interval, timeout, NULL, 0); } const char * resources_find_service_class(const char *agent) { /* Priority is: * - lsb * - systemd * - upstart */ int rc = 0; struct stat st; char *path = NULL; #ifdef LSB_ROOT_DIR rc = asprintf(&path, "%s/%s", LSB_ROOT_DIR, agent); if (rc > 0 && stat(path, &st) == 0) { free(path); return PCMK_RESOURCE_CLASS_LSB; } free(path); #endif #if SUPPORT_SYSTEMD if (systemd_unit_exists(agent)) { return PCMK_RESOURCE_CLASS_SYSTEMD; } #endif #if SUPPORT_UPSTART if (upstart_job_exists(agent)) { return PCMK_RESOURCE_CLASS_UPSTART; } #endif return NULL; } static inline void init_recurring_actions(void) { if (recurring_actions == NULL) { recurring_actions = g_hash_table_new_full(g_str_hash, g_str_equal, NULL, NULL); } } /*! * \internal * \brief Check whether op is in-flight systemd or upstart op * * \param[in] op Operation to check * * \return TRUE if op is in-flight systemd or upstart op */ static inline gboolean inflight_systemd_or_upstart(svc_action_t *op) { return (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD) || safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_UPSTART)) && (g_list_find(inflight_ops, op) != NULL); } /*! * \internal * \brief Expand "service" alias to an actual resource class * * \param[in] rsc Resource name (for logging only) * \param[in] standard Resource class as configured * \param[in] agent Agent name to look for * * \return Newly allocated string with actual resource class * * \note The caller is responsible for calling free() on the result. */ static char * expand_resource_class(const char *rsc, const char *standard, const char *agent) { char *expanded_class = NULL; if (strcasecmp(standard, PCMK_RESOURCE_CLASS_SERVICE) == 0) { const char *found_class = resources_find_service_class(agent); if (found_class) { crm_debug("Found %s agent %s for %s", found_class, agent, rsc); expanded_class = strdup(found_class); } else { crm_info("Assuming resource class lsb for agent %s for %s", agent, rsc); expanded_class = strdup(PCMK_RESOURCE_CLASS_LSB); } } else { expanded_class = strdup(standard); } CRM_ASSERT(expanded_class); return expanded_class; } svc_action_t * resources_action_create(const char *name, const char *standard, const char *provider, const char *agent, const char *action, int interval, int timeout, GHashTable * params, enum svc_action_flags flags) { svc_action_t *op = NULL; /* * Do some up front sanity checks before we go off and * build the svc_action_t instance. */ if (crm_strlen_zero(name)) { crm_err("Cannot create operation without resource name"); goto return_error; } if (crm_strlen_zero(standard)) { crm_err("Cannot create operation for %s without resource class", name); goto return_error; } if (!strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF) && crm_strlen_zero(provider)) { crm_err("Cannot create OCF operation for %s without provider", name); goto return_error; } if (crm_strlen_zero(agent)) { crm_err("Cannot create operation for %s without agent name", name); goto return_error; } if (crm_strlen_zero(action)) { crm_err("Cannot create operation for %s without operation name", name); goto return_error; } /* * Sanity checks passed, proceed! */ op = calloc(1, sizeof(svc_action_t)); op->opaque = calloc(1, sizeof(svc_action_private_t)); op->rsc = strdup(name); op->interval = interval; op->timeout = timeout; op->standard = expand_resource_class(name, standard, agent); op->agent = strdup(agent); op->sequence = ++operations; op->flags = flags; op->id = generate_op_key(name, action, interval); if (safe_str_eq(action, "monitor") && ( #if SUPPORT_HEARTBEAT safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_HB) || #endif safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB))) { action = "status"; } op->action = strdup(action); if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_OCF) == 0) { op->provider = strdup(provider); op->params = params; params = NULL; if (asprintf(&op->opaque->exec, "%s/resource.d/%s/%s", OCF_ROOT_DIR, provider, agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); op->opaque->args[1] = strdup(action); } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_LSB) == 0) { if (op->agent[0] == '/') { /* if given an absolute path, use that instead * of tacking on the LSB_ROOT_DIR path to the front */ op->opaque->exec = strdup(op->agent); } else if (asprintf(&op->opaque->exec, "%s/%s", LSB_ROOT_DIR, op->agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); op->opaque->args[1] = strdup(op->action); op->opaque->args[2] = NULL; #if SUPPORT_HEARTBEAT } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_HB) == 0) { int index; int param_num; char buf_tmp[20]; void *value_tmp; if (op->agent[0] == '/') { /* if given an absolute path, use that instead * of tacking on the HB_RA_DIR path to the front */ op->opaque->exec = strdup(op->agent); } else if (asprintf(&op->opaque->exec, "%s/%s", HB_RA_DIR, op->agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); /* The "heartbeat" agent class only has positional arguments, * which we keyed by their decimal position number. */ param_num = 1; for (index = 1; index <= MAX_ARGC - 3; index++ ) { snprintf(buf_tmp, sizeof(buf_tmp), "%d", index); value_tmp = g_hash_table_lookup(params, buf_tmp); if (value_tmp == NULL) { /* maybe: strdup("") ?? * But the old lrmd did simply continue as well. */ continue; } op->opaque->args[param_num++] = strdup(value_tmp); } /* Add operation code as the last argument, */ /* and the teminating NULL pointer */ op->opaque->args[param_num++] = strdup(op->action); op->opaque->args[param_num] = NULL; #endif #if SUPPORT_SYSTEMD } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) { op->opaque->exec = strdup("systemd-dbus"); #endif #if SUPPORT_UPSTART } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_UPSTART) == 0) { op->opaque->exec = strdup("upstart-dbus"); #endif #if SUPPORT_NAGIOS } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_NAGIOS) == 0) { int index = 0; if (op->agent[0] == '/') { /* if given an absolute path, use that instead * of tacking on the NAGIOS_PLUGIN_DIR path to the front */ op->opaque->exec = strdup(op->agent); } else if (asprintf(&op->opaque->exec, "%s/%s", NAGIOS_PLUGIN_DIR, op->agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); index = 1; if (safe_str_eq(op->action, "monitor") && op->interval == 0) { /* Invoke --version for a nagios probe */ op->opaque->args[index] = strdup("--version"); index++; } else if (params) { GHashTableIter iter; char *key = NULL; char *value = NULL; static int args_size = sizeof(op->opaque->args) / sizeof(char *); g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value) && index <= args_size - 3) { int len = 3; char *long_opt = NULL; if (safe_str_eq(key, XML_ATTR_CRM_VERSION) || strstr(key, CRM_META "_")) { continue; } len += strlen(key); long_opt = calloc(1, len); sprintf(long_opt, "--%s", key); long_opt[len - 1] = 0; op->opaque->args[index] = long_opt; op->opaque->args[index + 1] = strdup(value); index += 2; } } op->opaque->args[index] = NULL; #endif } else { crm_err("Unknown resource standard: %s", op->standard); services_action_free(op); op = NULL; } if(params) { g_hash_table_destroy(params); } return op; return_error: if(params) { g_hash_table_destroy(params); } services_action_free(op); return NULL; } svc_action_t * services_action_create_generic(const char *exec, const char *args[]) { svc_action_t *op; unsigned int cur_arg; op = calloc(1, sizeof(*op)); op->opaque = calloc(1, sizeof(svc_action_private_t)); op->opaque->exec = strdup(exec); op->opaque->args[0] = strdup(exec); for (cur_arg = 1; args && args[cur_arg - 1]; cur_arg++) { op->opaque->args[cur_arg] = strdup(args[cur_arg - 1]); if (cur_arg == DIMOF(op->opaque->args) - 1) { crm_err("svc_action_t args list not long enough for '%s' execution request.", exec); break; } } return op; } #if SUPPORT_DBUS /*! * \internal * \brief Update operation's pending DBus call, unreferencing old one if needed * * \param[in,out] op Operation to modify * \param[in] pending Pending call to set */ void services_set_op_pending(svc_action_t *op, DBusPendingCall *pending) { if (op->opaque->pending && (op->opaque->pending != pending)) { if (pending) { crm_info("Lost pending %s DBus call (%p)", op->id, op->opaque->pending); } else { crm_trace("Done with pending %s DBus call (%p)", op->id, op->opaque->pending); } dbus_pending_call_unref(op->opaque->pending); } op->opaque->pending = pending; if (pending) { crm_trace("Updated pending %s DBus call (%p)", op->id, pending); } else { crm_trace("Cleared pending %s DBus call", op->id); } } #endif void services_action_cleanup(svc_action_t * op) { if(op->opaque == NULL) { return; } #if SUPPORT_DBUS if(op->opaque->timerid != 0) { crm_trace("Removing timer for call %s to %s", op->action, op->rsc); g_source_remove(op->opaque->timerid); op->opaque->timerid = 0; } if(op->opaque->pending) { crm_trace("Cleaning up pending dbus call %p %s for %s", op->opaque->pending, op->action, op->rsc); if(dbus_pending_call_get_completed(op->opaque->pending)) { crm_warn("Pending dbus call %s for %s did not complete", op->action, op->rsc); } dbus_pending_call_cancel(op->opaque->pending); dbus_pending_call_unref(op->opaque->pending); op->opaque->pending = NULL; } #endif if (op->opaque->stderr_gsource) { mainloop_del_fd(op->opaque->stderr_gsource); op->opaque->stderr_gsource = NULL; } if (op->opaque->stdout_gsource) { mainloop_del_fd(op->opaque->stdout_gsource); op->opaque->stdout_gsource = NULL; } } void services_action_free(svc_action_t * op) { unsigned int i; if (op == NULL) { return; } /* The operation should be removed from all tracking lists by this point. * If it's not, we have a bug somewhere, so bail. That may lead to a * memory leak, but it's better than a use-after-free segmentation fault. */ CRM_CHECK(g_list_find(inflight_ops, op) == NULL, return); CRM_CHECK(g_list_find(blocked_ops, op) == NULL, return); CRM_CHECK((recurring_actions == NULL) || (g_hash_table_lookup(recurring_actions, op->id) == NULL), return); services_action_cleanup(op); if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } free(op->id); free(op->opaque->exec); for (i = 0; i < DIMOF(op->opaque->args); i++) { free(op->opaque->args[i]); } free(op->opaque); free(op->rsc); free(op->action); free(op->standard); free(op->agent); free(op->provider); free(op->stdout_data); free(op->stderr_data); if (op->params) { g_hash_table_destroy(op->params); op->params = NULL; } - if (op->alert_params) { - g_hash_table_destroy(op->alert_params); - op->alert_params = NULL; - } - free(op); } gboolean cancel_recurring_action(svc_action_t * op) { crm_info("Cancelling %s operation %s", op->standard, op->id); if (recurring_actions) { g_hash_table_remove(recurring_actions, op->id); } if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } return TRUE; } /*! * \brief Cancel a recurring action * * \param[in] name Name of resource that operation is for * \param[in] action Name of operation to cancel * \param[in] interval Interval of operation to cancel * * \return TRUE if action was successfully cancelled, FALSE otherwise */ gboolean services_action_cancel(const char *name, const char *action, int interval) { gboolean cancelled = FALSE; char *id = generate_op_key(name, action, interval); svc_action_t *op = NULL; /* We can only cancel a recurring action */ init_recurring_actions(); op = g_hash_table_lookup(recurring_actions, id); if (op == NULL) { goto done; } /* Tell operation_finalize() not to reschedule the operation */ op->cancel = TRUE; /* Stop tracking it as a recurring operation, and stop its timer */ cancel_recurring_action(op); /* If the op has a PID, it's an in-flight child process, so kill it. * * Whether the kill succeeds or fails, the main loop will send the op to * operation_finished() (and thus operation_finalize()) when the process * goes away. */ if (op->pid != 0) { crm_info("Terminating in-flight op %s (pid %d) early because it was cancelled", id, op->pid); cancelled = mainloop_child_kill(op->pid); if (cancelled == FALSE) { crm_err("Termination of %s (pid %d) failed", id, op->pid); } goto done; } /* In-flight systemd and upstart ops don't have a pid. The relevant handlers * will call operation_finalize() when the operation completes. * @TODO: Can we request early termination, maybe using * dbus_pending_call_cancel()? */ if (inflight_systemd_or_upstart(op)) { crm_info("Will cancel %s op %s when in-flight instance completes", op->standard, op->id); cancelled = FALSE; goto done; } /* Otherwise, operation is not in-flight, just report as cancelled */ op->status = PCMK_LRM_OP_CANCELLED; if (op->opaque->callback) { op->opaque->callback(op); } blocked_ops = g_list_remove(blocked_ops, op); services_action_free(op); cancelled = TRUE; done: free(id); return cancelled; } gboolean services_action_kick(const char *name, const char *action, int interval /* ms */) { svc_action_t * op = NULL; char *id = generate_op_key(name, action, interval); init_recurring_actions(); op = g_hash_table_lookup(recurring_actions, id); free(id); if (op == NULL) { return FALSE; } if (op->pid || inflight_systemd_or_upstart(op)) { return TRUE; } else { if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } recurring_action_timer(op); return TRUE; } } /*! * \internal * \brief Add a new recurring operation, checking for duplicates * * \param[in] op Operation to add * * \return TRUE if duplicate found (and reschedule), FALSE otherwise */ static gboolean handle_duplicate_recurring(svc_action_t * op) { svc_action_t * dup = NULL; /* check for duplicates */ dup = g_hash_table_lookup(recurring_actions, op->id); if (dup && (dup != op)) { /* update user data */ if (op->opaque->callback) { dup->opaque->callback = op->opaque->callback; dup->cb_data = op->cb_data; op->cb_data = NULL; } /* immediately execute the next interval */ if (dup->pid != 0) { if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } recurring_action_timer(dup); } /* free the duplicate */ services_action_free(op); return TRUE; } return FALSE; } inline static gboolean action_exec_helper(svc_action_t * op) { /* Whether a/synchronous must be decided (op->synchronous) beforehand. */ if (op->standard && (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_UPSTART) == 0)) { #if SUPPORT_UPSTART return upstart_job_exec(op); #endif } else if (op->standard && strcasecmp(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) { #if SUPPORT_SYSTEMD return systemd_unit_exec(op); #endif } else { return services_os_action_execute(op); } /* The 'op' has probably been freed if the execution functions return TRUE for the asynchronous 'op'. */ /* Avoid using the 'op' in here. */ return FALSE; } void services_add_inflight_op(svc_action_t * op) { if (op == NULL) { return; } CRM_ASSERT(op->synchronous == FALSE); /* keep track of ops that are in-flight to avoid collisions in the same namespace */ if (op->rsc) { inflight_ops = g_list_append(inflight_ops, op); } } /*! * \internal * \brief Stop tracking an operation that completed * * \param[in] op Operation to stop tracking */ void services_untrack_op(svc_action_t *op) { /* Op is no longer in-flight or blocked */ inflight_ops = g_list_remove(inflight_ops, op); blocked_ops = g_list_remove(blocked_ops, op); /* Op is no longer blocking other ops, so check if any need to run */ handle_blocked_ops(); } gboolean services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *)) { op->synchronous = false; if (action_callback) { op->opaque->callback = action_callback; } if (op->interval > 0) { init_recurring_actions(); if (handle_duplicate_recurring(op) == TRUE) { /* entry rescheduled, dup freed */ /* exit early */ return TRUE; } g_hash_table_replace(recurring_actions, op->id, op); } if (op->rsc && is_op_blocked(op->rsc)) { blocked_ops = g_list_append(blocked_ops, op); return TRUE; } return action_exec_helper(op); } static gboolean processing_blocked_ops = FALSE; gboolean is_op_blocked(const char *rsc) { GList *gIter = NULL; svc_action_t *op = NULL; for (gIter = inflight_ops; gIter != NULL; gIter = gIter->next) { op = gIter->data; if (safe_str_eq(op->rsc, rsc)) { return TRUE; } } return FALSE; } static void handle_blocked_ops(void) { GList *executed_ops = NULL; GList *gIter = NULL; svc_action_t *op = NULL; gboolean res = FALSE; if (processing_blocked_ops) { /* avoid nested calling of this function */ return; } processing_blocked_ops = TRUE; /* n^2 operation here, but blocked ops are incredibly rare. this list * will be empty 99% of the time. */ for (gIter = blocked_ops; gIter != NULL; gIter = gIter->next) { op = gIter->data; if (is_op_blocked(op->rsc)) { continue; } executed_ops = g_list_append(executed_ops, op); res = action_exec_helper(op); if (res == FALSE) { op->status = PCMK_LRM_OP_ERROR; /* this can cause this function to be called recursively * which is why we have processing_blocked_ops static variable */ operation_finalize(op); } } for (gIter = executed_ops; gIter != NULL; gIter = gIter->next) { op = gIter->data; blocked_ops = g_list_remove(blocked_ops, op); } g_list_free(executed_ops); processing_blocked_ops = FALSE; } gboolean services_action_sync(svc_action_t * op) { gboolean rc = TRUE; if (op == NULL) { crm_trace("No operation to execute"); return FALSE; } op->synchronous = true; rc = action_exec_helper(op); crm_trace(" > %s_%s_%d: %s = %d", op->rsc, op->action, op->interval, op->opaque->exec, op->rc); if (op->stdout_data) { crm_trace(" > stdout: %s", op->stdout_data); } if (op->stderr_data) { crm_trace(" > stderr: %s", op->stderr_data); } return rc; } GList * get_directory_list(const char *root, gboolean files, gboolean executable) { return services_os_get_directory_list(root, files, executable); } GList * services_list(void) { return resources_list_agents(PCMK_RESOURCE_CLASS_LSB, NULL); } #if SUPPORT_HEARTBEAT static GList * resources_os_list_hb_agents(void) { return services_os_get_directory_list(HB_RA_DIR, TRUE, TRUE); } #endif GList * resources_list_standards(void) { GList *standards = NULL; GList *agents = NULL; standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_OCF)); standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_LSB)); standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_SERVICE)); #if SUPPORT_SYSTEMD agents = systemd_unit_listall(); if (agents) { standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_SYSTEMD)); g_list_free_full(agents, free); } #endif #if SUPPORT_UPSTART agents = upstart_job_listall(); if (agents) { standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_UPSTART)); g_list_free_full(agents, free); } #endif #if SUPPORT_NAGIOS agents = resources_os_list_nagios_agents(); if (agents) { standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_NAGIOS)); g_list_free_full(agents, free); } #endif #if SUPPORT_HEARTBEAT standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_HB)); #endif return standards; } GList * resources_list_providers(const char *standard) { if (strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF) == 0) { return resources_os_list_ocf_providers(); } return NULL; } GList * resources_list_agents(const char *standard, const char *provider) { if ((standard == NULL) || (strcasecmp(standard, PCMK_RESOURCE_CLASS_SERVICE) == 0)) { GList *tmp1; GList *tmp2; GList *result = resources_os_list_lsb_agents(); if (standard == NULL) { tmp1 = result; tmp2 = resources_os_list_ocf_agents(NULL); if (tmp2) { result = g_list_concat(tmp1, tmp2); } } #if SUPPORT_SYSTEMD tmp1 = result; tmp2 = systemd_unit_listall(); if (tmp2) { result = g_list_concat(tmp1, tmp2); } #endif #if SUPPORT_UPSTART tmp1 = result; tmp2 = upstart_job_listall(); if (tmp2) { result = g_list_concat(tmp1, tmp2); } #endif return result; } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF) == 0) { return resources_os_list_ocf_agents(provider); } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_LSB) == 0) { return resources_os_list_lsb_agents(); #if SUPPORT_HEARTBEAT } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_HB) == 0) { return resources_os_list_hb_agents(); #endif #if SUPPORT_SYSTEMD } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) { return systemd_unit_listall(); #endif #if SUPPORT_UPSTART } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_UPSTART) == 0) { return upstart_job_listall(); #endif #if SUPPORT_NAGIOS } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_NAGIOS) == 0) { return resources_os_list_nagios_agents(); #endif } return NULL; } diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c index 1c60766dd8..74ac03162e 100644 --- a/lib/services/services_linux.c +++ b/lib/services/services_linux.c @@ -1,925 +1,926 @@ /* * Copyright (C) 2010-2016 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_SYS_SIGNALFD_H #include #endif #include "crm/crm.h" #include "crm/common/mainloop.h" #include "crm/services.h" #include "services_private.h" #if SUPPORT_CIBSECRETS # include "crm/common/cib_secrets.h" #endif static inline void set_fd_opts(int fd, int opts) { int flag; if ((flag = fcntl(fd, F_GETFL)) >= 0) { if (fcntl(fd, F_SETFL, flag | opts) < 0) { crm_err("fcntl() write failed"); } } else { crm_err("fcntl() read failed"); } } static gboolean svc_read_output(int fd, svc_action_t * op, bool is_stderr) { char *data = NULL; int rc = 0, len = 0; char buf[500]; static const size_t buf_read_len = sizeof(buf) - 1; if (fd < 0) { crm_trace("No fd for %s", op->id); return FALSE; } if (is_stderr && op->stderr_data) { len = strlen(op->stderr_data); data = op->stderr_data; crm_trace("Reading %s stderr into offset %d", op->id, len); } else if (is_stderr == FALSE && op->stdout_data) { len = strlen(op->stdout_data); data = op->stdout_data; crm_trace("Reading %s stdout into offset %d", op->id, len); } else { crm_trace("Reading %s %s into offset %d", op->id, is_stderr?"stderr":"stdout", len); } do { rc = read(fd, buf, buf_read_len); if (rc > 0) { crm_trace("Got %d chars: %.80s", rc, buf); buf[rc] = 0; data = realloc_safe(data, len + rc + 1); len += sprintf(data + len, "%s", buf); } else if (errno != EINTR) { /* error or EOF * Cleanup happens in pipe_done() */ rc = FALSE; break; } } while (rc == buf_read_len || rc < 0); if (is_stderr) { op->stderr_data = data; } else { op->stdout_data = data; } return rc; } static int dispatch_stdout(gpointer userdata) { svc_action_t *op = (svc_action_t *) userdata; return svc_read_output(op->opaque->stdout_fd, op, FALSE); } static int dispatch_stderr(gpointer userdata) { svc_action_t *op = (svc_action_t *) userdata; return svc_read_output(op->opaque->stderr_fd, op, TRUE); } static void pipe_out_done(gpointer user_data) { svc_action_t *op = (svc_action_t *) user_data; crm_trace("%p", op); op->opaque->stdout_gsource = NULL; if (op->opaque->stdout_fd > STDOUT_FILENO) { close(op->opaque->stdout_fd); } op->opaque->stdout_fd = -1; } static void pipe_err_done(gpointer user_data) { svc_action_t *op = (svc_action_t *) user_data; op->opaque->stderr_gsource = NULL; if (op->opaque->stderr_fd > STDERR_FILENO) { close(op->opaque->stderr_fd); } op->opaque->stderr_fd = -1; } static struct mainloop_fd_callbacks stdout_callbacks = { .dispatch = dispatch_stdout, .destroy = pipe_out_done, }; static struct mainloop_fd_callbacks stderr_callbacks = { .dispatch = dispatch_stderr, .destroy = pipe_err_done, }; static void set_ocf_env(const char *key, const char *value, gpointer user_data) { if (setenv(key, value, 1) != 0) { crm_perror(LOG_ERR, "setenv failed for key:%s and value:%s", key, value); } } static void set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data) { char buffer[500]; snprintf(buffer, sizeof(buffer), "OCF_RESKEY_%s", (char *)key); set_ocf_env(buffer, value, user_data); } static void set_alert_env(gpointer key, gpointer value, gpointer user_data) { set_ocf_env((char*)key, value, user_data); } /*! * \internal * \brief Add environment variables suitable for an action * * \param[in] op Action to use * * \note Environment variables are added only for alerts and OCF agents. */ static void add_action_env_vars(const svc_action_t *op) { - if (op->alert_params) { - g_hash_table_foreach(op->alert_params, set_alert_env, NULL); + if (safe_str_eq(op->standard, PCMK_ALERT_CLASS)) { + if (op->params) { + g_hash_table_foreach(op->params, set_alert_env, NULL); + } return; } - if ((op->standard == NULL) - || (strcasecmp(PCMK_RESOURCE_CLASS_OCF, op->standard) != 0)) { + if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF) == FALSE) { return; } if (op->params) { g_hash_table_foreach(op->params, set_ocf_env_with_prefix, NULL); } set_ocf_env("OCF_RA_VERSION_MAJOR", "1", NULL); set_ocf_env("OCF_RA_VERSION_MINOR", "0", NULL); set_ocf_env("OCF_ROOT", OCF_ROOT_DIR, NULL); set_ocf_env("OCF_EXIT_REASON_PREFIX", PCMK_OCF_REASON_PREFIX, NULL); if (op->rsc) { set_ocf_env("OCF_RESOURCE_INSTANCE", op->rsc, NULL); } if (op->agent != NULL) { set_ocf_env("OCF_RESOURCE_TYPE", op->agent, NULL); } /* Notes: this is not added to specification yet. Sept 10,2004 */ if (op->provider != NULL) { set_ocf_env("OCF_RESOURCE_PROVIDER", op->provider, NULL); } } gboolean recurring_action_timer(gpointer data) { svc_action_t *op = data; crm_debug("Scheduling another invocation of %s", op->id); /* Clean out the old result */ free(op->stdout_data); op->stdout_data = NULL; free(op->stderr_data); op->stderr_data = NULL; op->opaque->repeat_timer = 0; services_action_async(op, NULL); return FALSE; } /* Returns FALSE if 'op' should be free'd by the caller */ gboolean operation_finalize(svc_action_t * op) { int recurring = 0; if (op->interval) { if (op->cancel) { op->status = PCMK_LRM_OP_CANCELLED; cancel_recurring_action(op); } else { recurring = 1; op->opaque->repeat_timer = g_timeout_add(op->interval, recurring_action_timer, (void *)op); } } if (op->opaque->callback) { op->opaque->callback(op); } op->pid = 0; services_untrack_op(op); if (!recurring && op->synchronous == FALSE) { /* * If this is a recurring action, do not free explicitly. * It will get freed whenever the action gets cancelled. */ services_action_free(op); return TRUE; } services_action_cleanup(op); return FALSE; } static void operation_finished(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { svc_action_t *op = mainloop_child_userdata(p); char *prefix = crm_strdup_printf("%s:%d", op->id, op->pid); mainloop_clear_child_userdata(p); op->status = PCMK_LRM_OP_DONE; CRM_ASSERT(op->pid == pid); crm_trace("%s %p %p", prefix, op->opaque->stderr_gsource, op->opaque->stdout_gsource); if (op->opaque->stderr_gsource) { /* Make sure we have read everything from the buffer. * Depending on the priority mainloop gives the fd, operation_finished * could occur before all the reads are done. Force the read now.*/ crm_trace("%s dispatching stderr", prefix); dispatch_stderr(op); crm_trace("%s: %p", op->id, op->stderr_data); mainloop_del_fd(op->opaque->stderr_gsource); op->opaque->stderr_gsource = NULL; } if (op->opaque->stdout_gsource) { /* Make sure we have read everything from the buffer. * Depending on the priority mainloop gives the fd, operation_finished * could occur before all the reads are done. Force the read now.*/ crm_trace("%s dispatching stdout", prefix); dispatch_stdout(op); crm_trace("%s: %p", op->id, op->stdout_data); mainloop_del_fd(op->opaque->stdout_gsource); op->opaque->stdout_gsource = NULL; } if (signo) { if (mainloop_child_timeout(p)) { crm_warn("%s - timed out after %dms", prefix, op->timeout); op->status = PCMK_LRM_OP_TIMEOUT; op->rc = PCMK_OCF_TIMEOUT; } else { do_crm_log_unlikely((op->cancel) ? LOG_INFO : LOG_WARNING, "%s - terminated with signal %d", prefix, signo); op->status = PCMK_LRM_OP_ERROR; op->rc = PCMK_OCF_SIGNAL; } } else { op->rc = exitcode; crm_debug("%s - exited with rc=%d", prefix, exitcode); } free(prefix); prefix = crm_strdup_printf("%s:%d:stderr", op->id, op->pid); crm_log_output(LOG_NOTICE, prefix, op->stderr_data); free(prefix); prefix = crm_strdup_printf("%s:%d:stdout", op->id, op->pid); crm_log_output(LOG_DEBUG, prefix, op->stdout_data); free(prefix); operation_finalize(op); } /*! * \internal * \brief Set operation rc and status per errno from stat(), fork() or execvp() * * \param[in,out] op Operation to set rc and status for * \param[in] error Value of errno after system call * * \return void */ static void services_handle_exec_error(svc_action_t * op, int error) { int rc_not_installed, rc_insufficient_priv, rc_exec_error; /* Mimic the return codes for each standard as that's what we'll convert back from in get_uniform_rc() */ if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB) && safe_str_eq(op->action, "status")) { rc_not_installed = PCMK_LSB_STATUS_NOT_INSTALLED; rc_insufficient_priv = PCMK_LSB_STATUS_INSUFFICIENT_PRIV; rc_exec_error = PCMK_LSB_STATUS_UNKNOWN; #if SUPPORT_NAGIOS } else if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_NAGIOS)) { rc_not_installed = NAGIOS_NOT_INSTALLED; rc_insufficient_priv = NAGIOS_INSUFFICIENT_PRIV; rc_exec_error = PCMK_OCF_EXEC_ERROR; #endif } else { rc_not_installed = PCMK_OCF_NOT_INSTALLED; rc_insufficient_priv = PCMK_OCF_INSUFFICIENT_PRIV; rc_exec_error = PCMK_OCF_EXEC_ERROR; } switch (error) { /* see execve(2), stat(2) and fork(2) */ case ENOENT: /* No such file or directory */ case EISDIR: /* Is a directory */ case ENOTDIR: /* Path component is not a directory */ case EINVAL: /* Invalid executable format */ case ENOEXEC: /* Invalid executable format */ op->rc = rc_not_installed; op->status = PCMK_LRM_OP_NOT_INSTALLED; break; case EACCES: /* permission denied (various errors) */ case EPERM: /* permission denied (various errors) */ op->rc = rc_insufficient_priv; op->status = PCMK_LRM_OP_ERROR; break; default: op->rc = rc_exec_error; op->status = PCMK_LRM_OP_ERROR; } } static void action_launch_child(svc_action_t *op) { int lpc; /* SIGPIPE is ignored (which is different from signal blocking) by the gnutls library. * Depending on the libqb version in use, libqb may set SIGPIPE to be ignored as well. * We do not want this to be inherited by the child process. By resetting this the signal * to the default behavior, we avoid some potential odd problems that occur during OCF * scripts when SIGPIPE is ignored by the environment. */ signal(SIGPIPE, SIG_DFL); #if defined(HAVE_SCHED_SETSCHEDULER) if (sched_getscheduler(0) != SCHED_OTHER) { struct sched_param sp; memset(&sp, 0, sizeof(sp)); sp.sched_priority = 0; if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) { crm_perror(LOG_ERR, "Could not reset scheduling policy to SCHED_OTHER for %s", op->id); } } #endif if (setpriority(PRIO_PROCESS, 0, 0) == -1) { crm_perror(LOG_ERR, "Could not reset process priority to 0 for %s", op->id); } /* Man: The call setpgrp() is equivalent to setpgid(0,0) * _and_ compiles on BSD variants too * need to investigate if it works the same too. */ setpgid(0, 0); /* close all descriptors except stdin/out/err and channels to logd */ for (lpc = getdtablesize() - 1; lpc > STDERR_FILENO; lpc--) { close(lpc); } #if SUPPORT_CIBSECRETS if (replace_secret_params(op->rsc, op->params) < 0) { /* replacing secrets failed! */ if (safe_str_eq(op->action,"stop")) { /* don't fail on stop! */ crm_info("proceeding with the stop operation for %s", op->rsc); } else { crm_err("failed to get secrets for %s, " "considering resource not configured", op->rsc); _exit(PCMK_OCF_NOT_CONFIGURED); } } #endif add_action_env_vars(op); /* execute the RA */ execvp(op->opaque->exec, op->opaque->args); /* Most cases should have been already handled by stat() */ services_handle_exec_error(op, errno); _exit(op->rc); } #ifndef HAVE_SYS_SIGNALFD_H static int sigchld_pipe[2] = { -1, -1 }; static void sigchld_handler() { if ((sigchld_pipe[1] >= 0) && (write(sigchld_pipe[1], "", 1) == -1)) { crm_perror(LOG_TRACE, "Could not poke SIGCHLD self-pipe"); } } #endif static void action_synced_wait(svc_action_t * op, sigset_t *mask) { int status = 0; int timeout = op->timeout; int sfd = -1; time_t start = -1; struct pollfd fds[3]; int wait_rc = 0; #ifdef HAVE_SYS_SIGNALFD_H sfd = signalfd(-1, mask, SFD_NONBLOCK); if (sfd < 0) { crm_perror(LOG_ERR, "signalfd() failed"); } #else sfd = sigchld_pipe[0]; #endif fds[0].fd = op->opaque->stdout_fd; fds[0].events = POLLIN; fds[0].revents = 0; fds[1].fd = op->opaque->stderr_fd; fds[1].events = POLLIN; fds[1].revents = 0; fds[2].fd = sfd; fds[2].events = POLLIN; fds[2].revents = 0; crm_trace("Waiting for %d", op->pid); start = time(NULL); do { int poll_rc = poll(fds, 3, timeout); if (poll_rc > 0) { if (fds[0].revents & POLLIN) { svc_read_output(op->opaque->stdout_fd, op, FALSE); } if (fds[1].revents & POLLIN) { svc_read_output(op->opaque->stderr_fd, op, TRUE); } if (fds[2].revents & POLLIN) { #ifdef HAVE_SYS_SIGNALFD_H struct signalfd_siginfo fdsi; ssize_t s; s = read(sfd, &fdsi, sizeof(struct signalfd_siginfo)); if (s != sizeof(struct signalfd_siginfo)) { crm_perror(LOG_ERR, "Read from signal fd %d failed", sfd); } else if (fdsi.ssi_signo == SIGCHLD) { #else if (1) { /* Clear out the sigchld pipe. */ char ch; while (read(sfd, &ch, 1) == 1) /*omit*/; #endif wait_rc = waitpid(op->pid, &status, WNOHANG); if (wait_rc > 0) { break; } else if (wait_rc < 0){ if (errno == ECHILD) { /* Here, don't dare to kill and bail out... */ break; } else { /* ...otherwise pretend process still runs. */ wait_rc = 0; } crm_perror(LOG_ERR, "waitpid() for %d failed", op->pid); } } } } else if (poll_rc == 0) { timeout = 0; break; } else if (poll_rc < 0) { if (errno != EINTR) { crm_perror(LOG_ERR, "poll() failed"); break; } } timeout = op->timeout - (time(NULL) - start) * 1000; } while ((op->timeout < 0 || timeout > 0)); crm_trace("Child done: %d", op->pid); if (wait_rc <= 0) { op->rc = PCMK_OCF_UNKNOWN_ERROR; if (op->timeout > 0 && timeout <= 0) { op->status = PCMK_LRM_OP_TIMEOUT; crm_warn("%s:%d - timed out after %dms", op->id, op->pid, op->timeout); } else { op->status = PCMK_LRM_OP_ERROR; } /* If only child hasn't been successfully waited for, yet. This is to limit killing wrong target a bit more. */ if (wait_rc == 0 && waitpid(op->pid, &status, WNOHANG) == 0) { if (kill(op->pid, SIGKILL)) { crm_err("kill(%d, KILL) failed: %d", op->pid, errno); } /* Safe to skip WNOHANG here as we sent non-ignorable signal. */ while (waitpid(op->pid, &status, 0) == (pid_t) -1 && errno == EINTR) /*omit*/; } } else if (WIFEXITED(status)) { op->status = PCMK_LRM_OP_DONE; op->rc = WEXITSTATUS(status); crm_info("Managed %s process %d exited with rc=%d", op->id, op->pid, op->rc); } else if (WIFSIGNALED(status)) { int signo = WTERMSIG(status); op->status = PCMK_LRM_OP_ERROR; crm_err("Managed %s process %d exited with signal=%d", op->id, op->pid, signo); } #ifdef WCOREDUMP if (WCOREDUMP(status)) { crm_err("Managed %s process %d dumped core", op->id, op->pid); } #endif svc_read_output(op->opaque->stdout_fd, op, FALSE); svc_read_output(op->opaque->stderr_fd, op, TRUE); close(op->opaque->stdout_fd); close(op->opaque->stderr_fd); #ifdef HAVE_SYS_SIGNALFD_H close(sfd); #endif } /* For an asynchronous 'op', returns FALSE if 'op' should be free'd by the caller */ /* For a synchronous 'op', returns FALSE if 'op' fails */ gboolean services_os_action_execute(svc_action_t * op) { int stdout_fd[2]; int stderr_fd[2]; struct stat st; sigset_t *pmask; #ifdef HAVE_SYS_SIGNALFD_H sigset_t mask; sigset_t old_mask; #define sigchld_cleanup() do { \ if (sigismember(&old_mask, SIGCHLD) == 0) { \ if (sigprocmask(SIG_UNBLOCK, &mask, NULL) < 0) { \ crm_perror(LOG_ERR, "sigprocmask() failed to unblock sigchld"); \ } \ } \ } while (0) #else struct sigaction sa; struct sigaction old_sa; #define sigchld_cleanup() do { \ if (sigaction(SIGCHLD, &old_sa, NULL) < 0) { \ crm_perror(LOG_ERR, "sigaction() failed to remove sigchld handler"); \ } \ close(sigchld_pipe[0]); \ close(sigchld_pipe[1]); \ sigchld_pipe[0] = sigchld_pipe[1] = -1; \ } while(0) #endif /* Fail fast */ if(stat(op->opaque->exec, &st) != 0) { int rc = errno; crm_warn("Cannot execute '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); services_handle_exec_error(op, rc); if (!op->synchronous) { return operation_finalize(op); } return FALSE; } if (pipe(stdout_fd) < 0) { int rc = errno; crm_err("pipe(stdout_fd) failed. '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); services_handle_exec_error(op, rc); if (!op->synchronous) { return operation_finalize(op); } return FALSE; } if (pipe(stderr_fd) < 0) { int rc = errno; close(stdout_fd[0]); close(stdout_fd[1]); crm_err("pipe(stderr_fd) failed. '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); services_handle_exec_error(op, rc); if (!op->synchronous) { return operation_finalize(op); } return FALSE; } if (op->synchronous) { #ifdef HAVE_SYS_SIGNALFD_H sigemptyset(&mask); sigaddset(&mask, SIGCHLD); sigemptyset(&old_mask); if (sigprocmask(SIG_BLOCK, &mask, &old_mask) < 0) { crm_perror(LOG_ERR, "sigprocmask() failed to block sigchld"); } pmask = &mask; #else if(pipe(sigchld_pipe) == -1) { crm_perror(LOG_ERR, "pipe() failed"); } set_fd_opts(sigchld_pipe[0], O_NONBLOCK); set_fd_opts(sigchld_pipe[1], O_NONBLOCK); sa.sa_handler = sigchld_handler; sa.sa_flags = 0; sigemptyset(&sa.sa_mask); if (sigaction(SIGCHLD, &sa, &old_sa) < 0) { crm_perror(LOG_ERR, "sigaction() failed to set sigchld handler"); } pmask = NULL; #endif } op->pid = fork(); switch (op->pid) { case -1: { int rc = errno; close(stdout_fd[0]); close(stdout_fd[1]); close(stderr_fd[0]); close(stderr_fd[1]); crm_err("Could not execute '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); services_handle_exec_error(op, rc); if (!op->synchronous) { return operation_finalize(op); } sigchld_cleanup(); return FALSE; } case 0: /* Child */ close(stdout_fd[0]); close(stderr_fd[0]); if (STDOUT_FILENO != stdout_fd[1]) { if (dup2(stdout_fd[1], STDOUT_FILENO) != STDOUT_FILENO) { crm_err("dup2() failed (stdout)"); } close(stdout_fd[1]); } if (STDERR_FILENO != stderr_fd[1]) { if (dup2(stderr_fd[1], STDERR_FILENO) != STDERR_FILENO) { crm_err("dup2() failed (stderr)"); } close(stderr_fd[1]); } if (op->synchronous) { sigchld_cleanup(); } action_launch_child(op); CRM_ASSERT(0); /* action_launch_child is effectively noreturn */ } /* Only the parent reaches here */ close(stdout_fd[1]); close(stderr_fd[1]); op->opaque->stdout_fd = stdout_fd[0]; set_fd_opts(op->opaque->stdout_fd, O_NONBLOCK); op->opaque->stderr_fd = stderr_fd[0]; set_fd_opts(op->opaque->stderr_fd, O_NONBLOCK); if (op->synchronous) { action_synced_wait(op, pmask); sigchld_cleanup(); } else { crm_trace("Async waiting for %d - %s", op->pid, op->opaque->exec); mainloop_child_add_with_flags(op->pid, op->timeout, op->id, op, (op->flags & SVC_ACTION_LEAVE_GROUP) ? mainloop_leave_pid_group : 0, operation_finished); op->opaque->stdout_gsource = mainloop_add_fd(op->id, G_PRIORITY_LOW, op->opaque->stdout_fd, op, &stdout_callbacks); op->opaque->stderr_gsource = mainloop_add_fd(op->id, G_PRIORITY_LOW, op->opaque->stderr_fd, op, &stderr_callbacks); services_add_inflight_op(op); } return TRUE; } GList * services_os_get_directory_list(const char *root, gboolean files, gboolean executable) { GList *list = NULL; struct dirent **namelist; int entries = 0, lpc = 0; char buffer[PATH_MAX]; entries = scandir(root, &namelist, NULL, alphasort); if (entries <= 0) { return list; } for (lpc = 0; lpc < entries; lpc++) { struct stat sb; if ('.' == namelist[lpc]->d_name[0]) { free(namelist[lpc]); continue; } snprintf(buffer, sizeof(buffer), "%s/%s", root, namelist[lpc]->d_name); if (stat(buffer, &sb)) { continue; } if (S_ISDIR(sb.st_mode)) { if (files) { free(namelist[lpc]); continue; } } else if (S_ISREG(sb.st_mode)) { if (files == FALSE) { free(namelist[lpc]); continue; } else if (executable && (sb.st_mode & S_IXUSR) == 0 && (sb.st_mode & S_IXGRP) == 0 && (sb.st_mode & S_IXOTH) == 0) { free(namelist[lpc]); continue; } } list = g_list_append(list, strdup(namelist[lpc]->d_name)); free(namelist[lpc]); } free(namelist); return list; } GList * resources_os_list_lsb_agents(void) { return get_directory_list(LSB_ROOT_DIR, TRUE, TRUE); } GList * resources_os_list_ocf_providers(void) { return get_directory_list(OCF_ROOT_DIR "/resource.d", FALSE, TRUE); } GList * resources_os_list_ocf_agents(const char *provider) { GList *gIter = NULL; GList *result = NULL; GList *providers = NULL; if (provider) { char buffer[500]; snprintf(buffer, sizeof(buffer), "%s/resource.d/%s", OCF_ROOT_DIR, provider); return get_directory_list(buffer, TRUE, TRUE); } providers = resources_os_list_ocf_providers(); for (gIter = providers; gIter != NULL; gIter = gIter->next) { GList *tmp1 = result; GList *tmp2 = resources_os_list_ocf_agents(gIter->data); if (tmp2) { result = g_list_concat(tmp1, tmp2); } } g_list_free_full(providers, free); return result; } #if SUPPORT_NAGIOS GList * resources_os_list_nagios_agents(void) { GList *plugin_list = NULL; GList *result = NULL; GList *gIter = NULL; plugin_list = get_directory_list(NAGIOS_PLUGIN_DIR, TRUE, TRUE); /* Make sure both the plugin and its metadata exist */ for (gIter = plugin_list; gIter != NULL; gIter = gIter->next) { const char *plugin = gIter->data; char *metadata = crm_strdup_printf(NAGIOS_METADATA_DIR "/%s.xml", plugin); struct stat st; if (stat(metadata, &st) == 0) { result = g_list_append(result, strdup(plugin)); } free(metadata); } g_list_free_full(plugin_list, free); return result; } #endif diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c index afdfd01b02..76c7e5058e 100644 --- a/lrmd/lrmd.c +++ b/lrmd/lrmd.c @@ -1,1793 +1,1793 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_SYS_TIMEB_H # include #endif #define EXIT_REASON_MAX_LEN 128 GHashTable *rsc_list = NULL; typedef struct lrmd_cmd_s { int timeout; int interval; int start_delay; int timeout_orig; int call_id; int exec_rc; int lrmd_op_status; int call_opts; /* Timer ids, must be removed on cmd destruction. */ int delay_id; int stonith_recurring_id; int rsc_deleted; int service_flags; char *client_id; char *origin; char *rsc_id; char *action; char *real_action; char *exit_reason; char *output; char *userdata_str; /* when set, this cmd should go through a container wrapper */ const char *isolation_wrapper; #ifdef HAVE_SYS_TIMEB_H /* recurring and systemd operations may involve more than one lrmd command * per operation, so they need info about original and most recent */ struct timeb t_first_run; /* Timestamp of when op first ran */ struct timeb t_run; /* Timestamp of when op most recently ran */ struct timeb t_first_queue; /* Timestamp of when op first was queued */ struct timeb t_queue; /* Timestamp of when op most recently was queued */ struct timeb t_rcchange; /* Timestamp of last rc change */ #endif int first_notify_sent; int last_notify_rc; int last_notify_op_status; int last_pid; GHashTable *params; } lrmd_cmd_t; static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc); static gboolean lrmd_rsc_dispatch(gpointer user_data); static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id); static void log_finished(lrmd_cmd_t * cmd, int exec_time, int queue_time) { char pid_str[32] = { 0, }; int log_level = LOG_INFO; if (cmd->last_pid) { snprintf(pid_str, 32, "%d", cmd->last_pid); } if (safe_str_eq(cmd->action, "monitor")) { log_level = LOG_DEBUG; } #ifdef HAVE_SYS_TIMEB_H do_crm_log(log_level, "finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d exec-time:%dms queue-time:%dms", cmd->rsc_id, cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str, cmd->exec_rc, exec_time, queue_time); #else do_crm_log(log_level, "finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d", cmd->rsc_id, cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str, cmd->exec_rc); #endif } static void log_execute(lrmd_cmd_t * cmd) { int log_level = LOG_INFO; if (safe_str_eq(cmd->action, "monitor")) { log_level = LOG_DEBUG; } do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d", cmd->rsc_id, cmd->action, cmd->call_id); } static const char * normalize_action_name(lrmd_rsc_t * rsc, const char *action) { if (safe_str_eq(action, "monitor") && (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_LSB) || safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE) || safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SYSTEMD))) { return "status"; } return action; } static lrmd_rsc_t * build_rsc_from_xml(xmlNode * msg) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_rsc_t *rsc = NULL; rsc = calloc(1, sizeof(lrmd_rsc_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts); rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS); rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER); rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE); rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc); return rsc; } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, strdup(key), strdup(value)); } static lrmd_cmd_t * create_lrmd_cmd(xmlNode * msg, crm_client_t * client, lrmd_rsc_t *rsc) { int call_options = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_cmd_t *cmd = NULL; cmd = calloc(1, sizeof(lrmd_cmd_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options); cmd->call_opts = call_options; cmd->client_id = strdup(client->id); crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id); crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval); crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout); crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay); cmd->timeout_orig = cmd->timeout; cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN); cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION); cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR); cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); cmd->params = xml2list(rsc_xml); cmd->isolation_wrapper = g_hash_table_lookup(cmd->params, "CRM_meta_isolation_wrapper"); if (cmd->isolation_wrapper) { if (g_hash_table_lookup(cmd->params, "CRM_meta_isolation_instance") == NULL) { g_hash_table_insert(cmd->params, strdup("CRM_meta_isolation_instance"), strdup(rsc->rsc_id)); } if (rsc->provider) { g_hash_table_insert(cmd->params, strdup("CRM_meta_provider"), strdup(rsc->provider)); } g_hash_table_insert(cmd->params, strdup("CRM_meta_class"), strdup(rsc->class)); g_hash_table_insert(cmd->params, strdup("CRM_meta_type"), strdup(rsc->type)); } if (safe_str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block")) { crm_debug("Setting flag to leave pid group on timeout and only kill action pid for %s_%s_%d", cmd->rsc_id, cmd->action, cmd->interval); cmd->service_flags |= SVC_ACTION_LEAVE_GROUP; } return cmd; } static lrmd_cmd_t * create_alert_cmd(xmlNode * msg, crm_client_t * client, lrmd_rsc_t *rsc) { int call_options = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_ALERT, msg, LOG_ERR); lrmd_cmd_t *cmd = NULL; cmd = calloc(1, sizeof(lrmd_cmd_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options); cmd->call_opts = call_options; cmd->client_id = strdup(client->id); crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id); crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout); cmd->timeout_orig = cmd->timeout; cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN); cmd->action = strdup("start"); cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_ALERT_ID); cmd->params = xml2list(rsc_xml); return cmd; } static void free_lrmd_cmd(lrmd_cmd_t * cmd) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } if (cmd->params) { g_hash_table_destroy(cmd->params); } free(cmd->origin); free(cmd->action); free(cmd->real_action); free(cmd->userdata_str); free(cmd->rsc_id); free(cmd->output); free(cmd->exit_reason); free(cmd->client_id); free(cmd); } static gboolean stonith_recurring_op_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc; cmd->stonith_recurring_id = 0; if (!cmd->rsc_id) { return FALSE; } rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); CRM_ASSERT(rsc != NULL); /* take it out of recurring_ops list, and put it in the pending ops * to be executed */ rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef HAVE_SYS_TIMEB_H ftime(&cmd->t_queue); if (cmd->t_first_queue.time == 0) { cmd->t_first_queue = cmd->t_queue; } #endif mainloop_set_trigger(rsc->work); return FALSE; } static gboolean start_delay_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc = NULL; cmd->delay_id = 0; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc) { mainloop_set_trigger(rsc->work); } return FALSE; } static gboolean merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { GListPtr gIter = NULL; lrmd_cmd_t * dup = NULL; gboolean dup_pending = FALSE; if (cmd->interval == 0) { return 0; } for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { dup = gIter->data; if (safe_str_eq(cmd->action, dup->action) && cmd->interval == dup->interval) { dup_pending = TRUE; goto merge_dup; } } /* if dup is in recurring_ops list, that means it has already executed * and is in the interval loop. we can't just remove it in this case. */ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { dup = gIter->data; if (safe_str_eq(cmd->action, dup->action) && cmd->interval == dup->interval) { goto merge_dup; } } return FALSE; merge_dup: /* This should not occur, if it does we need to investigate in the crmd * how something like this is possible */ crm_warn("Duplicate recurring op entry detected (%s_%s_%d), merging with previous op entry", rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval); /* merge */ dup->first_notify_sent = 0; free(dup->userdata_str); dup->userdata_str = cmd->userdata_str; cmd->userdata_str = NULL; dup->call_id = cmd->call_id; if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { /* if we are waiting for the next interval, kick it off now */ if (dup_pending == TRUE) { g_source_remove(cmd->stonith_recurring_id); cmd->stonith_recurring_id = 0; stonith_recurring_op_helper(cmd); } } else if (dup_pending == FALSE) { /* if we've already handed this to the service lib, kick off an early execution */ services_action_kick(rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval); } free_lrmd_cmd(cmd); return TRUE; } static void schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { gboolean dup_processed = FALSE; CRM_CHECK(cmd != NULL, return); CRM_CHECK(rsc != NULL, return); crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id); dup_processed = merge_recurring_duplicate(rsc, cmd); if (dup_processed) { /* duplicate recurring cmd found, cmds merged */ return; } /* crmd expects lrmd to automatically cancel recurring ops before rsc stops. */ if (rsc && safe_str_eq(cmd->action, "stop")) { cancel_all_recurring(rsc, NULL); } rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef HAVE_SYS_TIMEB_H ftime(&cmd->t_queue); if (cmd->t_first_queue.time == 0) { cmd->t_first_queue = cmd->t_queue; } #endif mainloop_set_trigger(rsc->work); if (cmd->start_delay) { cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); } } static void send_reply(crm_client_t * client, int rc, uint32_t id, int call_id) { int send_rc = 0; xmlNode *reply = NULL; reply = create_xml_node(NULL, T_LRMD_REPLY); crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(reply, F_LRMD_RC, rc); crm_xml_add_int(reply, F_LRMD_CALLID, call_id); send_rc = lrmd_server_send_reply(client, id, reply); free_xml(reply); if (send_rc < 0) { crm_warn("LRMD reply to %s failed: %d", client->name, send_rc); } } static void send_client_notify(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; crm_client_t *client = value; if (client == NULL) { crm_err("Asked to send event to NULL client"); return; } else if (client->name == NULL) { crm_trace("Asked to send event to client with no name"); return; } if (lrmd_server_send_notify(client, update_msg) <= 0) { crm_warn("Notification of client %s/%s failed", client->name, client->id); } } #ifdef HAVE_SYS_TIMEB_H /*! * \internal * \brief Return difference between two times in milliseconds * * \param[in] now More recent time (or NULL to use current time) * \param[in] old Earlier time * * \return milliseconds difference (or 0 if old is NULL or has time zero) */ static int time_diff_ms(struct timeb *now, struct timeb *old) { struct timeb local_now = { 0, }; if (now == NULL) { ftime(&local_now); now = &local_now; } if ((old == NULL) || (old->time == 0)) { return 0; } return difftime(now->time, old->time) * 1000 + now->millitm - old->millitm; } /*! * \internal * \brief Reset a command's operation times to their original values. * * Reset a command's run and queued timestamps to the timestamps of the original * command, so we report the entire time since then and not just the time since * the most recent command (for recurring and systemd operations). * * /param[in] cmd LRMD command object to reset * * /note It's not obvious what the queued time should be for a systemd * start/stop operation, which might go like this: * initial command queued 5ms, runs 3s * monitor command queued 10ms, runs 10s * monitor command queued 10ms, runs 10s * Is the queued time for that operation 5ms, 10ms or 25ms? The current * implementation will report 5ms. If it's 25ms, then we need to * subtract 20ms from the total exec time so as not to count it twice. * We can implement that later if it matters to anyone ... */ static void cmd_original_times(lrmd_cmd_t * cmd) { cmd->t_run = cmd->t_first_run; cmd->t_queue = cmd->t_first_queue; } #endif static void send_cmd_complete_notify(lrmd_cmd_t * cmd) { int exec_time = 0; int queue_time = 0; xmlNode *notify = NULL; #ifdef HAVE_SYS_TIMEB_H exec_time = time_diff_ms(NULL, &cmd->t_run); queue_time = time_diff_ms(&cmd->t_run, &cmd->t_queue); #endif log_finished(cmd, exec_time, queue_time); /* if the first notify result for a cmd has already been sent earlier, and the * the option to only send notifies on result changes is set. Check to see * if the last result is the same as the new one. If so, suppress this update */ if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) { if (cmd->last_notify_rc == cmd->exec_rc && cmd->last_notify_op_status == cmd->lrmd_op_status) { /* only send changes */ return; } } cmd->first_notify_sent = 1; cmd->last_notify_rc = cmd->exec_rc; cmd->last_notify_op_status = cmd->lrmd_op_status; notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout); crm_xml_add_int(notify, F_LRMD_RSC_INTERVAL, cmd->interval); crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay); crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc); crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status); crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id); crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted); #ifdef HAVE_SYS_TIMEB_H crm_xml_add_int(notify, F_LRMD_RSC_RUN_TIME, cmd->t_run.time); crm_xml_add_int(notify, F_LRMD_RSC_RCCHANGE_TIME, cmd->t_rcchange.time); crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time); crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time); #endif crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC); crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id); if(cmd->real_action) { crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action); } else { crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action); } crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str); crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output); crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->exit_reason); if (cmd->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS); g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { hash2smartfield((gpointer) key, (gpointer) value, args); } } if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) { crm_client_t *client = crm_client_get_by_id(cmd->client_id); if (client) { send_client_notify(client->id, client, notify); } } else if (client_connections != NULL) { g_hash_table_foreach(client_connections, send_client_notify, notify); } free_xml(notify); } static void send_generic_notify(int rc, xmlNode * request) { if (client_connections != NULL) { int call_id = 0; xmlNode *notify = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *op = crm_element_value(request, F_LRMD_OPERATION); crm_element_value_int(request, F_LRMD_CALLID, &call_id); notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(notify, F_LRMD_RC, rc); crm_xml_add_int(notify, F_LRMD_CALLID, call_id); crm_xml_add(notify, F_LRMD_OPERATION, op); crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id); g_hash_table_foreach(client_connections, send_client_notify, notify); free_xml(notify); } } static void cmd_reset(lrmd_cmd_t * cmd) { cmd->lrmd_op_status = 0; cmd->last_pid = 0; memset(&cmd->t_run, 0, sizeof(cmd->t_run)); memset(&cmd->t_queue, 0, sizeof(cmd->t_queue)); free(cmd->exit_reason); cmd->exit_reason = NULL; free(cmd->output); cmd->output = NULL; } static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc) { crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action, rsc ? rsc->active : NULL, cmd); if (rsc && (rsc->active == cmd)) { rsc->active = NULL; mainloop_set_trigger(rsc->work); } if (!rsc) { cmd->rsc_deleted = 1; } /* reset original timeout so client notification has correct information */ cmd->timeout = cmd->timeout_orig; send_cmd_complete_notify(cmd); if (cmd->interval && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) { if (rsc) { rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else if (cmd->interval == 0) { if (rsc) { rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else { /* Clear all the values pertaining just to the last iteration of a recurring op. */ cmd_reset(cmd); } } #if SUPPORT_HEARTBEAT static int pattern_matched(const char *pat, const char *str) { if (g_pattern_match_simple(pat, str)) { crm_debug("RA output matched stopped pattern [%s]", pat); return TRUE; } return FALSE; } static int hb2uniform_rc(const char *action, int rc, const char *stdout_data) { const char *stop_pattern[] = { "*stopped*", "*not*running*" }; const char *running_pattern[] = { "*running*", "*OK*" }; char *lower_std_output = NULL; int result; if (rc < 0) { return PCMK_OCF_UNKNOWN_ERROR; } /* Treat class heartbeat the same as class lsb. */ if (!safe_str_eq(action, "status") && !safe_str_eq(action, "monitor")) { return services_get_ocf_exitcode(action, rc); } /* for status though, exit code is ignored, * and the stdout is scanned for specific strings */ if (stdout_data == NULL) { crm_warn("No status output from the (hb) resource agent, assuming stopped"); return PCMK_OCF_NOT_RUNNING; } lower_std_output = g_ascii_strdown(stdout_data, -1); if (pattern_matched(stop_pattern[0], lower_std_output) || pattern_matched(stop_pattern[1], lower_std_output)) { result = PCMK_OCF_NOT_RUNNING; } else if (pattern_matched(running_pattern[0], lower_std_output) || pattern_matched(running_pattern[1], stdout_data)) { /* "OK" is matched case sensitive */ result = PCMK_OCF_OK; } else { /* It didn't say it was running - must be stopped */ crm_debug("RA output did not match any pattern, assuming stopped"); result = PCMK_OCF_NOT_RUNNING; } free(lower_std_output); return result; } #endif static int ocf2uniform_rc(int rc) { if (rc < 0 || rc > PCMK_OCF_FAILED_MASTER) { return PCMK_OCF_UNKNOWN_ERROR; } return rc; } static int stonith2uniform_rc(const char *action, int rc) { if (rc == -ENODEV) { if (safe_str_eq(action, "stop")) { rc = PCMK_OCF_OK; } else if (safe_str_eq(action, "start")) { rc = PCMK_OCF_NOT_INSTALLED; } else { rc = PCMK_OCF_NOT_RUNNING; } } else if (rc != 0) { rc = PCMK_OCF_UNKNOWN_ERROR; } return rc; } #if SUPPORT_NAGIOS static int nagios2uniform_rc(const char *action, int rc) { if (rc < 0) { return PCMK_OCF_UNKNOWN_ERROR; } switch (rc) { case NAGIOS_STATE_OK: return PCMK_OCF_OK; case NAGIOS_INSUFFICIENT_PRIV: return PCMK_OCF_INSUFFICIENT_PRIV; case NAGIOS_NOT_INSTALLED: return PCMK_OCF_NOT_INSTALLED; case NAGIOS_STATE_WARNING: case NAGIOS_STATE_CRITICAL: case NAGIOS_STATE_UNKNOWN: case NAGIOS_STATE_DEPENDENT: default: return PCMK_OCF_UNKNOWN_ERROR; } return PCMK_OCF_UNKNOWN_ERROR; } #endif static int get_uniform_rc(const char *standard, const char *action, int rc) { if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_OCF)) { return ocf2uniform_rc(rc); } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_STONITH)) { return stonith2uniform_rc(action, rc); } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD)) { return rc; } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_UPSTART)) { return rc; #if SUPPORT_NAGIOS } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_NAGIOS)) { return nagios2uniform_rc(action, rc); #endif } else { return services_get_ocf_exitcode(action, rc); } } static int action_get_uniform_rc(svc_action_t * action) { lrmd_cmd_t *cmd = action->cb_data; #if SUPPORT_HEARTBEAT if (safe_str_eq(action->standard, PCMK_RESOURCE_CLASS_HB)) { return hb2uniform_rc(cmd->action, action->rc, action->stdout_data); } #endif return get_uniform_rc(action->standard, cmd->action, action->rc); } void notify_of_new_client(crm_client_t *new_client) { crm_client_t *client = NULL; GHashTableIter iter; xmlNode *notify = NULL; char *key = NULL; notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT); g_hash_table_iter_init(&iter, client_connections); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & client)) { if (safe_str_eq(client->id, new_client->id)) { continue; } send_client_notify((gpointer) key, (gpointer) client, (gpointer) notify); } free_xml(notify); } static char * parse_exit_reason(const char *output) { const char *cur = NULL; const char *last = NULL; char *reason = NULL; static int cookie_len = 0; char *eol = NULL; if (output == NULL) { return NULL; } if (!cookie_len) { cookie_len = strlen(PCMK_OCF_REASON_PREFIX); } cur = strstr(output, PCMK_OCF_REASON_PREFIX); for (; cur != NULL; cur = strstr(cur, PCMK_OCF_REASON_PREFIX)) { /* skip over the cookie delimiter string */ cur += cookie_len; last = cur; } if (last == NULL) { return NULL; } /* make our own copy */ reason = calloc(1, (EXIT_REASON_MAX_LEN+1)); CRM_ASSERT(reason); /* limit reason string size */ strncpy(reason, last, EXIT_REASON_MAX_LEN); /* truncate everything after a new line */ eol = strchr(reason, '\n'); if (eol != NULL) { *eol = '\0'; } return reason; } void client_disconnect_cleanup(const char *client_id) { GHashTableIter iter; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (rsc->call_opts & lrmd_opt_drop_recurring) { /* This client is disconnecting, drop any recurring operations * it may have initiated on the resource */ cancel_all_recurring(rsc, client_id); } } } static void action_complete(svc_action_t * action) { lrmd_rsc_t *rsc; lrmd_cmd_t *cmd = action->cb_data; const char *rclass = NULL; bool goagain = false; if (!cmd) { crm_err("LRMD action (%s) completed does not match any known operations.", action->id); return; } #ifdef HAVE_SYS_TIMEB_H if (cmd->exec_rc != action->rc) { ftime(&cmd->t_rcchange); } #endif cmd->last_pid = action->pid; cmd->exec_rc = action_get_uniform_rc(action); cmd->lrmd_op_status = action->status; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE)) { rclass = resources_find_service_class(rsc->class); } else if(rsc) { rclass = rsc->class; } if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD)) { if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->action, "start")) { /* systemd I curse thee! * * systemd returns from start actions after the start _begins_ * not after it completes. * * So we have to jump through a few hoops so that we don't * report 'complete' to the rest of pacemaker until, you know, * it's actually done. */ goagain = true; cmd->real_action = cmd->action; cmd->action = strdup("monitor"); } else if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->action, "stop")) { goagain = true; cmd->real_action = cmd->action; cmd->action = strdup("monitor"); } else if(cmd->real_action) { /* Ok, so this is the follow up monitor action to check if start actually completed */ if(cmd->lrmd_op_status == PCMK_LRM_OP_DONE && cmd->exec_rc == PCMK_OCF_PENDING) { goagain = true; } else if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->real_action, "stop")) { goagain = true; } else { #ifdef HAVE_SYS_TIMEB_H int time_sum = time_diff_ms(NULL, &cmd->t_first_run); int timeout_left = cmd->timeout_orig - time_sum; crm_debug("%s %s is now complete (elapsed=%dms, remaining=%dms): %s (%d)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc); cmd_original_times(cmd); #endif if(cmd->lrmd_op_status == PCMK_LRM_OP_DONE && cmd->exec_rc == PCMK_OCF_NOT_RUNNING && safe_str_eq(cmd->real_action, "stop")) { cmd->exec_rc = PCMK_OCF_OK; } } } } #if SUPPORT_NAGIOS if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS)) { if (safe_str_eq(cmd->action, "monitor") && cmd->interval == 0 && cmd->exec_rc == PCMK_OCF_OK) { /* Successfully executed --version for the nagios plugin */ cmd->exec_rc = PCMK_OCF_NOT_RUNNING; } else if (safe_str_eq(cmd->action, "start") && cmd->exec_rc != PCMK_OCF_OK) { goagain = true; } } #endif /* Wrapping this section in ifdef implies that systemd resources are not * fully supported on platforms without sys/timeb.h. Since timeb is * obsolete, we should eventually prefer a clock_gettime() implementation * (wrapped in its own ifdef) with timeb as a fallback. */ #ifdef HAVE_SYS_TIMEB_H if(goagain) { int time_sum = time_diff_ms(NULL, &cmd->t_first_run); int timeout_left = cmd->timeout_orig - time_sum; int delay = cmd->timeout_orig / 10; if(delay >= timeout_left && timeout_left > 20) { delay = timeout_left/2; } delay = QB_MIN(2000, delay); if (delay < timeout_left) { cmd->start_delay = delay; cmd->timeout = timeout_left; if(cmd->exec_rc == PCMK_OCF_OK) { crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay); } else if(cmd->exec_rc == PCMK_OCF_PENDING) { crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, time_sum, timeout_left, delay); } else { crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc, time_sum, timeout_left, delay); } cmd_reset(cmd); if(rsc) { rsc->active = NULL; } schedule_lrmd_cmd(rsc, cmd); /* Don't finalize cmd, we're not done with it yet */ return; } else { crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)", cmd->rsc_id, cmd->real_action?cmd->real_action:cmd->action, cmd->exec_rc, time_sum, timeout_left); cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT; cmd->exec_rc = PCMK_OCF_TIMEOUT; cmd_original_times(cmd); } } #endif if (action->stderr_data) { cmd->output = strdup(action->stderr_data); cmd->exit_reason = parse_exit_reason(action->stderr_data); } else if (action->stdout_data) { cmd->output = strdup(action->stdout_data); } cmd_finalize(cmd, rsc); } static void stonith_action_complete(lrmd_cmd_t * cmd, int rc) { int recurring = cmd->interval; lrmd_rsc_t *rsc = NULL; cmd->exec_rc = get_uniform_rc(PCMK_RESOURCE_CLASS_STONITH, cmd->action, rc); rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); if (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED) { recurring = 0; /* do nothing */ } else if (rc == -ENODEV && safe_str_eq(cmd->action, "monitor")) { /* Not registered == inactive */ cmd->lrmd_op_status = PCMK_LRM_OP_DONE; cmd->exec_rc = PCMK_OCF_NOT_RUNNING; } else if (rc) { /* Attempt to map return codes to op status if possible */ switch (rc) { case -EPROTONOSUPPORT: cmd->lrmd_op_status = PCMK_LRM_OP_NOTSUPPORTED; break; case -ETIME: cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT; break; default: /* TODO: This looks wrong. Status should be _DONE and exec_rc set to an error */ cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; } } else { /* command successful */ cmd->lrmd_op_status = PCMK_LRM_OP_DONE; if (safe_str_eq(cmd->action, "start") && rsc) { rsc->stonith_started = 1; } } if (recurring && rsc) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } cmd->stonith_recurring_id = g_timeout_add(cmd->interval, stonith_recurring_op_helper, cmd); } cmd_finalize(cmd, rsc); } static void lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) { stonith_action_complete(data->userdata, data->rc); } void stonith_connection_failed(void) { GHashTableIter iter; GList *cmd_list = NULL; GList *cmd_iter = NULL; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { if (rsc->active) { cmd_list = g_list_append(cmd_list, rsc->active); } if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, rsc->recurring_ops); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, rsc->pending_ops); } rsc->pending_ops = rsc->recurring_ops = NULL; } } if (!cmd_list) { return; } crm_err("STONITH connection failed, finalizing %d pending operations.", g_list_length(cmd_list)); for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { stonith_action_complete(cmd_iter->data, -ENOTCONN); } g_list_free(cmd_list); } static int lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { int rc = 0; int do_monitor = 0; stonith_t *stonith_api = get_stonith_connection(); if (!stonith_api) { cmd->exec_rc = get_uniform_rc(PCMK_RESOURCE_CLASS_STONITH, cmd->action, -ENOTCONN); cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; cmd_finalize(cmd, rsc); return -EUNATCH; } if (safe_str_eq(cmd->action, "start")) { char *key = NULL; char *value = NULL; stonith_key_value_t *device_params = NULL; if (cmd->params) { GHashTableIter iter; g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { device_params = stonith_key_value_add(device_params, key, value); } } /* Stonith automatically registers devices from the IPC when changes occur, * but to avoid a possible race condition between stonith receiving the IPC update * and the lrmd requesting that resource, the lrmd still registers the device as well. * Stonith knows how to handle duplicate device registrations correctly. */ rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call, cmd->rsc_id, rsc->provider, rsc->type, device_params); stonith_key_value_freeall(device_params, 1, 1); if (rc == 0) { do_monitor = 1; } } else if (safe_str_eq(cmd->action, "stop")) { rc = stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, cmd->rsc_id); rsc->stonith_started = 0; } else if (safe_str_eq(cmd->action, "monitor")) { if (cmd->interval) { do_monitor = 1; } else { rc = rsc->stonith_started ? 0 : -ENODEV; } } if (!do_monitor) { goto cleanup_stonith_exec; } rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id, cmd->timeout / 1000); rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd, "lrmd_stonith_callback", lrmd_stonith_callback); /* don't cleanup yet, we will find out the result of the monitor later */ if (rc > 0) { rsc->active = cmd; return rc; } else if (rc == 0) { rc = -1; } cleanup_stonith_exec: stonith_action_complete(cmd, rc); return rc; } static int lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { svc_action_t *action = NULL; GHashTable *params_copy = NULL; CRM_ASSERT(rsc); CRM_ASSERT(cmd); crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s", rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type); #if SUPPORT_NAGIOS /* Recurring operations are cancelled anyway for a stop operation */ if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS) && safe_str_eq(cmd->action, "stop")) { cmd->exec_rc = PCMK_OCF_OK; goto exec_done; } #endif if (cmd->params) { params_copy = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (params_copy != NULL) { g_hash_table_foreach(cmd->params, dup_attr, params_copy); } } if (safe_str_eq(rsc->class, PCMK_ALERT_CLASS)) { /* In the case of Alert, lrmd always set rsc->type from CRM_alert_path parameter. */ void *value_lookup = g_hash_table_lookup(params_copy, CRM_ALERT_KEY_PATH); if (value_lookup != NULL) { action = services_action_create_generic((char*)value_lookup, NULL); action->action = strdup(cmd->action); action->timeout = cmd->timeout; action->id = strdup(rsc->rsc_id); - action->alert_params = params_copy; + action->params = params_copy; value_lookup = g_hash_table_lookup(params_copy, CRM_ALERT_NODE_SEQUENCE); if (value_lookup != NULL) { action->sequence = crm_atoi(value_lookup, ""); } } } else if (cmd->isolation_wrapper) { g_hash_table_remove(params_copy, "CRM_meta_isolation_wrapper"); action = resources_action_create(rsc->rsc_id, PCMK_RESOURCE_CLASS_OCF, LRMD_ISOLATION_PROVIDER, cmd->isolation_wrapper, cmd->action, /*action will be normalized in wrapper*/ cmd->interval, cmd->timeout, params_copy, cmd->service_flags); } else { action = resources_action_create(rsc->rsc_id, rsc->class, rsc->provider, rsc->type, normalize_action_name(rsc, cmd->action), cmd->interval, cmd->timeout, params_copy, cmd->service_flags); } if (!action) { crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id); cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; goto exec_done; } action->cb_data = cmd; /* 'cmd' may not be valid after this point if * services_action_async() returned TRUE * * Upstart and systemd both synchronously determine monitor/status * results and call action_complete (which may free 'cmd') if necessary. */ if (services_action_async(action, action_complete)) { return TRUE; } cmd->exec_rc = action->rc; if(action->status != PCMK_LRM_OP_DONE) { cmd->lrmd_op_status = action->status; } else { cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; } services_action_free(action); action = NULL; exec_done: cmd_finalize(cmd, rsc); return TRUE; } static gboolean lrmd_rsc_execute(lrmd_rsc_t * rsc) { lrmd_cmd_t *cmd = NULL; CRM_CHECK(rsc != NULL, return FALSE); if (rsc->active) { crm_trace("%s is still active", rsc->rsc_id); return TRUE; } if (rsc->pending_ops) { GList *first = rsc->pending_ops; cmd = first->data; if (cmd->delay_id) { crm_trace ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms", cmd->rsc_id, cmd->action, cmd->start_delay); return TRUE; } rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first); g_list_free_1(first); #ifdef HAVE_SYS_TIMEB_H if (cmd->t_first_run.time == 0) { ftime(&cmd->t_first_run); } ftime(&cmd->t_run); #endif } if (!cmd) { crm_trace("Nothing further to do for %s", rsc->rsc_id); return TRUE; } rsc->active = cmd; /* only one op at a time for a rsc */ if (cmd->interval) { rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd); } log_execute(cmd); if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { lrmd_rsc_execute_stonith(rsc, cmd); } else { lrmd_rsc_execute_service_lib(rsc, cmd); } return TRUE; } static gboolean lrmd_rsc_dispatch(gpointer user_data) { return lrmd_rsc_execute(user_data); } void free_rsc(gpointer data) { GListPtr gIter = NULL; lrmd_rsc_t *rsc = data; int is_stonith = safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH); gIter = rsc->pending_ops; while (gIter != NULL) { GListPtr next = gIter->next; lrmd_cmd_t *cmd = gIter->data; /* command was never executed */ cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, NULL); gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->pending_ops); gIter = rsc->recurring_ops; while (gIter != NULL) { GListPtr next = gIter->next; lrmd_cmd_t *cmd = gIter->data; if (is_stonith) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; /* If a stonith command is in-flight, just mark it as cancelled; * it is not safe to finalize/free the cmd until the stonith api * says it has either completed or timed out. */ if (rsc->active != cmd) { cmd_finalize(cmd, NULL); } } else { /* This command is already handed off to service library, * let service library cancel it and tell us via the callback * when it is cancelled. The rsc can be safely destroyed * even if we are waiting for the cancel result */ services_action_cancel(rsc->rsc_id, normalize_action_name(rsc, cmd->action), cmd->interval); } gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->recurring_ops); free(rsc->rsc_id); free(rsc->class); free(rsc->provider); free(rsc->type); mainloop_destroy_trigger(rsc->work); free(rsc); } static int process_lrmd_signon(crm_client_t * client, uint32_t id, xmlNode * request) { xmlNode *reply = create_xml_node(NULL, "reply"); const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER); const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION); if (compare_version(protocol_version, LRMD_PROTOCOL_VERSION) < 0) { crm_err("Cluster API version must be greater than or equal to %s, not %s", LRMD_PROTOCOL_VERSION, protocol_version); crm_xml_add_int(reply, F_LRMD_RC, -EPROTO); crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); } crm_xml_add(reply, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(reply, F_LRMD_CLIENTID, client->id); crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); lrmd_server_send_reply(client, id, reply); if (crm_is_true(is_ipc_provider)) { /* this is a remote connection from a cluster nodes crmd */ #ifdef SUPPORT_REMOTE ipc_proxy_add_provider(client); #endif } free_xml(reply); return pcmk_ok; } static int process_lrmd_rsc_register(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = build_rsc_from_xml(request); lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id); if (dup && safe_str_eq(rsc->class, dup->class) && safe_str_eq(rsc->provider, dup->provider) && safe_str_eq(rsc->type, dup->type)) { crm_warn("Can't add, RSC '%s' already present in the rsc list (%d active resources)", rsc->rsc_id, g_hash_table_size(rsc_list)); free_rsc(rsc); return rc; } g_hash_table_replace(rsc_list, rsc->rsc_id, rsc); crm_info("Added '%s' to the rsc list (%d active resources)", rsc->rsc_id, g_hash_table_size(rsc_list)); return rc; } static void process_lrmd_get_rsc_info(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; int send_rc = 0; int call_id = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); xmlNode *reply = NULL; lrmd_rsc_t *rsc = NULL; crm_element_value_int(request, F_LRMD_CALLID, &call_id); if (!rsc_id) { rc = -ENODEV; goto get_rsc_done; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); rc = -ENODEV; goto get_rsc_done; } get_rsc_done: reply = create_xml_node(NULL, T_LRMD_REPLY); crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(reply, F_LRMD_RC, rc); crm_xml_add_int(reply, F_LRMD_CALLID, call_id); if (rsc) { crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id); crm_xml_add(reply, F_LRMD_CLASS, rsc->class); crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider); crm_xml_add(reply, F_LRMD_TYPE, rsc->type); } send_rc = lrmd_server_send_reply(client, id, reply); if (send_rc < 0) { crm_warn("LRMD reply to %s failed: %d", client->name, send_rc); } free_xml(reply); } static int process_lrmd_rsc_unregister(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); if (!rsc_id) { return -ENODEV; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return pcmk_ok; } if (rsc->active) { /* let the caller know there are still active ops on this rsc to watch for */ crm_trace("Operation still in progress: %p", rsc->active); rc = -EINPROGRESS; } g_hash_table_remove(rsc_list, rsc_id); return rc; } static int process_lrmd_rsc_exec(crm_client_t * client, uint32_t id, xmlNode * request) { lrmd_rsc_t *rsc = NULL; lrmd_cmd_t *cmd = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); int call_id; if (!rsc_id) { return -EINVAL; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return -ENODEV; } cmd = create_lrmd_cmd(request, client, rsc); call_id = cmd->call_id; /* Don't reference cmd after handing it off to be scheduled. * The cmd could get merged and freed. */ schedule_lrmd_cmd(rsc, cmd); return call_id; } static int process_lrmd_alert_exec(crm_client_t * client, uint32_t id, xmlNode * request) { lrmd_rsc_t *alert = NULL; lrmd_cmd_t *cmd = NULL; xmlNode *alert_xml = get_xpath_object("//" F_LRMD_ALERT, request, LOG_ERR); const char *alert_id = crm_element_value(alert_xml, F_LRMD_ALERT_ID); int call_id; if (!alert_id) { return -EINVAL; } alert = g_hash_table_lookup(rsc_list, alert_id); if (alert == NULL) { crm_info("Alert '%s' not found (%d active resources)", alert_id, g_hash_table_size(rsc_list)); return -ENODEV; } call_id = pcmk_ok; cmd = create_alert_cmd(request, client, alert); call_id = cmd->call_id; /* Don't reference cmd after handing it off to be scheduled. * The cmd could get merged and freed. */ schedule_lrmd_cmd(alert, cmd); return call_id; } static int cancel_op(const char *rsc_id, const char *action, int interval) { GListPtr gIter = NULL; lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id); /* How to cancel an action. * 1. Check pending ops list, if it hasn't been handed off * to the service library or stonith recurring list remove * it there and that will stop it. * 2. If it isn't in the pending ops list, then it's either a * recurring op in the stonith recurring list, or the service * library's recurring list. Stop it there * 3. If not found in any lists, then this operation has either * been executed already and is not a recurring operation, or * never existed. */ if (!rsc) { return -ENODEV; } for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (safe_str_eq(cmd->action, action) && cmd->interval == interval) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, rsc); return pcmk_ok; } } if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { /* The service library does not handle stonith operations. * We have to handle recurring stonith operations ourselves. */ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (safe_str_eq(cmd->action, action) && cmd->interval == interval) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; if (rsc->active != cmd) { cmd_finalize(cmd, rsc); } return pcmk_ok; } } } else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval) == TRUE) { /* The service library will tell the action_complete callback function * this action was cancelled, which will destroy the cmd and remove * it from the recurring_op list. Do not do that in this function * if the service library says it cancelled it. */ return pcmk_ok; } return -EOPNOTSUPP; } static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id) { GList *cmd_list = NULL; GList *cmd_iter = NULL; /* Notice a copy of each list is created when concat is called. * This prevents odd behavior from occurring when the cmd_list * is iterated through later on. It is possible the cancel_op * function may end up modifying the recurring_ops and pending_ops * lists. If we did not copy those lists, our cmd_list iteration * could get messed up.*/ if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops)); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops)); } if (!cmd_list) { return; } for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { lrmd_cmd_t *cmd = cmd_iter->data; if (cmd->interval == 0) { continue; } if (client_id && safe_str_neq(cmd->client_id, client_id)) { continue; } cancel_op(rsc->rsc_id, cmd->action, cmd->interval); } /* frees only the copied list data, not the cmds */ g_list_free(cmd_list); } static int process_lrmd_rsc_cancel(crm_client_t * client, uint32_t id, xmlNode * request) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION); int interval = 0; crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &interval); if (!rsc_id || !action) { return -EINVAL; } return cancel_op(rsc_id, action, interval); } void process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; int call_id = 0; const char *op = crm_element_value(request, F_LRMD_OPERATION); int do_reply = 0; int do_notify = 0; crm_trace("Processing %s operation from %s", op, client->id); crm_element_value_int(request, F_LRMD_CALLID, &call_id); if (crm_str_eq(op, CRM_OP_IPC_FWD, TRUE)) { #ifdef SUPPORT_REMOTE ipc_proxy_forward_client(client, request); #endif do_reply = 1; } else if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { rc = process_lrmd_signon(client, id, request); } else if (crm_str_eq(op, LRMD_OP_RSC_REG, TRUE)) { rc = process_lrmd_rsc_register(client, id, request); do_notify = 1; do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_INFO, TRUE)) { process_lrmd_get_rsc_info(client, id, request); } else if (crm_str_eq(op, LRMD_OP_RSC_UNREG, TRUE)) { rc = process_lrmd_rsc_unregister(client, id, request); /* don't notify anyone about failed un-registers */ if (rc == pcmk_ok || rc == -EINPROGRESS) { do_notify = 1; } do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_EXEC, TRUE)) { rc = process_lrmd_rsc_exec(client, id, request); do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_CANCEL, TRUE)) { rc = process_lrmd_rsc_cancel(client, id, request); do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_POKE, TRUE)) { do_notify = 1; do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_CHECK, TRUE)) { xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA); const char *timeout = crm_element_value(data, F_LRMD_WATCHDOG); CRM_LOG_ASSERT(data != NULL); check_sbd_timeout(timeout); } else if (crm_str_eq(op, LRMD_OP_ALERT_EXEC, TRUE)) { rc = process_lrmd_alert_exec(client, id, request); do_reply = 1; } else { rc = -EOPNOTSUPP; do_reply = 1; crm_err("Unknown %s from %s", op, client->name); crm_log_xml_warn(request, "UnknownOp"); } crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d", op, client->id, rc, do_reply, do_notify); if (do_reply) { send_reply(client, rc, id, call_id); } if (do_notify) { send_generic_notify(rc, request); } }