diff --git a/include/crm/services.h b/include/crm/services.h index c06aae97a2..ca2c1079a1 100644 --- a/include/crm/services.h +++ b/include/crm/services.h @@ -1,350 +1,355 @@ /* * Copyright (C) 2010 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /** * \file * \brief Services API * \ingroup core */ #ifndef __PCMK_SERVICES__ # define __PCMK_SERVICES__ # ifdef __cplusplus extern "C" { # endif # include # include # include # ifndef OCF_ROOT_DIR # define OCF_ROOT_DIR "/usr/lib/ocf" # endif # ifndef LSB_ROOT_DIR # define LSB_ROOT_DIR "/etc/init.d" # endif /* TODO: Autodetect these two ?*/ # ifndef SYSTEMCTL # define SYSTEMCTL "/bin/systemctl" # endif # ifndef SERVICE_SCRIPT # define SERVICE_SCRIPT "/sbin/service" # endif /* *INDENT-OFF* */ enum lsb_exitcode { PCMK_LSB_OK = 0, PCMK_LSB_UNKNOWN_ERROR = 1, PCMK_LSB_INVALID_PARAM = 2, PCMK_LSB_UNIMPLEMENT_FEATURE = 3, PCMK_LSB_INSUFFICIENT_PRIV = 4, PCMK_LSB_NOT_INSTALLED = 5, PCMK_LSB_NOT_CONFIGURED = 6, PCMK_LSB_NOT_RUNNING = 7, }; /* The return codes for the status operation are not the same for other * operatios - go figure */ enum lsb_status_exitcode { PCMK_LSB_STATUS_OK = 0, PCMK_LSB_STATUS_VAR_PID = 1, PCMK_LSB_STATUS_VAR_LOCK = 2, PCMK_LSB_STATUS_NOT_RUNNING = 3, PCMK_LSB_STATUS_NOT_INSTALLED = 4, }; /* Uniform exit codes * Everything is mapped to its OCF equivalent so that Pacemaker only deals with one set of codes */ enum ocf_exitcode { PCMK_OCF_OK = 0, PCMK_OCF_UNKNOWN_ERROR = 1, PCMK_OCF_INVALID_PARAM = 2, PCMK_OCF_UNIMPLEMENT_FEATURE = 3, PCMK_OCF_INSUFFICIENT_PRIV = 4, PCMK_OCF_NOT_INSTALLED = 5, PCMK_OCF_NOT_CONFIGURED = 6, PCMK_OCF_NOT_RUNNING = 7, /* End of overlap with LSB */ PCMK_OCF_RUNNING_MASTER = 8, PCMK_OCF_FAILED_MASTER = 9, /* 150-199 reserved for application use */ PCMK_OCF_EXEC_ERROR = 192, /* Generic problem invoking the agent */ PCMK_OCF_UNKNOWN = 193, /* State of the service is unknown - used for recording in-flight operations */ PCMK_OCF_SIGNAL = 194, PCMK_OCF_NOT_SUPPORTED = 195, PCMK_OCF_PENDING = 196, PCMK_OCF_CANCELLED = 197, PCMK_OCF_TIMEOUT = 198, PCMK_OCF_OTHER_ERROR = 199, /* Keep the same codes as PCMK_LSB */ }; enum op_status { PCMK_LRM_OP_PENDING = -1, PCMK_LRM_OP_DONE, PCMK_LRM_OP_CANCELLED, PCMK_LRM_OP_TIMEOUT, PCMK_LRM_OP_NOTSUPPORTED, PCMK_LRM_OP_ERROR, PCMK_LRM_OP_ERROR_HARD, PCMK_LRM_OP_ERROR_FATAL, PCMK_LRM_OP_NOT_INSTALLED, }; enum nagios_exitcode { NAGIOS_STATE_OK = 0, NAGIOS_STATE_WARNING = 1, NAGIOS_STATE_CRITICAL = 2, NAGIOS_STATE_UNKNOWN = 3, NAGIOS_STATE_DEPENDENT = 4, NAGIOS_INSUFFICIENT_PRIV = 100, NAGIOS_NOT_INSTALLED = 101, }; /* *INDENT-ON* */ typedef struct svc_action_private_s svc_action_private_t; typedef struct svc_action_s { char *id; char *rsc; char *action; int interval; char *standard; char *provider; char *agent; int timeout; GHashTable *params; int rc; int pid; int cancel; int status; int sequence; int expected_rc; char *stderr_data; char *stdout_data; /** * Data stored by the creator of the action. * * This may be used to hold data that is needed later on by a callback, * for example. */ void *cb_data; svc_action_private_t *opaque; } svc_action_t; /** * Get a list of files or directories in a given path * * \param[in] root full path to a directory to read * \param[in] files true to get a list of files, false for a list of directories * * \return a list of what was found. The list items are gchar *. This list _must_ * be destroyed using g_list_free_full(list, free). */ GList *get_directory_list(const char *root, gboolean files, gboolean executable); /** * Get a list of services * * \return a list of services. The list items are gchar *. This list _must_ * be destroyed using g_list_free_full(list, free). */ GList *services_list(void); /** * Get a list of providers * * \param[in] the standard for providers to check for (such as "ocf") * * \return a list of providers. The list items are gchar *. This list _must_ * be destroyed using g_list_free_full(list, free). */ GList *resources_list_providers(const char *standard); /** * Get a list of resource agents * * \param[in] the standard for research agents to check for * (such as "ocf", "lsb", or "windows") * * \return a list of resource agents. The list items are gchar *. This list _must_ * be destroyed using g_list_free_full(list, free). */ GList *resources_list_agents(const char *standard, const char *provider); /** * Get list of available standards * * \return a list of resource standards. The list items are char *. This list _must_ * be destroyed using g_list_free_full(list, free). */ GList *resources_list_standards(void); svc_action_t *services_action_create(const char *name, const char *action, int interval /* ms */ , int timeout /* ms */ ); /** * Create a resources action. * * \param[in] timeout the timeout in milliseconds * \param[in] interval how often to repeat this action, in milliseconds. * If this value is 0, only execute this action one time. * * \post After the call, 'params' is owned, and later free'd by the svc_action_t result */ svc_action_t *resources_action_create(const char *name, const char *standard, const char *provider, const char *agent, const char *action, int interval /* ms */ , int timeout /* ms */ , GHashTable * params); +/** + * Kick a recurring action so it is scheduled immediately for re-execution + */ + gboolean services_action_kick(const char *name, const char *action, int interval /* ms */); + /** * Find the first class that can provide service::${agent} * * \param[in] agent which agent to search for * \return NULL, or the first class that provides the named agent */ const char *resources_find_service_class(const char *agent); /** * Utilize services API to execute an arbitrary command. * * This API has useful infrastructure in place to be able to run a command * in the background and get notified via a callback when the command finishes. * * \param[in] exec command to execute * \param[in] args arguments to the command, NULL terminated * * \return a svc_action_t object, used to pass to the execute function * (services_action_sync() or services_action_async()) and is * provided to the callback. */ svc_action_t *services_action_create_generic(const char *exec, const char *args[]); void services_action_free(svc_action_t * op); gboolean services_action_sync(svc_action_t * op); /** * Run an action asynchronously. * * \param[in] op services action data * \param[in] action_callback callback for when the action completes * * \retval TRUE succesfully started execution * \retval FALSE failed to start execution, no callback will be received */ gboolean services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *)); gboolean services_action_cancel(const char *name, const char *action, int interval); static inline const char *services_lrm_status_str(enum op_status status) { switch (status) { case PCMK_LRM_OP_PENDING: return "pending"; case PCMK_LRM_OP_DONE:return "complete"; case PCMK_LRM_OP_CANCELLED:return "Cancelled"; case PCMK_LRM_OP_TIMEOUT:return "Timed Out"; case PCMK_LRM_OP_NOTSUPPORTED:return "NOT SUPPORTED"; case PCMK_LRM_OP_ERROR:return "Error"; case PCMK_LRM_OP_NOT_INSTALLED:return "Not installed"; default:return "UNKNOWN!"; }} static inline const char *services_ocf_exitcode_str(enum ocf_exitcode code) { switch (code) { case PCMK_OCF_OK: return "ok"; case PCMK_OCF_UNKNOWN_ERROR: return "unknown error"; case PCMK_OCF_INVALID_PARAM: return "invalid parameter"; case PCMK_OCF_UNIMPLEMENT_FEATURE: return "unimplemented feature"; case PCMK_OCF_INSUFFICIENT_PRIV: return "insufficient privileges"; case PCMK_OCF_NOT_INSTALLED: return "not installed"; case PCMK_OCF_NOT_CONFIGURED: return "not configured"; case PCMK_OCF_NOT_RUNNING: return "not running"; case PCMK_OCF_RUNNING_MASTER: return "master"; case PCMK_OCF_FAILED_MASTER: return "master (failed)"; case PCMK_OCF_SIGNAL: return "OCF_SIGNAL"; case PCMK_OCF_NOT_SUPPORTED: return "OCF_NOT_SUPPORTED"; case PCMK_OCF_PENDING: return "OCF_PENDING"; case PCMK_OCF_CANCELLED: return "OCF_CANCELLED"; case PCMK_OCF_TIMEOUT: return "OCF_TIMEOUT"; case PCMK_OCF_OTHER_ERROR: return "OCF_OTHER_ERROR"; default: return "unknown"; } } static inline enum ocf_exitcode services_get_ocf_exitcode(char *action, int lsb_exitcode) { if (action != NULL && strcmp("status", action) == 0) { switch (lsb_exitcode) { case PCMK_LSB_STATUS_OK: return PCMK_OCF_OK; case PCMK_LSB_STATUS_VAR_PID: return PCMK_OCF_NOT_RUNNING; case PCMK_LSB_STATUS_VAR_LOCK: return PCMK_OCF_NOT_RUNNING; case PCMK_LSB_STATUS_NOT_RUNNING: return PCMK_OCF_NOT_RUNNING; case PCMK_LSB_STATUS_NOT_INSTALLED: return PCMK_OCF_UNKNOWN_ERROR; default: return PCMK_OCF_UNKNOWN_ERROR; } } else if (lsb_exitcode > PCMK_LSB_NOT_RUNNING) { return PCMK_OCF_UNKNOWN_ERROR; } /* For non-status operations, the PCMK_LSB and PCMK_OCF share error code meaning * for rc <= 7 */ return (enum ocf_exitcode)lsb_exitcode; } # ifdef __cplusplus } # endif #endif /* __PCMK_SERVICES__ */ diff --git a/lib/services/services.c b/lib/services/services.c index 30762c6bd9..ba7117365a 100644 --- a/lib/services/services.c +++ b/lib/services/services.c @@ -1,605 +1,634 @@ /* * Copyright (C) 2010 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include "services_private.h" #if SUPPORT_UPSTART # include #endif #if SUPPORT_SYSTEMD # include #endif /* TODO: Develop a rollover strategy */ static int operations = 0; GHashTable *recurring_actions = NULL; svc_action_t * services_action_create(const char *name, const char *action, int interval, int timeout) { return resources_action_create(name, "lsb", NULL, name, action, interval, timeout, NULL); } const char * resources_find_service_class(const char *agent) { /* Priority is: * - lsb * - systemd * - upstart */ int rc = 0; struct stat st; char *path = NULL; #ifdef LSB_ROOT_DIR rc = asprintf(&path, "%s/%s", LSB_ROOT_DIR, agent); if (rc > 0 && stat(path, &st) == 0) { free(path); return "lsb"; } free(path); #endif #if SUPPORT_SYSTEMD if (systemd_unit_exists(agent)) { return "systemd"; } #endif #if SUPPORT_UPSTART if (upstart_job_exists(agent)) { return "upstart"; } #endif return NULL; } svc_action_t * resources_action_create(const char *name, const char *standard, const char *provider, const char *agent, const char *action, int interval, int timeout, GHashTable * params) { svc_action_t *op = NULL; /* * Do some up front sanity checks before we go off and * build the svc_action_t instance. */ if (crm_strlen_zero(name)) { crm_err("A service or resource action must have a name."); goto return_error; } if (crm_strlen_zero(standard)) { crm_err("A service action must have a valid standard."); goto return_error; } if (!strcasecmp(standard, "ocf") && crm_strlen_zero(provider)) { crm_err("An OCF resource action must have a provider."); goto return_error; } if (crm_strlen_zero(agent)) { crm_err("A service or resource action must have an agent."); goto return_error; } if (crm_strlen_zero(action)) { crm_err("A service or resource action must specify an action."); goto return_error; } if (safe_str_eq(action, "monitor") && (safe_str_eq(standard, "lsb") || safe_str_eq(standard, "service"))) { action = "status"; } /* * Sanity checks passed, proceed! */ op = calloc(1, sizeof(svc_action_t)); op->opaque = calloc(1, sizeof(svc_action_private_t)); op->rsc = strdup(name); op->action = strdup(action); op->interval = interval; op->timeout = timeout; op->standard = strdup(standard); op->agent = strdup(agent); op->sequence = ++operations; if (asprintf(&op->id, "%s_%s_%d", name, action, interval) == -1) { goto return_error; } if (strcasecmp(op->standard, "service") == 0) { const char *expanded = resources_find_service_class(op->agent); if(expanded) { crm_debug("Found a %s agent for %s/%s", expanded, op->rsc, op->agent); free(op->standard); op->standard = strdup(expanded); } else { crm_info("Cannot determine the standard for %s (%s)", op->rsc, op->agent); free(op->standard); op->standard = strdup("lsb"); } } if (strcasecmp(op->standard, "ocf") == 0) { op->provider = strdup(provider); op->params = params; params = NULL; if (asprintf(&op->opaque->exec, "%s/resource.d/%s/%s", OCF_ROOT_DIR, provider, agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); op->opaque->args[1] = strdup(action); } else if (strcasecmp(op->standard, "lsb") == 0) { if (op->agent[0] == '/') { /* if given an absolute path, use that instead * of tacking on the LSB_ROOT_DIR path to the front */ op->opaque->exec = strdup(op->agent); } else if (asprintf(&op->opaque->exec, "%s/%s", LSB_ROOT_DIR, op->agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); op->opaque->args[1] = strdup(op->action); op->opaque->args[2] = NULL; #if SUPPORT_SYSTEMD } else if (strcasecmp(op->standard, "systemd") == 0) { op->opaque->exec = strdup("systemd-dbus"); #endif #if SUPPORT_UPSTART } else if (strcasecmp(op->standard, "upstart") == 0) { op->opaque->exec = strdup("upstart-dbus"); #endif } else if (strcasecmp(op->standard, "service") == 0) { op->opaque->exec = strdup(SERVICE_SCRIPT); op->opaque->args[0] = strdup(SERVICE_SCRIPT); op->opaque->args[1] = strdup(agent); op->opaque->args[2] = strdup(action); #if SUPPORT_NAGIOS } else if (strcasecmp(op->standard, "nagios") == 0) { int index = 0; if (op->agent[0] == '/') { /* if given an absolute path, use that instead * of tacking on the NAGIOS_PLUGIN_DIR path to the front */ op->opaque->exec = strdup(op->agent); } else if (asprintf(&op->opaque->exec, "%s/%s", NAGIOS_PLUGIN_DIR, op->agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); index = 1; if (safe_str_eq(op->action, "monitor") && op->interval == 0) { /* Invoke --version for a nagios probe */ op->opaque->args[index] = strdup("--version"); index++; } else if (params) { GHashTableIter iter; char *key = NULL; char *value = NULL; static int args_size = sizeof(op->opaque->args) / sizeof(char *); g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value) && index <= args_size - 3) { int len = 3; char *long_opt = NULL; if (safe_str_eq(key, XML_ATTR_CRM_VERSION) || strstr(key, CRM_META "_")) { continue; } len += strlen(key); long_opt = calloc(1, len); sprintf(long_opt, "--%s", key); long_opt[len - 1] = 0; op->opaque->args[index] = long_opt; op->opaque->args[index + 1] = strdup(value); index += 2; } } op->opaque->args[index] = NULL; #endif } else { crm_err("Unknown resource standard: %s", op->standard); services_action_free(op); op = NULL; } if(params) { g_hash_table_destroy(params); } return op; return_error: if(params) { g_hash_table_destroy(params); } services_action_free(op); return NULL; } svc_action_t * services_action_create_generic(const char *exec, const char *args[]) { svc_action_t *op; unsigned int cur_arg; op = calloc(1, sizeof(*op)); op->opaque = calloc(1, sizeof(svc_action_private_t)); op->opaque->exec = strdup(exec); op->opaque->args[0] = strdup(exec); for (cur_arg = 1; args && args[cur_arg - 1]; cur_arg++) { op->opaque->args[cur_arg] = strdup(args[cur_arg - 1]); if (cur_arg == DIMOF(op->opaque->args) - 1) { crm_err("svc_action_t args list not long enough for '%s' execution request.", exec); break; } } return op; } void services_action_free(svc_action_t * op) { unsigned int i; if (op == NULL) { return; } if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); } if (op->opaque->stderr_gsource) { mainloop_del_fd(op->opaque->stderr_gsource); op->opaque->stderr_gsource = NULL; } if (op->opaque->stdout_gsource) { mainloop_del_fd(op->opaque->stdout_gsource); op->opaque->stdout_gsource = NULL; } free(op->id); free(op->opaque->exec); for (i = 0; i < DIMOF(op->opaque->args); i++) { free(op->opaque->args[i]); } free(op->opaque); free(op->rsc); free(op->action); free(op->standard); free(op->agent); free(op->provider); free(op->stdout_data); free(op->stderr_data); if (op->params) { g_hash_table_destroy(op->params); op->params = NULL; } free(op); } gboolean cancel_recurring_action(svc_action_t * op) { crm_info("Cancelling operation %s", op->id); if (recurring_actions) { g_hash_table_remove(recurring_actions, op->id); } if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); } return TRUE; } gboolean services_action_cancel(const char *name, const char *action, int interval) { svc_action_t *op = NULL; char id[512]; snprintf(id, sizeof(id), "%s_%s_%d", name, action, interval); if (!(op = g_hash_table_lookup(recurring_actions, id))) { return FALSE; } if (op->pid == 0) { cancel_recurring_action(op); op->status = PCMK_LRM_OP_CANCELLED; if (op->opaque->callback) { op->opaque->callback(op); } services_action_free(op); } else { crm_info("Cancelling op: %s will occur once operation completes", id); op->cancel = 1; } return TRUE; } +gboolean +services_action_kick(const char *name, const char *action, int interval /* ms */) +{ + svc_action_t * op = NULL; + char *id = NULL; + + if (asprintf(&id, "%s_%s_%d", name, action, interval) == -1) { + return FALSE; + } + + op = g_hash_table_lookup(recurring_actions, id); + free(id); + + if (op == NULL) { + return FALSE; + } + + if (op->pid) { + return TRUE; + } else { + if (op->opaque->repeat_timer) { + g_source_remove(op->opaque->repeat_timer); + } + recurring_action_timer(op); + return TRUE; + } + +} + /* add new recurring operation, check for duplicates. * - if duplicate found, return TRUE, immediately reschedule op. * - if no dup, return FALSE, inserve into recurring op list.*/ static gboolean handle_duplicate_recurring(svc_action_t * op, void (*action_callback) (svc_action_t *)) { svc_action_t * dup = NULL; if (recurring_actions == NULL) { recurring_actions = g_hash_table_new_full(g_str_hash, g_str_equal, NULL, NULL); return FALSE; } /* check for duplicates */ dup = g_hash_table_lookup(recurring_actions, op->id); if (dup && (dup != op)) { /* update user data */ if (op->opaque->callback) { dup->opaque->callback = op->opaque->callback; dup->cb_data = op->cb_data; op->cb_data = NULL; } /* immediately execute the next interval */ if (dup->pid != 0) { if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); } recurring_action_timer(dup); } /* free the dup. */ services_action_free(op); return TRUE; } return FALSE; } gboolean services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *)) { if (action_callback) { op->opaque->callback = action_callback; } if (op->interval > 0) { if (handle_duplicate_recurring(op, action_callback) == TRUE) { /* entry rescheduled, dup freed */ return TRUE; } g_hash_table_replace(recurring_actions, op->id, op); } if (op->standard && strcasecmp(op->standard, "upstart") == 0) { #if SUPPORT_UPSTART return upstart_job_exec(op, FALSE); #endif } if (op->standard && strcasecmp(op->standard, "systemd") == 0) { #if SUPPORT_SYSTEMD return systemd_unit_exec(op, FALSE); #endif } return services_os_action_execute(op, FALSE); } gboolean services_action_sync(svc_action_t * op) { gboolean rc = TRUE; if (op == NULL) { crm_trace("No operation to execute"); return FALSE; } else if (op->standard && strcasecmp(op->standard, "upstart") == 0) { #if SUPPORT_UPSTART rc = upstart_job_exec(op, TRUE); #endif } else if (op->standard && strcasecmp(op->standard, "systemd") == 0) { #if SUPPORT_SYSTEMD rc = systemd_unit_exec(op, TRUE); #endif } else { rc = services_os_action_execute(op, TRUE); } crm_trace(" > %s_%s_%d: %s = %d", op->rsc, op->action, op->interval, op->opaque->exec, op->rc); if (op->stdout_data) { crm_trace(" > stdout: %s", op->stdout_data); } if (op->stderr_data) { crm_trace(" > stderr: %s", op->stderr_data); } return rc; } GList * get_directory_list(const char *root, gboolean files, gboolean executable) { return services_os_get_directory_list(root, files, executable); } GList * services_list(void) { return resources_list_agents("lsb", NULL); } GList * resources_list_standards(void) { GList *standards = NULL; GList *agents = NULL; standards = g_list_append(standards, strdup("ocf")); standards = g_list_append(standards, strdup("lsb")); standards = g_list_append(standards, strdup("service")); #if SUPPORT_SYSTEMD agents = systemd_unit_listall(); #else agents = NULL; #endif if (agents) { standards = g_list_append(standards, strdup("systemd")); g_list_free_full(agents, free); } #if SUPPORT_UPSTART agents = upstart_job_listall(); #else agents = NULL; #endif if (agents) { standards = g_list_append(standards, strdup("upstart")); g_list_free_full(agents, free); } #if SUPPORT_NAGIOS agents = resources_os_list_nagios_agents(); if (agents) { standards = g_list_append(standards, strdup("nagios")); g_list_free_full(agents, free); } #endif return standards; } GList * resources_list_providers(const char *standard) { if (strcasecmp(standard, "ocf") == 0) { return resources_os_list_ocf_providers(); } return NULL; } GList * resources_list_agents(const char *standard, const char *provider) { if (standard == NULL || strcasecmp(standard, "service") == 0) { GList *tmp1; GList *tmp2; GList *result = resources_os_list_lsb_agents(); if (standard == NULL) { tmp1 = result; tmp2 = resources_os_list_ocf_agents(NULL); if (tmp2) { result = g_list_concat(tmp1, tmp2); } } #if SUPPORT_SYSTEMD tmp1 = result; tmp2 = systemd_unit_listall(); if (tmp2) { result = g_list_concat(tmp1, tmp2); } #endif #if SUPPORT_UPSTART tmp1 = result; tmp2 = upstart_job_listall(); if (tmp2) { result = g_list_concat(tmp1, tmp2); } #endif return result; } else if (strcasecmp(standard, "ocf") == 0) { return resources_os_list_ocf_agents(provider); } else if (strcasecmp(standard, "lsb") == 0) { return resources_os_list_lsb_agents(); #if SUPPORT_SYSTEMD } else if (strcasecmp(standard, "systemd") == 0) { return systemd_unit_listall(); #endif #if SUPPORT_UPSTART } else if (strcasecmp(standard, "upstart") == 0) { return upstart_job_listall(); #endif #if SUPPORT_NAGIOS } else if (strcasecmp(standard, "nagios") == 0) { return resources_os_list_nagios_agents(); #endif } return NULL; } diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c index d8215f094b..58068411a9 100644 --- a/lrmd/lrmd.c +++ b/lrmd/lrmd.c @@ -1,1346 +1,1413 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_SYS_TIMEB_H # include #endif GHashTable *rsc_list = NULL; typedef struct lrmd_cmd_s { int timeout; int interval; int start_delay; int timeout_orig; int call_id; int exec_rc; int lrmd_op_status; int call_opts; /* Timer ids, must be removed on cmd destruction. */ int delay_id; int stonith_recurring_id; int rsc_deleted; char *client_id; char *origin; char *rsc_id; char *action; char *output; char *userdata_str; #ifdef HAVE_SYS_TIMEB_H /* Timestamp of when op first ran */ struct timeb t_first_run; /* Timestamp of when op ran */ struct timeb t_run; /* Timestamp of when op was queued */ struct timeb t_queue; /* Timestamp of last rc change */ struct timeb t_rcchange; #endif int first_notify_sent; int last_notify_rc; int last_notify_op_status; int last_pid; GHashTable *params; } lrmd_cmd_t; static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc); static gboolean lrmd_rsc_dispatch(gpointer user_data); static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id); static void log_finished(lrmd_cmd_t * cmd, int exec_time, int queue_time) { char pid_str[32] = { 0, }; int log_level = LOG_INFO; if (cmd->last_pid) { snprintf(pid_str, 32, "%d", cmd->last_pid); } if (safe_str_eq(cmd->action, "monitor")) { log_level = LOG_DEBUG; } #ifdef HAVE_SYS_TIMEB_H do_crm_log(log_level, "finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d exec-time:%dms queue-time:%dms", cmd->rsc_id, cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str, cmd->exec_rc, exec_time, queue_time); #else do_crm_log(log_level, "finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d", cmd->rsc_id, cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str, cmd->exec_rc); #endif } static void log_execute(lrmd_cmd_t * cmd) { int log_level = LOG_INFO; if (safe_str_eq(cmd->action, "monitor")) { log_level = LOG_DEBUG; } do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d", cmd->rsc_id, cmd->action, cmd->call_id); } +static const char * +normalize_action_name(lrmd_rsc_t * rsc, const char *action) +{ + if (safe_str_eq(action, "monitor") && + (safe_str_eq(rsc->class, "lsb") || + safe_str_eq(rsc->class, "service") || safe_str_eq(rsc->class, "systemd"))) { + return "status"; + } + return action; +} + static lrmd_rsc_t * build_rsc_from_xml(xmlNode * msg) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_rsc_t *rsc = NULL; rsc = calloc(1, sizeof(lrmd_rsc_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts); rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS); rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER); rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE); rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc); return rsc; } static lrmd_cmd_t * create_lrmd_cmd(xmlNode * msg, crm_client_t * client) { int call_options = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_cmd_t *cmd = NULL; cmd = calloc(1, sizeof(lrmd_cmd_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options); cmd->call_opts = call_options; cmd->client_id = strdup(client->id); crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id); crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval); crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout); crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay); cmd->timeout_orig = cmd->timeout; cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN); cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION); cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR); cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); cmd->params = xml2list(rsc_xml); return cmd; } static void free_lrmd_cmd(lrmd_cmd_t * cmd) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } if (cmd->params) { g_hash_table_destroy(cmd->params); } free(cmd->origin); free(cmd->action); free(cmd->userdata_str); free(cmd->rsc_id); free(cmd->output); free(cmd->client_id); free(cmd); } static gboolean stonith_recurring_op_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc; cmd->stonith_recurring_id = 0; if (!cmd->rsc_id) { return FALSE; } rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); CRM_ASSERT(rsc != NULL); /* take it out of recurring_ops list, and put it in the pending ops * to be executed */ rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef HAVE_SYS_TIMEB_H ftime(&cmd->t_queue); #endif mainloop_set_trigger(rsc->work); return FALSE; } static gboolean start_delay_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc = NULL; cmd->delay_id = 0; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc) { mainloop_set_trigger(rsc->work); } return FALSE; } +static gboolean +merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) +{ + GListPtr gIter = NULL; + lrmd_cmd_t * dup = NULL; + gboolean dup_pending = FALSE; + + if (cmd->interval == 0) { + return 0; + } + + for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { + dup = gIter->data; + if (safe_str_eq(cmd->action, dup->action) && cmd->interval == dup->interval) { + dup_pending = TRUE; + goto merge_dup; + } + } + + /* if dup is in recurring_ops list, that means it has already executed + * and is in the interval loop. we can't just remove it in this case. */ + for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { + dup = gIter->data; + if (safe_str_eq(cmd->action, dup->action) && cmd->interval == dup->interval) { + goto merge_dup; + } + } + + return FALSE; +merge_dup: + + /* merge */ + dup->first_notify_sent = 0; + free(dup->userdata_str); + dup->userdata_str = cmd->userdata_str; + cmd->userdata_str = NULL; + dup->call_id = cmd->call_id; + + if (safe_str_eq(rsc->class, "stonith")) { + /* if we are waiting for the next interval, kick it off now */ + if (dup_pending == TRUE) { + g_source_remove(cmd->stonith_recurring_id); + cmd->stonith_recurring_id = 0; + stonith_recurring_op_helper(cmd); + } + + } else if (dup_pending == FALSE) { + /* if we've already handed this to the service lib, kick off an early execution */ + services_action_kick(rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval); + } + free_lrmd_cmd(cmd); + + return TRUE; +} + static void schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { + gboolean dup_processed = FALSE; CRM_CHECK(cmd != NULL, return); CRM_CHECK(rsc != NULL, return); crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id); + + dup_processed = merge_recurring_duplicate(rsc, cmd); + if (dup_processed) { + /* duplicate recurring cmd found, cmds merged */ + return; + } + rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef HAVE_SYS_TIMEB_H ftime(&cmd->t_queue); #endif mainloop_set_trigger(rsc->work); if (cmd->start_delay) { cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); } - } static void send_reply(crm_client_t * client, int rc, uint32_t id, int call_id) { int send_rc = 0; xmlNode *reply = NULL; reply = create_xml_node(NULL, T_LRMD_REPLY); crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(reply, F_LRMD_RC, rc); crm_xml_add_int(reply, F_LRMD_CALLID, call_id); send_rc = lrmd_server_send_reply(client, id, reply); free_xml(reply); if (send_rc < 0) { crm_warn("LRMD reply to %s failed: %d", client->name, send_rc); } } static void send_client_notify(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; crm_client_t *client = value; if (client == NULL) { crm_err("Asked to send event to NULL client"); return; } else if (client->name == NULL) { crm_trace("Asked to send event to client with no name"); return; } if (lrmd_server_send_notify(client, update_msg) <= 0) { crm_warn("Notification of client %s/%s failed", client->name, client->id); } } #ifdef HAVE_SYS_TIMEB_H static int time_diff_ms(struct timeb *now, struct timeb *old) { int sec = difftime(now->time, old->time); int ms = now->millitm - old->millitm; if (old->time == 0) { return 0; } return (sec * 1000) + ms; } #endif static void send_cmd_complete_notify(lrmd_cmd_t * cmd) { int exec_time = 0; int queue_time = 0; xmlNode *notify = NULL; #ifdef HAVE_SYS_TIMEB_H struct timeb now = { 0, }; ftime(&now); exec_time = time_diff_ms(&now, &cmd->t_run); queue_time = time_diff_ms(&cmd->t_run, &cmd->t_queue); #endif log_finished(cmd, exec_time, queue_time); /* if the first notify result for a cmd has already been sent earlier, and the * the option to only send notifies on result changes is set. Check to see * if the last result is the same as the new one. If so, suppress this update */ if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) { if (cmd->last_notify_rc == cmd->exec_rc && cmd->last_notify_op_status == cmd->lrmd_op_status) { /* only send changes */ return; } } cmd->first_notify_sent = 1; cmd->last_notify_rc = cmd->exec_rc; cmd->last_notify_op_status = cmd->lrmd_op_status; notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout); crm_xml_add_int(notify, F_LRMD_RSC_INTERVAL, cmd->interval); crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay); crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc); crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status); crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id); crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted); #ifdef HAVE_SYS_TIMEB_H crm_xml_add_int(notify, F_LRMD_RSC_RUN_TIME, cmd->t_run.time); crm_xml_add_int(notify, F_LRMD_RSC_RCCHANGE_TIME, cmd->t_rcchange.time); crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time); crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time); #endif crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC); crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id); crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action); crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str); crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output); if (cmd->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS); g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { hash2field((gpointer) key, (gpointer) value, args); } } if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) { crm_client_t *client = crm_client_get_by_id(cmd->client_id); if (client) { send_client_notify(client->id, client, notify); } } else { g_hash_table_foreach(client_connections, send_client_notify, notify); } free_xml(notify); } static void send_generic_notify(int rc, xmlNode * request) { int call_id = 0; xmlNode *notify = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *op = crm_element_value(request, F_LRMD_OPERATION); crm_element_value_int(request, F_LRMD_CALLID, &call_id); notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(notify, F_LRMD_RC, rc); crm_xml_add_int(notify, F_LRMD_CALLID, call_id); crm_xml_add(notify, F_LRMD_OPERATION, op); crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id); g_hash_table_foreach(client_connections, send_client_notify, notify); free_xml(notify); } static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc) { crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action, rsc ? rsc->active : NULL, cmd); if (rsc && (rsc->active == cmd)) { rsc->active = NULL; mainloop_set_trigger(rsc->work); } if (!rsc) { cmd->rsc_deleted = 1; } send_cmd_complete_notify(cmd); /* crmd expects lrmd to automatically cancel recurring ops after rsc stops */ if (rsc && safe_str_eq(cmd->action, "stop")) { cancel_all_recurring(rsc, NULL); } if (cmd->interval && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) { if (rsc) { rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else if (cmd->interval == 0) { if (rsc) { rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else { /* Clear all the values pertaining just to the last iteration of a recurring op. */ cmd->lrmd_op_status = 0; cmd->last_pid = 0; memset(&cmd->t_run, 0, sizeof(cmd->t_run)); memset(&cmd->t_queue, 0, sizeof(cmd->t_queue)); free(cmd->output); cmd->output = NULL; } } static int lsb2uniform_rc(const char *action, int rc) { if (rc < 0) { return PCMK_OCF_UNKNOWN_ERROR; } /* status has different return codes that everything else. */ if (!safe_str_eq(action, "status") && !safe_str_eq(action, "monitor")) { if (rc > PCMK_LSB_NOT_RUNNING) { return PCMK_OCF_UNKNOWN_ERROR; } return rc; } switch (rc) { case PCMK_LSB_STATUS_OK: return PCMK_OCF_OK; case PCMK_LSB_STATUS_NOT_INSTALLED: return PCMK_OCF_NOT_INSTALLED; case PCMK_LSB_STATUS_VAR_PID: case PCMK_LSB_STATUS_VAR_LOCK: case PCMK_LSB_STATUS_NOT_RUNNING: return PCMK_OCF_NOT_RUNNING; default: return PCMK_OCF_UNKNOWN_ERROR; } return PCMK_OCF_UNKNOWN_ERROR; } static int ocf2uniform_rc(int rc) { if (rc < 0 || rc > PCMK_OCF_FAILED_MASTER) { return PCMK_OCF_UNKNOWN_ERROR; } return rc; } static int stonith2uniform_rc(const char *action, int rc) { if (rc == -ENODEV) { if (safe_str_eq(action, "stop")) { rc = PCMK_OCF_OK; } else if (safe_str_eq(action, "start")) { rc = PCMK_OCF_NOT_INSTALLED; } else { rc = PCMK_OCF_NOT_RUNNING; } } else if (rc != 0) { rc = PCMK_OCF_UNKNOWN_ERROR; } return rc; } #if SUPPORT_NAGIOS static int nagios2uniform_rc(const char *action, int rc) { if (rc < 0) { return PCMK_OCF_UNKNOWN_ERROR; } switch (rc) { case NAGIOS_STATE_OK: return PCMK_OCF_OK; case NAGIOS_INSUFFICIENT_PRIV: return PCMK_OCF_INSUFFICIENT_PRIV; case NAGIOS_NOT_INSTALLED: return PCMK_OCF_NOT_INSTALLED; case NAGIOS_STATE_WARNING: case NAGIOS_STATE_CRITICAL: case NAGIOS_STATE_UNKNOWN: case NAGIOS_STATE_DEPENDENT: default: return PCMK_OCF_UNKNOWN_ERROR; } return PCMK_OCF_UNKNOWN_ERROR; } #endif static int get_uniform_rc(const char *standard, const char *action, int rc) { if (safe_str_eq(standard, "ocf")) { return ocf2uniform_rc(rc); } else if (safe_str_eq(standard, "stonith")) { return stonith2uniform_rc(action, rc); } else if (safe_str_eq(standard, "systemd")) { return rc; } else if (safe_str_eq(standard, "upstart")) { return rc; #if SUPPORT_NAGIOS } else if (safe_str_eq(standard, "nagios")) { return nagios2uniform_rc(action, rc); #endif } else { return lsb2uniform_rc(action, rc); } } void notify_of_new_client(crm_client_t *new_client) { crm_client_t *client = NULL; GHashTableIter iter; xmlNode *notify = NULL; char *key = NULL; notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT); g_hash_table_iter_init(&iter, client_connections); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & client)) { if (safe_str_eq(client->id, new_client->id)) { continue; } send_client_notify((gpointer) key, (gpointer) client, (gpointer) notify); } free_xml(notify); } void client_disconnect_cleanup(const char *client_id) { GHashTableIter iter; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (rsc->call_opts & lrmd_opt_drop_recurring) { /* This client is disconnecting, drop any recurring operations * it may have initiated on the resource */ cancel_all_recurring(rsc, client_id); } } } static void action_complete(svc_action_t * action) { lrmd_rsc_t *rsc; lrmd_cmd_t *cmd = action->cb_data; if (!cmd) { crm_err("LRMD action (%s) completed does not match any known operations.", action->id); return; } #ifdef HAVE_SYS_TIMEB_H if (cmd->exec_rc != action->rc) { ftime(&cmd->t_rcchange); } #endif cmd->last_pid = action->pid; cmd->exec_rc = get_uniform_rc(action->standard, cmd->action, action->rc); cmd->lrmd_op_status = action->status; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (action->stdout_data) { cmd->output = strdup(action->stdout_data); } #if SUPPORT_NAGIOS if (rsc && safe_str_eq(rsc->class, "nagios")) { if (safe_str_eq(cmd->action, "monitor") && cmd->interval == 0 && cmd->exec_rc == PCMK_OCF_OK) { /* Successfully executed --version for the nagios plugin */ cmd->exec_rc = PCMK_OCF_NOT_RUNNING; } else if (safe_str_eq(cmd->action, "start") && cmd->exec_rc != PCMK_OCF_OK) { int time_sum = 0; int timeout_left = 0; int delay = cmd->timeout_orig / 10; # ifdef HAVE_SYS_TIMEB_H struct timeb now = { 0, }; ftime(&now); time_sum = time_diff_ms(&now, &cmd->t_first_run); timeout_left = cmd->timeout_orig - time_sum; if (delay < timeout_left) { cmd->start_delay = delay; cmd->timeout = timeout_left; crm_notice ("%s %s failed (rc=%d): re-scheduling (time_sum=%dms, start_delay=%dms, timeout=%dms)", cmd->rsc_id, cmd->action, cmd->exec_rc, time_sum, cmd->start_delay, cmd->timeout); cmd->lrmd_op_status = 0; cmd->last_pid = 0; memset(&cmd->t_run, 0, sizeof(cmd->t_run)); memset(&cmd->t_queue, 0, sizeof(cmd->t_queue)); free(cmd->output); cmd->output = NULL; rsc->active = NULL; schedule_lrmd_cmd(rsc, cmd); return; } # endif } } #endif cmd_finalize(cmd, rsc); } static void stonith_action_complete(lrmd_cmd_t * cmd, int rc) { int recurring = cmd->interval; lrmd_rsc_t *rsc = NULL; cmd->exec_rc = get_uniform_rc("stonith", cmd->action, rc); rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); if (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED) { recurring = 0; /* do nothing */ } else if (rc) { /* Attempt to map return codes to op status if possible */ switch (rc) { case -EPROTONOSUPPORT: cmd->lrmd_op_status = PCMK_LRM_OP_NOTSUPPORTED; break; case -ETIME: cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT; break; default: cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; } } else { /* command successful */ cmd->lrmd_op_status = PCMK_LRM_OP_DONE; if (safe_str_eq(cmd->action, "start") && rsc) { rsc->stonith_started = 1; } } if (recurring && rsc) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } cmd->stonith_recurring_id = g_timeout_add(cmd->interval, stonith_recurring_op_helper, cmd); } cmd_finalize(cmd, rsc); } static void lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) { stonith_action_complete(data->userdata, data->rc); } void stonith_connection_failed(void) { GHashTableIter iter; GList *cmd_list = NULL; GList *cmd_iter = NULL; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (safe_str_eq(rsc->class, "stonith")) { if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, rsc->recurring_ops); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, rsc->pending_ops); } rsc->pending_ops = rsc->recurring_ops = NULL; } } if (!cmd_list) { return; } crm_err("STONITH connection failed, finalizing %d pending operations.", g_list_length(cmd_list)); for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { stonith_action_complete(cmd_iter->data, -ENOTCONN); } g_list_free(cmd_list); } static int lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { int rc = 0; int do_monitor = 0; stonith_t *stonith_api = get_stonith_connection(); if (!stonith_api) { cmd->exec_rc = get_uniform_rc("stonith", cmd->action, -ENOTCONN); cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; cmd_finalize(cmd, rsc); return -EUNATCH; } if (safe_str_eq(cmd->action, "start")) { char *key = NULL; char *value = NULL; stonith_key_value_t *device_params = NULL; if (cmd->params) { GHashTableIter iter; g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { device_params = stonith_key_value_add(device_params, key, value); } } /* Stonith automatically registers devices from the IPC when changes occur, * but to avoid a possible race condition between stonith receiving the IPC update * and the lrmd requesting that resource, the lrmd still registers the device as well. * Stonith knows how to handle duplicate device registrations correctly. */ rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call, cmd->rsc_id, rsc->provider, rsc->type, device_params); stonith_key_value_freeall(device_params, 1, 1); if (rc == 0) { do_monitor = 1; } } else if (safe_str_eq(cmd->action, "stop")) { rc = stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, cmd->rsc_id); rsc->stonith_started = 0; } else if (safe_str_eq(cmd->action, "monitor")) { if (cmd->interval) { do_monitor = 1; } else { rc = rsc->stonith_started ? 0 : -ENODEV; } } if (!do_monitor) { goto cleanup_stonith_exec; } rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id, cmd->timeout / 1000); rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd, "lrmd_stonith_callback", lrmd_stonith_callback); /* don't cleanup yet, we will find out the result of the monitor later */ if (rc > 0) { rsc->active = cmd; return rc; } else if (rc == 0) { rc = -1; } cleanup_stonith_exec: stonith_action_complete(cmd, rc); return rc; } -static const char * -normalize_action_name(lrmd_rsc_t * rsc, const char *action) -{ - if (safe_str_eq(action, "monitor") && - (safe_str_eq(rsc->class, "lsb") || - safe_str_eq(rsc->class, "service") || safe_str_eq(rsc->class, "systemd"))) { - return "status"; - } - return action; -} - static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, strdup(key), strdup(value)); } static int lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { svc_action_t *action = NULL; GHashTable *params_copy = NULL; CRM_ASSERT(rsc); CRM_ASSERT(cmd); crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s", rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type); #if SUPPORT_NAGIOS /* Recurring operations are cancelled anyway for a stop operation */ if (safe_str_eq(rsc->class, "nagios") && safe_str_eq(cmd->action, "stop")) { cmd->exec_rc = PCMK_OCF_OK; goto exec_done; } #endif if (cmd->params) { params_copy = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (params_copy != NULL) { g_hash_table_foreach(cmd->params, dup_attr, params_copy); } } action = resources_action_create(rsc->rsc_id, rsc->class, rsc->provider, rsc->type, normalize_action_name(rsc, cmd->action), cmd->interval, cmd->timeout, params_copy); if (!action) { crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id); cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; goto exec_done; } action->cb_data = cmd; /* 'cmd' may not be valid after this point if * services_action_async() returned TRUE * * Upstart and systemd both synchronously determine monitor/status * results and call action_complete (which may free 'cmd') if necessary. */ if (services_action_async(action, action_complete)) { return TRUE; } cmd->exec_rc = action->rc; if(action->status != PCMK_LRM_OP_DONE) { cmd->lrmd_op_status = action->status; } else { cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; } services_action_free(action); action = NULL; exec_done: cmd_finalize(cmd, rsc); return TRUE; } static gboolean lrmd_rsc_execute(lrmd_rsc_t * rsc) { lrmd_cmd_t *cmd = NULL; CRM_CHECK(rsc != NULL, return FALSE); if (rsc->active) { crm_trace("%s is still active", rsc->rsc_id); return TRUE; } if (rsc->pending_ops) { GList *first = rsc->pending_ops; cmd = first->data; if (cmd->delay_id) { crm_trace ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms", cmd->rsc_id, cmd->action, cmd->start_delay); return TRUE; } rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first); g_list_free_1(first); #ifdef HAVE_SYS_TIMEB_H if (cmd->t_first_run.time == 0) { ftime(&cmd->t_first_run); } ftime(&cmd->t_run); #endif } if (!cmd) { crm_trace("Nothing further to do for %s", rsc->rsc_id); return TRUE; } rsc->active = cmd; /* only one op at a time for a rsc */ if (cmd->interval) { rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd); } log_execute(cmd); if (safe_str_eq(rsc->class, "stonith")) { lrmd_rsc_execute_stonith(rsc, cmd); } else { lrmd_rsc_execute_service_lib(rsc, cmd); } return TRUE; } static gboolean lrmd_rsc_dispatch(gpointer user_data) { return lrmd_rsc_execute(user_data); } void free_rsc(gpointer data) { GListPtr gIter = NULL; lrmd_rsc_t *rsc = data; int is_stonith = safe_str_eq(rsc->class, "stonith"); for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; /* command was never executed */ cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, NULL); } /* frees list, but not list elements. */ g_list_free(rsc->pending_ops); for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (is_stonith) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, NULL); } else { /* This command is already handed off to service library, * let service library cancel it and tell us via the callback * when it is cancelled. The rsc can be safely destroyed * even if we are waiting for the cancel result */ services_action_cancel(rsc->rsc_id, normalize_action_name(rsc, cmd->action), cmd->interval); } } /* frees list, but not list elements. */ g_list_free(rsc->recurring_ops); free(rsc->rsc_id); free(rsc->class); free(rsc->provider); free(rsc->type); mainloop_destroy_trigger(rsc->work); free(rsc); } static int process_lrmd_signon(crm_client_t * client, uint32_t id, xmlNode * request) { xmlNode *reply = create_xml_node(NULL, "reply"); const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER); const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION); if (safe_str_neq(protocol_version, LRMD_PROTOCOL_VERSION)) { crm_xml_add_int(reply, F_LRMD_RC, -EPROTO); crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); } crm_xml_add(reply, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(reply, F_LRMD_CLIENTID, client->id); lrmd_server_send_reply(client, id, reply); if (crm_is_true(is_ipc_provider)) { /* this is a remote connection from a cluster nodes crmd */ #ifdef SUPPORT_REMOTE ipc_proxy_add_provider(client); #endif } free_xml(reply); return pcmk_ok; } static int process_lrmd_rsc_register(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = build_rsc_from_xml(request); lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id); if (dup && safe_str_eq(rsc->class, dup->class) && safe_str_eq(rsc->provider, dup->provider) && safe_str_eq(rsc->type, dup->type)) { crm_warn("Can't add, RSC '%s' already present in the rsc list (%d active resources)", rsc->rsc_id, g_hash_table_size(rsc_list)); free_rsc(rsc); return rc; } g_hash_table_replace(rsc_list, rsc->rsc_id, rsc); crm_info("Added '%s' to the rsc list (%d active resources)", rsc->rsc_id, g_hash_table_size(rsc_list)); return rc; } static void process_lrmd_get_rsc_info(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; int send_rc = 0; int call_id = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); xmlNode *reply = NULL; lrmd_rsc_t *rsc = NULL; crm_element_value_int(request, F_LRMD_CALLID, &call_id); if (!rsc_id) { rc = -ENODEV; goto get_rsc_done; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); rc = -ENODEV; goto get_rsc_done; } get_rsc_done: reply = create_xml_node(NULL, T_LRMD_REPLY); crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(reply, F_LRMD_RC, rc); crm_xml_add_int(reply, F_LRMD_CALLID, call_id); if (rsc) { crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id); crm_xml_add(reply, F_LRMD_CLASS, rsc->class); crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider); crm_xml_add(reply, F_LRMD_TYPE, rsc->type); } send_rc = lrmd_server_send_reply(client, id, reply); if (send_rc < 0) { crm_warn("LRMD reply to %s failed: %d", client->name, send_rc); } free_xml(reply); } static int process_lrmd_rsc_unregister(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); if (!rsc_id) { return -ENODEV; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return pcmk_ok; } if (rsc->active) { /* let the caller know there are still active ops on this rsc to watch for */ crm_trace("Operation still in progress: %p", rsc->active); rc = -EINPROGRESS; } g_hash_table_remove(rsc_list, rsc_id); return rc; } static int process_lrmd_rsc_exec(crm_client_t * client, uint32_t id, xmlNode * request) { lrmd_rsc_t *rsc = NULL; lrmd_cmd_t *cmd = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); + int call_id; if (!rsc_id) { return -EINVAL; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return -ENODEV; } cmd = create_lrmd_cmd(request, client); + call_id = cmd->call_id; + + /* Don't reference cmd after handing it off to be scheduled. + * The cmd could get merged and freed. */ schedule_lrmd_cmd(rsc, cmd); - return cmd->call_id; + return call_id; } static int cancel_op(const char *rsc_id, const char *action, int interval) { GListPtr gIter = NULL; lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id); /* How to cancel an action. * 1. Check pending ops list, if it hasn't been handed off * to the service library or stonith recurring list remove * it there and that will stop it. * 2. If it isn't in the pending ops list, then its either a * recurring op in the stonith recurring list, or the service * library's recurring list. Stop it there * 3. If not found in any lists, then this operation has either * been executed already and is not a recurring operation, or * never existed. */ if (!rsc) { return -ENODEV; } for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (safe_str_eq(cmd->action, action) && cmd->interval == interval) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, rsc); return pcmk_ok; } } if (safe_str_eq(rsc->class, "stonith")) { /* The service library does not handle stonith operations. * We have to handle recurring stonith opereations ourselves. */ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (safe_str_eq(cmd->action, action) && cmd->interval == interval) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; if (rsc->active != cmd) { cmd_finalize(cmd, rsc); } return pcmk_ok; } } } else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval) == TRUE) { /* The service library will tell the action_complete callback function * this action was cancelled, which will destroy the cmd and remove * it from the recurring_op list. Do not do that in this function * if the service library says it cancelled it. */ return pcmk_ok; } return -EOPNOTSUPP; } static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id) { GList *cmd_list = NULL; GList *cmd_iter = NULL; /* Notice a copy of each list is created when concat is called. * This prevents odd behavior from occurring when the cmd_list * is iterated through later on. It is possible the cancel_op * function may end up modifying the recurring_ops and pending_ops * lists. If we did not copy those lists, our cmd_list iteration * could get messed up.*/ if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops)); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops)); } if (!cmd_list) { return; } for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { lrmd_cmd_t *cmd = cmd_iter->data; if (cmd->interval == 0) { continue; } if (client_id && safe_str_neq(cmd->client_id, client_id)) { continue; } cancel_op(rsc->rsc_id, cmd->action, cmd->interval); } /* frees only the copied list data, not the cmds */ g_list_free(cmd_list); } static int process_lrmd_rsc_cancel(crm_client_t * client, uint32_t id, xmlNode * request) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION); int interval = 0; crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &interval); if (!rsc_id || !action) { return -EINVAL; } return cancel_op(rsc_id, action, interval); } void process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; int call_id = 0; const char *op = crm_element_value(request, F_LRMD_OPERATION); int do_reply = 0; int do_notify = 0; crm_trace("Processing %s operation from %s", op, client->id); crm_element_value_int(request, F_LRMD_CALLID, &call_id); if (crm_str_eq(op, CRM_OP_IPC_FWD, TRUE)) { #ifdef SUPPORT_REMOTE ipc_proxy_forward_client(client, request); #endif do_reply = 1; } else if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { rc = process_lrmd_signon(client, id, request); } else if (crm_str_eq(op, LRMD_OP_RSC_REG, TRUE)) { rc = process_lrmd_rsc_register(client, id, request); do_notify = 1; do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_INFO, TRUE)) { process_lrmd_get_rsc_info(client, id, request); } else if (crm_str_eq(op, LRMD_OP_RSC_UNREG, TRUE)) { rc = process_lrmd_rsc_unregister(client, id, request); /* don't notify anyone about failed un-registers */ if (rc == pcmk_ok || rc == -EINPROGRESS) { do_notify = 1; } do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_EXEC, TRUE)) { rc = process_lrmd_rsc_exec(client, id, request); do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_CANCEL, TRUE)) { rc = process_lrmd_rsc_cancel(client, id, request); do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_POKE, TRUE)) { do_notify = 1; do_reply = 1; } else { rc = -EOPNOTSUPP; do_reply = 1; crm_err("Unknown %s from %s", op, client->name); crm_log_xml_warn(request, "UnknownOp"); } crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d, exit=%d", op, client->id, rc, do_reply, do_notify, exit); if (do_reply) { send_reply(client, rc, id, call_id); } if (do_notify) { send_generic_notify(rc, request); } }