diff --git a/extra/resources/Dummy b/extra/resources/Dummy index 2410c4d320..aeac24713e 100644 --- a/extra/resources/Dummy +++ b/extra/resources/Dummy @@ -1,207 +1,220 @@ #!/bin/sh # # # Dummy OCF RA. Does nothing but wait a few seconds, can be # configured to fail occassionally. # # Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Brée # All Rights Reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License # along with this program; if not, write the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs} . ${OCF_FUNCTIONS} : ${__OCF_ACTION=$1} ####################################################################### meta_data() { cat < 1.0 This is a Dummy Resource Agent. It does absolutely nothing except keep track of whether its running or not. Its purpose in life is for testing and to serve as a template for RA writers. NB: Please pay attention to the timeouts specified in the actions section below. They should be meaningful for the kind of resource the agent manages. They should be the minimum advised timeouts, but they shouldn't/cannot cover _all_ possible resource instances. So, try to be neither overly generous nor too stingy, but moderate. The minimum timeouts should never be below 10 seconds. Example stateless resource agent Location to store the resource state in. State file Fake attribute that can be changed to cause a reload Fake attribute that can be changed to cause a reload Number of seconds to sleep during operations. This can be used to test how the cluster reacts to operation timeouts. Operation sleep duration in seconds. END } ####################################################################### # don't exit on TERM, to test that lrmd makes sure that we do exit trap sigterm_handler TERM sigterm_handler() { ocf_log info "They use TERM to bring us down. No such luck." return } dummy_usage() { cat < * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include "services_private.h" #if SUPPORT_UPSTART # include #endif #if SUPPORT_SYSTEMD # include #endif /* TODO: Develop a rollover strategy */ static int operations = 0; GHashTable *recurring_actions = NULL; +/* ops waiting to run async because of conflicting active + * pending ops*/ +GList *blocked_ops = NULL; + +/* ops currently active (in-flight) */ +GList *inflight_ops = NULL; + svc_action_t * services_action_create(const char *name, const char *action, int interval, int timeout) { return resources_action_create(name, "lsb", NULL, name, action, interval, timeout, NULL, 0); } const char * resources_find_service_class(const char *agent) { /* Priority is: * - lsb * - systemd * - upstart */ int rc = 0; struct stat st; char *path = NULL; #ifdef LSB_ROOT_DIR rc = asprintf(&path, "%s/%s", LSB_ROOT_DIR, agent); if (rc > 0 && stat(path, &st) == 0) { free(path); return "lsb"; } free(path); #endif #if SUPPORT_SYSTEMD if (systemd_unit_exists(agent)) { return "systemd"; } #endif #if SUPPORT_UPSTART if (upstart_job_exists(agent)) { return "upstart"; } #endif return NULL; } svc_action_t * resources_action_create(const char *name, const char *standard, const char *provider, const char *agent, const char *action, int interval, int timeout, GHashTable * params, enum svc_action_flags flags) { svc_action_t *op = NULL; /* * Do some up front sanity checks before we go off and * build the svc_action_t instance. */ if (crm_strlen_zero(name)) { crm_err("A service or resource action must have a name."); goto return_error; } if (crm_strlen_zero(standard)) { crm_err("A service action must have a valid standard."); goto return_error; } if (!strcasecmp(standard, "ocf") && crm_strlen_zero(provider)) { crm_err("An OCF resource action must have a provider."); goto return_error; } if (crm_strlen_zero(agent)) { crm_err("A service or resource action must have an agent."); goto return_error; } if (crm_strlen_zero(action)) { crm_err("A service or resource action must specify an action."); goto return_error; } if (safe_str_eq(action, "monitor") && ( #if SUPPORT_HEARTBEAT safe_str_eq(standard, "heartbeat") || #endif safe_str_eq(standard, "lsb") || safe_str_eq(standard, "service"))) { action = "status"; } /* * Sanity checks passed, proceed! */ op = calloc(1, sizeof(svc_action_t)); op->opaque = calloc(1, sizeof(svc_action_private_t)); op->rsc = strdup(name); op->action = strdup(action); op->interval = interval; op->timeout = timeout; op->standard = strdup(standard); op->agent = strdup(agent); op->sequence = ++operations; op->flags = flags; if (asprintf(&op->id, "%s_%s_%d", name, action, interval) == -1) { goto return_error; } if (strcasecmp(op->standard, "service") == 0) { const char *expanded = resources_find_service_class(op->agent); if(expanded) { crm_debug("Found a %s agent for %s/%s", expanded, op->rsc, op->agent); free(op->standard); op->standard = strdup(expanded); } else { crm_info("Cannot determine the standard for %s (%s)", op->rsc, op->agent); free(op->standard); op->standard = strdup("lsb"); } CRM_ASSERT(op->standard); } if (strcasecmp(op->standard, "ocf") == 0) { op->provider = strdup(provider); op->params = params; params = NULL; if (asprintf(&op->opaque->exec, "%s/resource.d/%s/%s", OCF_ROOT_DIR, provider, agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); op->opaque->args[1] = strdup(action); } else if (strcasecmp(op->standard, "lsb") == 0) { if (op->agent[0] == '/') { /* if given an absolute path, use that instead * of tacking on the LSB_ROOT_DIR path to the front */ op->opaque->exec = strdup(op->agent); } else if (asprintf(&op->opaque->exec, "%s/%s", LSB_ROOT_DIR, op->agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); op->opaque->args[1] = strdup(op->action); op->opaque->args[2] = NULL; #if SUPPORT_HEARTBEAT } else if (strcasecmp(op->standard, "heartbeat") == 0) { int index; int param_num; char buf_tmp[20]; void *value_tmp; if (op->agent[0] == '/') { /* if given an absolute path, use that instead * of tacking on the HB_RA_DIR path to the front */ op->opaque->exec = strdup(op->agent); } else if (asprintf(&op->opaque->exec, "%s/%s", HB_RA_DIR, op->agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); /* The "heartbeat" agent class only has positional arguments, * which we keyed by their decimal position number. */ param_num = 1; for (index = 1; index <= MAX_ARGC - 3; index++ ) { snprintf(buf_tmp, sizeof(buf_tmp), "%d", index); value_tmp = g_hash_table_lookup(params, buf_tmp); if (value_tmp == NULL) { /* maybe: strdup("") ?? * But the old lrmd did simply continue as well. */ continue; } op->opaque->args[param_num++] = strdup(value_tmp); } /* Add operation code as the last argument, */ /* and the teminating NULL pointer */ op->opaque->args[param_num++] = strdup(op->action); op->opaque->args[param_num] = NULL; #endif #if SUPPORT_SYSTEMD } else if (strcasecmp(op->standard, "systemd") == 0) { op->opaque->exec = strdup("systemd-dbus"); #endif #if SUPPORT_UPSTART } else if (strcasecmp(op->standard, "upstart") == 0) { op->opaque->exec = strdup("upstart-dbus"); #endif } else if (strcasecmp(op->standard, "service") == 0) { op->opaque->exec = strdup(SERVICE_SCRIPT); op->opaque->args[0] = strdup(SERVICE_SCRIPT); op->opaque->args[1] = strdup(agent); op->opaque->args[2] = strdup(action); #if SUPPORT_NAGIOS } else if (strcasecmp(op->standard, "nagios") == 0) { int index = 0; if (op->agent[0] == '/') { /* if given an absolute path, use that instead * of tacking on the NAGIOS_PLUGIN_DIR path to the front */ op->opaque->exec = strdup(op->agent); } else if (asprintf(&op->opaque->exec, "%s/%s", NAGIOS_PLUGIN_DIR, op->agent) == -1) { crm_err("Internal error: cannot create agent path"); goto return_error; } op->opaque->args[0] = strdup(op->opaque->exec); index = 1; if (safe_str_eq(op->action, "monitor") && op->interval == 0) { /* Invoke --version for a nagios probe */ op->opaque->args[index] = strdup("--version"); index++; } else if (params) { GHashTableIter iter; char *key = NULL; char *value = NULL; static int args_size = sizeof(op->opaque->args) / sizeof(char *); g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value) && index <= args_size - 3) { int len = 3; char *long_opt = NULL; if (safe_str_eq(key, XML_ATTR_CRM_VERSION) || strstr(key, CRM_META "_")) { continue; } len += strlen(key); long_opt = calloc(1, len); sprintf(long_opt, "--%s", key); long_opt[len - 1] = 0; op->opaque->args[index] = long_opt; op->opaque->args[index + 1] = strdup(value); index += 2; } } op->opaque->args[index] = NULL; #endif } else { crm_err("Unknown resource standard: %s", op->standard); services_action_free(op); op = NULL; } if(params) { g_hash_table_destroy(params); } return op; return_error: if(params) { g_hash_table_destroy(params); } services_action_free(op); return NULL; } svc_action_t * services_action_create_generic(const char *exec, const char *args[]) { svc_action_t *op; unsigned int cur_arg; op = calloc(1, sizeof(*op)); op->opaque = calloc(1, sizeof(svc_action_private_t)); op->opaque->exec = strdup(exec); op->opaque->args[0] = strdup(exec); for (cur_arg = 1; args && args[cur_arg - 1]; cur_arg++) { op->opaque->args[cur_arg] = strdup(args[cur_arg - 1]); if (cur_arg == DIMOF(op->opaque->args) - 1) { crm_err("svc_action_t args list not long enough for '%s' execution request.", exec); break; } } return op; } void services_action_cleanup(svc_action_t * op) { #if SUPPORT_DBUS if(op->opaque == NULL) { return; } if(op->opaque->timerid != 0) { crm_trace("Removing timer for call %s to %s", op->action, op->rsc); g_source_remove(op->opaque->timerid); op->opaque->timerid = 0; } if(op->opaque->pending) { crm_trace("Cleaning up pending dbus call %p %s for %s", op->opaque->pending, op->action, op->rsc); if(dbus_pending_call_get_completed(op->opaque->pending)) { crm_warn("Pending dbus call %s for %s did not complete", op->action, op->rsc); } dbus_pending_call_cancel(op->opaque->pending); dbus_pending_call_unref(op->opaque->pending); op->opaque->pending = NULL; } if (op->opaque->stderr_gsource) { mainloop_del_fd(op->opaque->stderr_gsource); op->opaque->stderr_gsource = NULL; } if (op->opaque->stdout_gsource) { mainloop_del_fd(op->opaque->stdout_gsource); op->opaque->stdout_gsource = NULL; } #endif } void services_action_free(svc_action_t * op) { unsigned int i; if (op == NULL) { return; } services_action_cleanup(op); if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } free(op->id); free(op->opaque->exec); for (i = 0; i < DIMOF(op->opaque->args); i++) { free(op->opaque->args[i]); } free(op->opaque); free(op->rsc); free(op->action); free(op->standard); free(op->agent); free(op->provider); free(op->stdout_data); free(op->stderr_data); if (op->params) { g_hash_table_destroy(op->params); op->params = NULL; } free(op); } gboolean cancel_recurring_action(svc_action_t * op) { crm_info("Cancelling operation %s", op->id); if (recurring_actions) { g_hash_table_remove(recurring_actions, op->id); } if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } return TRUE; } gboolean services_action_cancel(const char *name, const char *action, int interval) { svc_action_t *op = NULL; char id[512]; snprintf(id, sizeof(id), "%s_%s_%d", name, action, interval); if (!(op = g_hash_table_lookup(recurring_actions, id))) { return FALSE; } /* Always kill the recurring timer */ cancel_recurring_action(op); if (op->pid == 0) { op->status = PCMK_LRM_OP_CANCELLED; if (op->opaque->callback) { op->opaque->callback(op); } + + blocked_ops = g_list_remove(blocked_ops, op); services_action_free(op); } else { crm_info("Cancelling in-flight op: performing early termination of %s (pid=%d)", id, op->pid); op->cancel = 1; if (mainloop_child_kill(op->pid) == FALSE) { /* even though the early termination failed, * the op will be marked as cancelled once it completes. */ crm_err("Termination of %s (pid=%d) failed", id, op->pid); return FALSE; } } return TRUE; } gboolean services_action_kick(const char *name, const char *action, int interval /* ms */) { svc_action_t * op = NULL; char *id = NULL; if (asprintf(&id, "%s_%s_%d", name, action, interval) == -1) { return FALSE; } op = g_hash_table_lookup(recurring_actions, id); free(id); if (op == NULL) { return FALSE; } if (op->pid) { return TRUE; } else { if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } recurring_action_timer(op); return TRUE; } } /* add new recurring operation, check for duplicates. * - if duplicate found, return TRUE, immediately reschedule op. * - if no dup, return FALSE, inserve into recurring op list.*/ static gboolean handle_duplicate_recurring(svc_action_t * op, void (*action_callback) (svc_action_t *)) { svc_action_t * dup = NULL; if (recurring_actions == NULL) { recurring_actions = g_hash_table_new_full(g_str_hash, g_str_equal, NULL, NULL); return FALSE; } /* check for duplicates */ dup = g_hash_table_lookup(recurring_actions, op->id); if (dup && (dup != op)) { /* update user data */ if (op->opaque->callback) { dup->opaque->callback = op->opaque->callback; dup->cb_data = op->cb_data; op->cb_data = NULL; } /* immediately execute the next interval */ if (dup->pid != 0) { if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } recurring_action_timer(dup); } /* free the dup. */ services_action_free(op); return TRUE; } return FALSE; } +static gboolean +action_async_helper(svc_action_t * op) { + gboolean res = FALSE; + + if (op->standard && strcasecmp(op->standard, "upstart") == 0) { +#if SUPPORT_UPSTART + res = upstart_job_exec(op, FALSE); +#endif + } else if (op->standard && strcasecmp(op->standard, "systemd") == 0) { +#if SUPPORT_SYSTEMD + res = systemd_unit_exec(op); +#endif + } else { + res = services_os_action_execute(op, FALSE); + } + + /* keep track of ops that are in-flight to avoid collisions in the same namespace */ + if (res) { + inflight_ops = g_list_append(inflight_ops, op); + } + + return res; +} + gboolean services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *)) { op->synchronous = false; if (action_callback) { op->opaque->callback = action_callback; } if (op->interval > 0) { if (handle_duplicate_recurring(op, action_callback) == TRUE) { /* entry rescheduled, dup freed */ + /* exit early */ return TRUE; } g_hash_table_replace(recurring_actions, op->id, op); } - if (op->standard && strcasecmp(op->standard, "upstart") == 0) { -#if SUPPORT_UPSTART - return upstart_job_exec(op, FALSE); -#endif + + if (is_op_blocked(op->rsc)) { + blocked_ops = g_list_append(blocked_ops, op); + return TRUE; } - if (op->standard && strcasecmp(op->standard, "systemd") == 0) { -#if SUPPORT_SYSTEMD - return systemd_unit_exec(op); -#endif + + return action_async_helper(op); +} + + +static gboolean processing_blocked_ops = FALSE; + +gboolean +is_op_blocked(const char *rsc) +{ + GList *gIter = NULL; + svc_action_t *op = NULL; + + for (gIter = inflight_ops; gIter != NULL; gIter = gIter->next) { + op = gIter->data; + if (safe_str_eq(op->rsc, rsc)) { + return TRUE; + } } - return services_os_action_execute(op, FALSE); + + return FALSE; +} + +void +handle_blocked_ops(void) +{ + GList *executed_ops = NULL; + GList *gIter = NULL; + svc_action_t *op = NULL; + gboolean res = FALSE; + + if (processing_blocked_ops) { + /* avoid nested calling of this function */ + return; + } + + processing_blocked_ops = TRUE; + + /* n^2 operation here, but blocked ops are incredibly rare. this list + * will be empty 99% of the time. */ + for (gIter = blocked_ops; gIter != NULL; gIter = gIter->next) { + op = gIter->data; + if (is_op_blocked(op->rsc)) { + continue; + } + executed_ops = g_list_append(executed_ops, op); + res = action_async_helper(op); + if (res == FALSE) { + op->status = PCMK_LRM_OP_ERROR; + /* this can cause this function to be called recursively + * which is why we have processing_blocked_ops static variable */ + operation_finalize(op); + } + } + + for (gIter = executed_ops; gIter != NULL; gIter = gIter->next) { + op = gIter->data; + blocked_ops = g_list_remove(blocked_ops, op); + } + g_list_free(executed_ops); + + processing_blocked_ops = FALSE; } gboolean services_action_sync(svc_action_t * op) { gboolean rc = TRUE; if (op == NULL) { crm_trace("No operation to execute"); return FALSE; } op->synchronous = true; if (op->standard && strcasecmp(op->standard, "upstart") == 0) { #if SUPPORT_UPSTART rc = upstart_job_exec(op, TRUE); #endif } else if (op->standard && strcasecmp(op->standard, "systemd") == 0) { #if SUPPORT_SYSTEMD rc = systemd_unit_exec(op); #endif } else { rc = services_os_action_execute(op, TRUE); } crm_trace(" > %s_%s_%d: %s = %d", op->rsc, op->action, op->interval, op->opaque->exec, op->rc); if (op->stdout_data) { crm_trace(" > stdout: %s", op->stdout_data); } if (op->stderr_data) { crm_trace(" > stderr: %s", op->stderr_data); } return rc; } GList * get_directory_list(const char *root, gboolean files, gboolean executable) { return services_os_get_directory_list(root, files, executable); } GList * services_list(void) { return resources_list_agents("lsb", NULL); } #if SUPPORT_HEARTBEAT static GList * resources_os_list_hb_agents(void) { return services_os_get_directory_list(HB_RA_DIR, TRUE, TRUE); } #endif GList * resources_list_standards(void) { GList *standards = NULL; GList *agents = NULL; standards = g_list_append(standards, strdup("ocf")); standards = g_list_append(standards, strdup("lsb")); standards = g_list_append(standards, strdup("service")); #if SUPPORT_SYSTEMD agents = systemd_unit_listall(); #else agents = NULL; #endif if (agents) { standards = g_list_append(standards, strdup("systemd")); g_list_free_full(agents, free); } #if SUPPORT_UPSTART agents = upstart_job_listall(); #else agents = NULL; #endif if (agents) { standards = g_list_append(standards, strdup("upstart")); g_list_free_full(agents, free); } #if SUPPORT_NAGIOS agents = resources_os_list_nagios_agents(); if (agents) { standards = g_list_append(standards, strdup("nagios")); g_list_free_full(agents, free); } #endif #if SUPPORT_HEARTBEAT standards = g_list_append(standards, strdup("heartbeat")); #endif return standards; } GList * resources_list_providers(const char *standard) { if (strcasecmp(standard, "ocf") == 0) { return resources_os_list_ocf_providers(); } return NULL; } GList * resources_list_agents(const char *standard, const char *provider) { if (standard == NULL || strcasecmp(standard, "service") == 0) { GList *tmp1; GList *tmp2; GList *result = resources_os_list_lsb_agents(); if (standard == NULL) { tmp1 = result; tmp2 = resources_os_list_ocf_agents(NULL); if (tmp2) { result = g_list_concat(tmp1, tmp2); } } #if SUPPORT_SYSTEMD tmp1 = result; tmp2 = systemd_unit_listall(); if (tmp2) { result = g_list_concat(tmp1, tmp2); } #endif #if SUPPORT_UPSTART tmp1 = result; tmp2 = upstart_job_listall(); if (tmp2) { result = g_list_concat(tmp1, tmp2); } #endif return result; } else if (strcasecmp(standard, "ocf") == 0) { return resources_os_list_ocf_agents(provider); } else if (strcasecmp(standard, "lsb") == 0) { return resources_os_list_lsb_agents(); #if SUPPORT_HEARTBEAT } else if (strcasecmp(standard, "heartbeat") == 0) { return resources_os_list_hb_agents(); #endif #if SUPPORT_SYSTEMD } else if (strcasecmp(standard, "systemd") == 0) { return systemd_unit_listall(); #endif #if SUPPORT_UPSTART } else if (strcasecmp(standard, "upstart") == 0) { return upstart_job_listall(); #endif #if SUPPORT_NAGIOS } else if (strcasecmp(standard, "nagios") == 0) { return resources_os_list_nagios_agents(); #endif } return NULL; } diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c index a823976606..57669d9d1c 100644 --- a/lib/services/services_linux.c +++ b/lib/services/services_linux.c @@ -1,822 +1,829 @@ /* * Copyright (C) 2010 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_SYS_SIGNALFD_H #include #endif #include "crm/crm.h" #include "crm/common/mainloop.h" #include "crm/services.h" #include "services_private.h" #if SUPPORT_CIBSECRETS # include "crm/common/cib_secrets.h" #endif +/* ops currently active (in-flight) */ +extern GList *inflight_ops; + static inline void set_fd_opts(int fd, int opts) { int flag; if ((flag = fcntl(fd, F_GETFL)) >= 0) { if (fcntl(fd, F_SETFL, flag | opts) < 0) { crm_err("fcntl() write failed"); } } else { crm_err("fcntl() read failed"); } } static gboolean svc_read_output(int fd, svc_action_t * op, bool is_stderr) { char *data = NULL; int rc = 0, len = 0; char buf[500]; static const size_t buf_read_len = sizeof(buf) - 1; if (fd < 0) { crm_trace("No fd for %s", op->id); return FALSE; } if (is_stderr && op->stderr_data) { len = strlen(op->stderr_data); data = op->stderr_data; crm_trace("Reading %s stderr into offset %d", op->id, len); } else if (is_stderr == FALSE && op->stdout_data) { len = strlen(op->stdout_data); data = op->stdout_data; crm_trace("Reading %s stdout into offset %d", op->id, len); } else { crm_trace("Reading %s %s", op->id, is_stderr?"stderr":"stdout", len); } do { rc = read(fd, buf, buf_read_len); if (rc > 0) { crm_trace("Got %d chars: %.80s", rc, buf); buf[rc] = 0; data = realloc_safe(data, len + rc + 1); len += sprintf(data + len, "%s", buf); } else if (errno != EINTR) { /* error or EOF * Cleanup happens in pipe_done() */ rc = FALSE; break; } } while (rc == buf_read_len || rc < 0); if (is_stderr) { op->stderr_data = data; } else { op->stdout_data = data; } return rc; } static int dispatch_stdout(gpointer userdata) { svc_action_t *op = (svc_action_t *) userdata; return svc_read_output(op->opaque->stdout_fd, op, FALSE); } static int dispatch_stderr(gpointer userdata) { svc_action_t *op = (svc_action_t *) userdata; return svc_read_output(op->opaque->stderr_fd, op, TRUE); } static void pipe_out_done(gpointer user_data) { svc_action_t *op = (svc_action_t *) user_data; crm_trace("%p", op); op->opaque->stdout_gsource = NULL; if (op->opaque->stdout_fd > STDOUT_FILENO) { close(op->opaque->stdout_fd); } op->opaque->stdout_fd = -1; } static void pipe_err_done(gpointer user_data) { svc_action_t *op = (svc_action_t *) user_data; op->opaque->stderr_gsource = NULL; if (op->opaque->stderr_fd > STDERR_FILENO) { close(op->opaque->stderr_fd); } op->opaque->stderr_fd = -1; } static struct mainloop_fd_callbacks stdout_callbacks = { .dispatch = dispatch_stdout, .destroy = pipe_out_done, }; static struct mainloop_fd_callbacks stderr_callbacks = { .dispatch = dispatch_stderr, .destroy = pipe_err_done, }; static void set_ocf_env(const char *key, const char *value, gpointer user_data) { if (setenv(key, value, 1) != 0) { crm_perror(LOG_ERR, "setenv failed for key:%s and value:%s", key, value); } } static void set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data) { char buffer[500]; snprintf(buffer, sizeof(buffer), "OCF_RESKEY_%s", (char *)key); set_ocf_env(buffer, value, user_data); } static void add_OCF_env_vars(svc_action_t * op) { if (!op->standard || strcasecmp("ocf", op->standard) != 0) { return; } if (op->params) { g_hash_table_foreach(op->params, set_ocf_env_with_prefix, NULL); } set_ocf_env("OCF_RA_VERSION_MAJOR", "1", NULL); set_ocf_env("OCF_RA_VERSION_MINOR", "0", NULL); set_ocf_env("OCF_ROOT", OCF_ROOT_DIR, NULL); set_ocf_env("OCF_EXIT_REASON_PREFIX", PCMK_OCF_REASON_PREFIX, NULL); if (op->rsc) { set_ocf_env("OCF_RESOURCE_INSTANCE", op->rsc, NULL); } if (op->agent != NULL) { set_ocf_env("OCF_RESOURCE_TYPE", op->agent, NULL); } /* Notes: this is not added to specification yet. Sept 10,2004 */ if (op->provider != NULL) { set_ocf_env("OCF_RESOURCE_PROVIDER", op->provider, NULL); } } gboolean recurring_action_timer(gpointer data) { svc_action_t *op = data; crm_debug("Scheduling another invocation of %s", op->id); /* Clean out the old result */ free(op->stdout_data); op->stdout_data = NULL; free(op->stderr_data); op->stderr_data = NULL; op->opaque->repeat_timer = 0; services_action_async(op, NULL); return FALSE; } /* Returns FALSE if 'op' should be free'd by the caller */ gboolean operation_finalize(svc_action_t * op) { int recurring = 0; if (op->interval) { if (op->cancel) { op->status = PCMK_LRM_OP_CANCELLED; cancel_recurring_action(op); } else { recurring = 1; op->opaque->repeat_timer = g_timeout_add(op->interval, recurring_action_timer, (void *)op); } } if (op->opaque->callback) { op->opaque->callback(op); } op->pid = 0; + inflight_ops = g_list_remove(inflight_ops, op); + + handle_blocked_ops(); + if (!recurring && op->synchronous == FALSE) { /* * If this is a recurring action, do not free explicitly. * It will get freed whenever the action gets cancelled. */ services_action_free(op); return TRUE; } services_action_cleanup(op); return FALSE; } static void operation_finished(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { svc_action_t *op = mainloop_child_userdata(p); char *prefix = crm_strdup_printf("%s:%d", op->id, op->pid); mainloop_clear_child_userdata(p); op->status = PCMK_LRM_OP_DONE; CRM_ASSERT(op->pid == pid); crm_trace("%s %p %p", prefix, op->opaque->stderr_gsource, op->opaque->stdout_gsource); if (op->opaque->stderr_gsource) { /* Make sure we have read everything from the buffer. * Depending on the priority mainloop gives the fd, operation_finished * could occur before all the reads are done. Force the read now.*/ crm_trace("%s dispatching stderr", prefix); dispatch_stderr(op); crm_trace("%s: %p", op->id, op->stderr_data); mainloop_del_fd(op->opaque->stderr_gsource); op->opaque->stderr_gsource = NULL; } if (op->opaque->stdout_gsource) { /* Make sure we have read everything from the buffer. * Depending on the priority mainloop gives the fd, operation_finished * could occur before all the reads are done. Force the read now.*/ crm_trace("%s dispatching stdout", prefix); dispatch_stdout(op); crm_trace("%s: %p", op->id, op->stdout_data); mainloop_del_fd(op->opaque->stdout_gsource); op->opaque->stdout_gsource = NULL; } if (signo) { if (mainloop_child_timeout(p)) { crm_warn("%s - timed out after %dms", prefix, op->timeout); op->status = PCMK_LRM_OP_TIMEOUT; op->rc = PCMK_OCF_TIMEOUT; } else { do_crm_log_unlikely((op->cancel) ? LOG_INFO : LOG_WARNING, "%s - terminated with signal %d", prefix, signo); op->status = PCMK_LRM_OP_ERROR; op->rc = PCMK_OCF_SIGNAL; } } else { op->rc = exitcode; crm_debug("%s - exited with rc=%d", prefix, exitcode); } free(prefix); prefix = crm_strdup_printf("%s:%d:stderr", op->id, op->pid); crm_log_output(LOG_NOTICE, prefix, op->stderr_data); free(prefix); prefix = crm_strdup_printf("%s:%d:stdout", op->id, op->pid); crm_log_output(LOG_DEBUG, prefix, op->stdout_data); free(prefix); operation_finalize(op); } /*! * \internal * \brief Set operation rc and status per errno from stat(), fork() or execvp() * * \param[in,out] op Operation to set rc and status for * \param[in] error Value of errno after system call * * \return void */ static void services_handle_exec_error(svc_action_t * op, int error) { int rc_not_installed, rc_insufficient_priv, rc_exec_error; /* Mimic the return codes for each standard as that's what we'll convert back from in get_uniform_rc() */ if (safe_str_eq(op->standard, "lsb") && safe_str_eq(op->action, "status")) { rc_not_installed = PCMK_LSB_STATUS_NOT_INSTALLED; rc_insufficient_priv = PCMK_LSB_STATUS_INSUFFICIENT_PRIV; rc_exec_error = PCMK_LSB_STATUS_UNKNOWN; #if SUPPORT_NAGIOS } else if (safe_str_eq(op->standard, "nagios")) { rc_not_installed = NAGIOS_NOT_INSTALLED; rc_insufficient_priv = NAGIOS_INSUFFICIENT_PRIV; rc_exec_error = PCMK_OCF_EXEC_ERROR; #endif } else { rc_not_installed = PCMK_OCF_NOT_INSTALLED; rc_insufficient_priv = PCMK_OCF_INSUFFICIENT_PRIV; rc_exec_error = PCMK_OCF_EXEC_ERROR; } switch (error) { /* see execve(2), stat(2) and fork(2) */ case ENOENT: /* No such file or directory */ case EISDIR: /* Is a directory */ case ENOTDIR: /* Path component is not a directory */ case EINVAL: /* Invalid executable format */ case ENOEXEC: /* Invalid executable format */ op->rc = rc_not_installed; op->status = PCMK_LRM_OP_NOT_INSTALLED; break; case EACCES: /* permission denied (various errors) */ case EPERM: /* permission denied (various errors) */ op->rc = rc_insufficient_priv; op->status = PCMK_LRM_OP_ERROR; break; default: op->rc = rc_exec_error; op->status = PCMK_LRM_OP_ERROR; } } static void action_launch_child(svc_action_t *op) { int lpc; /* SIGPIPE is ignored (which is different from signal blocking) by the gnutls library. * Depending on the libqb version in use, libqb may set SIGPIPE to be ignored as well. * We do not want this to be inherited by the child process. By resetting this the signal * to the default behavior, we avoid some potential odd problems that occur during OCF * scripts when SIGPIPE is ignored by the environment. */ signal(SIGPIPE, SIG_DFL); #if defined(HAVE_SCHED_SETSCHEDULER) if (sched_getscheduler(0) != SCHED_OTHER) { struct sched_param sp; memset(&sp, 0, sizeof(sp)); sp.sched_priority = 0; if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) { crm_perror(LOG_ERR, "Could not reset scheduling policy to SCHED_OTHER for %s", op->id); } } #endif if (setpriority(PRIO_PROCESS, 0, 0) == -1) { crm_perror(LOG_ERR, "Could not reset process priority to 0 for %s", op->id); } /* Man: The call setpgrp() is equivalent to setpgid(0,0) * _and_ compiles on BSD variants too * need to investigate if it works the same too. */ setpgid(0, 0); /* close all descriptors except stdin/out/err and channels to logd */ for (lpc = getdtablesize() - 1; lpc > STDERR_FILENO; lpc--) { close(lpc); } #if SUPPORT_CIBSECRETS if (replace_secret_params(op->rsc, op->params) < 0) { /* replacing secrets failed! */ if (safe_str_eq(op->action,"stop")) { /* don't fail on stop! */ crm_info("proceeding with the stop operation for %s", op->rsc); } else { crm_err("failed to get secrets for %s, " "considering resource not configured", op->rsc); _exit(PCMK_OCF_NOT_CONFIGURED); } } #endif /* Setup environment correctly */ add_OCF_env_vars(op); /* execute the RA */ execvp(op->opaque->exec, op->opaque->args); /* Most cases should have been already handled by stat() */ services_handle_exec_error(op, errno); _exit(op->rc); } static void action_synced_wait(svc_action_t * op, sigset_t mask) { #ifndef HAVE_SYS_SIGNALFD_H CRM_ASSERT(FALSE); #else int status = 0; int timeout = op->timeout; int sfd = -1; time_t start = -1; struct pollfd fds[3]; int wait_rc = 0; sfd = signalfd(-1, &mask, SFD_NONBLOCK); if (sfd < 0) { crm_perror(LOG_ERR, "signalfd() failed"); } fds[0].fd = op->opaque->stdout_fd; fds[0].events = POLLIN; fds[0].revents = 0; fds[1].fd = op->opaque->stderr_fd; fds[1].events = POLLIN; fds[1].revents = 0; fds[2].fd = sfd; fds[2].events = POLLIN; fds[2].revents = 0; crm_trace("Waiting for %d", op->pid); start = time(NULL); do { int poll_rc = poll(fds, 3, timeout); if (poll_rc > 0) { if (fds[0].revents & POLLIN) { svc_read_output(op->opaque->stdout_fd, op, FALSE); } if (fds[1].revents & POLLIN) { svc_read_output(op->opaque->stderr_fd, op, TRUE); } if (fds[2].revents & POLLIN) { struct signalfd_siginfo fdsi; ssize_t s; s = read(sfd, &fdsi, sizeof(struct signalfd_siginfo)); if (s != sizeof(struct signalfd_siginfo)) { crm_perror(LOG_ERR, "Read from signal fd %d failed", sfd); } else if (fdsi.ssi_signo == SIGCHLD) { wait_rc = waitpid(op->pid, &status, WNOHANG); if (wait_rc < 0){ crm_perror(LOG_ERR, "waitpid() for %d failed", op->pid); } else if (wait_rc > 0) { break; } } } } else if (poll_rc == 0) { timeout = 0; break; } else if (poll_rc < 0) { if (errno != EINTR) { crm_perror(LOG_ERR, "poll() failed"); break; } } timeout = op->timeout - (time(NULL) - start) * 1000; } while ((op->timeout < 0 || timeout > 0)); crm_trace("Child done: %d", op->pid); if (wait_rc <= 0) { int killrc = kill(op->pid, SIGKILL); op->rc = PCMK_OCF_UNKNOWN_ERROR; if (op->timeout > 0 && timeout <= 0) { op->status = PCMK_LRM_OP_TIMEOUT; crm_warn("%s:%d - timed out after %dms", op->id, op->pid, op->timeout); } else { op->status = PCMK_LRM_OP_ERROR; } if (killrc && errno != ESRCH) { crm_err("kill(%d, KILL) failed: %d", op->pid, errno); } /* * From sigprocmask(2): * It is not possible to block SIGKILL or SIGSTOP. Attempts to do so are silently ignored. * * This makes it safe to skip WNOHANG here */ waitpid(op->pid, &status, 0); } else if (WIFEXITED(status)) { op->status = PCMK_LRM_OP_DONE; op->rc = WEXITSTATUS(status); crm_info("Managed %s process %d exited with rc=%d", op->id, op->pid, op->rc); } else if (WIFSIGNALED(status)) { int signo = WTERMSIG(status); op->status = PCMK_LRM_OP_ERROR; crm_err("Managed %s process %d exited with signal=%d", op->id, op->pid, signo); } #ifdef WCOREDUMP if (WCOREDUMP(status)) { crm_err("Managed %s process %d dumped core", op->id, op->pid); } #endif svc_read_output(op->opaque->stdout_fd, op, FALSE); svc_read_output(op->opaque->stderr_fd, op, TRUE); close(op->opaque->stdout_fd); close(op->opaque->stderr_fd); close(sfd); #endif } /* Returns FALSE if 'op' should be free'd by the caller */ gboolean services_os_action_execute(svc_action_t * op, gboolean synchronous) { int stdout_fd[2]; int stderr_fd[2]; sigset_t mask; sigset_t old_mask; struct stat st; if (pipe(stdout_fd) < 0) { crm_err("pipe() failed"); } if (pipe(stderr_fd) < 0) { crm_err("pipe() failed"); } /* Fail fast */ if(stat(op->opaque->exec, &st) != 0) { int rc = errno; crm_warn("Cannot execute '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); services_handle_exec_error(op, rc); if (!synchronous) { return operation_finalize(op); } return FALSE; } if (synchronous) { sigemptyset(&mask); sigaddset(&mask, SIGCHLD); sigemptyset(&old_mask); if (sigprocmask(SIG_BLOCK, &mask, &old_mask) < 0) { crm_perror(LOG_ERR, "sigprocmask() failed"); } } op->pid = fork(); switch (op->pid) { case -1: { int rc = errno; close(stdout_fd[0]); close(stdout_fd[1]); close(stderr_fd[0]); close(stderr_fd[1]); crm_err("Could not execute '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); services_handle_exec_error(op, rc); if (!synchronous) { return operation_finalize(op); } return FALSE; } case 0: /* Child */ close(stdout_fd[0]); close(stderr_fd[0]); if (STDOUT_FILENO != stdout_fd[1]) { if (dup2(stdout_fd[1], STDOUT_FILENO) != STDOUT_FILENO) { crm_err("dup2() failed (stdout)"); } close(stdout_fd[1]); } if (STDERR_FILENO != stderr_fd[1]) { if (dup2(stderr_fd[1], STDERR_FILENO) != STDERR_FILENO) { crm_err("dup2() failed (stderr)"); } close(stderr_fd[1]); } action_launch_child(op); } /* Only the parent reaches here */ close(stdout_fd[1]); close(stderr_fd[1]); op->opaque->stdout_fd = stdout_fd[0]; set_fd_opts(op->opaque->stdout_fd, O_NONBLOCK); op->opaque->stderr_fd = stderr_fd[0]; set_fd_opts(op->opaque->stderr_fd, O_NONBLOCK); if (synchronous) { action_synced_wait(op, mask); if (sigismember(&old_mask, SIGCHLD) == 0) { if (sigprocmask(SIG_UNBLOCK, &mask, NULL) < 0) { crm_perror(LOG_ERR, "sigprocmask() to unblocked failed"); } } } else { crm_trace("Async waiting for %d - %s", op->pid, op->opaque->exec); mainloop_child_add_with_flags(op->pid, op->timeout, op->id, op, (op->flags & SVC_ACTION_LEAVE_GROUP) ? mainloop_leave_pid_group : 0, operation_finished); op->opaque->stdout_gsource = mainloop_add_fd(op->id, G_PRIORITY_LOW, op->opaque->stdout_fd, op, &stdout_callbacks); op->opaque->stderr_gsource = mainloop_add_fd(op->id, G_PRIORITY_LOW, op->opaque->stderr_fd, op, &stderr_callbacks); } return TRUE; } GList * services_os_get_directory_list(const char *root, gboolean files, gboolean executable) { GList *list = NULL; struct dirent **namelist; int entries = 0, lpc = 0; char buffer[PATH_MAX]; entries = scandir(root, &namelist, NULL, alphasort); if (entries <= 0) { return list; } for (lpc = 0; lpc < entries; lpc++) { struct stat sb; if ('.' == namelist[lpc]->d_name[0]) { free(namelist[lpc]); continue; } snprintf(buffer, sizeof(buffer), "%s/%s", root, namelist[lpc]->d_name); if (stat(buffer, &sb)) { continue; } if (S_ISDIR(sb.st_mode)) { if (files) { free(namelist[lpc]); continue; } } else if (S_ISREG(sb.st_mode)) { if (files == FALSE) { free(namelist[lpc]); continue; } else if (executable && (sb.st_mode & S_IXUSR) == 0 && (sb.st_mode & S_IXGRP) == 0 && (sb.st_mode & S_IXOTH) == 0) { free(namelist[lpc]); continue; } } list = g_list_append(list, strdup(namelist[lpc]->d_name)); free(namelist[lpc]); } free(namelist); return list; } GList * resources_os_list_lsb_agents(void) { return get_directory_list(LSB_ROOT_DIR, TRUE, TRUE); } GList * resources_os_list_ocf_providers(void) { return get_directory_list(OCF_ROOT_DIR "/resource.d", FALSE, TRUE); } GList * resources_os_list_ocf_agents(const char *provider) { GList *gIter = NULL; GList *result = NULL; GList *providers = NULL; if (provider) { char buffer[500]; snprintf(buffer, sizeof(buffer), "%s/resource.d/%s", OCF_ROOT_DIR, provider); return get_directory_list(buffer, TRUE, TRUE); } providers = resources_os_list_ocf_providers(); for (gIter = providers; gIter != NULL; gIter = gIter->next) { GList *tmp1 = result; GList *tmp2 = resources_os_list_ocf_agents(gIter->data); if (tmp2) { result = g_list_concat(tmp1, tmp2); } } g_list_free_full(providers, free); return result; } #if SUPPORT_NAGIOS GList * resources_os_list_nagios_agents(void) { GList *plugin_list = NULL; GList *result = NULL; GList *gIter = NULL; plugin_list = get_directory_list(NAGIOS_PLUGIN_DIR, TRUE, TRUE); /* Make sure both the plugin and its metadata exist */ for (gIter = plugin_list; gIter != NULL; gIter = gIter->next) { const char *plugin = gIter->data; char *metadata = crm_strdup_printf(NAGIOS_METADATA_DIR "/%s.xml", plugin); struct stat st; if (stat(metadata, &st) == 0) { result = g_list_append(result, strdup(plugin)); } free(metadata); } g_list_free_full(plugin_list, free); return result; } #endif diff --git a/lib/services/services_private.h b/lib/services/services_private.h index f3094e39e3..183afb5d79 100644 --- a/lib/services/services_private.h +++ b/lib/services/services_private.h @@ -1,62 +1,66 @@ /* * Copyright (C) 2010 - 2011, Red Hat, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef __MH_SERVICES_PRIVATE_H__ # define __MH_SERVICES_PRIVATE_H__ #if SUPPORT_DBUS # include #endif #define MAX_ARGC 255 struct svc_action_private_s { char *exec; char *args[MAX_ARGC]; guint repeat_timer; void (*callback) (svc_action_t * op); int stderr_fd; mainloop_io_t *stderr_gsource; int stdout_fd; mainloop_io_t *stdout_gsource; #if SUPPORT_DBUS DBusPendingCall* pending; unsigned timerid; #endif }; GList *services_os_get_directory_list(const char *root, gboolean files, gboolean executable); gboolean services_os_action_execute(svc_action_t * op, gboolean synchronous); GList *resources_os_list_lsb_agents(void); GList *resources_os_list_ocf_providers(void); GList *resources_os_list_ocf_agents(const char *provider); GList *resources_os_list_nagios_agents(void); gboolean cancel_recurring_action(svc_action_t * op); gboolean recurring_action_timer(gpointer data); gboolean operation_finalize(svc_action_t * op); +void handle_blocked_ops(void); + +gboolean is_op_blocked(const char *rsc); + #endif /* __MH_SERVICES_PRIVATE_H__ */ diff --git a/lrmd/regression.py.in b/lrmd/regression.py.in index 118950eba7..750b174ede 100755 --- a/lrmd/regression.py.in +++ b/lrmd/regression.py.in @@ -1,1107 +1,1121 @@ #!/usr/bin/python # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. import os import sys import subprocess import shlex import time # Where to find test binaries # Prefer the source tree if available build_dir="@abs_top_builddir@" test_dir=sys.path[0] new_path=os.environ['PATH'] if os.path.exists("%s/regression.py.in" % test_dir): print "Running tests from the source tree: %s (%s)" % (build_dir, test_dir) new_path = "%s/lrmd:%s" % (build_dir, new_path) # For lrmd, lrmd_test and pacemaker_remoted new_path = "%s/tools:%s" % (build_dir, new_path) # For crm_resource new_path = "%s/fencing:%s" % (build_dir, new_path) # For stonithd else: print "Running tests from the install tree: @CRM_DAEMON_DIR@ (not %s)" % test_dir new_path = "@CRM_DAEMON_DIR@:%s" % (new_path) # For stonithd, lrmd, lrmd_test and pacemaker_remoted print new_path os.environ['PATH']=new_path def output_from_command(command, no_wait=0): test = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE) if no_wait == 0: test.wait() else: return 0 return test.communicate()[0].split("\n") class Test: def __init__(self, name, description, verbose = 0, tls = 0): self.name = name self.description = description self.cmds = [] if tls: self.daemon_location = "pacemaker_remoted" else: self.daemon_location = "lrmd" self.test_tool_location = "lrmd_test" self.verbose = verbose self.tls = tls self.result_txt = "" self.cmd_tool_output = "" self.result_exitcode = 0; self.lrmd_process = None self.stonith_process = None self.executed = 0 def __new_cmd(self, cmd, args, exitcode, stdout_match = "", no_wait = 0, stdout_negative_match = "", kill=None): if self.verbose and cmd == self.test_tool_location: args = args + " -V " if (cmd == self.test_tool_location) and self.tls: args = args + " -S " self.cmds.append( { "cmd" : cmd, "kill" : kill, "args" : args, "expected_exitcode" : exitcode, "stdout_match" : stdout_match, "stdout_negative_match" : stdout_negative_match, "no_wait" : no_wait, "cmd_output" : "", } ) def start_environment(self): ### make sure we are in full control here ### cmd = shlex.split("killall -q -9 stonithd lt-stonithd lrmd lt-lrmd lrmd_test lt-lrmd_test pacemaker_remoted") test = subprocess.Popen(cmd, stdout=subprocess.PIPE) test.wait() additional_args = "" if self.tls == 0: self.stonith_process = subprocess.Popen(shlex.split("stonithd -s")) if self.verbose: additional_args = additional_args + " -V" self.lrmd_process = subprocess.Popen(shlex.split("%s %s -l /tmp/lrmd-regression.log" % (self.daemon_location, additional_args))) time.sleep(1) def clean_environment(self): if self.lrmd_process: self.lrmd_process.terminate() self.lrmd_process.wait() if self.verbose: print "Daemon output" f = open('/tmp/lrmd-regression.log', 'r') for line in f.readlines(): print line.strip() os.remove('/tmp/lrmd-regression.log') if self.stonith_process: self.stonith_process.terminate() self.stonith_process.wait() self.lrmd_process = None self.stonith_process = None def add_sys_cmd(self, cmd, args): self.__new_cmd(cmd, args, 0, "") def add_sys_cmd_no_wait(self, cmd, args): self.__new_cmd(cmd, args, 0, "", 1) def add_cmd_check_stdout(self, args, match, no_match = ""): self.__new_cmd(self.test_tool_location, args, 0, match, 0, no_match) def add_cmd(self, args): self.__new_cmd(self.test_tool_location, args, 0, "") def add_cmd_and_kill(self, killProc, args): self.__new_cmd(self.test_tool_location, args, 0, "", kill=killProc) def add_expected_fail_cmd(self, args): self.__new_cmd(self.test_tool_location, args, 1, "") def get_exitcode(self): return self.result_exitcode def print_result(self, filler): print "%s%s" % (filler, self.result_txt) def run_cmd(self, args): cmd = shlex.split(args['args']) cmd.insert(0, args['cmd']) if self.verbose: print "\n\nRunning: "+" ".join(cmd) test = subprocess.Popen(cmd, stdout=subprocess.PIPE) if args['kill']: if self.verbose: print "Also running: "+args['kill'] ### Typically the kill argument is used to detect some sort of ### failure. Without yeilding for a few seconds here the process ### launched earlier that is listening for the failure may not have time ### to connect to the lrmd. time.sleep(2) subprocess.Popen(shlex.split(args['kill'])) if args['no_wait'] == 0: test.wait() else: return 0 output = test.communicate()[0] if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0: test.returncode = -2 print "STDOUT string '%s' was not found in cmd output" % (args['stdout_match']) if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0: test.returncode = -2 print "STDOUT string '%s' was found in cmd output" % (args['stdout_negative_match']) args['cmd_output'] = output return test.returncode; def run(self): res = 0 i = 1 if self.tls and self.name.count("stonith") != 0: self.result_txt = "SKIPPED - '%s' - disabled when testing pacemaker_remote" % (self.name) print self.result_txt return res self.start_environment() if self.verbose: print "\n--- START TEST - %s" % self.name self.result_txt = "SUCCESS - '%s'" % (self.name) self.result_exitcode = 0 for cmd in self.cmds: res = self.run_cmd(cmd) if res != cmd['expected_exitcode']: print cmd['cmd_output'] print "Step %d FAILED - command returned %d, expected %d" % (i, res, cmd['expected_exitcode']) self.result_txt = "FAILURE - '%s' failed at step %d. Command: lrmd_test %s" % (self.name, i, cmd['args']) self.result_exitcode = -1 break else: if self.verbose: print cmd['cmd_output'].strip() print "Step %d SUCCESS" % (i) i = i + 1 self.clean_environment() print self.result_txt if self.verbose: print "--- END TEST - %s\n" % self.name self.executed = 1 return res class Tests: def __init__(self, verbose = 0, tls = 0): self.tests = [] self.verbose = verbose self.tls = tls; self.rsc_classes = output_from_command("crm_resource --list-standards") self.rsc_classes = self.rsc_classes[:-1] # Strip trailing empty line self.need_authkey = 0 self.action_timeout = " -t 5000 " if self.tls: self.rsc_classes.remove("stonith") if "systemd" in self.rsc_classes: # the lrmd_dummy_daemon requires this, we are importing it # here just to guarantee it is installed before allowing this # script to run. Otherwise, running without this import being # available will make all the systemd tests look like they fail, # which is really scary looking. I'd rather see the import fail. import systemd.daemon print "Testing "+repr(self.rsc_classes) self.common_cmds = { "ocf_reg_line" : "-c register_rsc -r ocf_test_rsc "+self.action_timeout+" -C ocf -P pacemaker -T Dummy", "ocf_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"", "ocf_unreg_line" : "-c unregister_rsc -r \"ocf_test_rsc\" "+self.action_timeout, "ocf_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"", "ocf_start_line" : "-c exec -r \"ocf_test_rsc\" -a \"start\" "+self.action_timeout, "ocf_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:start rc:ok op_status:complete\" ", "ocf_stop_line" : "-c exec -r \"ocf_test_rsc\" -a \"stop\" "+self.action_timeout, "ocf_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:stop rc:ok op_status:complete\" ", "ocf_monitor_line" : "-c exec -r \"ocf_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, "ocf_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "ocf_cancel_line" : "-c cancel -r \"ocf_test_rsc\" -a \"monitor\" -i \"2000\" -t \"3000\" ", "ocf_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "systemd_reg_line" : "-c register_rsc -r systemd_test_rsc "+self.action_timeout+" -C systemd -T lrmd_dummy_daemon", "systemd_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"", "systemd_unreg_line" : "-c unregister_rsc -r \"systemd_test_rsc\" "+self.action_timeout, "systemd_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"", "systemd_start_line" : "-c exec -r \"systemd_test_rsc\" -a \"start\" "+self.action_timeout, "systemd_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:start rc:ok op_status:complete\" ", "systemd_stop_line" : "-c exec -r \"systemd_test_rsc\" -a \"stop\" "+self.action_timeout, "systemd_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:stop rc:ok op_status:complete\" ", "systemd_monitor_line" : "-c exec -r \"systemd_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, "systemd_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "systemd_cancel_line" : "-c cancel -r \"systemd_test_rsc\" -a \"monitor\" -i \"2000\" -t \"3000\" ", "systemd_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "upstart_reg_line" : "-c register_rsc -r upstart_test_rsc "+self.action_timeout+" -C upstart -T lrmd_dummy_daemon", "upstart_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"", "upstart_unreg_line" : "-c unregister_rsc -r \"upstart_test_rsc\" "+self.action_timeout, "upstart_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"", "upstart_start_line" : "-c exec -r \"upstart_test_rsc\" -a \"start\" "+self.action_timeout, "upstart_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:start rc:ok op_status:complete\" ", "upstart_stop_line" : "-c exec -r \"upstart_test_rsc\" -a \"stop\" "+self.action_timeout, "upstart_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:stop rc:ok op_status:complete\" ", "upstart_monitor_line" : "-c exec -r \"upstart_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, "upstart_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "upstart_cancel_line" : "-c cancel -r \"upstart_test_rsc\" -a \"monitor\" -i \"2000\" -t \"3000\" ", "upstart_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "service_reg_line" : "-c register_rsc -r service_test_rsc "+self.action_timeout+" -C service -T LSBDummy", "service_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", "service_unreg_line" : "-c unregister_rsc -r \"service_test_rsc\" "+self.action_timeout, "service_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", "service_start_line" : "-c exec -r \"service_test_rsc\" -a \"start\" "+self.action_timeout, "service_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:start rc:ok op_status:complete\" ", "service_stop_line" : "-c exec -r \"service_test_rsc\" -a \"stop\" "+self.action_timeout, "service_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:stop rc:ok op_status:complete\" ", "service_monitor_line" : "-c exec -r \"service_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, "service_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "service_cancel_line" : "-c cancel -r \"service_test_rsc\" -a \"monitor\" -i \"2000\" -t \"3000\" ", "service_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "lsb_reg_line" : "-c register_rsc -r lsb_test_rsc "+self.action_timeout+" -C lsb -T LSBDummy", "lsb_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\" ", "lsb_unreg_line" : "-c unregister_rsc -r \"lsb_test_rsc\" "+self.action_timeout, "lsb_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\"", "lsb_start_line" : "-c exec -r \"lsb_test_rsc\" -a \"start\" "+self.action_timeout, "lsb_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:start rc:ok op_status:complete\" ", "lsb_stop_line" : "-c exec -r \"lsb_test_rsc\" -a \"stop\" "+self.action_timeout, "lsb_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:stop rc:ok op_status:complete\" ", "lsb_monitor_line" : "-c exec -r \"lsb_test_rsc\" -a status -i \"2000\" "+self.action_timeout, "lsb_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:complete\" "+self.action_timeout, "lsb_cancel_line" : "-c cancel -r \"lsb_test_rsc\" -a \"status\" -i \"2000\" -t \"3000\" ", "lsb_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:Cancelled\" ", "heartbeat_reg_line" : "-c register_rsc -r hb_test_rsc "+self.action_timeout+" -C heartbeat -T HBDummy", "heartbeat_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:hb_test_rsc action:none rc:ok op_status:complete\" ", "heartbeat_unreg_line" : "-c unregister_rsc -r \"hb_test_rsc\" "+self.action_timeout, "heartbeat_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:hb_test_rsc action:none rc:ok op_status:complete\"", "heartbeat_start_line" : "-c exec -r \"hb_test_rsc\" -a \"start\" -k 1 -v a -k 2 -v b "+self.action_timeout, "heartbeat_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:hb_test_rsc action:start rc:ok op_status:complete\" ", "heartbeat_stop_line" : "-c exec -r \"hb_test_rsc\" -a \"stop\" -k 1 -v a -k 2 -v b "+self.action_timeout, "heartbeat_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:hb_test_rsc action:stop rc:ok op_status:complete\" ", "heartbeat_monitor_line" : "-c exec -r \"hb_test_rsc\" -a status -k 1 -v a -k 2 -v b -i \"2000\" "+self.action_timeout, "heartbeat_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:hb_test_rsc action:status rc:ok op_status:complete\" "+self.action_timeout, "heartbeat_cancel_line" : "-c cancel -r \"hb_test_rsc\" -a \"status\" -k 1 -v a -k 2 -v b -i \"2000\" -t \"3000\" ", "heartbeat_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:hb_test_rsc action:status rc:ok op_status:Cancelled\" ", "stonith_reg_line" : "-c register_rsc -r stonith_test_rsc "+self.action_timeout+" -C stonith -P pacemaker -T fence_dummy_monitor", "stonith_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\" ", "stonith_unreg_line" : "-c unregister_rsc -r \"stonith_test_rsc\" "+self.action_timeout, "stonith_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\"", "stonith_start_line" : "-c exec -r \"stonith_test_rsc\" -a \"start\" -t 8000 ", "stonith_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:start rc:ok op_status:complete\" ", "stonith_stop_line" : "-c exec -r \"stonith_test_rsc\" -a \"stop\" "+self.action_timeout, "stonith_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:stop rc:ok op_status:complete\" ", "stonith_monitor_line" : "-c exec -r \"stonith_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, "stonith_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "stonith_cancel_line" : "-c cancel -r \"stonith_test_rsc\" -a \"monitor\" -i \"2000\" -t \"3000\" ", "stonith_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:Cancelled\" ", } def new_test(self, name, description): test = Test(name, description, self.verbose, self.tls) self.tests.append(test) return test def setup_test_environment(self): os.system("service pacemaker_remote stop") self.cleanup_test_environment() if self.tls and not os.path.isfile("/etc/pacemaker/authkey"): self.need_authkey = 1 os.system("mkdir -p /etc/pacemaker") os.system("dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1") ### Make fake systemd daemon and unit file ### dummy_daemon = """#!/bin/python import time, systemd.daemon time.sleep(3) systemd.daemon.notify("READY=1") while True: time.sleep(5) """ dummy_service_file = """ [Unit] Description=Dummy resource that takes a while to start [Service] Type=notify ExecStart=/usr/sbin/lrmd_dummy_daemon """ dummy_upstart_job = (""" description "Dummy service for regression tests" exec dd if=/dev/random of=/dev/null """) dummy_fence_sleep_agent = ("""#!/usr/bin/python import sys import time def main(): for line in sys.stdin.readlines(): if line.count("monitor") > 0: time.sleep(30000) sys.exit(0) sys.exit(-1) if __name__ == "__main__": main() """) dummy_fence_agent = ("""#!/usr/bin/python import sys def main(): for line in sys.stdin.readlines(): if line.count("monitor") > 0: sys.exit(0) if line.count("metadata") > 0: print '' print ' dummy description.' print ' http://www.example.com' print ' ' print ' ' print ' ' print ' ' print ' Fencing Action' print ' ' print ' ' print ' ' print ' ' print ' Physical plug number or name of virtual machine' print ' ' print ' ' print ' ' print ' ' print ' ' print ' ' print ' ' print ' ' print '' sys.exit(0) sys.exit(-1) if __name__ == "__main__": main() """) os.system("cat <<-END >>/etc/init/lrmd_dummy_daemon.conf\n%s\nEND" % (dummy_upstart_job)) os.system("cat <<-END >>/usr/sbin/lrmd_dummy_daemon\n%s\nEND" % (dummy_daemon)) os.system("cat <<-END >>/lib/systemd/system/lrmd_dummy_daemon.service\n%s\nEND" % (dummy_service_file)) os.system("chmod a+x /usr/sbin/lrmd_dummy_daemon") os.system("cat <<-END >>/usr/sbin/fence_dummy_sleep\n%s\nEND" % (dummy_fence_sleep_agent)) os.system("chmod 711 /usr/sbin/fence_dummy_sleep") os.system("cat <<-END >>/usr/sbin/fence_dummy_monitor\n%s\nEND" % (dummy_fence_agent)) os.system("chmod 711 /usr/sbin/fence_dummy_monitor") if os.path.exists("%s/cts/LSBDummy" % build_dir): print "Using %s/cts/LSBDummy" % build_dir os.system("cp %s/cts/LSBDummy /etc/init.d/LSBDummy" % build_dir) if not os.path.exists("@OCF_RA_DIR@/pacemaker"): os.system("mkdir -p @OCF_RA_DIR@/pacemaker/") # Install helper OCF agents for ra in [ "Dummy", "Stateful", "ping" ]: os.system("cp %s/extra/resources/%s @OCF_RA_DIR@/pacemaker/%s" % (build_dir, ra, ra)) os.system("chmod a+x @OCF_RA_DIR@/pacemaker/%s" % (ra)) else: # Assume it's installed print "Using @datadir@/@PACKAGE@/tests/cts/LSBDummy" os.system("cp @datadir@/@PACKAGE@/tests/cts/LSBDummy /etc/init.d/LSBDummy") os.system("chmod a+x /etc/init.d/LSBDummy") os.system("ls -al /etc/init.d/LSBDummy") os.system("mkdir -p @CRM_CORE_DIR@/root") os.system("mkdir -p /etc/ha.d/resource.d") if os.path.exists("%s/cts/HBDummy" % build_dir): print "Using %s/cts/HBDummy" % build_dir os.system("cp %s/cts/HBDummy /etc/ha.d/resource.d/HBDummy" % build_dir) else: # Assume it's installed print "Using @datadir@/@PACKAGE@/tests/cts/HBDummy" os.system("cp @datadir@/@PACKAGE@/tests/cts/HBDummy /etc/ha.d/resource.d/HBDummy") os.system("chmod a+x /etc/ha.d/resource.d/HBDummy") os.system("ls -al /etc/ha.d/resource.d/HBDummy") if os.path.exists("/bin/systemctl"): os.system("systemctl daemon-reload") def cleanup_test_environment(self): if self.need_authkey: os.system("rm -f /etc/pacemaker/authkey") os.system("rm -f /etc/init.d/LSBDummy") os.system("rm -f /etc/ha.d/resource.d/HBDummy") os.system("rm -f /lib/systemd/system/lrmd_dummy_daemon.service") os.system("rm -f /usr/sbin/lrmd_dummy_daemon") os.system("rm -f /usr/sbin/fence_dummy_monitor") os.system("rm -f /usr/sbin/fence_dummy_sleep") if os.path.exists("/bin/systemctl"): os.system("systemctl daemon-reload") ### These are tests that should apply to all resource classes ### def build_generic_tests(self): common_cmds = self.common_cmds ### register/unregister tests ### for rsc in self.rsc_classes: test = self.new_test("generic_registration_%s" % (rsc), "Simple resource registration test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### start/stop tests ### for rsc in self.rsc_classes: test = self.new_test("generic_start_stop_%s" % (rsc), "Simple start and stop test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### monitor cancel test ### for rsc in self.rsc_classes: test = self.new_test("generic_monitor_cancel_%s" % (rsc), "Simple monitor cancel test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### monitor duplicate test ### for rsc in self.rsc_classes: test = self.new_test("generic_monitor_duplicate_%s" % (rsc), "Test creation and canceling of duplicate monitors for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### # Add the duplicate monitors. test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) # verify we still get update events test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### # cancel the monitor, if the duplicate merged with the original, we should no longer see monitor updates test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### stop implies cancel test ### for rsc in self.rsc_classes: test = self.new_test("generic_stop_implies_cancel_%s" % (rsc), "Verify stopping a resource implies cancel of recurring ops for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### These are complex tests that involve managing multiple resouces of different types ### def build_multi_rsc_tests(self): common_cmds = self.common_cmds # do not use service and systemd at the same time, it is the same resource. ### register start monitor stop unregister resources of each type at the same time. ### test = self.new_test("multi_rsc_start_stop_all", "Start, monitor, and stop resources of multiple types and classes") for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor is not being rescheduled #### for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### These are tests related to how the lrmd handles failures. ### def build_negative_tests(self): ### ocf start timeout test ### test = self.new_test("ocf_start_timeout", "Force start timeout to occur, verify start failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -k \"op_sleep\" -v \"5\" -t 1000 -w") # -t must be less than self.action_timeout test.add_cmd("-l " "\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -a stop "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### stonith start timeout test ### test = self.new_test("stonith_start_timeout", "Force start timeout to occur, verify start failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy_sleep\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 1000 -w") # -t must be less than self.action_timeout test.add_cmd("-l " "\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -a stop "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### stonith component fail ### common_cmds = self.common_cmds test = self.new_test("stonith_component_fail", "Kill stonith component after lrmd connects") test.add_cmd(common_cmds["stonith_reg_line"] + " " + common_cmds["stonith_reg_event"]) test.add_cmd(common_cmds["stonith_start_line"] + " " + common_cmds["stonith_start_event"]) test.add_cmd("-c exec -r \"stonith_test_rsc\" -a \"monitor\" -i \"600000\" " "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill("killall -9 -q stonithd lt-stonithd" ,"-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:unknown error op_status:error\" -t 15000") test.add_cmd(common_cmds["stonith_unreg_line"] + " " + common_cmds["stonith_unreg_event"]) ### monitor fail for ocf resources ### test = self.new_test("monitor_fail_ocf", "Force ocf monitor to fail, verify failure is reported.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 6000") test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### verify notify changes only for monitor operation. ### test = self.new_test("monitor_changes_only", "Verify when flag is set, only monitor changes are notified.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+" -o " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+" -o " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 6000") test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### monitor fail for systemd resource ### if "systemd" in self.rsc_classes: test = self.new_test("monitor_fail_systemd", "Force systemd monitor to fail, verify failure is reported..") test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T lrmd_dummy_daemon "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill("killall -9 -q lrmd_dummy_daemon", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 8000") test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### monitor fail for upstart resource ### if "upstart" in self.rsc_classes: test = self.new_test("monitor_fail_upstart", "Force upstart monitor to fail, verify failure is reported..") test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T lrmd_dummy_daemon "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill("killall -9 -q dd", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 8000") test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Cancel non-existent operation on a resource ### test = self.new_test("cancel_non_existent_op", "Attempt to cancel the wrong monitor operation, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_expected_fail_cmd("-c cancel -r test_rsc -a \"monitor\" -i 1234 -t \"3000\" " ### interval is wrong, should fail "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-c cancel -r test_rsc -a stop -i 100 -t \"3000\" " ### action name is wrong, should fail "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Attempt to invoke non-existent rsc id ### test = self.new_test("invoke_non_existent_rsc", "Attempt to perform operations on a non-existent rsc id.") test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r test_rsc -a stop "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r test_rsc -a monitor -i 3000 "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c cancel -r test_rsc -a start "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register and start a resource that doesn't exist, systemd ### if "systemd" in self.rsc_classes: test = self.new_test("start_uninstalled_systemd", "Register uninstalled systemd agent, try to start, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") if "upstart" in self.rsc_classes: test = self.new_test("start_uninstalled_upstart", "Register uninstalled upstart agent, try to start, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register and start a resource that doesn't exist, ocf ### test = self.new_test("start_uninstalled_ocf", "Register uninstalled ocf agent, try to start, verify expected failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pacemaker -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register ocf with non-existent provider ### test = self.new_test("start_ocf_bad_provider", "Register ocf agent with a non-existent provider, verify expected failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pancakes -T Dummy "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register ocf with empty provider field ### test = self.new_test("start_ocf_no_provider", "Register ocf agent with a no provider, verify expected failure.") test.add_expected_fail_cmd("-c register_rsc -r \"test_rsc\" -C ocf -T Dummy "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Error\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### stress tests ### def build_stress_tests(self): timeout = "-t 20000" - iterations = 25 + iterations = 25 test = self.new_test("ocf_stress", "Verify systemd dbus connection works under load") for i in range(iterations): test.add_cmd("-c register_rsc -r rsc_%s %s -C ocf -P heartbeat -T Dummy -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a monitor %s -i 1000 -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete\"" % (i, timeout, i)) for i in range(iterations): test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) if "systemd" in self.rsc_classes: test = self.new_test("systemd_stress", "Verify systemd dbus connection works under load") for i in range(iterations): test.add_cmd("-c register_rsc -r rsc_%s %s -C systemd -T lrmd_dummy_daemon -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a monitor %s -i 1000 -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete\"" % (i, timeout, i)) for i in range(iterations): test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) + iterations = 9 + timeout = "-t 30000" + ### Verify recurring op in-flight collision is handled in series properly + test = self.new_test("rsc_inflight_collision", "Verify recurring ops do not collide with other operations for the same rsc.") + test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) + test.add_cmd("-c exec -r test_rsc -a start %s -k op_sleep -v 1 -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\"" % (timeout)) + for i in range(iterations): + test.add_cmd("-c exec -r test_rsc -a monitor %s -i 100%d -k op_sleep -v 2 -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\"" % (timeout, i)) +# test.add_sys_cmd("sleep", "-al @CRM_RSCTMP_DIR@") + + test.add_cmd("-c exec -r test_rsc -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\"" % (timeout)) + test.add_cmd("-c unregister_rsc -r test_rsc %s -l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\"" % (timeout)) ### These are tests that target specific cases ### def build_custom_tests(self): + ### verify resource temporary folder is created and used by heartbeat agents. ### test = self.new_test("rsc_tmp_dir", "Verify creation and use of rsc temporary state directory") test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@") test.add_cmd("-c register_rsc -r test_rsc -P heartbeat -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -a start -t 4000") test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@") test.add_sys_cmd("ls", "@CRM_RSCTMP_DIR@/Dummy-test_rsc.state") test.add_cmd("-c exec -r test_rsc -a stop -t 4000") test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### start delay then stop test ### test = self.new_test("start_delay", "Verify start delay works as expected.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -s 6000 -a start -w -t 6000") test.add_expected_fail_cmd("-l " "\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 2000") test.add_cmd("-l " "\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 6000") test.add_cmd("-c exec -r test_rsc -a stop "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### start delay, but cancel before it gets a chance to start. ### test = self.new_test("start_delay_cancel", "Using start_delay, start a rsc, but cancel the start op before execution.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -s 5000 -a start -w -t 4000") test.add_cmd("-c cancel -r test_rsc -a start "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") test.add_expected_fail_cmd("-l " "\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 5000") test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register a bunch of resources, verify we can get info on them ### test = self.new_test("verify_get_rsc_info", "Register multiple resources, verify retrieval of rsc info.") if "systemd" in self.rsc_classes: test.add_cmd("-c register_rsc -r rsc1 -C systemd -T lrmd_dummy_daemon "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ") if "upstart" in self.rsc_classes: test.add_cmd("-c register_rsc -r rsc1 -C upstart -T lrmd_dummy_daemon "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc2 ") test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ") ### Register duplicate, verify only one entry exists and can still be removed. test = self.new_test("duplicate_registration", "Register resource multiple times, verify only one entry exists and can be removed.") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Stateful -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Stateful") test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ") ### verify the option to only send notification to the original client. ### test = self.new_test("notify_orig_client_only", "Verify option to only send notifications to the client originating the action.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+" -n " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") # this will fail because the monitor notifications should only go to the original caller, which no longer exists. test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"3000\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### get metadata ### test = self.new_test("get_ocf_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Dummy\"" ,"resource-agent name=\"Dummy\"") test.add_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Stateful\"") test.add_expected_fail_cmd("-c metadata -P \"pacemaker\" -T \"Stateful\"") test.add_expected_fail_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"fake_agent\"") ### get metadata ### test = self.new_test("get_lsb_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"lsb\" -T \"LSBDummy\"" ,"resource-agent name='LSBDummy'") ### get stonith metadata ### test = self.new_test("get_stonith_metadata", "Retrieve stonith metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy_monitor\"", "resource-agent name=\"fence_dummy_monitor\"") ### get metadata ### if "systemd" in self.rsc_classes: test = self.new_test("get_systemd_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"systemd\" -T \"lrmd_dummy_daemon\"" ,"resource-agent name=\"lrmd_dummy_daemon\"") ### get metadata ### if "upstart" in self.rsc_classes: test = self.new_test("get_upstart_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"upstart\" -T \"lrmd_dummy_daemon\"" ,"resource-agent name=\"lrmd_dummy_daemon\"") if "heartbeat" in self.rsc_classes: test = self.new_test("get_heartbeat_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"heartbeat\" -T \"HBDummy\"" ,"resource-agent name='HBDummy'") ### get ocf providers ### test = self.new_test("list_ocf_providers", "Retrieve list of available resource providers, verifies pacemaker is a provider.") test.add_cmd_check_stdout("-c list_ocf_providers ", "pacemaker") test.add_cmd_check_stdout("-c list_ocf_providers -T ping", "pacemaker") ### Verify agents only exist in their lists ### test = self.new_test("verify_agent_lists", "Verify the agent lists contain the right data.") test.add_cmd_check_stdout("-c list_agents ", "Stateful") ### ocf ### test.add_cmd_check_stdout("-c list_agents -C ocf", "Stateful") test.add_cmd_check_stdout("-c list_agents -C lsb", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents ", "LSBDummy") ### init.d ### test.add_cmd_check_stdout("-c list_agents -C lsb", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C ocf", "", "lrmd_dummy_daemon") ### should not exist test.add_cmd_check_stdout("-c list_agents -C ocf", "", "lrmd_dummy_daemon") ### should not exist test.add_cmd_check_stdout("-c list_agents -C lsb", "", "fence_dummy_monitor") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "fence_dummy_monitor") ### should not exist test.add_cmd_check_stdout("-c list_agents -C ocf", "", "fence_dummy_monitor") ### should not exist if "systemd" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents ", "lrmd_dummy_daemon") ### systemd ### test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C systemd", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C systemd", "lrmd_dummy_daemon") test.add_cmd_check_stdout("-c list_agents -C systemd", "", "fence_dummy_monitor") ### should not exist if "upstart" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents ", "lrmd_dummy_daemon") ### upstart ### test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C upstart", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C upstart", "lrmd_dummy_daemon") test.add_cmd_check_stdout("-c list_agents -C upstart", "", "fence_dummy_monitor") ### should not exist if "stonith" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents -C stonith", "fence_dummy_monitor") ### stonith ### test.add_cmd_check_stdout("-c list_agents -C stonith", "", "lrmd_dummy_daemon") ### should not exist test.add_cmd_check_stdout("-c list_agents -C stonith", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents ", "fence_dummy_monitor") if "heartbeat" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents -C heartbeat", "HBDummy") test.add_cmd_check_stdout("-c list_agents -C heartbeat", "", "LSBDummy") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "HBDummy") ### should not exist def print_list(self): print "\n==== %d TESTS FOUND ====" % (len(self.tests)) print "%35s - %s" % ("TEST NAME", "TEST DESCRIPTION") print "%35s - %s" % ("--------------------", "--------------------") for test in self.tests: print "%35s - %s" % (test.name, test.description) print "==== END OF LIST ====\n" def run_single(self, name): for test in self.tests: if test.name == name: test.run() break; def run_tests_matching(self, pattern): for test in self.tests: if test.name.count(pattern) != 0: test.run() def run_tests(self): for test in self.tests: test.run() def exit(self): for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != 0: sys.exit(-1) sys.exit(0); def print_results(self): failures = 0; success = 0; print "\n\n======= FINAL RESULTS ==========" print "\n--- FAILURE RESULTS:" for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != 0: failures = failures + 1 test.print_result(" ") else: success = success + 1 if failures == 0: print " None" print "\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures) class TestOptions: def __init__(self): self.options = {} self.options['list-tests'] = 0 self.options['run-all'] = 1 self.options['run-only'] = "" self.options['run-only-pattern'] = "" self.options['verbose'] = 0 self.options['invalid-arg'] = "" self.options['show-usage'] = 0 self.options['pacemaker-remote'] = 0 def build_options(self, argv): args = argv[1:] skip = 0 for i in range(0, len(args)): if skip: skip = 0 continue elif args[i] == "-h" or args[i] == "--help": self.options['show-usage'] = 1 elif args[i] == "-l" or args[i] == "--list-tests": self.options['list-tests'] = 1 elif args[i] == "-V" or args[i] == "--verbose": self.options['verbose'] = 1 elif args[i] == "-R" or args[i] == "--pacemaker-remote": self.options['pacemaker-remote'] = 1 elif args[i] == "-r" or args[i] == "--run-only": self.options['run-only'] = args[i+1] skip = 1 elif args[i] == "-p" or args[i] == "--run-only-pattern": self.options['run-only-pattern'] = args[i+1] skip = 1 def show_usage(self): print "usage: " + sys.argv[0] + " [options]" print "If no options are provided, all tests will run" print "Options:" print "\t [--help | -h] Show usage" print "\t [--list-tests | -l] Print out all registered tests." print "\t [--run-only | -r 'testname'] Run a specific test" print "\t [--verbose | -V] Verbose output" print "\t [--pacemaker-remote | -R Test pacemaker-remote binary instead of lrmd." print "\t [--run-only-pattern | -p 'string'] Run only tests containing the string value" print "\n\tExample: Run only the test 'start_top'" print "\t\t python ./regression.py --run-only start_stop" print "\n\tExample: Run only the tests with the string 'systemd' present in them" print "\t\t python ./regression.py --run-only-pattern systemd" def main(argv): o = TestOptions() o.build_options(argv) tests = Tests(o.options['verbose'], o.options['pacemaker-remote']) tests.build_generic_tests() tests.build_multi_rsc_tests() tests.build_negative_tests() tests.build_custom_tests() tests.build_stress_tests() tests.setup_test_environment() print "Starting ..." if o.options['list-tests']: tests.print_list() elif o.options['show-usage']: o.show_usage() elif o.options['run-only-pattern'] != "": tests.run_tests_matching(o.options['run-only-pattern']) tests.print_results() elif o.options['run-only'] != "": tests.run_single(o.options['run-only']) tests.print_results() else: tests.run_tests() tests.print_results() tests.cleanup_test_environment() tests.exit() if __name__=="__main__": main(sys.argv)