diff --git a/cib/io.c b/cib/io.c index b6a58a1694..babed88b32 100644 --- a/cib/io.c +++ b/cib/io.c @@ -1,483 +1,483 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern const char *cib_root; crm_trigger_t *cib_writer = NULL; extern int cib_status; int write_cib_contents(gpointer p); static void cib_rename(const char *old) { int new_fd; char *new = crm_strdup_printf("%s/cib.auto.XXXXXX", cib_root); crm_err("Archiving unusable file %s as %s", old, new); umask(S_IWGRP | S_IWOTH | S_IROTH); if ((new_fd = mkstemp(new) < 0) || (rename(old, new) < 0)) { crm_perror(LOG_ERR, "Couldn't rename %s as %s", old, new); crm_err("Disabling disk writes and continuing"); cib_writes_enabled = FALSE; } if (new_fd > 0) { close(new_fd); } free(new); } /* * It is the callers responsibility to free the output of this function */ static xmlNode * retrieveCib(const char *filename, const char *sigfile) { xmlNode *root = NULL; crm_info("Reading cluster configuration file %s (digest: %s)", filename, sigfile); switch (cib_file_read_and_verify(filename, sigfile, &root)) { case -pcmk_err_cib_corrupt: crm_warn("Continuing but %s will NOT be used.", filename); break; case -pcmk_err_cib_modified: /* Archive the original files so the contents are not lost */ crm_warn("Continuing but %s will NOT be used.", filename); cib_rename(filename); cib_rename(sigfile); break; } return root; } /* * for OSs without support for direntry->d_type, like Solaris */ #ifndef DT_UNKNOWN # define DT_UNKNOWN 0 # define DT_FIFO 1 # define DT_CHR 2 # define DT_DIR 4 # define DT_BLK 6 # define DT_REG 8 # define DT_LNK 10 # define DT_SOCK 12 # define DT_WHT 14 #endif /*DT_UNKNOWN*/ static int cib_archive_filter(const struct dirent * a) { int rc = 0; /* Looking for regular files (d_type = 8) starting with 'cib-' and not ending in .sig */ struct stat s; char *a_path = crm_strdup_printf("%s/%s", cib_root, a->d_name); if(stat(a_path, &s) != 0) { rc = errno; crm_trace("%s - stat failed: %s (%d)", a->d_name, pcmk_strerror(rc), rc); rc = 0; } else if ((s.st_mode & S_IFREG) != S_IFREG) { unsigned char dtype; #ifdef HAVE_STRUCT_DIRENT_D_TYPE dtype = a->d_type; #else switch (s.st_mode & S_IFMT) { case S_IFREG: dtype = DT_REG; break; case S_IFDIR: dtype = DT_DIR; break; case S_IFCHR: dtype = DT_CHR; break; case S_IFBLK: dtype = DT_BLK; break; case S_IFLNK: dtype = DT_LNK; break; case S_IFIFO: dtype = DT_FIFO; break; case S_IFSOCK: dtype = DT_SOCK; break; default: dtype = DT_UNKNOWN; break; } #endif crm_trace("%s - wrong type (%d)", a->d_name, dtype); } else if(strstr(a->d_name, "cib-") != a->d_name) { crm_trace("%s - wrong prefix", a->d_name); } else if (crm_ends_with_ext(a->d_name, ".sig")) { crm_trace("%s - wrong suffix", a->d_name); } else { crm_debug("%s - candidate", a->d_name); rc = 1; } free(a_path); return rc; } static int cib_archive_sort(const struct dirent ** a, const struct dirent **b) { /* Order by creation date - most recently created file first */ int rc = 0; struct stat buf; time_t a_age = 0; time_t b_age = 0; char *a_path = crm_strdup_printf("%s/%s", cib_root, a[0]->d_name); char *b_path = crm_strdup_printf("%s/%s", cib_root, b[0]->d_name); if(stat(a_path, &buf) == 0) { a_age = buf.st_ctime; } if(stat(b_path, &buf) == 0) { b_age = buf.st_ctime; } free(a_path); free(b_path); if(a_age > b_age) { rc = 1; } else if(a_age < b_age) { rc = -1; } crm_trace("%s (%lu) vs. %s (%lu) : %d", a[0]->d_name, (unsigned long)a_age, b[0]->d_name, (unsigned long)b_age, rc); return rc; } xmlNode * readCibXmlFile(const char *dir, const char *file, gboolean discard_status) { struct dirent **namelist = NULL; int lpc = 0; char *sigfile = NULL; char *filename = NULL; const char *name = NULL; const char *value = NULL; const char *validation = NULL; const char *use_valgrind = getenv("PCMK_valgrind_enabled"); xmlNode *root = NULL; xmlNode *status = NULL; if (!crm_is_writable(dir, file, CRM_DAEMON_USER, NULL, FALSE)) { cib_status = -EACCES; return NULL; } filename = crm_concat(dir, file, '/'); sigfile = crm_concat(filename, "sig", '.'); cib_status = pcmk_ok; root = retrieveCib(filename, sigfile); free(filename); free(sigfile); if (root == NULL) { crm_warn("Primary configuration corrupt or unusable, trying backups in %s", cib_root); lpc = scandir(cib_root, &namelist, cib_archive_filter, cib_archive_sort); if (lpc < 0) { crm_perror(LOG_NOTICE, "scandir(%s) failed", cib_root); } } while (root == NULL && lpc > 1) { crm_debug("Testing %d candidates", lpc); lpc--; filename = crm_strdup_printf("%s/%s", cib_root, namelist[lpc]->d_name); sigfile = crm_concat(filename, "sig", '.'); crm_info("Reading cluster configuration file %s (digest: %s)", filename, sigfile); if (cib_file_read_and_verify(filename, sigfile, &root) < 0) { crm_warn("Continuing but %s will NOT be used.", filename); } else { crm_notice("Continuing with last valid configuration archive: %s", filename); } free(namelist[lpc]); free(filename); free(sigfile); } free(namelist); if (root == NULL) { root = createEmptyCib(0); crm_warn("Continuing with an empty configuration."); } if (cib_writes_enabled && use_valgrind) { if (crm_is_true(use_valgrind) || strstr(use_valgrind, "cib")) { cib_writes_enabled = FALSE; crm_err("*** Disabling disk writes to avoid confusing Valgrind ***"); } } status = find_xml_node(root, XML_CIB_TAG_STATUS, FALSE); if (discard_status && status != NULL) { /* strip out the status section if there is one */ free_xml(status); status = NULL; } if (status == NULL) { create_xml_node(root, XML_CIB_TAG_STATUS); } /* Do this before schema validation happens */ /* fill in some defaults */ name = XML_ATTR_GENERATION_ADMIN; value = crm_element_value(root, name); if (value == NULL) { crm_warn("No value for %s was specified in the configuration.", name); crm_warn("The recommended course of action is to shutdown," " run crm_verify and fix any errors it reports."); crm_warn("We will default to zero and continue but may get" " confused about which configuration to use if" " multiple nodes are powered up at the same time."); crm_xml_add_int(root, name, 0); } name = XML_ATTR_GENERATION; value = crm_element_value(root, name); if (value == NULL) { crm_xml_add_int(root, name, 0); } name = XML_ATTR_NUMUPDATES; value = crm_element_value(root, name); if (value == NULL) { crm_xml_add_int(root, name, 0); } // Unset (DC should set appropriate value) xml_remove_prop(root, XML_ATTR_DC_UUID); if (discard_status) { crm_log_xml_trace(root, "[on-disk]"); } validation = crm_element_value(root, XML_ATTR_VALIDATION); if (validate_xml(root, NULL, TRUE) == FALSE) { crm_err("CIB does not validate with %s", crm_str(validation)); cib_status = -pcmk_err_schema_validation; } else if (validation == NULL) { int version = 0; update_validation(&root, &version, 0, FALSE, FALSE); if (version > 0) { crm_notice("Enabling %s validation on" " the existing (sane) configuration", get_schema_name(version)); } else { crm_err("CIB does not validate with any known schema"); cib_status = -pcmk_err_schema_validation; } } return root; } /* * The caller should never free the return value */ xmlNode * get_the_CIB(void) { return the_cib; } gboolean uninitializeCib(void) { xmlNode *tmp_cib = the_cib; if (tmp_cib == NULL) { crm_debug("The CIB has already been deallocated."); return FALSE; } the_cib = NULL; crm_debug("Deallocating the CIB."); free_xml(tmp_cib); crm_debug("The CIB has been deallocated."); return TRUE; } /* * This method will free the old CIB pointer on success and the new one * on failure. */ int activateCibXml(xmlNode * new_cib, gboolean to_disk, const char *op) { if (new_cib) { xmlNode *saved_cib = the_cib; CRM_ASSERT(new_cib != saved_cib); the_cib = new_cib; free_xml(saved_cib); if (cib_writes_enabled && cib_status == pcmk_ok && to_disk) { crm_debug("Triggering CIB write for %s op", op); mainloop_set_trigger(cib_writer); } return pcmk_ok; } crm_err("Ignoring invalid CIB"); if (the_cib) { crm_warn("Reverting to last known CIB"); } else { crm_crit("Could not write out new CIB and no saved version to revert to"); } return -ENODATA; } static void cib_diskwrite_complete(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { if (signo) { crm_notice("Disk write process terminated with signal %d (pid=%d, core=%d)", signo, pid, core); } else { do_crm_log(exitcode == 0 ? LOG_TRACE : LOG_ERR, "Disk write process exited (pid=%d, rc=%d)", pid, exitcode); } if (exitcode != 0 && cib_writes_enabled) { crm_err("Disabling disk writes after write failure"); cib_writes_enabled = FALSE; } mainloop_trigger_complete(cib_writer); } int write_cib_contents(gpointer p) { int exit_rc = pcmk_ok; xmlNode *cib_local = NULL; /* Make a copy of the CIB to write (possibly in a forked child) */ if (p) { /* Synchronous write out */ cib_local = copy_xml(p); } else { int pid = 0; int bb_state = qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0); /* Turn it off before the fork() to avoid: * - 2 processes writing to the same shared mem * - the child needing to disable it * (which would close it from underneath the parent) * This way, the shared mem files are already closed */ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); pid = fork(); if (pid < 0) { crm_perror(LOG_ERR, "Disabling disk writes after fork failure"); cib_writes_enabled = FALSE; return FALSE; } if (pid) { /* Parent */ mainloop_child_add(pid, 0, "disk-writer", NULL, cib_diskwrite_complete); if (bb_state == QB_LOG_STATE_ENABLED) { /* Re-enable now that it it safe */ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); } return -1; /* -1 means 'still work to do' */ } - /* A-synchronous write out after a fork() */ + /* Asynchronous write-out after a fork() */ - /* In theory we can scribble on "the_cib" here and not affect the parent - * But lets be safe anyway + /* In theory, we can scribble on the_cib here and not affect the parent, + * but let's be safe anyway. */ cib_local = copy_xml(the_cib); } /* Write the CIB */ exit_rc = cib_file_write_with_digest(cib_local, cib_root, "cib.xml"); /* A nonzero exit code will cause further writes to be disabled */ free_xml(cib_local); if (p == NULL) { crm_exit_t exit_code = CRM_EX_OK; switch (exit_rc) { case pcmk_ok: exit_code = CRM_EX_OK; break; case pcmk_err_cib_modified: exit_code = CRM_EX_DIGEST; // Existing CIB doesn't match digest break; case pcmk_err_cib_backup: // Existing CIB couldn't be backed up case pcmk_err_cib_save: // New CIB couldn't be saved exit_code = CRM_EX_CANTCREAT; break; default: exit_code = CRM_EX_ERROR; break; } /* Use _exit() because exit() could affect the parent adversely */ _exit(exit_code); } return exit_rc; } diff --git a/cts/CM_common.py b/cts/CM_common.py index 6f4856b18f..f85d6c3114 100755 --- a/cts/CM_common.py +++ b/cts/CM_common.py @@ -1,486 +1,486 @@ '''CTS: Cluster Testing System: Cluster Manager Common Class This was originally the cluster manager class for the Heartbeat stack. It is retained for use as a base class by other cluster manager classes. It could be merged into the ClusterManager class directly, but this is easier. ''' __copyright__ = ''' Author: Huang Zhen Copyright (C) 2004 International Business Machines Additional Audits, Revised Start action, Default Configuration: Copyright (C) 2004 Andrew Beekhof ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. import sys from cts.CTSvars import * from cts.CTS import * from cts.CIB import * from cts.CTStests import AuditResource from cts.watcher import LogWatcher class crm_common(ClusterManager): def __init__(self, Environment, randseed=None, name=None): ClusterManager.__init__(self, Environment, randseed=randseed) self.fastfail = 0 self.cib_installed = 0 self.config = None self.cluster_monitor = 0 self.use_short_names = 1 if self.Env["DoBSC"]: del self.templates["Pat:They_stopped"] self._finalConditions() self.check_transitions = 0 self.check_elections = 0 self.CIBsync = {} self.CibFactory = ConfigFactory(self) self.cib = self.CibFactory.createConfig(self.Env["Schema"]) def errorstoignore(self): # At some point implement a more elegant solution that # also produces a report at the end - '''Return list of errors which are known and very noisey should be ignored''' + """ Return a list of known error messages that should be ignored """ return PatternSelector().get_patterns(self.name, "BadNewsIgnore") def install_config(self, node): if not self.ns.WaitForNodeToComeUp(node): self.log("Node %s is not up." % node) return None if not node in self.CIBsync and self.Env["ClobberCIB"] == 1: self.CIBsync[node] = 1 self.rsh(node, "rm -f "+CTSvars.CRM_CONFIG_DIR+"/cib*") # Only install the CIB on the first node, all the other ones will pick it up from there if self.cib_installed == 1: return None self.cib_installed = 1 if self.Env["CIBfilename"] == None: self.log("Installing Generated CIB on node %s" % (node)) self.cib.install(node) else: self.log("Installing CIB (%s) on node %s" % (self.Env["CIBfilename"], node)) if 0 != self.rsh.cp(self.Env["CIBfilename"], "root@" + (self.templates["CIBfile"] % node)): raise ValueError("Can not scp file to %s %d"%(node)) self.rsh(node, "chown "+CTSvars.CRM_DAEMON_USER+" "+CTSvars.CRM_CONFIG_DIR+"/cib.xml") def prepare(self): '''Finish the Initialization process. Prepare to test...''' self.partitions_expected = 1 for node in self.Env["nodes"]: self.ShouldBeStatus[node] = "" self.unisolate_node(node) self.StataCM(node) def test_node_CM(self, node): '''Report the status of the cluster manager on a given node''' watchpats = [ ] watchpats.append("Current ping state: (S_IDLE|S_NOT_DC)") watchpats.append(self.templates["Pat:NonDC_started"] % node) watchpats.append(self.templates["Pat:DC_started"] % node) idle_watch = LogWatcher(self.Env["LogFileName"], watchpats, "ClusterIdle", hosts=[node], kind=self.Env["LogWatcher"]) idle_watch.setwatch() out = self.rsh(node, self.templates["StatusCmd"]%node, 1) self.debug("Node %s status: '%s'" %(node, out)) if not out or str.find(out, 'ok') < 0: if self.ShouldBeStatus[node] == "up": self.log( "Node status for %s is %s but we think it should be %s" % (node, "down", self.ShouldBeStatus[node])) self.ShouldBeStatus[node] = "down" return 0 if self.ShouldBeStatus[node] == "down": self.log( "Node status for %s is %s but we think it should be %s: %s" % (node, "up", self.ShouldBeStatus[node], out)) self.ShouldBeStatus[node] = "up" # check the output first - because syslog-ng loses messages if str.find(out, 'S_NOT_DC') != -1: # Up and stable return 2 if str.find(out, 'S_IDLE') != -1: # Up and stable return 2 # fall back to syslog-ng and wait if not idle_watch.look(): # just up self.debug("Warn: Node %s is unstable: %s" % (node, out)) return 1 # Up and stable return 2 # Is the node up or is the node down def StataCM(self, node): '''Report the status of the cluster manager on a given node''' if self.test_node_CM(node) > 0: return 1 return None # Being up and being stable is not the same question... def node_stable(self, node): '''Report the status of the cluster manager on a given node''' if self.test_node_CM(node) == 2: return 1 self.log("Warn: Node %s not stable" % (node)) return None def partition_stable(self, nodes, timeout=None): watchpats = [ ] watchpats.append("Current ping state: S_IDLE") watchpats.append(self.templates["Pat:DC_IDLE"]) self.debug("Waiting for cluster stability...") if timeout == None: timeout = self.Env["DeadTime"] if len(nodes) < 3: self.debug("Cluster is inactive") return 1 idle_watch = LogWatcher(self.Env["LogFileName"], watchpats, "ClusterStable", timeout, hosts=nodes.split(), kind=self.Env["LogWatcher"]) idle_watch.setwatch() for node in nodes.split(): # have each node dump its current state self.rsh(node, self.templates["StatusCmd"] % node, 1) ret = idle_watch.look() while ret: self.debug(ret) for node in nodes.split(): if re.search(node, ret): return 1 ret = idle_watch.look() self.debug("Warn: Partition %s not IDLE after %ds" % (repr(nodes), timeout)) return None def cluster_stable(self, timeout=None, double_check=False): partitions = self.find_partitions() for partition in partitions: if not self.partition_stable(partition, timeout): return None if double_check: # Make sure we are really stable and that all resources, # including those that depend on transient node attributes, # are started if they were going to be time.sleep(5) for partition in partitions: if not self.partition_stable(partition, timeout): return None return 1 def is_node_dc(self, node, status_line=None): rc = 0 if not status_line: status_line = self.rsh(node, self.templates["StatusCmd"]%node, 1) if not status_line: rc = 0 elif str.find(status_line, 'S_IDLE') != -1: rc = 1 elif str.find(status_line, 'S_INTEGRATION') != -1: rc = 1 elif str.find(status_line, 'S_FINALIZE_JOIN') != -1: rc = 1 elif str.find(status_line, 'S_POLICY_ENGINE') != -1: rc = 1 elif str.find(status_line, 'S_TRANSITION_ENGINE') != -1: rc = 1 return rc def active_resources(self, node): # [SM].* {node} matches Started, Slave, Master # Stopped wont be matched as it wont include {node} (rc, output) = self.rsh(node, """crm_resource -c""", None) resources = [] for line in output: if re.search("^Resource", line): tmp = AuditResource(self, line) if tmp.type == "primitive" and tmp.host == node: resources.append(tmp.id) return resources def ResourceLocation(self, rid): ResourceNodes = [] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "up": cmd = self.templates["RscRunning"] % (rid) (rc, lines) = self.rsh(node, cmd, None) if rc == 127: self.log("Command '%s' failed. Binary or pacemaker-cts package not installed?" % cmd) for line in lines: self.log("Output: "+line) elif rc == 0: ResourceNodes.append(node) return ResourceNodes def find_partitions(self): ccm_partitions = [] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "up": partition = self.rsh(node, self.templates["PartitionCmd"], 1) if not partition: self.log("no partition details for %s" % node) elif len(partition) > 2: nodes = partition.split() nodes.sort() partition = ' '.join(nodes) found = 0 for a_partition in ccm_partitions: if partition == a_partition: found = 1 if found == 0: self.debug("Adding partition from %s: %s" % (node, partition)) ccm_partitions.append(partition) else: self.debug("Partition '%s' from %s is consistent with existing entries" % (partition, node)) else: self.log("bad partition details for %s" % node) else: self.debug("Node %s is down... skipping" % node) self.debug("Found partitions: %s" % repr(ccm_partitions) ) return ccm_partitions def HasQuorum(self, node_list): # If we are auditing a partition, then one side will # have quorum and the other not. # So the caller needs to tell us which we are checking # If no value for node_list is specified... assume all nodes if not node_list: node_list = self.Env["nodes"] for node in node_list: if self.ShouldBeStatus[node] == "up": quorum = self.rsh(node, self.templates["QuorumCmd"], 1) if str.find(quorum, "1") != -1: return 1 elif str.find(quorum, "0") != -1: return 0 else: self.debug("WARN: Unexpected quorum test result from " + node + ":" + quorum) return 0 def Components(self): complist = [] common_ignore = [ "Pending action:", "(ERROR|error): crm_log_message_adv:", "(ERROR|error): MSG: No message to dump", "pending LRM operations at shutdown", "Lost connection to the CIB service", "Connection to the CIB terminated...", "Sending message to CIB service FAILED", "Action A_RECOVER .* not supported", "(ERROR|error): stonithd_op_result_ready: not signed on", "pingd.*(ERROR|error): send_update: Could not send update", "send_ipc_message: IPC Channel to .* is not connected", "unconfirmed_actions: Waiting on .* unconfirmed actions", "cib_native_msgready: Message pending on command channel", r": Performing A_EXIT_1 - forcefully exiting the CRMd", r"Resource .* was active at shutdown. You may ignore this error if it is unmanaged.", ] stonith_ignore = [ r"Updating failcount for child_DoFencing", r"(ERROR|error).*: Sign-in failed: triggered a retry", "lrmd.*(ERROR|error): stonithd_receive_ops_result failed.", ] stonith_ignore.extend(common_ignore) ccm = Process(self, "ccm", triggersreboot=self.fastfail, pats = [ "State transition .* S_RECOVERY", "crmd.*Action A_RECOVER .* not supported", r"crmd.*: Input I_TERMINATE .*from do_recover", r"crmd.*: Could not recover from internal error", "crmd.*I_ERROR.*crmd_cib_connection_destroy", # these status numbers are likely wrong now r"crmd.*exited with status 2", r"attrd.*exited with status 1", r"cib.*exited with status 2", # Not if it was fenced # "A new node joined the cluster", # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # "tengine_stonith_callback: .*result=0", # "Processing I_NODE_JOIN:.* cause=C_HA_MESSAGE", # "State transition S_.* -> S_INTEGRATION.*input=I_NODE_JOIN", "State transition S_STARTING -> S_PENDING", ], badnews_ignore = common_ignore) cib = Process(self, "cib", triggersreboot=self.fastfail, pats = [ "State transition .* S_RECOVERY", "Lost connection to the CIB service", "Connection to the CIB terminated...", r"crmd.*: Input I_TERMINATE .*from do_recover", "crmd.*I_ERROR.*crmd_cib_connection_destroy", r"crmd.*: Could not recover from internal error", # these status numbers are likely wrong now r"crmd.*exited with status 2", r"attrd.*exited with status 1", ], badnews_ignore = common_ignore) lrmd = Process(self, "lrmd", triggersreboot=self.fastfail, pats = [ "State transition .* S_RECOVERY", "LRM Connection failed", "crmd.*I_ERROR.*lrm_connection_destroy", "State transition S_STARTING -> S_PENDING", r"crmd.*: Input I_TERMINATE .*from do_recover", r"crmd.*: Could not recover from internal error", # this status number is likely wrong now r"crmd.*exited with status 2", ], badnews_ignore = common_ignore) crmd = Process(self, "crmd", triggersreboot=self.fastfail, pats = [ # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # "tengine_stonith_callback: .*result=0", "State transition .* S_IDLE", "State transition S_STARTING -> S_PENDING", ], badnews_ignore = common_ignore) pengine = Process(self, "pengine", triggersreboot=self.fastfail, pats = [ "State transition .* S_RECOVERY", r"crmd.*: Input I_TERMINATE .*from do_recover", r"crmd.*: Could not recover from internal error", r"crmd.*CRIT.*: Connection to the Policy Engine failed", "crmd.*I_ERROR.*save_cib_contents", # this status number is likely wrong now r"crmd.*exited with status 2", ], badnews_ignore = common_ignore, dc_only=1) if self.Env["DoFencing"] == 1 : complist.append(Process(self, "stoniths", triggersreboot=self.fastfail, dc_pats = [ r"crmd.*CRIT.*: Fencing daemon connection failed", "Attempting connection to fencing daemon", ], badnews_ignore = stonith_ignore)) if self.fastfail == 0: ccm.pats.extend([ # these status numbers are likely wrong now r"attrd.*exited with status 1", r"cib.*exited with status 2", r"crmd.*exited with status 2", ]) cib.pats.extend([ # these status numbers are likely wrong now r"attrd.*exited with status 1", r"crmd.*exited with status 2", ]) lrmd.pats.extend([ # these status numbers are likely wrong now r"crmd.*exited with status 2", ]) complist.append(ccm) complist.append(cib) complist.append(lrmd) complist.append(crmd) complist.append(pengine) return complist def StandbyStatus(self, node): out=self.rsh(node, self.templates["StandbyQueryCmd"] % node, 1) if not out: return "off" out = out[:-1] self.debug("Standby result: "+out) return out # status == "on" : Enter Standby mode # status == "off": Enter Active mode def SetStandbyMode(self, node, status): current_status = self.StandbyStatus(node) cmd = self.templates["StandbyCmd"] % (node, status) ret = self.rsh(node, cmd) return True def AddDummyRsc(self, node, rid): rsc_xml = """ ' '""" % (rid, rid) constraint_xml = """ ' ' """ % (rid, node, node, rid) self.rsh(node, self.templates['CibAddXml'] % (rsc_xml)) self.rsh(node, self.templates['CibAddXml'] % (constraint_xml)) def RemoveDummyRsc(self, node, rid): constraint = "\"//rsc_location[@rsc='%s']\"" % (rid) rsc = "\"//primitive[@id='%s']\"" % (rid) self.rsh(node, self.templates['CibDelXpath'] % constraint) self.rsh(node, self.templates['CibDelXpath'] % rsc) ####################################################################### # # A little test code... # # Which you are advised to completely ignore... # ####################################################################### if __name__ == '__main__': pass diff --git a/cts/benchmark/clubench.in b/cts/benchmark/clubench.in index 1b64755e6e..9194505cac 100644 --- a/cts/benchmark/clubench.in +++ b/cts/benchmark/clubench.in @@ -1,195 +1,195 @@ #!/bin/sh # PROG=`basename $0` DIR=`dirname $0` SSHOPTS="-l root -o PasswordAuthentication=no -o ConnectTimeout=5" msg() { echo $@ >&2 } usage() { echo "usage: $0 " echo " dir: working directory (with the control file)" exit 0 } [ $# -eq 0 ] && usage WORKDIR=$1 test -d "$WORKDIR" || usage CTSCTRL=~/.cts CTRL=$WORKDIR/control CSV=$WORKDIR/bench.csv STATS=$WORKDIR/bench.stats test -f $CTRL && . $CTRL @datadir@/@PACKAGE@/tests/cts/cluster_test 500 || { msg "cluster_test failed" exit 1 } test -f $CTSCTRL || { msg no CTS control file $CTSCTRL exit 1 } . $CTSCTRL : ${CTS_logfacility:=local7} : ${CTS_stack:=corosync} : ${CTS_logfile:="@CRM_LOG_DIR@/ha-log-bench"} : ${CTS_adv:="--schema pacemaker-1.2 --clobber-cib -r"} : ${RUNS:=3} : ${CTSTESTS:="--benchmark"} : ${CTSDIR:="@datadir@/@PACKAGE@/tests/cts"} [ "$CTS_node_list" ] || { msg no node list specified exit 1 } case "$CTS_stack" in corosync) CRM_REPORT_OPTS="--corosync";; *) msg "$CTS_stack: cluster stack not recognized"; exit 1;; esac CTSOPTS="--stack $CTS_stack --at-boot $CTS_boot $CTS_adv" CTSOPTS="$CTSOPTS --facility $CTS_logfacility --logfile $CTS_logfile" if [ "x$CTS_stonith" != "x" ]; then CTSOPTS="$CTSOPTS --stonith-type $CTS_stonith" [ "x$CTS_stonith_args" != "x" ] && - CTSOPTS="$CTSOPTS --stonitha-params \"$CTS_stonith_args\"" + CTSOPTS="$CTSOPTS --stonith-params \"$CTS_stonith_args\"" else CTSOPTS="$CTSOPTS --stonith 0" fi CTSOPTS="$CTSOPTS $CTSTESTS" fibonacci() { local limit=$1 local n=2 prev=1 tmp_n while [ $n -le $limit ]; do echo $n tmp_n=$n n=$((n+prev)) prev=$tmp_n done [ $prev -ne $limit ] && echo $limit } [ "$SERIES" ] || SERIES=$(fibonacci `echo $CTS_node_list | wc -w`) get_nodes() { local c_nodes c_nodes=`echo $CTS_node_list | awk -v n=$1 ' { for( i=1; i<=NF; i++ ) node[cnt++]=$i } END{for( i=0; i $odir/ctsrun.out 2>&1 & ctspid=$! tail -f $odir/ctsrun.out & tailpid=$! wait $ctspid kill $tailpid >/dev/null 2>&1 } bench_re='CTS:.*runtime:' diginfo() { local d v local ctsdir=$1 local s="$2" filter=$3 ( cd $ctsdir for r in [0-9]*.tar.bz2; do tar xjf $r d=`basename $r .tar.bz2` for v in `grep $bench_re $d/ha-log.txt | eval $filter`; do s="$s,$v" done rm -r $d done echo $s ) } printheader() { diginfo $1 "" "awk '{print \$(NF-2)}'" } printstats() { diginfo $1 "$clusize" "awk '{print \$(NF)}'" } printmedians() { local f=$1 local s="$clusize" local middle=$((RUNS/2 + 1)) set `head -1 $f | sed 's/,/ /g'` local cols=$# local i v for i in `seq 2 $cols`; do v=`awk -v i=$i -F, '{print $i}' < $f | sort -n | head -$middle | tail -1` s="$s,$v" done echo $s } rm -f $CSV tmpf=`mktemp` test -f "$tmpf" || { msg "can't create temporary file" exit 1 } trap "rm -f $tmpf" 0 for clusize in $SERIES; do nodes=`get_nodes $clusize` outdir=$WORKDIR/$clusize rm -rf $outdir mkdir -p $outdir rm -f $tmpf node_cleanup for i in `seq $RUNS`; do > $CTS_logfile mkdir -p $outdir/$i runcts $outdir/$i mkreports $outdir/$i printstats $outdir/$i >> $tmpf done [ -f "$CSV" ] || printheader $outdir/1 > $CSV printmedians $tmpf >> $CSV cat $tmpf >> $STATS msg "Statistics for $clusize-node cluster saved" done msg "Tests done for series $SERIES, output in $CSV and $STATS" diff --git a/cts/cluster_test b/cts/cluster_test index 4d59c8222d..0cc8c26afc 100755 --- a/cts/cluster_test +++ b/cts/cluster_test @@ -1,166 +1,166 @@ #!/bin/bash if [ -e ~/.cts ]; then . ~/.cts fi anyAsked=0 [ $# -lt 1 ] || CTS_numtests=$1 die() { echo "$@"; exit 1; } if [ -z "$CTS_asked_once" ]; then anyAsked=1 echo "This script should only be executed on the test master." echo "The test master will remotely execute the actions required by the tests and should not be part of the cluster itself." read -p "Is this host intended to be the test master? (yN) " doUnderstand [ "$doUnderstand" = "y" ] \ || die "This script must be executed on the test master" fi if [ -z "$CTS_node_list" ]; then anyAsked=1 read -p "Please list your cluster nodes (eg. node1 node2 node3): " CTS_node_list else echo "Beginning test of cluster: $CTS_node_list" fi if [ -z "$CTS_stack" ]; then anyAsked=1 read -p "Which cluster stack are you using? ([corosync]): " CTS_stack [ -n "$CTS_stack" ] || CTS_stack=corosync else echo "Using the $CTS_stack cluster stack" fi [ "${CTS_node_list}" = "${CTS_node_list/$HOSTNAME/}" ] \ || die "This script must be executed on the test master and the test master cannot be part of the cluster" printf "+ Bootstraping ssh... " if [ -z "$SSH_AUTH_SOCK" ]; then printf "\n + Initializing SSH " eval "$(ssh-agent)" echo " + Adding identities..." ssh-add rc=$? if [ $rc -ne 0 ]; then echo " -- No identities added" printf "\nThe ability to open key-based 'ssh' connections (as the user 'root') is required to use CTS.\n" read -p " - Do you want this program to help you create one? (yN) " auto_fix if [ "$auto_fix" = "y" ]; then ssh-keygen -t dsa ssh-add else die "Please run 'ssh-keygen -t dsa' to create a new key" fi fi else echo "OK" fi test_ok=1 printf "+ Testing ssh configuration... " for n in $CTS_node_list; do ssh -l root -o PasswordAuthentication=no -o ConnectTimeout=5 "$n" /bin/true rc=$? if [ $rc -ne 0 ]; then echo " - connection to $n failed" test_ok=0 fi done if [ $test_ok -eq 0 ]; then printf "\nThe ability to open key-based 'ssh' connections (as the user 'root') is required to use CTS.\n" read -p " - Do you want this program to help you with such a setup? (yN) " auto_fix if [ "$auto_fix" = "y" ]; then # XXX are we picking the most suitable identity? privKey=$(ssh-add -L | head -n1 | cut -d" " -f3) sshCopyIdOpts="-o User=root" [ -z "$privKey" ] || sshCopyIdOpts+=" -i \"${privKey}.pub\"" for n in $CTS_node_list; do eval "ssh-copy-id $sshCopyIdOpts \"${n}\"" \ || die "Attempt to 'ssh-copy-id $sshCopyIdOpts \"$n\"' failed" done else die "Please install one of your SSH public keys to root's account on all cluster nodes" fi fi echo "OK" if [ -z "$CTS_logfile" ]; then anyAsked=1 read -p " + Where does/should syslog store logs from remote hosts? (/var/log/messages) " CTS_logfile [ -n "$CTS_logfile" ] || CTS_logfile=/var/log/messages fi [ -e "$CTS_logfile" ] || die "$CTS_logfile doesn't exist" if [ -z "$CTS_logfacility" ]; then anyAsked=1 read -p " + Which log facility does the cluster use? (daemon) " CTS_logfacility [ -n "$CTS_logfacility" ] || CTS_logfacility=daemon fi if [ -z "$CTS_boot" ]; then read -p "+ Is the cluster software started automatically when a node boots? [yN] " CTS_boot if [ -z "$CTS_boot" ]; then CTS_boot=0 else case $CTS_boot in 1|y|Y) CTS_boot=1;; *) CTS_boot=0;; esac fi fi if [ -z "$CTS_numtests" ]; then read -p "+ How many test iterations should be performed? (500) " CTS_numtests [ -n "$CTS_numtests" ] || CTS_numtests=500 fi if [ -z "$CTS_asked_once" ]; then anyAsked=1 read -p "+ What type of STONITH agent do you use? (none) " CTS_stonith [ -z "$CTS_stonith" ] \ || read -p "+ List any STONITH agent parameters (eq. device_host=switch.power.com): " CTS_stonith_args [ -n "$CTS_adv" ] \ || read -p "+ (Advanced) Any extra CTS parameters? (none) " CTS_adv fi [ $anyAsked -eq 0 ] \ || read -p "+ Save values to ~/.cts for next time? (yN) " doSave if [ "$doSave" = "y" ]; then cat > ~/.cts <<-EOF # CTS Test data CTS_stack="$CTS_stack" CTS_node_list="$CTS_node_list" CTS_logfile="$CTS_logfile" CTS_logport="$CTS_logport" CTS_logfacility="$CTS_logfacility" CTS_asked_once=1 CTS_adv="$CTS_adv" CTS_stonith="$CTS_stonith" CTS_stonith_args="$CTS_stonith_args" CTS_boot="$CTS_boot" EOF fi cts_extra="" if [ -n "$CTS_stonith" ]; then cts_extra="$cts_extra --stonith-type $CTS_stonith" [ -z "$CTS_stonith_args" ] \ - || cts_extra="$cts_extra --stonitha-params \"$CTS_stonith_args\"" + || cts_extra="$cts_extra --stonith-params \"$CTS_stonith_args\"" else cts_extra="$cts_extra --stonith 0" echo " - Testing a cluster without STONITH is like a blunt pencil... pointless" fi printf "\nAll set to go for %d iterations!\n" "$CTS_numtests" [ $anyAsked -ne 0 ] \ || echo "+ To use a different configuration, remove ~/.cts and re-run cts (or edit it manually)." echo Now paste the following command into this shell: echo "python `dirname "$0"`/CTSlab.py -L \"$CTS_logfile\" --syslog-facility \"$CTS_logfacility\" --no-unsafe-tests --stack \"$CTS_stack\" $CTS_adv --at-boot \"$CTS_boot\" $cts_extra \"$CTS_numtests\" --nodes \"$CTS_node_list\"" diff --git a/cts/cts-lrmd.in b/cts/cts-lrmd.in index ada5d819f2..6ca7f414df 100644 --- a/cts/cts-lrmd.in +++ b/cts/cts-lrmd.in @@ -1,1285 +1,1285 @@ #!@PYTHON@ """ Regression tests for Pacemaker's lrmd """ # Pacemaker targets compatibility with Python 2.7 and 3.2+ from __future__ import print_function, unicode_literals, absolute_import, division __copyright__ = "Copyright (C) 2012-2018 Andrew Beekhof " __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import stat import sys import subprocess import shlex import time # Where to find test binaries # Prefer the source tree if available BUILD_DIR = "@abs_top_builddir@" TEST_DIR = sys.path[0] SBIN_DIR = "@sbindir@" SYSTEMD_UNIT_DIR = "@systemdunitdir@" # File permissions for executable scripts we create EXECMODE = stat.S_IRUSR | stat.S_IXUSR | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH # These values must be kept in sync with include/crm/crm.h class CrmExit: OK = 0 ERROR = 1 INVALID_PARAM = 2 UNIMPLEMENT_FEATURE = 3 INSUFFICIENT_PRIV = 4 NOT_INSTALLED = 5 NOT_CONFIGURED = 6 NOT_RUNNING = 7 USAGE = 64 DATAERR = 65 NOINPUT = 66 NOUSER = 67 NOHOST = 68 UNAVAILABLE = 69 SOFTWARE = 70 OSERR = 71 OSFILE = 72 CANTCREAT = 73 IOERR = 74 TEMPFAIL = 75 PROTOCOL = 76 NOPERM = 77 CONFIG = 78 FATAL = 100 PANIC = 101 DISCONNECT = 102 SOLO = 103 DIGEST = 104 NOSUCH = 105 QUORUM = 106 UNSAFE = 107 EXISTS = 108 MULTIPLE = 109 OLD = 110 TIMEOUT = 124 MAX = 255 def update_path(): """ Set the PATH environment variable appropriately for the tests """ new_path = os.environ['PATH'] if os.path.exists("%s/cts-lrmd.in" % TEST_DIR): print("Running tests from the source tree: %s (%s)" % (BUILD_DIR, TEST_DIR)) new_path = "%s/lrmd:%s" % (BUILD_DIR, new_path) # For lrmd, lrmd_test and pacemaker_remoted new_path = "%s/tools:%s" % (BUILD_DIR, new_path) # For crm_resource new_path = "%s/fencing:%s" % (BUILD_DIR, new_path) # For stonithd else: print("Running tests from the install tree: @CRM_DAEMON_DIR@ (not %s)" % TEST_DIR) new_path = "@CRM_DAEMON_DIR@:%s" % (new_path) # For stonithd, lrmd, lrmd_test and pacemaker_remoted print('Using PATH="{}"'.format(new_path)) os.environ['PATH'] = new_path def pipe_output(pipes, stdout=True, stderr=False): """ Wrapper to get text output from pipes regardless of Python version """ output = "" pipe_outputs = pipes.communicate() if sys.version_info < (3,): if stdout: output = output + pipe_outputs[0] if stderr: output = output + pipe_outputs[1] else: if stdout: output = output + pipe_outputs[0].decode(sys.stdout.encoding) if stderr: output = output + pipe_outputs[1].decode(sys.stderr.encoding) return output def output_from_command(command): """ Run a command, and return its standard output. """ test = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE) test.wait() return pipe_output(test).split("\n") def write_file(filename, contents, executable=False): """ Create a file. """ f = io.open(filename, "w+") f.write(contents) f.close() if executable: os.chmod(filename, EXECMODE) class TestError(Exception): """ Base class for exceptions in this module """ pass class ExitCodeError(TestError): """ Exception raised when command exit status is unexpected """ def __init__(self, exit_code): self.exit_code = exit_code def __str__(self): return repr(self.exit_code) class OutputNotFoundError(TestError): """ Exception raised when command output does not contain wanted string """ def __init__(self, exit_code): self.output = output def __str__(self): return repr(self.output) class OutputFoundError(TestError): """ Exception raised when command output contains unwanted string """ def __init__(self, exit_code): self.output = output def __str__(self): return repr(self.output) class Test(object): """ Executor for a single lrmd regression test """ def __init__(self, name, description, verbose=0, tls=0): self.name = name self.description = description self.cmds = [] if tls: self.daemon_location = "pacemaker_remoted" else: self.daemon_location = "lrmd" self.test_tool_location = "lrmd_test" self.verbose = verbose self.tls = tls self.result_txt = "" self.cmd_tool_output = "" self.result_exitcode = CrmExit.OK self.lrmd_process = None self.stonith_process = None self.executed = 0 def __new_cmd(self, cmd, args, exitcode, stdout_match="", no_wait=0, stdout_negative_match="", kill=None): """ Add a command to be executed as part of this test """ if self.verbose and cmd == self.test_tool_location: args = args + " -V " if (cmd == self.test_tool_location) and self.tls: args = args + " -S " self.cmds.append( { "cmd" : cmd, "kill" : kill, "args" : args, "expected_exitcode" : exitcode, "stdout_match" : stdout_match, "stdout_negative_match" : stdout_negative_match, "no_wait" : no_wait, "cmd_output" : "", } ) def start_environment(self): """ Prepare the host for running a test """ ### make sure we are in full control here ### cmd = shlex.split("killall -q -9 stonithd lt-stonithd lrmd lt-lrmd lrmd_test lt-lrmd_test pacemaker_remoted") test = subprocess.Popen(cmd, stdout=subprocess.PIPE) test.wait() additional_args = "" if self.tls == 0: self.stonith_process = subprocess.Popen(shlex.split("stonithd -s")) if self.verbose: additional_args = additional_args + " -V" self.lrmd_process = subprocess.Popen(shlex.split("%s %s -l /tmp/lrmd-regression.log" % (self.daemon_location, additional_args))) time.sleep(1) def clean_environment(self): """ Clean up the host after running a test """ if self.lrmd_process: self.lrmd_process.terminate() self.lrmd_process.wait() if self.verbose: print("Daemon output") logfile = io.open('/tmp/lrmd-regression.log', 'rt', errors='replace') for line in logfile: print(line.strip().encode('utf-8', 'replace')) os.remove('/tmp/lrmd-regression.log') if self.stonith_process: self.stonith_process.terminate() self.stonith_process.wait() self.lrmd_process = None self.stonith_process = None def add_sys_cmd(self, cmd, args): """ Add a simple command to be executed as part of this test """ self.__new_cmd(cmd, args, CrmExit.OK, "") def add_cmd_check_stdout(self, args, match, no_match=""): """ Add a command with expected output to be executed as part of this test """ self.__new_cmd(self.test_tool_location, args, CrmExit.OK, match, 0, no_match) def add_cmd(self, args): """ Add an lrmd_test command to be executed as part of this test """ self.__new_cmd(self.test_tool_location, args, CrmExit.OK, "") def add_cmd_and_kill(self, kill_proc, args): """ Add an lrmd_test command and system command to be executed as part of this test """ self.__new_cmd(self.test_tool_location, args, CrmExit.OK, "", kill=kill_proc) def add_expected_fail_cmd(self, args, exitcode=CrmExit.ERROR): """ Add an lrmd_test command to be executed as part of this test and expected to fail """ self.__new_cmd(self.test_tool_location, args, exitcode, "") def get_exitcode(self): """ Return the exit status of the last test execution """ return self.result_exitcode def print_result(self, filler): """ Print the result of the last test execution """ print("%s%s" % (filler, self.result_txt)) def run_cmd(self, args): """ Execute a command as part of this test """ cmd = shlex.split(args['args']) cmd.insert(0, args['cmd']) if self.verbose: print("\n\nRunning: "+" ".join(cmd)) test = subprocess.Popen(cmd, stdout=subprocess.PIPE) if args['kill']: if self.verbose: print("Also running: "+args['kill']) ### Typically, the kill argument is used to detect some sort of ### failure. Without yielding for a few seconds here, the process ### launched earlier that is listening for the failure may not have ### time to connect to the lrmd. time.sleep(2) subprocess.Popen(shlex.split(args['kill'])) if args['no_wait'] == 0: test.wait() else: return CrmExit.OK output = pipe_output(test) args['cmd_output'] = output if test.returncode != args['expected_exitcode']: raise ExitCodeError(test.returncode) if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0: raise OutputNotFoundError(output) if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0: raise OutputFoundError(output) def set_error(self, step, cmd): """ Record failure of this test """ msg = "FAILURE - '%s' failed at step %d. Command: %s %s" self.result_txt = msg % (self.name, step, cmd['cmd'], cmd['args']) self.result_exitcode = CrmExit.ERROR def run(self): """ Execute this test. """ res = 0 i = 1 if self.tls and self.name.count("stonith") != 0: self.result_txt = "SKIPPED - '%s' - disabled when testing pacemaker_remote" % (self.name) print(self.result_txt) return res self.start_environment() if self.verbose: print("\n--- START TEST - %s" % self.name) self.result_txt = "SUCCESS - '%s'" % (self.name) self.result_exitcode = CrmExit.OK for cmd in self.cmds: try: self.run_cmd(cmd) except ExitCodeError as e: print(cmd['cmd_output']) print("Step %d FAILED - command returned %s, expected %d" % (i, e, cmd['expected_exitcode'])) self.set_error(i, cmd); break except OutputNotFoundError as e: print("Step %d FAILED - '%s' was not found in command output: %s" % (i, cmd['stdout_match'], e)) self.set_error(i, cmd); break except OutputFoundError as e: print("Step %d FAILED - '%s' was found in command output: %s" % (i, cmd['stdout_negative_match'], e)) self.set_error(i, cmd); break if self.verbose: print(cmd['cmd_output'].strip()) print("Step %d SUCCESS" % (i)) i = i + 1 self.clean_environment() print(self.result_txt) if self.verbose: print("--- END TEST - %s\n" % self.name) self.executed = 1 return res class Tests(object): """ Collection of all lrmd regression tests """ def __init__(self, verbose=0, tls=0): self.tests = [] self.verbose = verbose self.tls = tls self.rsc_classes = output_from_command("crm_resource --list-standards") self.rsc_classes = self.rsc_classes[:-1] # Strip trailing empty line self.need_authkey = 0 self.action_timeout = " -t 9000 " if self.tls: self.rsc_classes.remove("stonith") if "systemd" in self.rsc_classes: try: # This code doesn't need this import, but lrmd_dummy_daemon does, # so ensure the dependency is available rather than cause all # systemd tests to fail. import systemd.daemon except ImportError: print("Fatal error: python systemd bindings not found. Is package installed?", file=sys.stderr) sys.exit(CrmExit.ERROR) print("Testing resource classes", repr(self.rsc_classes)) self.common_cmds = { "ocf_reg_line" : "-c register_rsc -r ocf_test_rsc "+self.action_timeout+" -C ocf -P pacemaker -T Dummy", "ocf_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"", "ocf_unreg_line" : "-c unregister_rsc -r \"ocf_test_rsc\" "+self.action_timeout, "ocf_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"", "ocf_start_line" : "-c exec -r \"ocf_test_rsc\" -a \"start\" "+self.action_timeout, "ocf_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:start rc:ok op_status:complete\" ", "ocf_stop_line" : "-c exec -r \"ocf_test_rsc\" -a \"stop\" "+self.action_timeout, "ocf_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:stop rc:ok op_status:complete\" ", "ocf_monitor_line" : "-c exec -r \"ocf_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, "ocf_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "ocf_cancel_line" : "-c cancel -r \"ocf_test_rsc\" -a \"monitor\" -i \"2000\" -t \"6000\" ", "ocf_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "systemd_reg_line" : "-c register_rsc -r systemd_test_rsc "+self.action_timeout+" -C systemd -T lrmd_dummy_daemon", "systemd_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"", "systemd_unreg_line" : "-c unregister_rsc -r \"systemd_test_rsc\" "+self.action_timeout, "systemd_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"", "systemd_start_line" : "-c exec -r \"systemd_test_rsc\" -a \"start\" "+self.action_timeout, "systemd_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:start rc:ok op_status:complete\" ", "systemd_stop_line" : "-c exec -r \"systemd_test_rsc\" -a \"stop\" "+self.action_timeout, "systemd_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:stop rc:ok op_status:complete\" ", "systemd_monitor_line" : "-c exec -r \"systemd_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, # not sure why this one takes so much longer "systemd_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:complete\" -t 12000 ", "systemd_cancel_line" : "-c cancel -r \"systemd_test_rsc\" -a \"monitor\" -i \"2000\" -t \"6000\" ", "systemd_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "upstart_reg_line" : "-c register_rsc -r upstart_test_rsc "+self.action_timeout+" -C upstart -T lrmd_dummy_daemon", "upstart_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"", "upstart_unreg_line" : "-c unregister_rsc -r \"upstart_test_rsc\" "+self.action_timeout, "upstart_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"", "upstart_start_line" : "-c exec -r \"upstart_test_rsc\" -a \"start\" "+self.action_timeout, "upstart_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:start rc:ok op_status:complete\" ", "upstart_stop_line" : "-c exec -r \"upstart_test_rsc\" -a \"stop\" "+self.action_timeout, "upstart_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:stop rc:ok op_status:complete\" ", "upstart_monitor_line" : "-c exec -r \"upstart_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, "upstart_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "upstart_cancel_line" : "-c cancel -r \"upstart_test_rsc\" -a \"monitor\" -i \"2000\" -t \"6000\" ", "upstart_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "service_reg_line" : "-c register_rsc -r service_test_rsc "+self.action_timeout+" -C service -T LSBDummy", "service_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", "service_unreg_line" : "-c unregister_rsc -r \"service_test_rsc\" "+self.action_timeout, "service_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", "service_start_line" : "-c exec -r \"service_test_rsc\" -a \"start\" "+self.action_timeout, "service_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:start rc:ok op_status:complete\" ", "service_stop_line" : "-c exec -r \"service_test_rsc\" -a \"stop\" "+self.action_timeout, "service_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:stop rc:ok op_status:complete\" ", "service_monitor_line" : "-c exec -r \"service_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, "service_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "service_cancel_line" : "-c cancel -r \"service_test_rsc\" -a \"monitor\" -i \"2000\" -t \"6000\" ", "service_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "lsb_reg_line" : "-c register_rsc -r lsb_test_rsc "+self.action_timeout+" -C lsb -T LSBDummy", "lsb_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\" ", "lsb_unreg_line" : "-c unregister_rsc -r \"lsb_test_rsc\" "+self.action_timeout, "lsb_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\"", "lsb_start_line" : "-c exec -r \"lsb_test_rsc\" -a \"start\" "+self.action_timeout, "lsb_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:start rc:ok op_status:complete\" ", "lsb_stop_line" : "-c exec -r \"lsb_test_rsc\" -a \"stop\" "+self.action_timeout, "lsb_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:stop rc:ok op_status:complete\" ", "lsb_monitor_line" : "-c exec -r \"lsb_test_rsc\" -a status -i \"2000\" "+self.action_timeout, "lsb_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:complete\" "+self.action_timeout, "lsb_cancel_line" : "-c cancel -r \"lsb_test_rsc\" -a \"status\" -i \"2000\" -t \"6000\" ", "lsb_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:Cancelled\" ", "stonith_reg_line" : "-c register_rsc -r stonith_test_rsc "+self.action_timeout+" -C stonith -P pacemaker -T fence_dummy_monitor", "stonith_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\" ", "stonith_unreg_line" : "-c unregister_rsc -r \"stonith_test_rsc\" "+self.action_timeout, "stonith_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\"", "stonith_start_line" : "-c exec -r \"stonith_test_rsc\" -a \"start\" -t 8000 ", "stonith_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:start rc:ok op_status:complete\" ", "stonith_stop_line" : "-c exec -r \"stonith_test_rsc\" -a \"stop\" "+self.action_timeout, "stonith_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:stop rc:ok op_status:complete\" ", "stonith_monitor_line" : "-c exec -r \"stonith_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, "stonith_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "stonith_cancel_line" : "-c cancel -r \"stonith_test_rsc\" -a \"monitor\" -i \"2000\" -t \"6000\" ", "stonith_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:Cancelled\" ", } def new_test(self, name, description): """ Create a named test """ test = Test(name, description, self.verbose, self.tls) self.tests.append(test) return test def setup_test_environment(self): """ Prepare the host before executing any tests """ os.system("service pacemaker_remote stop") self.cleanup_test_environment() if self.tls and not os.path.isfile("/etc/pacemaker/authkey"): self.need_authkey = 1 os.system("mkdir -p /etc/pacemaker") os.system("dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1") ### Make fake systemd daemon and unit file ### dummy_daemon = """#!@PYTHON@ import time, systemd.daemon time.sleep(3) systemd.daemon.notify("READY=1") while True: time.sleep(5) """ dummy_service_file = """ [Unit] Description=Dummy resource that takes a while to start [Service] Type=notify ExecStart=""" + SBIN_DIR + """/lrmd_dummy_daemon """ dummy_upstart_job = (""" description "Dummy service for regression tests" exec dd if=/dev/random of=/dev/null """) dummy_fence_sleep_agent = ("""#!@PYTHON@ import sys import time def main(): for line in sys.stdin: if line.count("monitor") > 0: time.sleep(30000) sys.exit(0) sys.exit(1) if __name__ == "__main__": main() """) dummy_fence_agent = ("""#!/bin/sh while read line; do case ${line} in *monitor*) exit 0;; *metadata*) echo '' echo ' dummy description.' echo ' http://www.example.com' echo ' ' echo ' ' echo ' ' echo ' ' echo ' Fencing Action' echo ' ' echo ' ' echo ' ' echo ' ' echo ' Physical plug number or name of virtual machine' echo ' ' echo ' ' echo ' ' echo ' ' echo ' ' echo ' ' echo ' ' echo ' ' echo '' exit 0;; esac exit 1 done """) if os.path.isdir("/etc/init"): write_file("/etc/init/lrmd_dummy_daemon.conf", dummy_upstart_job); write_file(SBIN_DIR + "/lrmd_dummy_daemon", dummy_daemon, executable=True); if os.path.isdir(SYSTEMD_UNIT_DIR): write_file(SYSTEMD_UNIT_DIR + "/lrmd_dummy_daemon.service", dummy_service_file); write_file(SBIN_DIR + "/fence_dummy_sleep", dummy_fence_sleep_agent, executable=True); write_file(SBIN_DIR + "/fence_dummy_monitor", dummy_fence_agent, executable=True); if os.path.exists("%s/cts/LSBDummy" % BUILD_DIR): os.system("cp %s/cts/LSBDummy /etc/init.d/LSBDummy" % BUILD_DIR) if not os.path.exists("@OCF_RA_DIR@/pacemaker"): os.system("mkdir -p @OCF_RA_DIR@/pacemaker/") # Install helper OCF agents for agent in ["Dummy", "Stateful", "ping"]: os.system("cp %s/extra/resources/%s @OCF_RA_DIR@/pacemaker/%s" % (BUILD_DIR, agent, agent)) os.system("chmod a+x @OCF_RA_DIR@/pacemaker/%s" % (agent)) else: # Assume it's installed os.system("cp @datadir@/@PACKAGE@/tests/cts/LSBDummy /etc/init.d/LSBDummy") os.system("chmod a+x /etc/init.d/LSBDummy") os.system("mkdir -p @CRM_CORE_DIR@/root") if os.path.exists("/bin/systemctl"): os.system("systemctl daemon-reload") def cleanup_test_environment(self): """ Clean up the host after executing desired tests """ if self.need_authkey: os.system("rm -f /etc/pacemaker/authkey") os.system("rm -f /etc/init.d/LSBDummy") os.system("rm -f " + SYSTEMD_UNIT_DIR + "/lrmd_dummy_daemon.service") os.system("rm -f " + SBIN_DIR + "/lrmd_dummy_daemon") os.system("rm -f " + SBIN_DIR + "/fence_dummy_monitor") os.system("rm -f " + SBIN_DIR + "/fence_dummy_sleep") if os.path.exists("/bin/systemctl"): os.system("systemctl daemon-reload") def build_generic_tests(self): """ Register tests that apply to all resource classes """ common_cmds = self.common_cmds ### register/unregister tests ### for rsc in self.rsc_classes: test = self.new_test("generic_registration_%s" % (rsc), "Simple resource registration test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### start/stop tests ### for rsc in self.rsc_classes: test = self.new_test("generic_start_stop_%s" % (rsc), "Simple start and stop test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### monitor cancel test ### for rsc in self.rsc_classes: test = self.new_test("generic_monitor_cancel_%s" % (rsc), "Simple monitor cancel test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### monitor duplicate test ### for rsc in self.rsc_classes: test = self.new_test("generic_monitor_duplicate_%s" % (rsc), "Test creation and canceling of duplicate monitors for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) # Add the duplicate monitors test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) # verify we still get update events ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) # cancel the monitor, if the duplicate merged with the original, we should no longer see monitor updates test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### stop implies cancel test ### for rsc in self.rsc_classes: test = self.new_test("generic_stop_implies_cancel_%s" % (rsc), "Verify stopping a resource implies cancel of recurring ops for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) def build_multi_rsc_tests(self): """ Register complex tests that involve managing multiple resouces of different types """ common_cmds = self.common_cmds # do not use service and systemd at the same time, it is the same resource. ### register start monitor stop unregister resources of each type at the same time. ### test = self.new_test("multi_rsc_start_stop_all", "Start, monitor, and stop resources of multiple types and classes") for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) for rsc in self.rsc_classes: ### If this fails, that means the monitor is not being rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) def build_negative_tests(self): """ Register tests related to how the lrmd handles failures """ ### ocf start timeout test ### test = self.new_test("ocf_start_timeout", "Force start timeout to occur, verify start failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" " + self.action_timeout + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") # -t must be less than self.action_timeout test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -k \"op_sleep\" -v \"5\" -t 1000 -w") test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out" ' + self.action_timeout) test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### stonith start timeout test ### test = self.new_test("stonith_start_timeout", "Force start timeout to occur, verify start failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy_sleep\" " + self.action_timeout + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 1000 -w") # -t must be less than self.action_timeout test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out" ' + self.action_timeout) test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### stonith component fail ### common_cmds = self.common_cmds test = self.new_test("stonith_component_fail", "Kill stonith component after lrmd connects") test.add_cmd(common_cmds["stonith_reg_line"] + " " + common_cmds["stonith_reg_event"]) test.add_cmd(common_cmds["stonith_start_line"] + " " + common_cmds["stonith_start_event"]) test.add_cmd('-c exec -r "stonith_test_rsc" -a "monitor" -i "600000" -l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete" ' + self.action_timeout) test.add_cmd_and_kill("killall -9 -q stonithd lt-stonithd", '-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:unknown error op_status:error" -t 15000') test.add_cmd(common_cmds["stonith_unreg_line"] + " " + common_cmds["stonith_unreg_event"]) ### monitor fail for ocf resources ### test = self.new_test("monitor_fail_ocf", "Force ocf monitor to fail, verify failure is reported.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" " + self.action_timeout + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r "test_rsc" -a "monitor" -i "100" ' + self.action_timeout + '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"') test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' + self.action_timeout) test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' + self.action_timeout) test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state", '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete" -t 6000') test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"6000\" " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" " + self.action_timeout, CrmExit.TIMEOUT) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" " + self.action_timeout, CrmExit.TIMEOUT) test.add_cmd("-c unregister_rsc -r \"test_rsc\" " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### verify notify changes only for monitor operation. ### test = self.new_test("monitor_changes_only", "Verify when flag is set, only monitor changes are notified.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+" -o " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r "test_rsc" -a "monitor" -i "100" ' + self.action_timeout + ' -o -l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete" ') test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 6000") test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"6000\" " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd('-c unregister_rsc -r "test_rsc" ' + self.action_timeout + '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete"') ### monitor fail for systemd resource ### if "systemd" in self.rsc_classes: test = self.new_test("monitor_fail_systemd", "Force systemd monitor to fail, verify failure is reported..") test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T lrmd_dummy_daemon "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill("killall -9 -q lrmd_dummy_daemon", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 8000") test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"6000\" " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### monitor fail for upstart resource ### if "upstart" in self.rsc_classes: test = self.new_test("monitor_fail_upstart", "Force upstart monitor to fail, verify failure is reported..") test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T lrmd_dummy_daemon "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill("killall -9 -q dd", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 8000") test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"6000\" " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Cancel non-existent operation on a resource ### test = self.new_test("cancel_non_existent_op", "Attempt to cancel the wrong monitor operation, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_expected_fail_cmd("-c cancel -r test_rsc -a \"monitor\" -i 1234 -t \"6000\" " ### interval is wrong, should fail "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-c cancel -r test_rsc -a stop -i 100 -t \"6000\" " ### action name is wrong, should fail "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Attempt to invoke non-existent rsc id ### test = self.new_test("invoke_non_existent_rsc", "Attempt to perform operations on a non-existent rsc id.") test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r test_rsc -a stop "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r test_rsc -a monitor -i 6000 "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c cancel -r test_rsc -a start "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register and start a resource that doesn't exist, systemd ### if "systemd" in self.rsc_classes: test = self.new_test("start_uninstalled_systemd", "Register uninstalled systemd agent, try to start, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") if "upstart" in self.rsc_classes: test = self.new_test("start_uninstalled_upstart", "Register uninstalled upstart agent, try to start, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register and start a resource that doesn't exist, ocf ### test = self.new_test("start_uninstalled_ocf", "Register uninstalled ocf agent, try to start, verify expected failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pacemaker -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register ocf with non-existent provider ### test = self.new_test("start_ocf_bad_provider", "Register ocf agent with a non-existent provider, verify expected failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pancakes -T Dummy "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register ocf with empty provider field ### test = self.new_test("start_ocf_no_provider", "Register ocf agent with a no provider, verify expected failure.") test.add_expected_fail_cmd("-c register_rsc -r \"test_rsc\" -C ocf -T Dummy "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Error\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") def build_stress_tests(self): """ Register stress tests """ timeout = "-t 20000" iterations = 25 test = self.new_test("ocf_stress", "Verify OCF agent handling works under load") for i in range(iterations): test.add_cmd("-c register_rsc -r rsc_%s %s -C ocf -P heartbeat -T Dummy -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a monitor %s -i 1000 -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete\"" % (i, timeout, i)) for i in range(iterations): test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) if "systemd" in self.rsc_classes: test = self.new_test("systemd_stress", "Verify systemd dbus connection works under load") for i in range(iterations): test.add_cmd("-c register_rsc -r rsc_%s %s -C systemd -T lrmd_dummy_daemon -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a monitor %s -i 1000 -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete\"" % (i, timeout, i)) for i in range(iterations): test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) iterations = 9 timeout = "-t 30000" ### Verify recurring op in-flight collision is handled in series properly test = self.new_test("rsc_inflight_collision", "Verify recurring ops do not collide with other operations for the same rsc.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -a start %s -k op_sleep -v 1 -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\"" % (timeout)) for i in range(iterations): test.add_cmd("-c exec -r test_rsc -a monitor %s -i 100%d -k op_sleep -v 2 -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\"" % (timeout, i)) test.add_cmd("-c exec -r test_rsc -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\"" % (timeout)) test.add_cmd("-c unregister_rsc -r test_rsc %s -l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\"" % (timeout)) def build_custom_tests(self): """ Register tests that target specific cases """ ### verify resource temporary folder is created and used by OCF agents. ### test = self.new_test("rsc_tmp_dir", "Verify creation and use of rsc temporary state directory") test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@") test.add_cmd("-c register_rsc -r test_rsc -P heartbeat -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -a start -t 4000") test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@") test.add_sys_cmd("ls", "@CRM_RSCTMP_DIR@/Dummy-test_rsc.state") test.add_cmd("-c exec -r test_rsc -a stop -t 4000") test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### start delay then stop test ### test = self.new_test("start_delay", "Verify start delay works as expected.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -s 6000 -a start -w -t 6000") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 2000", CrmExit.TIMEOUT) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 6000") test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### start delay, but cancel before it gets a chance to start. ### test = self.new_test("start_delay_cancel", "Using start_delay, start a rsc, but cancel the start op before execution.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -s 5000 -a start -w -t 4000") test.add_cmd("-c cancel -r test_rsc -a start " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 5000", CrmExit.TIMEOUT) test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register a bunch of resources, verify we can get info on them ### test = self.new_test("verify_get_rsc_info", "Register multiple resources, verify retrieval of rsc info.") if "systemd" in self.rsc_classes: test.add_cmd("-c register_rsc -r rsc1 -C systemd -T lrmd_dummy_daemon "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ") if "upstart" in self.rsc_classes: test.add_cmd("-c register_rsc -r rsc1 -C upstart -T lrmd_dummy_daemon "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc2 ") test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ") ### Register duplicate, verify only one entry exists and can still be removed. test = self.new_test("duplicate_registration", "Register resource multiple times, verify only one entry exists and can be removed.") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Stateful -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Stateful") test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ") ### verify the option to only send notification to the original client. ### test = self.new_test("notify_orig_client_only", "Verify option to only send notifications to the client originating the action.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+" -n " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") # this will fail because the monitor notifications should only go to the original caller, which no longer exists. test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"6000\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### get metadata ### test = self.new_test("get_ocf_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Dummy\"", "resource-agent name=\"Dummy\"") test.add_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Stateful\"") test.add_expected_fail_cmd("-c metadata -P \"pacemaker\" -T \"Stateful\"") test.add_expected_fail_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"fake_agent\"") ### get metadata ### test = self.new_test("get_lsb_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"lsb\" -T \"LSBDummy\"", "resource-agent name='LSBDummy'") ### get stonith metadata ### test = self.new_test("get_stonith_metadata", "Retrieve stonith metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy_monitor\"", "resource-agent name=\"fence_dummy_monitor\"") ### get metadata ### if "systemd" in self.rsc_classes: test = self.new_test("get_systemd_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"systemd\" -T \"lrmd_dummy_daemon\"", "resource-agent name=\"lrmd_dummy_daemon\"") ### get metadata ### if "upstart" in self.rsc_classes: test = self.new_test("get_upstart_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"upstart\" -T \"lrmd_dummy_daemon\"", "resource-agent name=\"lrmd_dummy_daemon\"") ### get ocf providers ### test = self.new_test("list_ocf_providers", "Retrieve list of available resource providers, verifies pacemaker is a provider.") test.add_cmd_check_stdout("-c list_ocf_providers ", "pacemaker") test.add_cmd_check_stdout("-c list_ocf_providers -T ping", "pacemaker") ### Verify agents only exist in their lists ### test = self.new_test("verify_agent_lists", "Verify the agent lists contain the right data.") test.add_cmd_check_stdout("-c list_agents ", "Stateful") ### ocf ### test.add_cmd_check_stdout("-c list_agents -C ocf", "Stateful") test.add_cmd_check_stdout("-c list_agents -C lsb", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents ", "LSBDummy") ### init.d ### test.add_cmd_check_stdout("-c list_agents -C lsb", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C ocf", "", "lrmd_dummy_daemon") ### should not exist test.add_cmd_check_stdout("-c list_agents -C ocf", "", "lrmd_dummy_daemon") ### should not exist test.add_cmd_check_stdout("-c list_agents -C lsb", "", "fence_dummy_monitor") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "fence_dummy_monitor") ### should not exist test.add_cmd_check_stdout("-c list_agents -C ocf", "", "fence_dummy_monitor") ### should not exist if "systemd" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents ", "lrmd_dummy_daemon") ### systemd ### test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C systemd", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C systemd", "lrmd_dummy_daemon") test.add_cmd_check_stdout("-c list_agents -C systemd", "", "fence_dummy_monitor") ### should not exist if "upstart" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents ", "lrmd_dummy_daemon") ### upstart ### test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C upstart", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C upstart", "lrmd_dummy_daemon") test.add_cmd_check_stdout("-c list_agents -C upstart", "", "fence_dummy_monitor") ### should not exist if "stonith" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents -C stonith", "fence_dummy_monitor") ### stonith ### test.add_cmd_check_stdout("-c list_agents -C stonith", "", "lrmd_dummy_daemon") ### should not exist test.add_cmd_check_stdout("-c list_agents -C stonith", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents ", "fence_dummy_monitor") def print_list(self): """ List all registered tests """ print("\n==== %d TESTS FOUND ====" % (len(self.tests))) print("%35s - %s" % ("TEST NAME", "TEST DESCRIPTION")) print("%35s - %s" % ("--------------------", "--------------------")) for test in self.tests: print("%35s - %s" % (test.name, test.description)) print("==== END OF LIST ====\n") def run_single(self, name): """ Run a single named test """ for test in self.tests: if test.name == name: test.run() break def run_tests_matching(self, pattern): """ Run all tests whose name matches a pattern """ for test in self.tests: if test.name.count(pattern) != 0: test.run() def run_tests(self): """ Run all tests """ for test in self.tests: test.run() def exit(self): """ Exit (with error status code if any test failed) """ for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != CrmExit.OK: sys.exit(CrmExit.ERROR) sys.exit(CrmExit.OK) def print_results(self): """ Print summary of results of executed tests """ failures = 0 success = 0 print("\n\n======= FINAL RESULTS ==========") print("\n--- FAILURE RESULTS:") for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != CrmExit.OK: failures = failures + 1 test.print_result(" ") else: success = success + 1 if failures == 0: print(" None") print("\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures)) class TestOptions(object): """ Option handler """ def __init__(self): self.options = {} self.options['list-tests'] = 0 self.options['run-all'] = 1 self.options['run-only'] = "" self.options['run-only-pattern'] = "" self.options['verbose'] = 0 self.options['invalid-arg'] = "" self.options['show-usage'] = 0 self.options['pacemaker-remote'] = 0 def build_options(self, argv): """ Set options based on command-line arguments """ args = argv[1:] skip = 0 for i in range(0, len(args)): if skip: skip = 0 continue elif args[i] == "-h" or args[i] == "--help": self.options['show-usage'] = 1 elif args[i] == "-l" or args[i] == "--list-tests": self.options['list-tests'] = 1 elif args[i] == "-V" or args[i] == "--verbose": self.options['verbose'] = 1 elif args[i] == "-R" or args[i] == "--pacemaker-remote": self.options['pacemaker-remote'] = 1 elif args[i] == "-r" or args[i] == "--run-only": self.options['run-only'] = args[i+1] skip = 1 elif args[i] == "-p" or args[i] == "--run-only-pattern": self.options['run-only-pattern'] = args[i+1] skip = 1 def show_usage(self): """ Show command usage """ print("usage: " + sys.argv[0] + " [options]") print("If no options are provided, all tests will run") print("Options:") print("\t [--help | -h] Show usage") print("\t [--list-tests | -l] Print out all registered tests.") print("\t [--run-only | -r 'testname'] Run a specific test") print("\t [--verbose | -V] Verbose output") print("\t [--pacemaker-remote | -R Test pacemaker-remote binary instead of lrmd.") print("\t [--run-only-pattern | -p 'string'] Run only tests containing the string value") - print("\n\tExample: Run only the test 'start_top'") + print("\n\tExample: Run only the test 'start_stop'") print("\t\t " + sys.argv[0] + " --run-only start_stop") print("\n\tExample: Run only the tests with the string 'systemd' present in them") print("\t\t " + sys.argv[0] + " --run-only-pattern systemd") def main(argv): """ Run lrmd regression tests as specified by arguments """ update_path() opts = TestOptions() opts.build_options(argv) tests = Tests(opts.options['verbose'], opts.options['pacemaker-remote']) tests.build_generic_tests() tests.build_multi_rsc_tests() tests.build_negative_tests() tests.build_custom_tests() tests.build_stress_tests() tests.setup_test_environment() print("Starting ...") if opts.options['list-tests']: tests.print_list() elif opts.options['show-usage']: opts.show_usage() elif opts.options['run-only-pattern'] != "": tests.run_tests_matching(opts.options['run-only-pattern']) tests.print_results() elif opts.options['run-only'] != "": tests.run_single(opts.options['run-only']) tests.print_results() else: tests.run_tests() tests.print_results() tests.cleanup_test_environment() tests.exit() if __name__ == "__main__": main(sys.argv) diff --git a/cts/cts-pengine.in b/cts/cts-pengine.in index a5dbf37bfc..ccc4be1481 100644 --- a/cts/cts-pengine.in +++ b/cts/cts-pengine.in @@ -1,1260 +1,1260 @@ #!/bin/bash # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # USAGE_TEXT="Usage: cts-pengine [] Options: --help Display this text, then exit -V, --verbose Display any differences from expected output --run TEST Run only single specified test -b, --binary PATH Specify path to crm_simulate -i, --io-dir PATH Specify path to regression test data directory -v, --valgrind Run all commands under valgrind --valgrind-dhat Run all commands under valgrind with heap analyzer --valgrind-skip-output If running under valgrind, don't display output" SBINDIR="@sbindir@" BUILDDIR="@abs_top_builddir@" CRM_SCHEMA_DIRECTORY="@CRM_SCHEMA_DIRECTORY@" test_home=$(dirname $(readlink -e $0)) io_dir="$test_home/pengine" failed="$test_home/.regression.failed.diff" test_binary= single_test= verbose=0 num_failed=0 num_tests=0 VALGRIND_CMD="" VALGRIND_OPTS="-q --gen-suppressions=all --log-file=%q{valgrind_output} --time-stamp=yes --trace-children=no --show-reachable=no --leak-check=full --num-callers=20 --suppressions=$test_home/valgrind-pcmk.suppressions" VALGRIND_DHAT_OPTS="--tool=exp-dhat --log-file=%q{valgrind_output} --time-stamp=yes --trace-children=no --show-top-n=100 --num-callers=4" diff_opts="--ignore-all-space --ignore-blank-lines -u -N" # These constants must track crm_exit_t values CRM_EX_OK=0 CRM_EX_ERROR=1 CRM_EX_NOT_INSTALLED=5 CRM_EX_USAGE=64 CRM_EX_NOINPUT=66 EXITCODE=$CRM_EX_OK function info() { printf "$*\n" } function error() { printf " * ERROR: $*\n" } function failed() { printf " * FAILED: $*\n" } function show_test() { name=$1; shift printf " Test %-25s $*\n" "$name:" } info "Test home is:\t$test_home" while [ $# -gt 0 ] ; do case "$1" in -V|--verbose) verbose=1 shift ;; -v|--valgrind) export G_SLICE=always-malloc VALGRIND_CMD="valgrind $VALGRIND_OPTS" shift ;; --valgrind-dhat) VALGRIND_CMD="valgrind $VALGRIND_DHAT_OPTS" shift ;; --valgrind-skip-output) VALGRIND_SKIP_OUTPUT=1 shift ;; --run) single_test="$2" shift 2 ;; -b|--binary) test_binary="$2" shift 2 ;; -i|--io-dir) io_dir="$2" shift 2 ;; --help) echo "$USAGE_TEXT" exit $CRM_EX_OK ;; *) error "unknown option: $1" exit $CRM_EX_USAGE ;; esac done if [ -z "$PCMK_schema_directory" ]; then if [ -d "$BUILDDIR/xml" ]; then export PCMK_schema_directory="$BUILDDIR/xml" elif [ -d "$CRM_SCHEMA_DIRECTORY" ]; then export PCMK_schema_directory="$CRM_SCHEMA_DIRECTORY" fi fi if [ -z "$test_binary" ]; then if [ -x "$BUILDDIR/tools/crm_simulate" ]; then test_binary="$BUILDDIR/tools/crm_simulate" elif [ -x "$SBINDIR/crm_simulate" ]; then test_binary="$SBINDIR/crm_simulate" fi fi if [ ! -x "$test_binary" ]; then error "Test binary $test_binary not found" exit $CRM_EX_NOT_INSTALLED fi info "Test binary is:\t$test_binary" if [ -n "$PCMK_schema_directory" ]; then info "Schema home is:\t$PCMK_schema_directory" fi if [ "x$VALGRIND_CMD" != "x" ]; then info "Activating memory testing with valgrind"; fi info " " test_cmd="$VALGRIND_CMD $test_binary" #echo $test_cmd if [ `whoami` != root ]; then declare -x CIB_shadow_dir=/tmp fi do_test() { did_fail=0 expected_rc=0 num_tests=$(( $num_tests + 1 )) base=$1; shift name=$1; shift input=$io_dir/${base}.xml output=$io_dir/${base}.out expected=$io_dir/${base}.exp dot_png=$io_dir/${base}.png dot_expected=$io_dir/${base}.dot dot_output=$io_dir/${base}.pe.dot scores=$io_dir/${base}.scores score_output=$io_dir/${base}.scores.pe stderr_expected=$io_dir/${base}.stderr stderr_output=$io_dir/${base}.stderr.pe summary=$io_dir/${base}.summary summary_output=$io_dir/${base}.summary.pe valgrind_output=$io_dir/${base}.valgrind export valgrind_output if [ "x$1" = "x--rc" ]; then expected_rc=$2 shift; shift; fi show_test "$base" "$name" if [ ! -f $input ]; then error "No input"; did_fail=1 num_failed=$(( $num_failed + 1 )) return $CRM_EX_NOINPUT; fi if [ "$create_mode" != "true" -a ! -f $expected ]; then error "no stored output"; return $CRM_EX_NOINPUT; fi # ../admin/crm_verify -X $input if [ ! -z "$single_test" ]; then echo CIB_shadow_dir=$io_dir $test_cmd -x $input -D $dot_output -G $output -S $* CIB_shadow_dir=$io_dir $test_cmd -x $input -D $dot_output -G $output -S $* 2>&1 | tee $summary_output else CIB_shadow_dir=$io_dir $test_cmd -x $input -S &> $summary_output fi CIB_shadow_dir=$io_dir $test_cmd -x $input -D $dot_output -G $output -SQ -s $* 2> $stderr_output > $score_output rc=$? if [ $rc -ne $expected_rc ]; then failed "Test returned: $rc"; did_fail=1 echo "CIB_shadow_dir=$io_dir $test_cmd -x $input -D $dot_output -G $output -SQ -s $*" fi if [ -z "$VALGRIND_SKIP_OUTPUT" ]; then if [ -s "${valgrind_output}" ]; then error "Valgrind reported errors"; did_fail=1 cat ${valgrind_output} fi rm -f ${valgrind_output} fi if [ -s core ]; then error "Core-file detected: core.${base}"; did_fail=1 rm -f $test_home/core.$base mv core $test_home/core.$base fi if [ -e "$stderr_expected" ]; then diff $diff_opts $stderr_expected $stderr_output >/dev/null rc2=$? if [ $rc2 -ne 0 ]; then failed "stderr changed"; diff $diff_opts $stderr_expected $stderr_output 2>/dev/null >> $failed echo "" >> $failed did_fail=1 fi elif [ -s "$stderr_output" ]; then error "Output was written to stderr" did_fail=1 cat $stderr_output fi rm -f $stderr_output if [ ! -s $output ]; then error "No graph produced"; did_fail=1 num_failed=$(( $num_failed + 1 )) rm -f $output return $CRM_EX_ERROR; fi if [ ! -s $dot_output ]; then error "No dot-file summary produced"; did_fail=1 num_failed=$(( $num_failed + 1 )) rm -f $output return $CRM_EX_ERROR; else echo "digraph \"g\" {" > $dot_output.sort LC_ALL=POSIX sort -u $dot_output | grep -v -e ^}$ -e digraph >> $dot_output.sort echo "}" >> $dot_output.sort mv -f $dot_output.sort $dot_output fi if [ ! -s $score_output ]; then error "No allocation scores produced"; did_fail=1 num_failed=$(( $num_failed + 1 )) rm $output return $CRM_EX_ERROR; else LC_ALL=POSIX sort $score_output > $score_output.sorted mv -f $score_output.sorted $score_output fi if [ "$create_mode" = "true" ]; then cp "$output" "$expected" cp "$dot_output" "$dot_expected" cp "$score_output" "$scores" cp "$summary_output" "$summary" info " Created expected outputs" fi diff $diff_opts $summary $summary_output >/dev/null rc2=$? if [ $rc2 -ne 0 ]; then failed "summary changed"; diff $diff_opts $summary $summary_output 2>/dev/null >> $failed echo "" >> $failed did_fail=1 fi diff $diff_opts $dot_expected $dot_output >/dev/null rc=$? if [ $rc -ne 0 ]; then failed "dot-file summary changed"; diff $diff_opts $dot_expected $dot_output 2>/dev/null >> $failed echo "" >> $failed did_fail=1 else rm -f $dot_output fi sed -i -e 's/crm_feature_set="[^"]*"//' -e 's/batch-limit="[0-9]*"//' $expected $output diff $diff_opts $expected $output >/dev/null rc2=$? if [ $rc2 -ne 0 ]; then failed "xml-file changed"; diff $diff_opts $expected $output 2>/dev/null >> $failed echo "" >> $failed did_fail=1 fi diff $diff_opts $scores $score_output >/dev/null rc=$? if [ $rc -ne 0 ]; then failed "scores-file changed"; diff $diff_opts $scores $score_output 2>/dev/null >> $failed echo "" >> $failed did_fail=1 fi rm -f $output $score_output $summary_output if [ $did_fail -eq 1 ]; then num_failed=$(( $num_failed + 1 )) return $CRM_EX_ERROR fi return $CRM_EX_OK } function test_results { if [ $num_failed -ne 0 ]; then if [ -s "$failed" ]; then if [ $verbose -eq 1 ]; then error "Results of $num_failed failed tests (out of $num_tests)...." cat $failed else error "Results of $num_failed failed tests (out of $num_tests) are in $failed...." error "Use $0 -V to display them automatically." fi else error "$num_failed (of $num_tests) tests failed (no diff results)" rm $failed fi EXITCODE=$CRM_EX_ERROR fi } # zero out the error log > $failed if [ -n "$single_test" ]; then do_test $single_test "Single shot" $* TEST_RC=$? cat $failed exit $TEST_RC fi DO_VERSIONED_TESTS=0 create_mode="true" info Generating test outputs for these tests... # do_test file description info Done. echo "" info Performing the following tests from $io_dir create_mode="false" echo "" do_test simple1 "Offline " do_test simple2 "Start " do_test simple3 "Start 2 " do_test simple4 "Start Failed" do_test simple6 "Stop Start " do_test simple7 "Shutdown " #do_test simple8 "Stonith " #do_test simple9 "Lower version" #do_test simple10 "Higher version" do_test simple11 "Priority (ne)" do_test simple12 "Priority (eq)" do_test simple8 "Stickiness" echo "" do_test group1 "Group " do_test group2 "Group + Native " do_test group3 "Group + Group " do_test group4 "Group + Native (nothing)" do_test group5 "Group + Native (move) " do_test group6 "Group + Group (move) " do_test group7 "Group colocation" do_test group13 "Group colocation (cant run)" do_test group8 "Group anti-colocation" do_test group9 "Group recovery" do_test group10 "Group partial recovery" do_test group11 "Group target_role" do_test group14 "Group stop (graph terminated)" do_test group15 "-ve group colocation" do_test bug-1573 "Partial stop of a group with two children" do_test bug-1718 "Mandatory group ordering - Stop group_FUN" do_test bug-lf-2613 "Move group on failure" do_test bug-lf-2619 "Move group on clone failure" do_test group-fail "Ensure stop order is preserved for partially active groups" do_test group-unmanaged "No need to restart r115 because r114 is unmanaged" do_test group-unmanaged-stopped "Make sure r115 is stopped when r114 fails" do_test group-dependents "Account for the location preferences of things colocated with a group" echo "" do_test rsc_dep1 "Must not " do_test rsc_dep3 "Must " do_test rsc_dep5 "Must not 3 " do_test rsc_dep7 "Must 3 " do_test rsc_dep10 "Must (but cant)" do_test rsc_dep2 "Must (running) " do_test rsc_dep8 "Must (running : alt) " do_test rsc_dep4 "Must (running + move)" do_test asymmetric "Asymmetric - require explicit location constraints" echo "" do_test orphan-0 "Orphan ignore" do_test orphan-1 "Orphan stop" do_test orphan-2 "Orphan stop, remove failcount" echo "" do_test params-0 "Params: No change" do_test params-1 "Params: Changed" do_test params-2 "Params: Resource definition" do_test params-4 "Params: Reload" do_test params-5 "Params: Restart based on probe digest" do_test novell-251689 "Resource definition change + target_role=stopped" do_test bug-lf-2106 "Restart all anonymous clone instances after config change" do_test params-6 "Params: Detect reload in previously migrated resource" do_test nvpair-id-ref "Support id-ref in nvpair with optional name" do_test not-reschedule-unneeded-monitor "Do not reschedule unneeded monitors while resource definitions have changed" do_test reload-becomes-restart "Cancel reload if restart becomes required" echo "" do_test target-0 "Target Role : baseline" do_test target-1 "Target Role : master" do_test target-2 "Target Role : invalid" echo "" do_test base-score "Set a node's default score for all nodes" echo "" do_test date-1 "Dates" -t "2005-020" do_test date-2 "Date Spec - Pass" -t "2005-020T12:30" do_test date-3 "Date Spec - Fail" -t "2005-020T11:30" do_test origin "Timing of recurring operations" -t "2014-05-07 00:28:00" do_test probe-0 "Probe (anon clone)" do_test probe-1 "Pending Probe" do_test probe-2 "Correctly re-probe cloned groups" do_test probe-3 "Probe (pending node)" do_test probe-4 "Probe (pending node + stopped resource)" do_test standby "Standby" do_test comments "Comments" echo "" do_test one-or-more-0 "Everything starts" do_test one-or-more-1 "Nothing starts because of A" do_test one-or-more-2 "D can start because of C" do_test one-or-more-3 "D cannot start because of B and C" do_test one-or-more-4 "D cannot start because of target-role" do_test one-or-more-5 "Start A and F even though C and D are stopped" do_test one-or-more-6 "Leave A running even though B is stopped" do_test one-or-more-7 "Leave A running even though C is stopped" do_test bug-5140-require-all-false "Allow basegrp:0 to stop" do_test clone-require-all-1 "clone B starts node 3 and 4" do_test clone-require-all-2 "clone B remains stopped everywhere" do_test clone-require-all-3 "clone B stops everywhere because A stops everywhere" do_test clone-require-all-4 "clone B remains on node 3 and 4 with only one instance of A remaining." do_test clone-require-all-5 "clone B starts on node 1 3 and 4" do_test clone-require-all-6 "clone B remains active after shutting down instances of A" do_test clone-require-all-7 "clone A and B both start at the same time. all instances of A start before B." do_test clone-require-all-no-interleave-1 "C starts everywhere after A and B" do_test clone-require-all-no-interleave-2 "C starts on nodes 1, 2, and 4 with only one active instance of B" do_test clone-require-all-no-interleave-3 "C remains active when instance of B is stopped on one node and started on another." do_test one-or-more-unrunnable-instances "Avoid dependencies on instances that won't ever be started" echo "" do_test order1 "Order start 1 " do_test order2 "Order start 2 " do_test order3 "Order stop " do_test order4 "Order (multiple) " do_test order5 "Order (move) " do_test order6 "Order (move w/ restart) " do_test order7 "Order (mandatory) " do_test order-optional "Order (score=0) " do_test order-required "Order (score=INFINITY) " do_test bug-lf-2171 "Prevent group start when clone is stopped" do_test order-clone "Clone ordering should be able to prevent startup of dependent clones" do_test order-sets "Ordering for resource sets" do_test order-serialize "Serialize resources without inhibiting migration" do_test order-serialize-set "Serialize a set of resources without inhibiting migration" do_test clone-order-primitive "Order clone start after a primitive" do_test clone-order-16instances "Verify ordering of 16 cloned resources" do_test order-optional-keyword "Order (optional keyword)" do_test order-mandatory "Order (mandatory keyword)" do_test bug-lf-2493 "Don't imply colocation requirements when applying ordering constraints with clones" do_test ordered-set-basic-startup "Constraint set with default order settings." do_test ordered-set-natural "Allow natural set ordering" do_test order-wrong-kind "Order (error)" echo "" do_test coloc-loop "Colocation - loop" do_test coloc-many-one "Colocation - many-to-one" do_test coloc-list "Colocation - many-to-one with list" do_test coloc-group "Colocation - groups" do_test coloc-slave-anti "Anti-colocation with slave shouldn't prevent master colocation" do_test coloc-attr "Colocation based on node attributes" do_test coloc-negative-group "Negative colocation with a group" do_test coloc-intra-set "Intra-set colocation" do_test bug-lf-2435 "Colocation sets with a negative score" do_test coloc-clone-stays-active "Ensure clones don't get stopped/demoted because a dependent must stop" do_test coloc_fp_logic "Verify floating point calculations in colocation are working" do_test colo_master_w_native "cl#5070 - Verify promotion order is affected when colocating master to native rsc." do_test colo_slave_w_native "cl#5070 - Verify promotion order is affected when colocating slave to native rsc." do_test anti-colocation-order "cl#5187 - Prevent resources in an anti-colocation from even temporarily running on a same node" do_test anti-colocation-master "Organize order of actions for master resources in anti-colocations" do_test anti-colocation-slave "Organize order of actions for slave resources in anti-colocations" do_test enforce-colo1 "Always enforce B with A INFINITY." do_test complex_enforce_colo "Always enforce B with A INFINITY. (make sure heat-engine stops)" echo "" do_test rsc-sets-seq-true "Resource Sets - sequential=false" do_test rsc-sets-seq-false "Resource Sets - sequential=true" do_test rsc-sets-clone "Resource Sets - Clone" do_test rsc-sets-master "Resource Sets - Master" do_test rsc-sets-clone-1 "Resource Sets - Clone (lf#2404)" #echo "" #do_test agent1 "version: lt (empty)" #do_test agent2 "version: eq " #do_test agent3 "version: gt " echo "" do_test attrs1 "string: eq (and) " do_test attrs2 "string: lt / gt (and)" do_test attrs3 "string: ne (or) " do_test attrs4 "string: exists " do_test attrs5 "string: not_exists " do_test attrs6 "is_dc: true " do_test attrs7 "is_dc: false " do_test attrs8 "score_attribute " do_test per-node-attrs "Per node resource parameters" echo "" do_test mon-rsc-1 "Schedule Monitor - start" do_test mon-rsc-2 "Schedule Monitor - move " do_test mon-rsc-3 "Schedule Monitor - pending start " do_test mon-rsc-4 "Schedule Monitor - move/pending start" echo "" do_test rec-rsc-0 "Resource Recover - no start " do_test rec-rsc-1 "Resource Recover - start " do_test rec-rsc-2 "Resource Recover - monitor " do_test rec-rsc-3 "Resource Recover - stop - ignore" do_test rec-rsc-4 "Resource Recover - stop - block " do_test rec-rsc-5 "Resource Recover - stop - fence " do_test rec-rsc-6 "Resource Recover - multiple - restart" do_test rec-rsc-7 "Resource Recover - multiple - stop " do_test rec-rsc-8 "Resource Recover - multiple - block " do_test rec-rsc-9 "Resource Recover - group/group" do_test monitor-recovery "on-fail=block + resource recovery detected by recurring monitor" do_test stop-failure-no-quorum "Stop failure without quorum" do_test stop-failure-no-fencing "Stop failure without fencing available" do_test stop-failure-with-fencing "Stop failure with fencing available" do_test multiple-active-block-group "Support of multiple-active=block for resource groups" do_test multiple-monitor-one-failed "Consider resource failed if any of the configured monitor operations failed" echo "" do_test quorum-1 "No quorum - ignore" do_test quorum-2 "No quorum - freeze" do_test quorum-3 "No quorum - stop " do_test quorum-4 "No quorum - start anyway" do_test quorum-5 "No quorum - start anyway (group)" do_test quorum-6 "No quorum - start anyway (clone)" do_test bug-cl-5212 "No promotion with no-quorum-policy=freeze" do_test suicide-needed-inquorate "no-quorum-policy=suicide: suicide necessary" do_test suicide-not-needed-initial-quorum "no-quorum-policy=suicide: suicide not necessary at initial quorum" do_test suicide-not-needed-never-quorate "no-quorum-policy=suicide: suicide not necessary if never quorate" do_test suicide-not-needed-quorate "no-quorum-policy=suicide: suicide necessary if quorate" echo "" do_test rec-node-1 "Node Recover - Startup - no fence" do_test rec-node-2 "Node Recover - Startup - fence " do_test rec-node-3 "Node Recover - HA down - no fence" do_test rec-node-4 "Node Recover - HA down - fence " do_test rec-node-5 "Node Recover - CRM down - no fence" do_test rec-node-6 "Node Recover - CRM down - fence " do_test rec-node-7 "Node Recover - no quorum - ignore " do_test rec-node-8 "Node Recover - no quorum - freeze " do_test rec-node-9 "Node Recover - no quorum - stop " do_test rec-node-10 "Node Recover - no quorum - stop w/fence" do_test rec-node-11 "Node Recover - CRM down w/ group - fence " do_test rec-node-12 "Node Recover - nothing active - fence " do_test rec-node-13 "Node Recover - failed resource + shutdown - fence " do_test rec-node-15 "Node Recover - unknown lrm section" do_test rec-node-14 "Serialize all stonith's" echo "" do_test multi1 "Multiple Active (stop/start)" echo "" do_test migrate-begin "Normal migration" do_test migrate-success "Completed migration" do_test migrate-partial-1 "Completed migration, missing stop on source" do_test migrate-partial-2 "Successful migrate_to only" do_test migrate-partial-3 "Successful migrate_to only, target down" do_test migrate-partial-4 "Migrate from the correct host after migrate_to+migrate_from" do_test bug-5186-partial-migrate "Handle partial migration when src node loses membership" do_test migrate-fail-2 "Failed migrate_from" do_test migrate-fail-3 "Failed migrate_from + stop on source" do_test migrate-fail-4 "Failed migrate_from + stop on target - ideally we wouldn't need to re-stop on target" do_test migrate-fail-5 "Failed migrate_from + stop on source and target" do_test migrate-fail-6 "Failed migrate_to" do_test migrate-fail-7 "Failed migrate_to + stop on source" do_test migrate-fail-8 "Failed migrate_to + stop on target - ideally we wouldn't need to re-stop on target" do_test migrate-fail-9 "Failed migrate_to + stop on source and target" do_test migrate-stop "Migration in a stopping stack" do_test migrate-start "Migration in a starting stack" do_test migrate-stop_start "Migration in a restarting stack" do_test migrate-stop-complex "Migration in a complex stopping stack" do_test migrate-start-complex "Migration in a complex starting stack" do_test migrate-stop-start-complex "Migration in a complex moving stack" do_test migrate-shutdown "Order the post-migration 'stop' before node shutdown" do_test migrate-1 "Migrate (migrate)" do_test migrate-2 "Migrate (stable)" do_test migrate-3 "Migrate (failed migrate_to)" do_test migrate-4 "Migrate (failed migrate_from)" do_test novell-252693 "Migration in a stopping stack" do_test novell-252693-2 "Migration in a starting stack" do_test novell-252693-3 "Non-Migration in a starting and stopping stack" do_test bug-1820 "Migration in a group" do_test bug-1820-1 "Non-migration in a group" do_test migrate-5 "Primitive migration with a clone" do_test migrate-fencing "Migration after Fencing" do_test migrate-both-vms "Migrate two VMs that have no colocation" do_test migration-behind-migrating-remote "Migrate resource behind migrating remote connection" do_test 1-a-then-bm-move-b "Advanced migrate logic. A then B. migrate B." do_test 2-am-then-b-move-a "Advanced migrate logic, A then B, migrate A without stopping B" do_test 3-am-then-bm-both-migrate "Advanced migrate logic. A then B. migrate both" do_test 4-am-then-bm-b-not-migratable "Advanced migrate logic, A then B, B not migratable" do_test 5-am-then-bm-a-not-migratable "Advanced migrate logic. A then B. move both, a not migratable" do_test 6-migrate-group "Advanced migrate logic, migrate a group" do_test 7-migrate-group-one-unmigratable "Advanced migrate logic, migrate group mixed with allow-migrate true/false" do_test 8-am-then-bm-a-migrating-b-stopping "Advanced migrate logic, A then B, A migrating, B stopping" do_test 9-am-then-bm-b-migrating-a-stopping "Advanced migrate logic, A then B, B migrate, A stopping" do_test 10-a-then-bm-b-move-a-clone "Advanced migrate logic, A clone then B, migrate B while stopping A" do_test 11-a-then-bm-b-move-a-clone-starting "Advanced migrate logic, A clone then B, B moving while A is start/stopping" do_test a-promote-then-b-migrate "A promote then B start. migrate B" do_test a-demote-then-b-migrate "A demote then B stop. migrate B" if [ $DO_VERSIONED_TESTS -eq 1 ]; then do_test migrate-versioned "Disable migration for versioned resources" fi #echo "" #do_test complex1 "Complex " do_test bug-lf-2422 "Dependency on partially active group - stop ocfs:*" echo "" do_test clone-anon-probe-1 "Probe the correct (anonymous) clone instance for each node" do_test clone-anon-probe-2 "Avoid needless re-probing of anonymous clones" do_test clone-anon-failcount "Merge failcounts for anonymous clones" do_test inc0 "Incarnation start" do_test inc1 "Incarnation start order" do_test inc2 "Incarnation silent restart, stop, move" do_test inc3 "Inter-incarnation ordering, silent restart, stop, move" do_test inc4 "Inter-incarnation ordering, silent restart, stop, move (ordered)" do_test inc5 "Inter-incarnation ordering, silent restart, stop, move (restart 1)" do_test inc6 "Inter-incarnation ordering, silent restart, stop, move (restart 2)" do_test inc7 "Clone colocation" do_test inc8 "Clone anti-colocation" do_test inc9 "Non-unique clone" do_test inc10 "Non-unique clone (stop)" do_test inc11 "Primitive colocation with clones" do_test inc12 "Clone shutdown" do_test cloned-group "Make sure only the correct number of cloned groups are started" do_test cloned-group-stop "Ensure stopping qpidd also stops glance and cinder" do_test clone-no-shuffle "Don't prioritize allocation of instances that must be moved" do_test clone-max-zero "Orphan processing with clone-max=0" do_test clone-anon-dup "Bug LF#2087 - Correctly parse the state of anonymous clones that are active more than once per node" do_test bug-lf-2160 "Don't shuffle clones due to colocation" do_test bug-lf-2213 "clone-node-max enforcement for cloned groups" do_test bug-lf-2153 "Clone ordering constraints" do_test bug-lf-2361 "Ensure clones observe mandatory ordering constraints if the LHS is unrunnable" do_test bug-lf-2317 "Avoid needless restart of primitive depending on a clone" do_test clone-colocate-instance-1 "Colocation with a specific clone instance (negative example)" do_test clone-colocate-instance-2 "Colocation with a specific clone instance" do_test clone-order-instance "Ordering with specific clone instances" do_test bug-lf-2453 "Enforce mandatory clone ordering without colocation" do_test bug-lf-2508 "Correctly reconstruct the status of anonymous cloned groups" do_test bug-lf-2544 "Balanced clone placement" do_test bug-lf-2445 "Redistribute clones with node-max > 1 and stickiness = 0" do_test bug-lf-2574 "Avoid clone shuffle" do_test bug-lf-2581 "Avoid group restart due to unrelated clone (re)start" do_test bug-cl-5168 "Don't shuffle clones" do_test bug-cl-5170 "Prevent clone from starting with on-fail=block" do_test clone-fail-block-colocation "Move colocated group when failed clone has on-fail=block" do_test clone-interleave-1 "Clone-3 cannot start on pcmk-1 due to interleaved ordering (no colocation)" do_test clone-interleave-2 "Clone-3 must stop on pcmk-1 due to interleaved ordering (no colocation)" do_test clone-interleave-3 "Clone-3 must be recovered on pcmk-1 due to interleaved ordering (no colocation)" do_test rebalance-unique-clones "Rebalance unique clone instances with no stickiness" echo "" do_test cloned_start_one "order first clone then clone... first clone_min=2" do_test cloned_start_two "order first clone then clone... first clone_min=2" do_test cloned_stop_one "order first clone then clone... first clone_min=2" do_test cloned_stop_two "order first clone then clone... first clone_min=2" do_test clone_min_interleave_start_one "order first clone then clone... first clone_min=2 and then has interleave=true" do_test clone_min_interleave_start_two "order first clone then clone... first clone_min=2 and then has interleave=true" do_test clone_min_interleave_stop_one "order first clone then clone... first clone_min=2 and then has interleave=true" do_test clone_min_interleave_stop_two "order first clone then clone... first clone_min=2 and then has interleave=true" do_test clone_min_start_one "order first clone then primitive... first clone_min=2" do_test clone_min_start_two "order first clone then primitive... first clone_min=2" do_test clone_min_stop_all "order first clone then primitive... first clone_min=2" do_test clone_min_stop_one "order first clone then primitive... first clone_min=2" do_test clone_min_stop_two "order first clone then primitive... first clone_min=2" echo "" do_test unfence-startup "Clean unfencing" do_test unfence-definition "Unfencing when the agent changes" do_test unfence-parameters "Unfencing when the agent parameters changes" do_test unfence-device "Unfencing when a cluster has only fence devices" echo "" do_test master-0 "Stopped -> Slave" do_test master-1 "Stopped -> Promote" do_test master-2 "Stopped -> Promote : notify" do_test master-3 "Stopped -> Promote : master location" do_test master-4 "Started -> Promote : master location" do_test master-5 "Promoted -> Promoted" do_test master-6 "Promoted -> Promoted (2)" do_test master-7 "Promoted -> Fenced" do_test master-8 "Promoted -> Fenced -> Moved" do_test master-9 "Stopped + Promotable + No quorum" do_test master-10 "Stopped -> Promotable : notify with monitor" do_test master-11 "Stopped -> Promote : colocation" do_test novell-239082 "Demote/Promote ordering" do_test novell-239087 "Stable master placement" do_test master-12 "Promotion based solely on rsc_location constraints" do_test master-13 "Include preferences of colocated resources when placing master" do_test master-demote "Ordering when actions depends on demoting a slave resource" do_test master-ordering "Prevent resources from starting that need a master" do_test bug-1765 "Master-Master Colocation (dont stop the slaves)" do_test master-group "Promotion of cloned groups" do_test bug-lf-1852 "Don't shuffle master/slave instances unnecessarily" do_test master-failed-demote "Don't retry failed demote actions" do_test master-failed-demote-2 "Don't retry failed demote actions (notify=false)" do_test master-depend "Ensure resources that depend on the master don't get allocated until the master does" do_test master-reattach "Re-attach to a running master" do_test master-allow-start "Don't include master score if it would prevent allocation" do_test master-colocation "Allow master instances placemaker to be influenced by colocation constraints" do_test master-pseudo "Make sure promote/demote pseudo actions are created correctly" do_test master-role "Prevent target-role from promoting more than master-max instances" do_test bug-lf-2358 "Master-Master anti-colocation" do_test master-promotion-constraint "Mandatory master colocation constraints" do_test unmanaged-master "Ensure role is preserved for unmanaged resources" do_test master-unmanaged-monitor "Start the correct monitor operation for unmanaged masters" do_test master-demote-2 "Demote does not clear past failure" do_test master-move "Move master based on failure of colocated group" do_test master-probed-score "Observe the promotion score of probed resources" do_test colocation_constraint_stops_master "cl#5054 - Ensure master is demoted when stopped by colocation constraint" do_test colocation_constraint_stops_slave "cl#5054 - Ensure slave is not demoted when stopped by colocation constraint" do_test order_constraint_stops_master "cl#5054 - Ensure master is demoted when stopped by order constraint" do_test order_constraint_stops_slave "cl#5054 - Ensure slave is not demoted when stopped by order constraint" do_test master_monitor_restart "cl#5072 - Ensure master monitor operation will start after promotion." do_test bug-rh-880249 "Handle replacement of an m/s resource with a primitive" do_test bug-5143-ms-shuffle "Prevent master shuffling due to promotion score" do_test master-demote-block "Block promotion if demote fails with on-fail=block" do_test master-dependent-ban "Don't stop instances from being active because a dependent is banned from that host" do_test master-stop "Stop instances due to location constraint with role=Started" do_test master-partially-demoted-group "Allow partially demoted group to finish demoting" do_test bug-cl-5213 "Ensure role colocation with -INFINITY is enforced" do_test bug-cl-5219 "Allow unrelated resources with a common colocation target to remain promoted" do_test master-asymmetrical-order "Fix the behaviors of multi-state resources with asymmetrical ordering" do_test master-notify "Master promotion with notifies" do_test master-score-startup "Use permanent master scores without LRM history" do_test failed-demote-recovery "Recover resource in slave role after demote fails" do_test failed-demote-recovery-master "Recover resource in master role after demote fails" echo "" do_test history-1 "Correctly parse stateful-1 resource state" echo "" do_test managed-0 "Managed (reference)" do_test managed-1 "Not managed - down " do_test managed-2 "Not managed - up " do_test bug-5028 "Shutdown should block if anything depends on an unmanaged resource" do_test bug-5028-detach "Ensure detach still works" do_test bug-5028-bottom "Ensure shutdown still blocks if the blocked resource is at the bottom of the stack" do_test unmanaged-stop-1 "cl#5155 - Block the stop of resources if any depending resource is unmanaged " do_test unmanaged-stop-2 "cl#5155 - Block the stop of resources if the first resource in a mandatory stop order is unmanaged " do_test unmanaged-stop-3 "cl#5155 - Block the stop of resources if any depending resource in a group is unmanaged " do_test unmanaged-stop-4 "cl#5155 - Block the stop of resources if any depending resource in the middle of a group is unmanaged " do_test unmanaged-block-restart "Block restart of resources if any dependent resource in a group is unmanaged" echo "" do_test interleave-0 "Interleave (reference)" do_test interleave-1 "coloc - not interleaved" do_test interleave-2 "coloc - interleaved " do_test interleave-3 "coloc - interleaved (2)" do_test interleave-pseudo-stop "Interleaved clone during stonith" do_test interleave-stop "Interleaved clone during stop" do_test interleave-restart "Interleaved clone during dependency restart" echo "" do_test notify-0 "Notify reference" do_test notify-1 "Notify simple" do_test notify-2 "Notify simple, confirm" do_test notify-3 "Notify move, confirm" do_test novell-239079 "Notification priority" #do_test notify-2 "Notify - 764" echo "" do_test 594 "OSDL #594 - Unrunnable actions scheduled in transition" do_test 662 "OSDL #662 - Two resources start on one node when incarnation_node_max = 1" do_test 696 "OSDL #696 - CRM starts stonith RA without monitor" do_test 726 "OSDL #726 - Attempting to schedule rsc_posic041_monitor_5000 _after_ a stop" do_test 735 "OSDL #735 - Correctly detect that rsc_hadev1 is stopped on hadev3" do_test 764 "OSDL #764 - Missing monitor op for DoFencing:child_DoFencing:1" do_test 797 "OSDL #797 - Assert triggered: task_id_i > max_call_id" do_test 829 "OSDL #829" do_test 994 "OSDL #994 - Stopping the last resource in a resource group causes the entire group to be restarted" do_test 994-2 "OSDL #994 - with a dependent resource" do_test 1360 "OSDL #1360 - Clone stickiness" do_test 1484 "OSDL #1484 - on_fail=stop" do_test 1494 "OSDL #1494 - Clone stability" do_test unrunnable-1 "Unrunnable" do_test unrunnable-2 "Unrunnable 2" do_test stonith-0 "Stonith loop - 1" do_test stonith-1 "Stonith loop - 2" do_test stonith-2 "Stonith loop - 3" do_test stonith-3 "Stonith startup" do_test stonith-4 "Stonith node state" do_test bug-1572-1 "Recovery of groups depending on master/slave" do_test bug-1572-2 "Recovery of groups depending on master/slave when the master is never re-promoted" do_test bug-1685 "Depends-on-master ordering" do_test bug-1822 "Don't promote partially active groups" do_test bug-pm-11 "New resource added to a m/s group" do_test bug-pm-12 "Recover only the failed portion of a cloned group" do_test bug-n-387749 "Don't shuffle clone instances" do_test bug-n-385265 "Don't ignore the failure stickiness of group children - resource_idvscommon should stay stopped" do_test bug-n-385265-2 "Ensure groups are migrated instead of remaining partially active on the current node" do_test bug-lf-1920 "Correctly handle probes that find active resources" do_test bnc-515172 "Location constraint with multiple expressions" do_test colocate-primitive-with-clone "Optional colocation with a clone" do_test use-after-free-merge "Use-after-free in native_merge_weights" do_test bug-lf-2551 "STONITH ordering for stop" do_test bug-lf-2606 "Stonith implies demote" do_test bug-lf-2474 "Ensure resource op timeout takes precedence over op_defaults" do_test bug-suse-707150 "Prevent vm-01 from starting due to colocation/ordering" do_test bug-5014-A-start-B-start "Verify when A starts B starts using symmetrical=false" do_test bug-5014-A-stop-B-started "Verify when A stops B does not stop if it has already started using symmetric=false" do_test bug-5014-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using symmetric=false" do_test bug-5014-CthenAthenB-C-stopped "Verify when C then A is symmetrical=true, A then B is symmetric=false, and C is stopped that nothing starts." do_test bug-5014-CLONE-A-start-B-start "Verify when A starts B starts using clone resources with symmetric=false" do_test bug-5014-CLONE-A-stop-B-started "Verify when A stops B does not stop if it has already started using clone resources with symmetric=false." do_test bug-5014-GROUP-A-start-B-start "Verify when A starts B starts when using group resources with symmetric=false." do_test bug-5014-GROUP-A-stopped-B-started "Verify when A stops B does not stop if it has already started using group resources with symmetric=false." do_test bug-5014-GROUP-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using group resources with symmetric=false." do_test bug-5014-ordered-set-symmetrical-false "Verify ordered sets work with symmetrical=false" do_test bug-5014-ordered-set-symmetrical-true "Verify ordered sets work with symmetrical=true" do_test bug-5007-masterslave_colocation "Verify use of colocation scores other than INFINITY and -INFINITY work on multi-state resources." do_test bug-5038 "Prevent restart of anonymous clones when clone-max decreases" do_test bug-5025-1 "Automatically clean up failcount after resource config change with reload" do_test bug-5025-2 "Make sure clear failcount action isn't set when config does not change." do_test bug-5025-3 "Automatically clean up failcount after resource config change with restart" do_test bug-5025-4 "Clear failcount when last failure is a start op and rsc attributes changed." do_test failcount "Ensure failcounts are correctly expired" do_test failcount-block "Ensure failcounts are not expired when on-fail=block is present" do_test per-op-failcount "Ensure per-operation failcount is handled and not passed to fence agent" do_test on-fail-ignore "Ensure on-fail=ignore works even beyond migration-threshold" do_test monitor-onfail-restart "bug-5058 - Monitor failure with on-fail set to restart" do_test monitor-onfail-stop "bug-5058 - Monitor failure wiht on-fail set to stop" do_test bug-5059 "No need to restart p_stateful1:*" do_test bug-5069-op-enabled "Test on-fail=ignore with failure when monitor is enabled." do_test bug-5069-op-disabled "Test on-fail-ignore with failure when monitor is disabled." do_test obsolete-lrm-resource "cl#5115 - Do not use obsolete lrm_resource sections" do_test expire-non-blocked-failure "Ignore failure-timeout only if the failed operation has on-fail=block" do_test asymmetrical-order-move "Respect asymmetrical ordering when trying to move resources" do_test start-then-stop-with-unfence "Avoid graph loop with start-then-stop constraint plus unfencing" do_test ignore_stonith_rsc_order1 "cl#5056- Ignore order constraint between stonith and non-stonith rsc." do_test ignore_stonith_rsc_order2 "cl#5056- Ignore order constraint with group rsc containing mixed stonith and non-stonith." do_test ignore_stonith_rsc_order3 "cl#5056- Ignore order constraint, stonith clone and mixed group" do_test ignore_stonith_rsc_order4 "cl#5056- Ignore order constraint, stonith clone and clone with nested mixed group" do_test honor_stonith_rsc_order1 "cl#5056- Honor order constraint, stonith clone and pure stonith group(single rsc)." do_test honor_stonith_rsc_order2 "cl#5056- Honor order constraint, stonith clone and pure stonith group(multiple rsc)" do_test honor_stonith_rsc_order3 "cl#5056- Honor order constraint, stonith clones with nested pure stonith group." do_test honor_stonith_rsc_order4 "cl#5056- Honor order constraint, between two native stonith rscs." do_test probe-timeout "cl#5099 - Default probe timeout" do_test concurrent-fencing "Allow performing fencing operations in parallel" echo "" do_test systemhealth1 "System Health () #1" do_test systemhealth2 "System Health () #2" do_test systemhealth3 "System Health () #3" do_test systemhealthn1 "System Health (None) #1" do_test systemhealthn2 "System Health (None) #2" do_test systemhealthn3 "System Health (None) #3" do_test systemhealthm1 "System Health (Migrate On Red) #1" do_test systemhealthm2 "System Health (Migrate On Red) #2" do_test systemhealthm3 "System Health (Migrate On Red) #3" do_test systemhealtho1 "System Health (Only Green) #1" do_test systemhealtho2 "System Health (Only Green) #2" do_test systemhealtho3 "System Health (Only Green) #3" do_test systemhealthp1 "System Health (Progessive) #1" do_test systemhealthp2 "System Health (Progessive) #2" do_test systemhealthp3 "System Health (Progessive) #3" echo "" do_test utilization "Placement Strategy - utilization" do_test minimal "Placement Strategy - minimal" do_test balanced "Placement Strategy - balanced" echo "" do_test placement-stickiness "Optimized Placement Strategy - stickiness" do_test placement-priority "Optimized Placement Strategy - priority" do_test placement-location "Optimized Placement Strategy - location" do_test placement-capacity "Optimized Placement Strategy - capacity" echo "" do_test utilization-order1 "Utilization Order - Simple" do_test utilization-order2 "Utilization Order - Complex" do_test utilization-order3 "Utilization Order - Migrate" -do_test utilization-order4 "Utilization Order - Live Mirgration (bnc#695440)" +do_test utilization-order4 "Utilization Order - Live Migration (bnc#695440)" do_test utilization-shuffle "Don't displace prmExPostgreSQLDB2 on act2, Start prmExPostgreSQLDB1 on act3" do_test load-stopped-loop "Avoid transition loop due to load_stopped (cl#5044)" do_test load-stopped-loop-2 "cl#5235 - Prevent graph loops that can be introduced by load_stopped -> migrate_to ordering" echo "" do_test colocated-utilization-primitive-1 "Colocated Utilization - Primitive" do_test colocated-utilization-primitive-2 "Colocated Utilization - Choose the most capable node" do_test colocated-utilization-group "Colocated Utilization - Group" do_test colocated-utilization-clone "Colocated Utilization - Clone" do_test utilization-check-allowed-nodes "Only check the capacities of the nodes that can run the resource" echo "" do_test reprobe-target_rc "Ensure correct target_rc for reprobe of inactive resources" do_test node-maintenance-1 "cl#5128 - Node maintenance" do_test node-maintenance-2 "cl#5128 - Node maintenance (coming out of maintenance mode)" do_test shutdown-maintenance-node "Do not fence a maintenance node if it shuts down cleanly" do_test rsc-maintenance "Per-resource maintenance" echo "" do_test not-installed-agent "The resource agent is missing" do_test not-installed-tools "Something the resource agent needs is missing" echo "" do_test stopped-monitor-00 "Stopped Monitor - initial start" do_test stopped-monitor-01 "Stopped Monitor - failed started" do_test stopped-monitor-02 "Stopped Monitor - started multi-up" do_test stopped-monitor-03 "Stopped Monitor - stop started" do_test stopped-monitor-04 "Stopped Monitor - failed stop" do_test stopped-monitor-05 "Stopped Monitor - start unmanaged" do_test stopped-monitor-06 "Stopped Monitor - unmanaged multi-up" do_test stopped-monitor-07 "Stopped Monitor - start unmanaged multi-up" do_test stopped-monitor-08 "Stopped Monitor - migrate" do_test stopped-monitor-09 "Stopped Monitor - unmanage started" do_test stopped-monitor-10 "Stopped Monitor - unmanaged started multi-up" do_test stopped-monitor-11 "Stopped Monitor - stop unmanaged started" do_test stopped-monitor-12 "Stopped Monitor - unmanaged started multi-up (targer-role="Stopped")" do_test stopped-monitor-20 "Stopped Monitor - initial stop" do_test stopped-monitor-21 "Stopped Monitor - stopped single-up" do_test stopped-monitor-22 "Stopped Monitor - stopped multi-up" do_test stopped-monitor-23 "Stopped Monitor - start stopped" do_test stopped-monitor-24 "Stopped Monitor - unmanage stopped" do_test stopped-monitor-25 "Stopped Monitor - unmanaged stopped multi-up" do_test stopped-monitor-26 "Stopped Monitor - start unmanaged stopped" do_test stopped-monitor-27 "Stopped Monitor - unmanaged stopped multi-up (target-role="Started")" do_test stopped-monitor-30 "Stopped Monitor - new node started" do_test stopped-monitor-31 "Stopped Monitor - new node stopped" echo"" do_test ticket-primitive-1 "Ticket - Primitive (loss-policy=stop, initial)" do_test ticket-primitive-2 "Ticket - Primitive (loss-policy=stop, granted)" do_test ticket-primitive-3 "Ticket - Primitive (loss-policy-stop, revoked)" do_test ticket-primitive-4 "Ticket - Primitive (loss-policy=demote, initial)" do_test ticket-primitive-5 "Ticket - Primitive (loss-policy=demote, granted)" do_test ticket-primitive-6 "Ticket - Primitive (loss-policy=demote, revoked)" do_test ticket-primitive-7 "Ticket - Primitive (loss-policy=fence, initial)" do_test ticket-primitive-8 "Ticket - Primitive (loss-policy=fence, granted)" do_test ticket-primitive-9 "Ticket - Primitive (loss-policy=fence, revoked)" do_test ticket-primitive-10 "Ticket - Primitive (loss-policy=freeze, initial)" do_test ticket-primitive-11 "Ticket - Primitive (loss-policy=freeze, granted)" do_test ticket-primitive-12 "Ticket - Primitive (loss-policy=freeze, revoked)" do_test ticket-primitive-13 "Ticket - Primitive (loss-policy=stop, standby, granted)" do_test ticket-primitive-14 "Ticket - Primitive (loss-policy=stop, granted, standby)" do_test ticket-primitive-15 "Ticket - Primitive (loss-policy=stop, standby, revoked)" do_test ticket-primitive-16 "Ticket - Primitive (loss-policy=demote, standby, granted)" do_test ticket-primitive-17 "Ticket - Primitive (loss-policy=demote, granted, standby)" do_test ticket-primitive-18 "Ticket - Primitive (loss-policy=demote, standby, revoked)" do_test ticket-primitive-19 "Ticket - Primitive (loss-policy=fence, standby, granted)" do_test ticket-primitive-20 "Ticket - Primitive (loss-policy=fence, granted, standby)" do_test ticket-primitive-21 "Ticket - Primitive (loss-policy=fence, standby, revoked)" do_test ticket-primitive-22 "Ticket - Primitive (loss-policy=freeze, standby, granted)" do_test ticket-primitive-23 "Ticket - Primitive (loss-policy=freeze, granted, standby)" do_test ticket-primitive-24 "Ticket - Primitive (loss-policy=freeze, standby, revoked)" echo"" do_test ticket-group-1 "Ticket - Group (loss-policy=stop, initial)" do_test ticket-group-2 "Ticket - Group (loss-policy=stop, granted)" do_test ticket-group-3 "Ticket - Group (loss-policy-stop, revoked)" do_test ticket-group-4 "Ticket - Group (loss-policy=demote, initial)" do_test ticket-group-5 "Ticket - Group (loss-policy=demote, granted)" do_test ticket-group-6 "Ticket - Group (loss-policy=demote, revoked)" do_test ticket-group-7 "Ticket - Group (loss-policy=fence, initial)" do_test ticket-group-8 "Ticket - Group (loss-policy=fence, granted)" do_test ticket-group-9 "Ticket - Group (loss-policy=fence, revoked)" do_test ticket-group-10 "Ticket - Group (loss-policy=freeze, initial)" do_test ticket-group-11 "Ticket - Group (loss-policy=freeze, granted)" do_test ticket-group-12 "Ticket - Group (loss-policy=freeze, revoked)" do_test ticket-group-13 "Ticket - Group (loss-policy=stop, standby, granted)" do_test ticket-group-14 "Ticket - Group (loss-policy=stop, granted, standby)" do_test ticket-group-15 "Ticket - Group (loss-policy=stop, standby, revoked)" do_test ticket-group-16 "Ticket - Group (loss-policy=demote, standby, granted)" do_test ticket-group-17 "Ticket - Group (loss-policy=demote, granted, standby)" do_test ticket-group-18 "Ticket - Group (loss-policy=demote, standby, revoked)" do_test ticket-group-19 "Ticket - Group (loss-policy=fence, standby, granted)" do_test ticket-group-20 "Ticket - Group (loss-policy=fence, granted, standby)" do_test ticket-group-21 "Ticket - Group (loss-policy=fence, standby, revoked)" do_test ticket-group-22 "Ticket - Group (loss-policy=freeze, standby, granted)" do_test ticket-group-23 "Ticket - Group (loss-policy=freeze, granted, standby)" do_test ticket-group-24 "Ticket - Group (loss-policy=freeze, standby, revoked)" echo"" do_test ticket-clone-1 "Ticket - Clone (loss-policy=stop, initial)" do_test ticket-clone-2 "Ticket - Clone (loss-policy=stop, granted)" do_test ticket-clone-3 "Ticket - Clone (loss-policy-stop, revoked)" do_test ticket-clone-4 "Ticket - Clone (loss-policy=demote, initial)" do_test ticket-clone-5 "Ticket - Clone (loss-policy=demote, granted)" do_test ticket-clone-6 "Ticket - Clone (loss-policy=demote, revoked)" do_test ticket-clone-7 "Ticket - Clone (loss-policy=fence, initial)" do_test ticket-clone-8 "Ticket - Clone (loss-policy=fence, granted)" do_test ticket-clone-9 "Ticket - Clone (loss-policy=fence, revoked)" do_test ticket-clone-10 "Ticket - Clone (loss-policy=freeze, initial)" do_test ticket-clone-11 "Ticket - Clone (loss-policy=freeze, granted)" do_test ticket-clone-12 "Ticket - Clone (loss-policy=freeze, revoked)" do_test ticket-clone-13 "Ticket - Clone (loss-policy=stop, standby, granted)" do_test ticket-clone-14 "Ticket - Clone (loss-policy=stop, granted, standby)" do_test ticket-clone-15 "Ticket - Clone (loss-policy=stop, standby, revoked)" do_test ticket-clone-16 "Ticket - Clone (loss-policy=demote, standby, granted)" do_test ticket-clone-17 "Ticket - Clone (loss-policy=demote, granted, standby)" do_test ticket-clone-18 "Ticket - Clone (loss-policy=demote, standby, revoked)" do_test ticket-clone-19 "Ticket - Clone (loss-policy=fence, standby, granted)" do_test ticket-clone-20 "Ticket - Clone (loss-policy=fence, granted, standby)" do_test ticket-clone-21 "Ticket - Clone (loss-policy=fence, standby, revoked)" do_test ticket-clone-22 "Ticket - Clone (loss-policy=freeze, standby, granted)" do_test ticket-clone-23 "Ticket - Clone (loss-policy=freeze, granted, standby)" do_test ticket-clone-24 "Ticket - Clone (loss-policy=freeze, standby, revoked)" echo"" do_test ticket-master-1 "Ticket - Master (loss-policy=stop, initial)" do_test ticket-master-2 "Ticket - Master (loss-policy=stop, granted)" do_test ticket-master-3 "Ticket - Master (loss-policy-stop, revoked)" do_test ticket-master-4 "Ticket - Master (loss-policy=demote, initial)" do_test ticket-master-5 "Ticket - Master (loss-policy=demote, granted)" do_test ticket-master-6 "Ticket - Master (loss-policy=demote, revoked)" do_test ticket-master-7 "Ticket - Master (loss-policy=fence, initial)" do_test ticket-master-8 "Ticket - Master (loss-policy=fence, granted)" do_test ticket-master-9 "Ticket - Master (loss-policy=fence, revoked)" do_test ticket-master-10 "Ticket - Master (loss-policy=freeze, initial)" do_test ticket-master-11 "Ticket - Master (loss-policy=freeze, granted)" do_test ticket-master-12 "Ticket - Master (loss-policy=freeze, revoked)" do_test ticket-master-13 "Ticket - Master (loss-policy=stop, standby, granted)" do_test ticket-master-14 "Ticket - Master (loss-policy=stop, granted, standby)" do_test ticket-master-15 "Ticket - Master (loss-policy=stop, standby, revoked)" do_test ticket-master-16 "Ticket - Master (loss-policy=demote, standby, granted)" do_test ticket-master-17 "Ticket - Master (loss-policy=demote, granted, standby)" do_test ticket-master-18 "Ticket - Master (loss-policy=demote, standby, revoked)" do_test ticket-master-19 "Ticket - Master (loss-policy=fence, standby, granted)" do_test ticket-master-20 "Ticket - Master (loss-policy=fence, granted, standby)" do_test ticket-master-21 "Ticket - Master (loss-policy=fence, standby, revoked)" do_test ticket-master-22 "Ticket - Master (loss-policy=freeze, standby, granted)" do_test ticket-master-23 "Ticket - Master (loss-policy=freeze, granted, standby)" do_test ticket-master-24 "Ticket - Master (loss-policy=freeze, standby, revoked)" echo "" do_test ticket-rsc-sets-1 "Ticket - Resource sets (1 ticket, initial)" do_test ticket-rsc-sets-2 "Ticket - Resource sets (1 ticket, granted)" do_test ticket-rsc-sets-3 "Ticket - Resource sets (1 ticket, revoked)" do_test ticket-rsc-sets-4 "Ticket - Resource sets (2 tickets, initial)" do_test ticket-rsc-sets-5 "Ticket - Resource sets (2 tickets, granted)" do_test ticket-rsc-sets-6 "Ticket - Resource sets (2 tickets, granted)" do_test ticket-rsc-sets-7 "Ticket - Resource sets (2 tickets, revoked)" do_test ticket-rsc-sets-8 "Ticket - Resource sets (1 ticket, standby, granted)" do_test ticket-rsc-sets-9 "Ticket - Resource sets (1 ticket, granted, standby)" do_test ticket-rsc-sets-10 "Ticket - Resource sets (1 ticket, standby, revoked)" do_test ticket-rsc-sets-11 "Ticket - Resource sets (2 tickets, standby, granted)" do_test ticket-rsc-sets-12 "Ticket - Resource sets (2 tickets, standby, granted)" do_test ticket-rsc-sets-13 "Ticket - Resource sets (2 tickets, granted, standby)" do_test ticket-rsc-sets-14 "Ticket - Resource sets (2 tickets, standby, revoked)" do_test cluster-specific-params "Cluster-specific instance attributes based on rules" do_test site-specific-params "Site-specific instance attributes based on rules" echo "" do_test template-1 "Template - 1" do_test template-2 "Template - 2" do_test template-3 "Template - 3 (merge operations)" do_test template-coloc-1 "Template - Colocation 1" do_test template-coloc-2 "Template - Colocation 2" do_test template-coloc-3 "Template - Colocation 3" do_test template-order-1 "Template - Order 1" do_test template-order-2 "Template - Order 2" do_test template-order-3 "Template - Order 3" do_test template-ticket "Template - Ticket" do_test template-rsc-sets-1 "Template - Resource Sets 1" do_test template-rsc-sets-2 "Template - Resource Sets 2" do_test template-rsc-sets-3 "Template - Resource Sets 3" do_test template-rsc-sets-4 "Template - Resource Sets 4" do_test template-clone-primitive "Cloned primitive from template" do_test template-clone-group "Cloned group from template" do_test location-sets-templates "Resource sets and templates - Location" do_test tags-coloc-order-1 "Tags - Colocation and Order (Simple)" do_test tags-coloc-order-2 "Tags - Colocation and Order (Resource Sets with Templates)" do_test tags-location "Tags - Location" do_test tags-ticket "Tags - Ticket" echo "" do_test container-1 "Container - initial" do_test container-2 "Container - monitor failed" do_test container-3 "Container - stop failed" do_test container-4 "Container - reached migration-threshold" do_test container-group-1 "Container in group - initial" do_test container-group-2 "Container in group - monitor failed" do_test container-group-3 "Container in group - stop failed" do_test container-group-4 "Container in group - reached migration-threshold" do_test container-is-remote-node "Place resource within container when container is remote-node" do_test bug-rh-1097457 "Kill user defined container/contents ordering" do_test bug-cl-5247 "Graph loop when recovering m/s resource in a container" do_test bundle-order-startup "Bundle startup ordering" do_test bundle-order-partial-start "Bundle startup ordering when some dependancies are already running" do_test bundle-order-partial-start-2 "Bundle startup ordering when some dependancies and the container are already running" do_test bundle-order-stop "Bundle stop ordering" do_test bundle-order-partial-stop "Bundle startup ordering when some dependancies are already stopped" do_test bundle-order-stop-on-remote "Stop nested resource after bringing up the connection" do_test bundle-order-startup-clone "Prevent startup because bundle isn't promoted" do_test bundle-order-startup-clone-2 "Bundle startup with clones" do_test bundle-order-stop-clone "Stop bundle because clone is stopping" do_test bundle-nested-colocation "Colocation of nested connection resources" do_test bundle-order-fencing "Order pseudo bundle fencing after parent node fencing if both are happening" do_test bundle-probe-order-1 "order 1" do_test bundle-probe-order-2 "order 2" do_test bundle-probe-order-3 "order 3" do_test bundle-probe-remotes "Ensure remotes get probed too" echo "" do_test whitebox-fail1 "Fail whitebox container rsc." do_test whitebox-fail2 "Fail whitebox container rsc lrmd connection." do_test whitebox-fail3 "Failed containers should not run nested on remote nodes." do_test whitebox-start "Start whitebox container with resources assigned to it" do_test whitebox-stop "Stop whitebox container with resources assigned to it" do_test whitebox-move "Move whitebox container with resources assigned to it" do_test whitebox-asymmetric "Verify connection rsc opts-in based on container resource" do_test whitebox-ms-ordering "Verify promote/demote can not occur before connection is established" do_test whitebox-ms-ordering-move "Stop/Start cycle within a moving container" do_test whitebox-orphaned "Properly shutdown orphaned whitebox container" do_test whitebox-orphan-ms "Properly tear down orphan ms resources on remote-nodes" do_test whitebox-unexpectedly-running "Recover container nodes the cluster did not start." do_test whitebox-migrate1 "Migrate both container and connection resource" do_test whitebox-imply-stop-on-fence "imply stop action on container node rsc when host node is fenced" do_test whitebox-nested-group "Verify guest remote-node works nested in a group" do_test guest-node-host-dies "Verify guest node is recovered if host goes away" echo "" do_test remote-startup-probes "Baremetal remote-node startup probes" do_test remote-startup "Startup a newly discovered remote-nodes with no status." do_test remote-fence-unclean "Fence unclean baremetal remote-node" do_test remote-fence-unclean2 "Fence baremetal remote-node after cluster node fails and connection can not be recovered" do_test remote-fence-unclean-3 "Probe failed remote nodes (triggers fencing)" do_test remote-move "Move remote-node connection resource" do_test remote-disable "Disable a baremetal remote-node" do_test remote-probe-disable "Probe then stop a baremetal remote-node" do_test remote-orphaned "Properly shutdown orphaned connection resource" do_test remote-orphaned2 "verify we can handle orphaned remote connections with active resources on the remote" do_test remote-recover "Recover connection resource after cluster-node fails." do_test remote-stale-node-entry "Make sure we properly handle leftover remote-node entries in the node section" do_test remote-partial-migrate "Make sure partial migrations are handled before ops on the remote node." do_test remote-partial-migrate2 "Make sure partial migration target is prefered for remote connection." do_test remote-recover-fail "Make sure start failure causes fencing if rsc are active on remote." do_test remote-start-fail "Make sure a start failure does not result in fencing if no active resources are on remote." do_test remote-unclean2 "Make monitor failure always results in fencing, even if no rsc are active on remote." do_test remote-fence-before-reconnect "Fence before clearing recurring monitor failure" do_test remote-recovery "Recover remote connections before attempting demotion" do_test remote-recover-connection "Optimistically recovery of only the connection" do_test remote-recover-all "Fencing when the connection has no home" do_test remote-recover-no-resources "Fencing when the connection has no home and no active resources" do_test remote-recover-unknown "Fencing when the connection has no home and the remote has no operation history" do_test remote-reconnect-delay "Waiting for remote reconnect interval to expire" do_test remote-connection-unrecoverable "Remote connection host must be fenced, with connection unrecoverable" echo "" do_test resource-discovery "Exercises resource-discovery location constraint option." do_test rsc-discovery-per-node "Disable resource discovery per node" if [ $DO_VERSIONED_TESTS -eq 1 ]; then echo "" do_test versioned-resources "Start resources with #ra-version rules" do_test restart-versioned "Restart resources on #ra-version change" do_test reload-versioned "Reload resources on #ra-version change" echo "" do_test versioned-operations-1 "Use #ra-version to configure operations of native resources" do_test versioned-operations-2 "Use #ra-version to configure operations of stonith resources" do_test versioned-operations-3 "Use #ra-version to configure operations of master/slave resources" do_test versioned-operations-4 "Use #ra-version to configure operations of groups of the resources" fi echo "" test_results exit $EXITCODE diff --git a/cts/cts-stonithd.in b/cts/cts-stonithd.in index f05ea2f3e2..8a1a8793be 100644 --- a/cts/cts-stonithd.in +++ b/cts/cts-stonithd.in @@ -1,1427 +1,1427 @@ #!@PYTHON@ """ Regression tests for Pacemaker's stonithd """ # Pacemaker targets compatibility with Python 2.7 and 3.2+ from __future__ import print_function, unicode_literals, absolute_import, division __copyright__ = "Copyright (C) 2012-2018 Andrew Beekhof " __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import sys import subprocess import shlex import time FENCE_DUMMY = "@datadir@/@PACKAGE@/tests/cts/fence_dummy" # These values must be kept in sync with include/crm/crm.h class CrmExit: OK = 0 ERROR = 1 INVALID_PARAM = 2 UNIMPLEMENT_FEATURE = 3 INSUFFICIENT_PRIV = 4 NOT_INSTALLED = 5 NOT_CONFIGURED = 6 NOT_RUNNING = 7 USAGE = 64 DATAERR = 65 NOINPUT = 66 NOUSER = 67 NOHOST = 68 UNAVAILABLE = 69 SOFTWARE = 70 OSERR = 71 OSFILE = 72 CANTCREAT = 73 IOERR = 74 TEMPFAIL = 75 PROTOCOL = 76 NOPERM = 77 CONFIG = 78 FATAL = 100 PANIC = 101 DISCONNECT = 102 SOLO = 103 DIGEST = 104 NOSUCH = 105 QUORUM = 106 UNSAFE = 107 EXISTS = 108 MULTIPLE = 109 OLD = 110 TIMEOUT = 124 MAX = 255 def pipe_output(pipes, stdout=True, stderr=False): """ Wrapper to get text output from pipes regardless of Python version """ output = "" pipe_outputs = pipes.communicate() if sys.version_info < (3,): if stdout: output = output + pipe_outputs[0] if stderr: output = output + pipe_outputs[1] else: if stdout: output = output + pipe_outputs[0].decode(sys.stdout.encoding) if stderr: output = output + pipe_outputs[1].decode(sys.stderr.encoding) return output def output_from_command(command): """ Execute command and return its standard output """ test = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE) test.wait() return pipe_output(test).split("\n") def localname(): """ Return the uname of the local host """ our_uname = output_from_command("uname -n") if our_uname: our_uname = our_uname[0] else: our_uname = "localhost" return our_uname def killall(process): """ Kill all instances of a process """ cmd = shlex.split("killall -9 -q %s" % process) test = subprocess.Popen(cmd, stdout=subprocess.PIPE) test.wait() class TestError(Exception): """ Base class for exceptions in this module """ pass class ExitCodeError(TestError): """ Exception raised when command exit status is unexpected """ def __init__(self, exit_code): self.exit_code = exit_code def __str__(self): return repr(self.exit_code) class OutputNotFoundError(TestError): """ Exception raised when command output does not contain wanted string """ def __init__(self, exit_code): self.output = output def __str__(self): return repr(self.output) class OutputFoundError(TestError): """ Exception raised when command output contains unwanted string """ def __init__(self, exit_code): self.output = output def __str__(self): return repr(self.output) class Test(object): """ Executor for a single test """ def __init__(self, name, description, verbose=0, with_cpg=0): self.name = name self.description = description self.cmds = [] self.verbose = verbose self.result_txt = "" self.cmd_tool_output = "" self.result_exitcode = CrmExit.OK if with_cpg: self.stonith_options = "-c" self.enable_corosync = 1 else: self.stonith_options = "-s" self.enable_corosync = 0 self.stonith_process = None self.stonith_output = "" self.stonith_patterns = [] self.negative_stonith_patterns = [] self.executed = 0 def __new_cmd(self, cmd, args, exitcode, stdout_match="", no_wait=0, stdout_negative_match="", kill=None): """ Add a command to be executed as part of this test """ self.cmds.append( { "cmd" : cmd, "kill" : kill, "args" : args, "expected_exitcode" : exitcode, "stdout_match" : stdout_match, "stdout_negative_match" : stdout_negative_match, "no_wait" : no_wait, } ) def start_environment(self): """ Prepare the host for executing a test """ # Make sure we are in full control killall("pacemakerd") killall("stonithd") if self.verbose: self.stonith_options = self.stonith_options + " -V" print("Starting stonithd with %s" % self.stonith_options) if os.path.exists("/tmp/stonith-regression.log"): os.remove('/tmp/stonith-regression.log') cmd = "@CRM_DAEMON_DIR@/stonithd %s -l /tmp/stonith-regression.log" % self.stonith_options self.stonith_process = subprocess.Popen(shlex.split(cmd)) time.sleep(1) def clean_environment(self): """ Clean up the host after executing a test """ if self.stonith_process: self.stonith_process.terminate() self.stonith_process.wait() self.stonith_output = "" self.stonith_process = None logfile = io.open('/tmp/stonith-regression.log', 'rt') for line in logfile.readlines(): self.stonith_output = self.stonith_output + line if self.verbose: print("Daemon Output Start") print(self.stonith_output) print("Daemon Output End") os.remove('/tmp/stonith-regression.log') def add_stonith_log_pattern(self, pattern): """ Add a log pattern to expect from this test """ self.stonith_patterns.append(pattern) def add_stonith_neg_log_pattern(self, pattern): """ Add a log pattern that should not occur with this test """ self.negative_stonith_patterns.append(pattern) def add_cmd(self, cmd, args): """ Add a simple command to be executed as part of this test """ self.__new_cmd(cmd, args, CrmExit.OK, "") def add_cmd_no_wait(self, cmd, args): """ Add a simple command to be executed (without waiting) as part of this test """ self.__new_cmd(cmd, args, CrmExit.OK, "", 1) def add_cmd_check_stdout(self, cmd, args, match, no_match=""): """ Add a simple command with expected output to be executed as part of this test """ self.__new_cmd(cmd, args, CrmExit.OK, match, 0, no_match) def add_expected_fail_cmd(self, cmd, args, exitcode=CrmExit.ERROR): """ Add a command to be executed as part of this test and expected to fail """ self.__new_cmd(cmd, args, exitcode, "") def get_exitcode(self): """ Return the exit status of the last test execution """ return self.result_exitcode def print_result(self, filler): """ Print the result of the last test execution """ print("%s%s" % (filler, self.result_txt)) def run_cmd(self, args): """ Execute a command as part of this test """ cmd = shlex.split(args['args']) cmd.insert(0, args['cmd']) if self.verbose: print("\n\nRunning: "+" ".join(cmd)) test = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if args['kill']: if self.verbose: print("Also running: "+args['kill']) subprocess.Popen(shlex.split(args['kill'])) if args['no_wait'] == 0: test.wait() else: return CrmExit.OK output = pipe_output(test, stderr=True) if self.verbose: print(output) if test.returncode != args['expected_exitcode']: raise ExitCodeError(test.returncode) if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0: raise OutputNotFoundError(output) if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0: raise OutputFoundError(output) def count_negative_matches(self, outline): """ Return 1 if a line matches patterns that shouldn't have occurred """ count = 0 for line in self.negative_stonith_patterns: if outline.count(line): count = 1 if self.verbose: print("This pattern should not have matched = '%s" % (line)) return count def match_stonith_patterns(self): """ Check test output for expected patterns """ negative_matches = 0 cur = 0 pats = self.stonith_patterns total_patterns = len(self.stonith_patterns) if len(self.stonith_patterns) == 0 and len(self.negative_stonith_patterns) == 0: return for line in self.stonith_output.split("\n"): negative_matches = negative_matches + self.count_negative_matches(line) if len(pats) == 0: continue cur = -1 for pat in pats: cur = cur + 1 if line.count(pats[cur]): del pats[cur] break if len(pats) > 0 or negative_matches: if self.verbose: for pat in pats: print("Pattern Not Matched = '%s'" % pat) msg = "FAILURE - '%s' failed. %d patterns out of %d not matched. %d negative matches." self.result_txt = msg % (self.name, len(pats), total_patterns, negative_matches) self.result_exitcode = CrmExit.ERROR def set_error(self, step, cmd): """ Record failure of this test """ msg = "FAILURE - '%s' failed at step %d. Command: %s %s" self.result_txt = msg % (self.name, step, cmd['cmd'], cmd['args']) self.result_exitcode = CrmExit.ERROR def run(self): """ Execute this test. """ res = 0 i = 1 self.start_environment() if self.verbose: print("\n--- START TEST - %s" % self.name) self.result_txt = "SUCCESS - '%s'" % (self.name) self.result_exitcode = CrmExit.OK for cmd in self.cmds: try: self.run_cmd(cmd) except ExitCodeError as e: print("Step %d FAILED - command returned %s, expected %d" % (i, e, cmd['expected_exitcode'])) self.set_error(i, cmd); break except OutputNotFoundError as e: print("Step %d FAILED - '%s' was not found in command output: %s" % (i, cmd['stdout_match'], e)) self.set_error(i, cmd); break except OutputFoundError as e: print("Step %d FAILED - '%s' was found in command output: %s" % (i, cmd['stdout_negative_match'], e)) self.set_error(i, cmd); break if self.verbose: print("Step %d SUCCESS" % (i)) i = i + 1 self.clean_environment() if self.result_exitcode == CrmExit.OK: self.match_stonith_patterns() print(self.result_txt) if self.verbose: print("--- END TEST - %s\n" % self.name) self.executed = 1 return res class Tests(object): """ Collection of all fencing regression tests """ def __init__(self, verbose=0): self.tests = [] self.verbose = verbose self.autogen_corosync_cfg = 0 if not os.path.exists("/etc/corosync/corosync.conf"): self.autogen_corosync_cfg = 1 def new_test(self, name, description, with_cpg=0): """ Create a named test """ test = Test(name, description, self.verbose, with_cpg) self.tests.append(test) return test def print_list(self): """ List all registered tests """ print("\n==== %d TESTS FOUND ====" % (len(self.tests))) print("%35s - %s" % ("TEST NAME", "TEST DESCRIPTION")) print("%35s - %s" % ("--------------------", "--------------------")) for test in self.tests: print("%35s - %s" % (test.name, test.description)) print("==== END OF LIST ====\n") def start_corosync(self): """ Start the corosync process """ if self.verbose: print("Starting corosync") test = subprocess.Popen("corosync", stdout=subprocess.PIPE) test.wait() time.sleep(10) def run_single(self, name): """ Run a single named test """ for test in self.tests: if test.name == name: test.run() break def run_tests_matching(self, pattern): """ Run all tests whose name matches a pattern """ for test in self.tests: if test.name.count(pattern) != 0: test.run() def run_cpg_only(self): """ Run all corosync-enabled tests """ for test in self.tests: if test.enable_corosync: test.run() def run_no_cpg(self): """ Run all standalone tests """ for test in self.tests: if not test.enable_corosync: test.run() def run_tests(self): """ Run all tests """ for test in self.tests: test.run() def exit(self): """ Exit (with error status code if any test failed) """ for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != CrmExit.OK: sys.exit(CrmExit.ERROR) sys.exit(CrmExit.OK) def print_results(self): """ Print summary of results of executed tests """ failures = 0 success = 0 print("\n\n======= FINAL RESULTS ==========") print("\n--- FAILURE RESULTS:") for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != CrmExit.OK: failures = failures + 1 test.print_result(" ") else: success = success + 1 if failures == 0: print(" None") print("\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures)) def build_api_sanity_tests(self): """ Register tests to verify basic API usage """ verbose_arg = "" if self.verbose: verbose_arg = "-V" test = self.new_test("standalone_low_level_api_test", "Sanity test client api in standalone mode.") test.add_cmd("@CRM_DAEMON_DIR@/stonith-test", "-t %s" % (verbose_arg)) test = self.new_test("cpg_low_level_api_test", "Sanity test client api using mainloop and cpg.", 1) test.add_cmd("@CRM_DAEMON_DIR@/stonith-test", "-m %s" % (verbose_arg)) def build_custom_timeout_tests(self): """ Register tests to verify custom timeout usage """ # custom timeout without topology test = self.new_test("cpg_custom_timeout_1", "Verify per device timeouts work as expected without using topology.", 1) test.add_cmd('stonith_admin', '-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node1 node2 node3"') test.add_cmd('stonith_admin', '-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=1"') test.add_cmd('stonith_admin', '-R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=4"') test.add_cmd("stonith_admin", "-F node3 -t 2") # timeout is 2+1+4 = 7 test.add_stonith_log_pattern("Total timeout set to 7") # custom timeout _WITH_ topology test = self.new_test("cpg_custom_timeout_2", "Verify per device timeouts work as expected _WITH_ topology.", 1) test.add_cmd('stonith_admin', '-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node1 node2 node3"') test.add_cmd('stonith_admin', '-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=1"') test.add_cmd('stonith_admin', '-R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=4000"') test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 3 -v false2") test.add_cmd("stonith_admin", "-F node3 -t 2") # timeout is 2+1+4000 = 4003 test.add_stonith_log_pattern("Total timeout set to 4003") def build_fence_merge_tests(self): """ Register tests to verify when fence operations should be merged """ ### Simple test that overlapping fencing operations get merged test = self.new_test("cpg_custom_merge_single", "Verify overlapping identical fencing operations are merged, no fencing levels used.", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") ### one merger will happen test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") ### Test that multiple mergers occur test = self.new_test("cpg_custom_merge_multiple", "Verify multiple overlapping identical fencing operations are merged", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"delay=2\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") ### 4 mergers should occur test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") ### Test that multiple mergers occur with topologies used test = self.new_test("cpg_custom_merge_with_topology", "Verify multiple overlapping identical fencing operations are merged with fencing levels.", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") ### 4 mergers should occur test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") def build_fence_no_merge_tests(self): """ Register tests to verify when fence operations should not be merged """ test = self.new_test("cpg_custom_no_merge", "Verify differing fencing operations are not merged", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3 node2\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") test.add_cmd_no_wait("stonith_admin", "-F node2 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") test.add_stonith_neg_log_pattern("Merging stonith action off for node node3 originating from client") def build_standalone_tests(self): """ Register a grab bag of tests that can be executed in standalone or corosync mode """ test_types = [ { "prefix" : "standalone", "use_cpg" : 0, }, { "prefix" : "cpg", "use_cpg" : 1, }, ] # test what happens when all devices timeout for test_type in test_types: test = self.new_test("%s_fence_multi_device_failure" % test_type["prefix"], "Verify that all devices timeout, a fencing failure is returned.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false3 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") if test_type["use_cpg"] == 1: test.add_expected_fail_cmd("stonith_admin", "-F node3 -t 2", CrmExit.TIMEOUT) test.add_stonith_log_pattern("Total timeout set to 6") else: test.add_expected_fail_cmd("stonith_admin", "-F node3 -t 2", CrmExit.ERROR) test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: ") test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: ") test.add_stonith_log_pattern("for host 'node3' with device 'false3' returned: ") # test what happens when multiple devices can fence a node, but the first device fails. for test_type in test_types: test = self.new_test("%s_fence_device_failure_rollover" % test_type["prefix"], "Verify that when one fence device fails for a node, the others are tried.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-F node3 -t 2") if test_type["use_cpg"] == 1: test.add_stonith_log_pattern("Total timeout set to 6") # simple topology test for one device for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_simple" % test_type["prefix"], "Verify all fencing devices at a level are used.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true") test.add_cmd("stonith_admin", "-F node3 -t 2") test.add_stonith_log_pattern("Total timeout set to 2") test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") # add topology, delete topology, verify fencing still works for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_add_remove" % test_type["prefix"], "Verify fencing occurrs after all topology levels are removed", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true") test.add_cmd("stonith_admin", "-d node3 -i 1") test.add_cmd("stonith_admin", "-F node3 -t 2") test.add_stonith_log_pattern("Total timeout set to 2") test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") # test what happens when the first fencing level has multiple devices. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_device_fails" % test_type["prefix"], "Verify if one device in a level fails, the other is tried.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R false -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true") test.add_cmd("stonith_admin", "-F node3 -t 20") test.add_stonith_log_pattern("Total timeout set to 40") test.add_stonith_log_pattern("for host 'node3' with device 'false' returned: -201") test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") # test what happens when the first fencing level fails. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_multi_level_fails" % test_type["prefix"], "Verify if one level fails, the next leve is tried.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") test.add_cmd("stonith_admin", "-F node3 -t 3") test.add_stonith_log_pattern("Total timeout set to 18") test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -201") test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: -201") test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0") test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0") # test what happens when the first fencing level had devices that no one has registered for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_missing_devices" % test_type["prefix"], "Verify topology can continue with missing devices.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") test.add_cmd("stonith_admin", "-F node3 -t 2") # Test what happens if multiple fencing levels are defined, and then the first one is removed. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_level_removal" % test_type["prefix"], "Verify level removal works.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") # Now remove level 2, verify none of the devices in level two are hit. test.add_cmd("stonith_admin", "-d node3 -i 2") test.add_cmd("stonith_admin", "-F node3 -t 20") test.add_stonith_log_pattern("Total timeout set to 8") test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -201") test.add_stonith_neg_log_pattern("for host 'node3' with device 'false2' returned: ") test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0") test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0") # Test targeting a topology level by node name pattern. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_level_pattern" % test_type["prefix"], "Verify targeting topology by node name pattern works.", test_type["use_cpg"]) test.add_cmd("stonith_admin", """-R true -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node1 node2 node3" """) test.add_cmd("stonith_admin", """-r '@node.*' -i 1 -v true""") test.add_cmd("stonith_admin", "-F node3 -t 2") test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") # test allowing commas and semicolons as delimiters in pcmk_host_list for test_type in test_types: test = self.new_test("%s_host_list_delimiters" % test_type["prefix"], "Verify commas and semicolons can be used as pcmk_host_list delimiters", test_type["use_cpg"]) test.add_cmd("stonith_admin", """-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node1,node2,node3" """) test.add_cmd("stonith_admin", """-R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=pcmk1;pcmk2;pcmk3" """) test.add_cmd("stonith_admin", "stonith_admin -F node2 -t 2") test.add_cmd("stonith_admin", "stonith_admin -F pcmk3 -t 2") test.add_stonith_log_pattern("for host 'node2' with device 'true1' returned: 0") test.add_stonith_log_pattern("for host 'pcmk3' with device 'true2' returned: 0") # test the stonith builds the correct list of devices that can fence a node. for test_type in test_types: test = self.new_test("%s_list_devices" % test_type["prefix"], "Verify list of devices that can fence a node is correct", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true2", "true1") test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true3", "true1") # simple test of device monitor for test_type in test_types: test = self.new_test("%s_monitor" % test_type["prefix"], "Verify device is reachable", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-Q true1") test.add_cmd("stonith_admin", "-Q false1") test.add_expected_fail_cmd("stonith_admin", "-Q true2", CrmExit.ERROR) # Verify monitor occurs for duration of timeout period on failure for test_type in test_types: test = self.new_test("%s_monitor_timeout" % test_type["prefix"], "Verify monitor uses duration of timeout period given.", test_type["use_cpg"]) test.add_cmd("stonith_admin", '-R true1 -a fence_dummy -o "mode=fail" -o "monitor_mode=fail" -o "pcmk_host_list=node3"') test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 5", CrmExit.ERROR) test.add_stonith_log_pattern("Attempt 2 to execute") # Verify monitor occurs for duration of timeout period on failure, but stops at max retries for test_type in test_types: test = self.new_test("%s_monitor_timeout_max_retries" % test_type["prefix"], "Verify monitor retries until max retry value or timeout is hit.", test_type["use_cpg"]) test.add_cmd("stonith_admin", '-R true1 -a fence_dummy -o "mode=fail" -o "monitor_mode=fail" -o "pcmk_host_list=node3"') test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 15", CrmExit.ERROR) test.add_stonith_log_pattern("Attempted to execute agent fence_dummy (list) the maximum number of times") # simple register test for test_type in test_types: test = self.new_test("%s_register" % test_type["prefix"], "Verify devices can be registered and un-registered", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-Q true1") test.add_cmd("stonith_admin", "-D true1") test.add_expected_fail_cmd("stonith_admin", "-Q true1", CrmExit.ERROR) # simple reboot test for test_type in test_types: test = self.new_test("%s_reboot" % test_type["prefix"], "Verify devices can be rebooted", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-B node3 -t 2") test.add_cmd("stonith_admin", "-D true1") test.add_expected_fail_cmd("stonith_admin", "-Q true1", CrmExit.ERROR) # test fencing history. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_fence_history" % test_type["prefix"], "Verify last fencing operation is returned.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-F node3 -t 2 -V") test.add_cmd_check_stdout("stonith_admin", "-H node3", "was able to turn off node node3", "") # simple test of dynamic list query for test_type in test_types: test = self.new_test("%s_dynamic_list_query" % test_type["prefix"], "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found") # fence using dynamic list query for test_type in test_types: test = self.new_test("%s_fence_dynamic_list_query" % test_type["prefix"], "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "-F fake_port_1 -t 5 -V") # simple test of query using status action for test_type in test_types: test = self.new_test("%s_status_query" % test_type["prefix"], "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found") # test what happens when no reboot action is advertised for test_type in test_types: test = self.new_test("%s_no_reboot_support" % test_type["prefix"], "Verify reboot action defaults to off when no reboot action is advertised by agent.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_no_reboot -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-B node1 -t 5 -V") test.add_stonith_log_pattern("does not advertise support for 'reboot', performing 'off'") test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") # make sure reboot is used when reboot action is advertised for test_type in test_types: test = self.new_test("%s_with_reboot_support" % test_type["prefix"], "Verify reboot action can be used when metadata advertises it.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-B node1 -t 5 -V") test.add_stonith_neg_log_pattern("does not advertise support for 'reboot', performing 'off'") test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") def build_nodeid_tests(self): """ Register tests that use a corosync node id """ our_uname = localname() ### verify nodeid is supplied when nodeid is in the metadata parameters test = self.new_test("cpg_supply_nodeid", "Verify nodeid is given when fence agent has nodeid as parameter", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname)) test.add_stonith_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname)) ### verify nodeid is _NOT_ supplied when nodeid is not in the metadata parameters test = self.new_test("cpg_do_not_supply_nodeid", "Verify nodeid is _NOT_ given when fence agent does not have nodeid as parameter", 1) # use a host name that won't be in corosync.conf test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=regr-test\"") test.add_cmd("stonith_admin", "-F regr-test -t 3") test.add_stonith_neg_log_pattern("For stonith action (off) for victim regr-test, adding nodeid") ### verify nodeid use doesn't explode standalone mode test = self.new_test("standalone_do_not_supply_nodeid", "Verify nodeid in metadata parameter list doesn't kill standalone mode", 0) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname)) test.add_stonith_neg_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname)) def build_unfence_tests(self): """ Register tests that verify unfencing """ our_uname = localname() ### verify unfencing using automatic unfencing test = self.new_test("cpg_unfence_required_1", "Verify require unfencing on all devices when automatic=true in agent's metadata", 1) test.add_cmd('stonith_admin', '-R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname)) test.add_cmd('stonith_admin', '-R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) # both devices should be executed test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)") ### verify unfencing using automatic unfencing fails if any of the required agents fail test = self.new_test("cpg_unfence_required_2", "Verify require unfencing on all devices when automatic=true in agent's metadata", 1) test.add_cmd('stonith_admin', '-R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname)) test.add_cmd('stonith_admin', '-R true2 -a fence_dummy_auto_unfence -o "mode=fail" -o "pcmk_host_list=%s"' % (our_uname)) test.add_expected_fail_cmd("stonith_admin", "-U %s -t 6" % (our_uname), CrmExit.ERROR) ### verify unfencing using automatic devices with topology test = self.new_test("cpg_unfence_required_3", "Verify require unfencing on all devices even when at different topology levels", 1) test.add_cmd('stonith_admin', '-R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)") ### verify unfencing using automatic devices with topology test = self.new_test("cpg_unfence_required_4", "Verify all required devices are executed even with topology levels fail.", 1) test.add_cmd('stonith_admin', '-R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R true3 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R true4 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R false3 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R false4 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v false1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v false2" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v false3" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true3" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 3 -v false4" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 4 -v true4" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)") test.add_stonith_log_pattern("with device 'true3' returned: 0 (OK)") test.add_stonith_log_pattern("with device 'true4' returned: 0 (OK)") def build_unfence_on_target_tests(self): """ Register tests that verify unfencing that runs on the target """ our_uname = localname() ### verify unfencing using on_target device test = self.new_test("cpg_unfence_on_target_1", "Verify unfencing with on_target = true", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("(on) to be executed on the target node") ### verify failure of unfencing using on_target device test = self.new_test("cpg_unfence_on_target_2", "Verify failure unfencing with on_target = true", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake_1234\"" % (our_uname)) test.add_expected_fail_cmd("stonith_admin", "-U node_fake_1234 -t 3", CrmExit.ERROR) test.add_stonith_log_pattern("(on) to be executed on the target node") ### verify unfencing using on_target device with topology test = self.new_test("cpg_unfence_on_target_3", "Verify unfencing with on_target = true using topology", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("(on) to be executed on the target node") ### verify unfencing using on_target device with topology fails when victim node doesn't exist test = self.new_test("cpg_unfence_on_target_4", "Verify unfencing failure with on_target = true using topology", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v true1") test.add_cmd("stonith_admin", "-r node_fake -i 2 -v true2") test.add_expected_fail_cmd("stonith_admin", "-U node_fake -t 3", CrmExit.ERROR) test.add_stonith_log_pattern("(on) to be executed on the target node") def build_remap_tests(self): """ Register tests that verify remapping of reboots to off-on """ test = self.new_test("cpg_remap_simple", "Verify sequential topology reboot is remapped to all-off-then-all-on", 1) test.add_cmd("stonith_admin", """-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """ """-o "pcmk_off_timeout=1" -o "pcmk_reboot_timeout=10" """) test.add_cmd("stonith_admin", """-R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """ """-o "pcmk_off_timeout=2" -o "pcmk_reboot_timeout=20" """) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", "-B node_fake -t 5") test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") # timeout should be sum of off timeouts (1+2=3), not reboot timeouts (10+20=30) test.add_stonith_log_pattern("Total timeout set to 3 for peer's fencing of node_fake") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true1'") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true2'") test.add_stonith_log_pattern("Remapped off of node_fake complete, remapping to on") # fence_dummy sets "on" as an on_target action test.add_stonith_log_pattern("Ignoring true1 'on' failure (no capable peers) for node_fake") test.add_stonith_log_pattern("Ignoring true2 'on' failure (no capable peers) for node_fake") test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") test = self.new_test("cpg_remap_automatic", "Verify remapped topology reboot skips automatic 'on'", 1) test.add_cmd("stonith_admin", """-R true1 -a fence_dummy_auto_unfence """ """-o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true2 -a fence_dummy_auto_unfence """ """-o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", "-B node_fake -t 5") test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true1'") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true2'") test.add_stonith_log_pattern("Remapped off of node_fake complete, remapping to on") test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") test.add_stonith_neg_log_pattern("perform op 'node_fake on' with") test.add_stonith_neg_log_pattern("'on' failure") test = self.new_test("cpg_remap_complex_1", "Verify remapped topology reboot in second level works if non-remapped first level fails", 1) test.add_cmd("stonith_admin", """-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v false1") test.add_cmd("stonith_admin", "-r node_fake -i 2 -v true1 -v true2") test.add_cmd("stonith_admin", "-B node_fake -t 5") test.add_stonith_log_pattern("perform op 'node_fake reboot' with 'false1'") test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true1'") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true2'") test.add_stonith_log_pattern("Remapped off of node_fake complete, remapping to on") test.add_stonith_log_pattern("Ignoring true1 'on' failure (no capable peers) for node_fake") test.add_stonith_log_pattern("Ignoring true2 'on' failure (no capable peers) for node_fake") test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") test = self.new_test("cpg_remap_complex_2", "Verify remapped topology reboot failure in second level proceeds to third level", 1) test.add_cmd("stonith_admin", """-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true3 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v false1") test.add_cmd("stonith_admin", "-r node_fake -i 2 -v true1 -v false2 -v true3") test.add_cmd("stonith_admin", "-r node_fake -i 3 -v true2") test.add_cmd("stonith_admin", "-B node_fake -t 5") test.add_stonith_log_pattern("perform op 'node_fake reboot' with 'false1'") test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true1'") test.add_stonith_log_pattern("perform op 'node_fake off' with 'false2'") test.add_stonith_log_pattern("Attempted to execute agent fence_dummy (off) the maximum number of times") test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") test.add_stonith_log_pattern("perform op 'node_fake reboot' with 'true2'") test.add_stonith_neg_log_pattern("node_fake with true3") def setup_environment(self, use_corosync): """ Prepare the host before executing any tests """ if self.autogen_corosync_cfg and use_corosync: corosync_conf = (""" totem { version: 2 crypto_cipher: none crypto_hash: none nodeid: 101 secauth: off interface { ttl: 1 ringnumber: 0 mcastport: 6666 mcastaddr: 226.94.1.1 bindnetaddr: 127.0.0.1 } } logging { debug: off fileline: off to_syslog: no to_stderr: no syslog_facility: daemon timestamp: on to_logfile: yes logfile: @CRM_LOG_DIR@/corosync.test.log logfile_priority: info } """) os.system("cat <<-END >>/etc/corosync/corosync.conf\n%s\nEND" % (corosync_conf)) if use_corosync: ### make sure we are in control ### killall("corosync") self.start_corosync() os.system("cp %s @sbindir@/fence_dummy" % FENCE_DUMMY) # modifies dummy agent to do require unfencing os.system("sed 's/on_target=/automatic=/g' %s > @sbindir@/fence_dummy_auto_unfence" % FENCE_DUMMY) os.system("chmod 711 @sbindir@/fence_dummy_auto_unfence") # modifies dummy agent to not advertise reboot os.system("sed 's/^.*.*//g' %s > @sbindir@/fence_dummy_no_reboot" % FENCE_DUMMY) os.system("chmod 711 @sbindir@/fence_dummy_no_reboot") def cleanup_environment(self, use_corosync): """ Clean up the host after executing desired tests """ if use_corosync: killall("corosync") if self.verbose and os.path.exists('@CRM_LOG_DIR@/corosync.test.log'): print("Corosync output") logfile = io.open('@CRM_LOG_DIR@/corosync.test.log', 'rt') for line in logfile.readlines(): print(line.strip()) os.remove('@CRM_LOG_DIR@/corosync.test.log') if self.autogen_corosync_cfg: os.system("rm -f /etc/corosync/corosync.conf") os.system("rm -f @sbindir@/fence_dummy") os.system("rm -f @sbindir@/fence_dummy_auto_unfence") os.system("rm -f @sbindir@/fence_dummy_no_reboot") class TestOptions(object): """ Option handler """ def __init__(self): self.options = {} self.options['list-tests'] = 0 self.options['run-all'] = 1 self.options['run-only'] = "" self.options['run-only-pattern'] = "" self.options['verbose'] = 0 self.options['invalid-arg'] = "" self.options['cpg-only'] = 0 self.options['no-cpg'] = 0 self.options['show-usage'] = 0 def build_options(self, argv): """ Set options based on command-line arguments """ args = argv[1:] skip = 0 for i in range(0, len(args)): if skip: skip = 0 continue elif args[i] == "-h" or args[i] == "--help": self.options['show-usage'] = 1 elif args[i] == "-l" or args[i] == "--list-tests": self.options['list-tests'] = 1 elif args[i] == "-V" or args[i] == "--verbose": self.options['verbose'] = 1 elif args[i] == "-n" or args[i] == "--no-cpg": self.options['no-cpg'] = 1 elif args[i] == "-c" or args[i] == "--cpg-only": self.options['cpg-only'] = 1 elif args[i] == "-r" or args[i] == "--run-only": self.options['run-only'] = args[i+1] skip = 1 elif args[i] == "-p" or args[i] == "--run-only-pattern": self.options['run-only-pattern'] = args[i+1] skip = 1 def show_usage(self): """ Show command usage """ print("usage: " + sys.argv[0] + " [options]") print("If no options are provided, all tests will run") print("Options:") print("\t [--help | -h] Show usage") print("\t [--list-tests | -l] Print out all registered tests.") print("\t [--cpg-only | -c] Only run tests that require corosync.") print("\t [--no-cpg | -n] Only run tests that do not require corosync") print("\t [--run-only | -r 'testname'] Run a specific test") print("\t [--verbose | -V] Verbose output") print("\t [--run-only-pattern | -p 'string'] Run only tests containing the string value") - print("\n\tExample: Run only the test 'start_top'") + print("\n\tExample: Run only the test 'start_stop'") print("\t\t " + sys.argv[0] + " --run-only start_stop") print("\n\tExample: Run only the tests with the string 'systemd' present in them") print("\t\t " + sys.argv[0] + " --run-only-pattern systemd") def main(argv): """ Run fencing regression tests as specified by arguments """ opts = TestOptions() opts.build_options(argv) use_corosync = 1 tests = Tests(opts.options['verbose']) tests.build_standalone_tests() tests.build_custom_timeout_tests() tests.build_api_sanity_tests() tests.build_fence_merge_tests() tests.build_fence_no_merge_tests() tests.build_unfence_tests() tests.build_unfence_on_target_tests() tests.build_nodeid_tests() tests.build_remap_tests() if opts.options['list-tests']: tests.print_list() sys.exit(CrmExit.OK) elif opts.options['show-usage']: opts.show_usage() sys.exit(CrmExit.OK) print("Starting ...") if opts.options['no-cpg']: use_corosync = 0 tests.setup_environment(use_corosync) if opts.options['run-only-pattern'] != "": tests.run_tests_matching(opts.options['run-only-pattern']) tests.print_results() elif opts.options['run-only'] != "": tests.run_single(opts.options['run-only']) tests.print_results() elif opts.options['no-cpg']: tests.run_no_cpg() tests.print_results() elif opts.options['cpg-only']: tests.run_cpg_only() tests.print_results() else: tests.run_tests() tests.print_results() tests.cleanup_environment(use_corosync) tests.exit() if __name__ == "__main__": main(sys.argv) diff --git a/include/crm/common/util.h b/include/crm/common/util.h index 09252665d2..f3fc4390f0 100644 --- a/include/crm/common/util.h +++ b/include/crm/common/util.h @@ -1,185 +1,185 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRM_COMMON_UTIL__H # define CRM_COMMON_UTIL__H #ifdef __cplusplus extern "C" { #endif /** * \file * \brief Utility functions * \ingroup core */ # include # include # include # include # include # include # include # include # include # define ONLINESTATUS "online" // Status of an online client # define OFFLINESTATUS "offline" // Status of an offline client /* public Pacemaker Remote functions (from remote.c) */ int crm_default_remote_port(void); /* public string functions (from strings.c) */ char *crm_itoa_stack(int an_int, char *buf, size_t len); gboolean crm_is_true(const char *s); int crm_str_to_boolean(const char *s, int *ret); int crm_parse_int(const char *text, const char *default_text); char * crm_strip_trailing_newline(char *str); gboolean crm_str_eq(const char *a, const char *b, gboolean use_case); gboolean safe_str_neq(const char *a, const char *b); gboolean crm_strcase_equal(gconstpointer a, gconstpointer b); guint crm_strcase_hash(gconstpointer v); guint g_str_hash_traditional(gconstpointer v); char *crm_strdup_printf(char const *format, ...) __attribute__ ((__format__ (__printf__, 1, 2))); # define safe_str_eq(a, b) crm_str_eq(a, b, FALSE) # define crm_str_hash g_str_hash_traditional static inline char * crm_itoa(int an_int) { return crm_strdup_printf("%d", an_int); } /*! * \brief Create hash table with dynamically allocated string keys/values * - * \return Newly hash table + * \return Newly allocated hash table * \note It is the caller's responsibility to free the result, using * g_hash_table_destroy(). */ static inline GHashTable * crm_str_table_new() { return g_hash_table_new_full(crm_str_hash, g_str_equal, free, free); } /*! * \brief Create hash table with case-insensitive dynamically allocated string keys/values * - * \return Newly hash table + * \return Newly allocated hash table * \note It is the caller's responsibility to free the result, using * g_hash_table_destroy(). */ static inline GHashTable * crm_strcase_table_new() { return g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, free); } GHashTable *crm_str_table_dup(GHashTable *old_table); # define crm_atoi(text, default_text) crm_parse_int(text, default_text) /* public I/O functions (from io.c) */ void crm_build_path(const char *path_c, mode_t mode); long long crm_get_msec(const char *input); unsigned long long crm_get_interval(const char *input); int char2score(const char *score); char *score2char(int score); char *score2char_stack(int score, char *buf, size_t len); /* public operation functions (from operations.c) */ gboolean parse_op_key(const char *key, char **rsc_id, char **op_type, int *interval); gboolean decode_transition_key(const char *key, char **uuid, int *action, int *transition_id, int *target_rc); gboolean decode_transition_magic(const char *magic, char **uuid, int *transition_id, int *action_id, int *op_status, int *op_rc, int *target_rc); int rsc_op_expected_rc(lrmd_event_data_t *event); gboolean did_rsc_op_fail(lrmd_event_data_t *event, int target_rc); bool crm_op_needs_metadata(const char *rsc_class, const char *op); xmlNode *crm_create_op_xml(xmlNode *parent, const char *prefix, const char *task, const char *interval, const char *timeout); #define CRM_DEFAULT_OP_TIMEOUT_S "20s" int compare_version(const char *version1, const char *version2); /* coverity[+kill] */ void crm_abort(const char *file, const char *function, int line, const char *condition, gboolean do_core, gboolean do_fork); static inline gboolean is_not_set(long long word, long long bit) { return ((word & bit) == 0); } static inline gboolean is_set(long long word, long long bit) { return ((word & bit) == bit); } static inline gboolean is_set_any(long long word, long long bit) { return ((word & bit) != 0); } static inline guint crm_hash_table_size(GHashTable * hashtable) { if (hashtable == NULL) { return 0; } return g_hash_table_size(hashtable); } char *crm_meta_name(const char *field); const char *crm_meta_value(GHashTable * hash, const char *field); char *crm_md5sum(const char *buffer); char *crm_generate_uuid(void); bool crm_is_daemon_name(const char *name); int crm_user_lookup(const char *name, uid_t * uid, gid_t * gid); #ifdef HAVE_GNUTLS_GNUTLS_H void crm_gnutls_global_init(void); #endif bool pcmk_acl_required(const char *user); char *crm_generate_ra_key(const char *standard, const char *provider, const char *type); bool crm_provider_required(const char *standard); int crm_parse_agent_spec(const char *spec, char **standard, char **provider, char **type); #ifdef __cplusplus } #endif #endif diff --git a/include/crm/transition.h b/include/crm/transition.h index dbe78a108c..9c5614b48b 100644 --- a/include/crm/transition.h +++ b/include/crm/transition.h @@ -1,151 +1,151 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRM_TRANSITION__H # define CRM_TRANSITION__H #ifdef __cplusplus extern "C" { #endif #include #include #include typedef enum { action_type_pseudo, action_type_rsc, action_type_crm } action_type_e; typedef struct te_timer_s crm_action_timer_t; typedef struct crm_graph_s crm_graph_t; typedef struct synapse_s { int id; int priority; gboolean ready; gboolean failed; gboolean executed; gboolean confirmed; GListPtr actions; /* crm_action_t* */ GListPtr inputs; /* crm_action_t* */ } synapse_t; typedef struct crm_action_s { int id; int timeout; int interval; GHashTable *params; action_type_e type; crm_action_timer_t *timer; synapse_t *synapse; gboolean sent_update; /* sent to the CIB */ gboolean executed; /* sent to the CRM */ gboolean confirmed; gboolean failed; gboolean can_fail; xmlNode *xml; } crm_action_t; struct te_timer_s { int source_id; int timeout; crm_action_t *action; }; /* order matters here */ enum transition_action { tg_done, tg_stop, tg_restart, tg_shutdown, }; struct crm_graph_s { int id; char *source; int abort_priority; gboolean complete; const char *abort_reason; enum transition_action completion_action; int num_actions; int num_synapses; int batch_limit; int network_delay; int stonith_timeout; int transition_timeout; int fired; int pending; int skipped; int completed; int incomplete; - GListPtr synapses; /* synpase_t* */ + GListPtr synapses; /* synapse_t* */ int migration_limit; }; typedef struct crm_graph_functions_s { gboolean(*pseudo) (crm_graph_t * graph, crm_action_t * action); gboolean(*rsc) (crm_graph_t * graph, crm_action_t * action); gboolean(*crmd) (crm_graph_t * graph, crm_action_t * action); gboolean(*stonith) (crm_graph_t * graph, crm_action_t * action); gboolean(*allowed) (crm_graph_t * graph, crm_action_t * action); } crm_graph_functions_t; enum transition_status { transition_active, transition_pending, /* active but no actions performed this time */ transition_complete, transition_stopped, transition_terminated, transition_action_failed, transition_failed, }; void set_default_graph_functions(void); void set_graph_functions(crm_graph_functions_t * fns); crm_graph_t *unpack_graph(xmlNode * xml_graph, const char *reference); int run_graph(crm_graph_t * graph); gboolean update_graph(crm_graph_t * graph, crm_action_t * action); void destroy_graph(crm_graph_t * graph); const char *transition_status(enum transition_status state); void print_graph(unsigned int log_level, crm_graph_t * graph); void print_action(int log_level, const char *prefix, crm_action_t * action); bool update_abort_priority(crm_graph_t * graph, int priority, enum transition_action action, const char *abort_reason); const char *actiontype2text(action_type_e type); lrmd_event_data_t *convert_graph_action(xmlNode * resource, crm_action_t * action, int status, int rc); #ifdef __cplusplus } #endif #endif diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c index bdadde8e58..1e33a42610 100644 --- a/lib/cluster/cpg.c +++ b/lib/cluster/cpg.c @@ -1,660 +1,659 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include cpg_handle_t pcmk_cpg_handle = 0; /* TODO: Remove, use cluster.cpg_handle */ static bool cpg_evicted = FALSE; gboolean(*pcmk_cpg_dispatch_fn) (int kind, const char *from, const char *data) = NULL; #define cs_repeat(counter, max, code) do { \ code; \ if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ counter++; \ crm_debug("Retrying operation after %ds", counter); \ sleep(counter); \ } else { \ break; \ } \ } while(counter < max) void cluster_disconnect_cpg(crm_cluster_t *cluster) { pcmk_cpg_handle = 0; if (cluster->cpg_handle) { crm_trace("Disconnecting CPG"); cpg_leave(cluster->cpg_handle, &cluster->group); cpg_finalize(cluster->cpg_handle); cluster->cpg_handle = 0; } else { crm_info("No CPG connection"); } } uint32_t get_local_nodeid(cpg_handle_t handle) { int rc = CS_OK; int retries = 0; static uint32_t local_nodeid = 0; cpg_handle_t local_handle = handle; cpg_callbacks_t cb = { }; if(local_nodeid != 0) { return local_nodeid; } if(handle == 0) { crm_trace("Creating connection"); cs_repeat(retries, 5, rc = cpg_initialize(&local_handle, &cb)); } if (rc == CS_OK) { retries = 0; crm_trace("Performing lookup"); cs_repeat(retries, 5, rc = cpg_local_get(local_handle, &local_nodeid)); } if (rc != CS_OK) { crm_err("Could not get local node id from the CPG API: %s (%d)", ais_error2text(rc), rc); } if(handle == 0) { crm_trace("Closing connection"); cpg_finalize(local_handle); } crm_debug("Local nodeid is %u", local_nodeid); return local_nodeid; } GListPtr cs_message_queue = NULL; int cs_message_timer = 0; static ssize_t crm_cs_flush(gpointer data); static gboolean crm_cs_flush_cb(gpointer data) { cs_message_timer = 0; crm_cs_flush(data); return FALSE; } #define CS_SEND_MAX 200 static ssize_t crm_cs_flush(gpointer data) { int sent = 0; ssize_t rc = 0; int queue_len = 0; static unsigned int last_sent = 0; cpg_handle_t *handle = (cpg_handle_t *)data; if (*handle == 0) { crm_trace("Connection is dead"); return pcmk_ok; } queue_len = g_list_length(cs_message_queue); if ((queue_len % 1000) == 0 && queue_len > 1) { crm_err("CPG queue has grown to %d", queue_len); } else if (queue_len == CS_SEND_MAX) { crm_warn("CPG queue has grown to %d", queue_len); } if (cs_message_timer) { /* There is already a timer, wait until it goes off */ crm_trace("Timer active %d", cs_message_timer); return pcmk_ok; } while (cs_message_queue && sent < CS_SEND_MAX) { struct iovec *iov = cs_message_queue->data; errno = 0; rc = cpg_mcast_joined(*handle, CPG_TYPE_AGREED, iov, 1); if (rc != CS_OK) { break; } sent++; last_sent++; crm_trace("CPG message sent, size=%llu", (unsigned long long) iov->iov_len); cs_message_queue = g_list_remove(cs_message_queue, iov); free(iov->iov_base); free(iov); } queue_len -= sent; if (sent > 1 || cs_message_queue) { crm_info("Sent %d CPG messages (%d remaining, last=%u): %s (%lld)", sent, queue_len, last_sent, ais_error2text(rc), (long long) rc); } else { crm_trace("Sent %d CPG messages (%d remaining, last=%u): %s (%lld)", sent, queue_len, last_sent, ais_error2text(rc), (long long) rc); } if (cs_message_queue) { uint32_t delay_ms = 100; if(rc != CS_OK) { /* Proportionally more if sending failed but cap at 1s */ delay_ms = QB_MIN(1000, CS_SEND_MAX + (10 * queue_len)); } cs_message_timer = g_timeout_add(delay_ms, crm_cs_flush_cb, data); } return rc; } gboolean send_cpg_iov(struct iovec * iov) { static unsigned int queued = 0; queued++; crm_trace("Queueing CPG message %u (%llu bytes)", queued, (unsigned long long) iov->iov_len); cs_message_queue = g_list_append(cs_message_queue, iov); crm_cs_flush(&pcmk_cpg_handle); return TRUE; } static int pcmk_cpg_dispatch(gpointer user_data) { int rc = 0; crm_cluster_t *cluster = (crm_cluster_t*) user_data; rc = cpg_dispatch(cluster->cpg_handle, CS_DISPATCH_ONE); if (rc != CS_OK) { crm_err("Connection to the CPG API failed: %s (%d)", ais_error2text(rc), rc); cluster->cpg_handle = 0; return -1; } else if(cpg_evicted) { crm_err("Evicted from CPG membership"); return -1; } return 0; } char * pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void *content, uint32_t *kind, const char **from) { char *data = NULL; AIS_Message *msg = (AIS_Message *) content; if(handle) { // Do filtering and field massaging uint32_t local_nodeid = get_local_nodeid(handle); const char *local_name = get_local_node_name(); if (msg->sender.id > 0 && msg->sender.id != nodeid) { crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, msg->sender.id); return NULL; } else if (msg->host.id != 0 && (local_nodeid != msg->host.id)) { /* Not for us */ crm_trace("Not for us: %u != %u", msg->host.id, local_nodeid); return NULL; } else if (msg->host.size != 0 && safe_str_neq(msg->host.uname, local_name)) { /* Not for us */ crm_trace("Not for us: %s != %s", msg->host.uname, local_name); return NULL; } msg->sender.id = nodeid; if (msg->sender.size == 0) { crm_node_t *peer = crm_get_peer(nodeid, NULL); if (peer == NULL) { crm_err("Peer with nodeid=%u is unknown", nodeid); } else if (peer->uname == NULL) { crm_err("No uname for peer with nodeid=%u", nodeid); } else { crm_notice("Fixing uname for peer with nodeid=%u", nodeid); msg->sender.size = strlen(peer->uname); memset(msg->sender.uname, 0, MAX_NAME); memcpy(msg->sender.uname, peer->uname, msg->sender.size); } } } crm_trace("Got new%s message (size=%d, %d, %d)", msg->is_compressed ? " compressed" : "", ais_data_len(msg), msg->size, msg->compressed_size); if (kind != NULL) { *kind = msg->header.id; } if (from != NULL) { *from = msg->sender.uname; } if (msg->is_compressed && msg->size > 0) { int rc = BZ_OK; char *uncompressed = NULL; unsigned int new_size = msg->size + 1; if (check_message_sanity(msg, NULL) == FALSE) { goto badmsg; } crm_trace("Decompressing message data"); uncompressed = calloc(1, new_size); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, msg->data, msg->compressed_size, 1, 0); if (rc != BZ_OK) { crm_err("Decompression failed: %s " CRM_XS " bzerror=%d", bz2_strerror(rc), rc); free(uncompressed); goto badmsg; } CRM_ASSERT(rc == BZ_OK); CRM_ASSERT(new_size == msg->size); data = uncompressed; } else if (check_message_sanity(msg, data) == FALSE) { goto badmsg; } else if (safe_str_eq("identify", data)) { char *pid_s = crm_getpid_s(); send_cluster_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); free(pid_s); return NULL; } else { data = strdup(msg->data); } // Is this necessary? crm_get_peer(msg->sender.id, msg->sender.uname); crm_trace("Payload: %.200s", data); return data; badmsg: crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" " min=%d, total=%d, size=%d, bz2_size=%d", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, (int)sizeof(AIS_Message), msg->header.size, msg->size, msg->compressed_size); free(data); return NULL; } void pcmk_cpg_membership(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { int i; gboolean found = FALSE; static int counter = 0; uint32_t local_nodeid = get_local_nodeid(handle); for (i = 0; i < left_list_entries; i++) { crm_node_t *peer = crm_find_peer(left_list[i].nodeid, NULL); crm_info("Node %u left group %s (peer=%s, counter=%d.%d)", left_list[i].nodeid, groupName->value, (peer? peer->uname : ""), counter, i); if (peer) { crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, OFFLINESTATUS); } } for (i = 0; i < joined_list_entries; i++) { crm_info("Node %u joined group %s (counter=%d.%d)", joined_list[i].nodeid, groupName->value, counter, i); } for (i = 0; i < member_list_entries; i++) { crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); crm_info("Node %u still member of group %s (peer=%s, counter=%d.%d)", member_list[i].nodeid, groupName->value, (peer? peer->uname : ""), counter, i); /* Anyone that is sending us CPG messages must also be a _CPG_ member. * But it's _not_ safe to assume it's in the quorum membership. * We may have just found out it's dead and are processing the last couple of messages it sent */ peer = crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); if(peer && peer->state && crm_is_peer_active(peer) == FALSE) { time_t now = time(NULL); /* Co-opt the otherwise unused votes field */ if(peer->votes == 0) { peer->votes = now; } else if(now > (60 + peer->votes)) { - /* On the otherhand, if we're still getting messages, at a certain point - * we need to acknowledge our internal cache is probably wrong - * - * Set the threshold to 1 minute + /* On the other hand, if we're still getting messages, at a + * certain point we need to acknowledge our internal cache is + * probably wrong. Use 1 minute. */ crm_err("Node %s[%u] appears to be online even though we think it is dead", peer->uname, peer->id); if (crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, 0)) { peer->votes = 0; } } } if (local_nodeid == member_list[i].nodeid) { found = TRUE; } } if (!found) { crm_err("We're not part of CPG group '%s' anymore!", groupName->value); cpg_evicted = TRUE; } counter++; } gboolean cluster_connect_cpg(crm_cluster_t *cluster) { int rc = -1; int fd = 0; int retries = 0; uint32_t id = 0; crm_node_t *peer = NULL; cpg_handle_t handle = 0; struct mainloop_fd_callbacks cpg_fd_callbacks = { .dispatch = pcmk_cpg_dispatch, .destroy = cluster->destroy, }; cpg_callbacks_t cpg_callbacks = { .cpg_deliver_fn = cluster->cpg.cpg_deliver_fn, .cpg_confchg_fn = cluster->cpg.cpg_confchg_fn, /* .cpg_deliver_fn = pcmk_cpg_deliver, */ /* .cpg_confchg_fn = pcmk_cpg_membership, */ }; cpg_evicted = FALSE; cluster->group.length = 0; cluster->group.value[0] = 0; /* group.value is char[128] */ strncpy(cluster->group.value, crm_system_name?crm_system_name:"unknown", 127); cluster->group.value[127] = 0; cluster->group.length = 1 + QB_MIN(127, strlen(cluster->group.value)); cs_repeat(retries, 30, rc = cpg_initialize(&handle, &cpg_callbacks)); if (rc != CS_OK) { crm_err("Could not connect to the Cluster Process Group API: %d", rc); goto bail; } id = get_local_nodeid(handle); if (id == 0) { crm_err("Could not get local node id from the CPG API"); goto bail; } cluster->nodeid = id; retries = 0; cs_repeat(retries, 30, rc = cpg_join(handle, &cluster->group)); if (rc != CS_OK) { crm_err("Could not join the CPG group '%s': %d", crm_system_name, rc); goto bail; } rc = cpg_fd_get(handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CPG API connection: %d", rc); goto bail; } pcmk_cpg_handle = handle; cluster->cpg_handle = handle; mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, cluster, &cpg_fd_callbacks); bail: if (rc != CS_OK) { cpg_finalize(handle); return FALSE; } peer = crm_get_peer(id, NULL); crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); return TRUE; } gboolean send_cluster_message_cs(xmlNode * msg, gboolean local, crm_node_t * node, enum crm_ais_msg_types dest) { gboolean rc = TRUE; char *data = NULL; data = dump_xml_unformatted(msg); rc = send_cluster_text(crm_class_cluster, data, local, node, dest); free(data); return rc; } gboolean send_cluster_text(enum crm_ais_msg_class msg_class, const char *data, gboolean local, crm_node_t *node, enum crm_ais_msg_types dest) { static int msg_id = 0; static int local_pid = 0; static int local_name_len = 0; static const char *local_name = NULL; char *target = NULL; struct iovec *iov; AIS_Message *msg = NULL; enum crm_ais_msg_types sender = text2msg_type(crm_system_name); switch (msg_class) { case crm_class_cluster: break; default: crm_err("Invalid message class: %d", msg_class); return FALSE; } CRM_CHECK(dest != crm_msg_ais, return FALSE); if(local_name == NULL) { local_name = get_local_node_name(); } if(local_name_len == 0 && local_name) { local_name_len = strlen(local_name); } if (data == NULL) { data = ""; } if (local_pid == 0) { local_pid = getpid(); } if (sender == crm_msg_none) { sender = local_pid; } msg = calloc(1, sizeof(AIS_Message)); msg_id++; msg->id = msg_id; msg->header.id = msg_class; msg->header.error = CS_OK; msg->host.type = dest; msg->host.local = local; if (node) { if (node->uname) { target = strdup(node->uname); msg->host.size = strlen(node->uname); memset(msg->host.uname, 0, MAX_NAME); memcpy(msg->host.uname, node->uname, msg->host.size); } else { target = crm_strdup_printf("%u", node->id); } msg->host.id = node->id; } else { target = strdup("all"); } msg->sender.id = 0; msg->sender.type = sender; msg->sender.pid = local_pid; msg->sender.size = local_name_len; memset(msg->sender.uname, 0, MAX_NAME); if(local_name && msg->sender.size) { memcpy(msg->sender.uname, local_name, msg->sender.size); } msg->size = 1 + strlen(data); msg->header.size = sizeof(AIS_Message) + msg->size; if (msg->size < CRM_BZ2_THRESHOLD) { msg = realloc_safe(msg, msg->header.size); memcpy(msg->data, data, msg->size); } else { char *compressed = NULL; unsigned int new_size = 0; char *uncompressed = strdup(data); if (crm_compress_string(uncompressed, msg->size, 0, &compressed, &new_size)) { msg->header.size = sizeof(AIS_Message) + new_size; msg = realloc_safe(msg, msg->header.size); memcpy(msg->data, compressed, new_size); msg->is_compressed = TRUE; msg->compressed_size = new_size; } else { msg = realloc_safe(msg, msg->header.size); memcpy(msg->data, data, msg->size); } free(uncompressed); free(compressed); } iov = calloc(1, sizeof(struct iovec)); iov->iov_base = msg; iov->iov_len = msg->header.size; if (msg->compressed_size) { crm_trace("Queueing CPG message %u to %s (%llu bytes, %d bytes compressed payload): %.200s", msg->id, target, (unsigned long long) iov->iov_len, msg->compressed_size, data); } else { crm_trace("Queueing CPG message %u to %s (%llu bytes, %d bytes payload): %.200s", msg->id, target, (unsigned long long) iov->iov_len, msg->size, data); } free(target); send_cpg_iov(iov); return TRUE; } enum crm_ais_msg_types text2msg_type(const char *text) { int type = crm_msg_none; CRM_CHECK(text != NULL, return type); if (safe_str_eq(text, "ais")) { type = crm_msg_ais; } else if (safe_str_eq(text, CRM_SYSTEM_CIB)) { type = crm_msg_cib; } else if (safe_str_eq(text, CRM_SYSTEM_CRMD)) { type = crm_msg_crmd; } else if (safe_str_eq(text, CRM_SYSTEM_DC)) { type = crm_msg_crmd; } else if (safe_str_eq(text, CRM_SYSTEM_TENGINE)) { type = crm_msg_te; } else if (safe_str_eq(text, CRM_SYSTEM_PENGINE)) { type = crm_msg_pe; } else if (safe_str_eq(text, CRM_SYSTEM_LRMD)) { type = crm_msg_lrmd; } else if (safe_str_eq(text, CRM_SYSTEM_STONITHD)) { type = crm_msg_stonithd; } else if (safe_str_eq(text, "stonith-ng")) { type = crm_msg_stonith_ng; } else if (safe_str_eq(text, "attrd")) { type = crm_msg_attrd; } else { /* This will normally be a transient client rather than * a cluster daemon. Set the type to the pid of the client */ int scan_rc = sscanf(text, "%d", &type); if (scan_rc != 1 || type <= crm_msg_stonith_ng) { /* Ensure it's sane */ type = crm_msg_none; } } return type; } diff --git a/lib/common/logging.c b/lib/common/logging.c index 7f41bac3af..60ba585877 100644 --- a/lib/common/logging.c +++ b/lib/common/logging.c @@ -1,994 +1,994 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include unsigned int crm_log_priority = LOG_NOTICE; unsigned int crm_log_level = LOG_INFO; static gboolean crm_tracing_enabled(void); unsigned int crm_trace_nonlog = 0; bool crm_is_daemon = 0; GLogFunc glib_log_default; static void crm_glib_handler(const gchar * log_domain, GLogLevelFlags flags, const gchar * message, gpointer user_data) { int log_level = LOG_WARNING; GLogLevelFlags msg_level = (flags & G_LOG_LEVEL_MASK); static struct qb_log_callsite *glib_cs = NULL; if (glib_cs == NULL) { glib_cs = qb_log_callsite_get(__FUNCTION__, __FILE__, "glib-handler", LOG_DEBUG, __LINE__, crm_trace_nonlog); } switch (msg_level) { case G_LOG_LEVEL_CRITICAL: log_level = LOG_CRIT; if (crm_is_callsite_active(glib_cs, LOG_DEBUG, 0) == FALSE) { /* log and record how we got here */ crm_abort(__FILE__, __FUNCTION__, __LINE__, message, TRUE, TRUE); } break; case G_LOG_LEVEL_ERROR: log_level = LOG_ERR; break; case G_LOG_LEVEL_MESSAGE: log_level = LOG_NOTICE; break; case G_LOG_LEVEL_INFO: log_level = LOG_INFO; break; case G_LOG_LEVEL_DEBUG: log_level = LOG_DEBUG; break; case G_LOG_LEVEL_WARNING: case G_LOG_FLAG_RECURSION: case G_LOG_FLAG_FATAL: case G_LOG_LEVEL_MASK: log_level = LOG_WARNING; break; } do_crm_log(log_level, "%s: %s", log_domain, message); } #ifndef NAME_MAX # define NAME_MAX 256 #endif static void crm_trigger_blackbox(int nsig) { if(nsig == SIGTRAP) { /* Turn it on if it wasn't already */ crm_enable_blackbox(nsig); } crm_write_blackbox(nsig, NULL); } const char * daemon_option(const char *option) { char env_name[NAME_MAX]; const char *value = NULL; snprintf(env_name, NAME_MAX, "PCMK_%s", option); value = getenv(env_name); if (value != NULL) { crm_trace("Found %s = %s", env_name, value); return value; } snprintf(env_name, NAME_MAX, "HA_%s", option); value = getenv(env_name); if (value != NULL) { crm_trace("Found %s = %s", env_name, value); return value; } crm_trace("Nothing found for %s", option); return NULL; } void set_daemon_option(const char *option, const char *value) { char env_name[NAME_MAX]; snprintf(env_name, NAME_MAX, "PCMK_%s", option); if (value) { crm_trace("Setting %s to %s", env_name, value); setenv(env_name, value, 1); } else { crm_trace("Unsetting %s", env_name); unsetenv(env_name); } snprintf(env_name, NAME_MAX, "HA_%s", option); if (value) { crm_trace("Setting %s to %s", env_name, value); setenv(env_name, value, 1); } else { crm_trace("Unsetting %s", env_name); unsetenv(env_name); } } gboolean daemon_option_enabled(const char *daemon, const char *option) { const char *value = daemon_option(option); if (value != NULL && crm_is_true(value)) { return TRUE; } else if (value != NULL && strstr(value, daemon)) { return TRUE; } return FALSE; } void crm_log_deinit(void) { g_log_set_default_handler(glib_log_default, NULL); } #define FMT_MAX 256 static void set_format_string(int method, const char *daemon) { int offset = 0; char fmt[FMT_MAX]; if (method > QB_LOG_STDERR) { /* When logging to a file */ struct utsname res; if (uname(&res) == 0) { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%t [%lu] %s %10s: ", (unsigned long) getpid(), res.nodename, daemon); } else { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%t [%lu] %10s: ", (unsigned long) getpid(), daemon); } } if (method == QB_LOG_SYSLOG) { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%g %%-7p: %%b"); crm_extended_logging(method, QB_FALSE); } else if (crm_tracing_enabled()) { offset += snprintf(fmt + offset, FMT_MAX - offset, "(%%-12f:%%5l %%g) %%-7p: %%n:\t%%b"); } else { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%g %%-7p: %%n:\t%%b"); } CRM_LOG_ASSERT(offset > 0); qb_log_format_set(method, fmt); } gboolean crm_add_logfile(const char *filename) { bool is_default = false; static int default_fd = -1; static gboolean have_logfile = FALSE; const char *default_logfile = CRM_LOG_DIR "/pacemaker.log"; struct stat parent; int fd = 0, rc = 0; FILE *logfile = NULL; char *parent_dir = NULL; char *filename_cp; if (filename == NULL && have_logfile == FALSE) { filename = default_logfile; } if (filename == NULL) { return FALSE; /* Nothing to do */ } else if(safe_str_eq(filename, "none")) { return FALSE; /* Nothing to do */ } else if(safe_str_eq(filename, "/dev/null")) { return FALSE; /* Nothing to do */ } else if(safe_str_eq(filename, default_logfile)) { is_default = TRUE; } if(is_default && default_fd >= 0) { return TRUE; /* Nothing to do */ } /* Check the parent directory */ filename_cp = strdup(filename); parent_dir = dirname(filename_cp); rc = stat(parent_dir, &parent); if (rc != 0) { crm_err("Directory '%s' does not exist: logging to '%s' is disabled", parent_dir, filename); free(filename_cp); return FALSE; } free(filename_cp); errno = 0; logfile = fopen(filename, "a"); if(logfile == NULL) { crm_err("%s (%d): Logging to '%s' as uid=%u, gid=%u is disabled", pcmk_strerror(errno), errno, filename, geteuid(), getegid()); return FALSE; } /* Check/Set permissions if we're root */ if (geteuid() == 0) { struct stat st; uid_t pcmk_uid = 0; gid_t pcmk_gid = 0; gboolean fix = FALSE; int logfd = fileno(logfile); rc = fstat(logfd, &st); if (rc < 0) { crm_perror(LOG_WARNING, "Cannot stat %s", filename); fclose(logfile); return FALSE; } if(crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) == 0) { if (st.st_gid != pcmk_gid) { /* Wrong group */ fix = TRUE; } else if ((st.st_mode & S_IRWXG) != (S_IRGRP | S_IWGRP)) { /* Not read/writable by the correct group */ fix = TRUE; } } if (fix) { rc = fchown(logfd, pcmk_uid, pcmk_gid); if (rc < 0) { crm_warn("Cannot change the ownership of %s to user %s and gid %d", filename, CRM_DAEMON_USER, pcmk_gid); } rc = fchmod(logfd, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (rc < 0) { crm_warn("Cannot change the mode of %s to rw-rw----", filename); } fprintf(logfile, "Set r/w permissions for uid=%d, gid=%d on %s\n", pcmk_uid, pcmk_gid, filename); if (fflush(logfile) < 0 || fsync(logfd) < 0) { crm_err("Couldn't write out logfile: %s", filename); } } } /* Close and reopen with libqb */ fclose(logfile); fd = qb_log_file_open(filename); if (fd < 0) { crm_perror(LOG_WARNING, "Couldn't send additional logging to %s", filename); return FALSE; } if(is_default) { default_fd = fd; } else if(default_fd >= 0) { crm_notice("Switching to %s", filename); qb_log_ctl(default_fd, QB_LOG_CONF_ENABLED, QB_FALSE); } crm_notice("Additional logging available in %s", filename); qb_log_ctl(fd, QB_LOG_CONF_ENABLED, QB_TRUE); /* qb_log_ctl(fd, QB_LOG_CONF_FILE_SYNC, 1); Turn on synchronous writes */ /* Enable callsites */ crm_update_callsites(); have_logfile = TRUE; return TRUE; } static int blackbox_trigger = 0; static char *blackbox_file_prefix = NULL; static void blackbox_logger(int32_t t, struct qb_log_callsite *cs, time_t timestamp, const char *msg) { if(cs && cs->priority < LOG_ERR) { crm_write_blackbox(SIGTRAP, cs); /* Bypass the over-dumping logic */ } else { crm_write_blackbox(0, cs); } } static void crm_control_blackbox(int nsig, bool enable) { int lpc = 0; if (blackbox_file_prefix == NULL) { pid_t pid = getpid(); blackbox_file_prefix = crm_strdup_printf("%s/%s-%lu", CRM_BLACKBOX_DIR, crm_system_name, (unsigned long) pid); } if (enable && qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) { qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_SIZE, 5 * 1024 * 1024); /* Any size change drops existing entries */ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); /* Setting the size seems to disable it */ /* Enable synchronous logging */ for (lpc = QB_LOG_BLACKBOX; lpc < QB_LOG_TARGET_MAX; lpc++) { qb_log_ctl(lpc, QB_LOG_CONF_FILE_SYNC, QB_TRUE); } crm_notice("Initiated blackbox recorder: %s", blackbox_file_prefix); /* Save to disk on abnormal termination */ crm_signal(SIGSEGV, crm_trigger_blackbox); crm_signal(SIGABRT, crm_trigger_blackbox); crm_signal(SIGILL, crm_trigger_blackbox); crm_signal(SIGBUS, crm_trigger_blackbox); crm_signal(SIGFPE, crm_trigger_blackbox); crm_update_callsites(); blackbox_trigger = qb_log_custom_open(blackbox_logger, NULL, NULL, NULL); qb_log_ctl(blackbox_trigger, QB_LOG_CONF_ENABLED, QB_TRUE); crm_trace("Trigger: %d is %d %d", blackbox_trigger, qb_log_ctl(blackbox_trigger, QB_LOG_CONF_STATE_GET, 0), QB_LOG_STATE_ENABLED); crm_update_callsites(); } else if (!enable && qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0) == QB_LOG_STATE_ENABLED) { qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); /* Disable synchronous logging again when the blackbox is disabled */ for (lpc = QB_LOG_BLACKBOX; lpc < QB_LOG_TARGET_MAX; lpc++) { qb_log_ctl(lpc, QB_LOG_CONF_FILE_SYNC, QB_FALSE); } } } void crm_enable_blackbox(int nsig) { crm_control_blackbox(nsig, TRUE); } void crm_disable_blackbox(int nsig) { crm_control_blackbox(nsig, FALSE); } void crm_write_blackbox(int nsig, struct qb_log_callsite *cs) { static int counter = 1; static time_t last = 0; char buffer[NAME_MAX]; time_t now = time(NULL); if (blackbox_file_prefix == NULL) { return; } switch (nsig) { case 0: case SIGTRAP: /* The graceful case - such as assertion failure or user request */ if (nsig == 0 && now == last) { /* Prevent over-dumping */ return; } snprintf(buffer, NAME_MAX, "%s.%d", blackbox_file_prefix, counter++); if (nsig == SIGTRAP) { crm_notice("Blackbox dump requested, please see %s for contents", buffer); } else if (cs) { syslog(LOG_NOTICE, "Problem detected at %s:%d (%s), please see %s for additional details", cs->function, cs->lineno, cs->filename, buffer); } else { crm_notice("Problem detected, please see %s for additional details", buffer); } last = now; qb_log_blackbox_write_to_file(buffer); /* Flush the existing contents * A size change would also work */ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); break; default: /* Do as little as possible, just try to get what we have out * We logged the filename when the blackbox was enabled */ crm_signal(nsig, SIG_DFL); qb_log_blackbox_write_to_file(blackbox_file_prefix); qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); raise(nsig); break; } } gboolean crm_log_cli_init(const char *entity) { return crm_log_init(entity, LOG_ERR, FALSE, FALSE, 0, NULL, TRUE); } static const char * crm_quark_to_string(uint32_t tag) { const char *text = g_quark_to_string(tag); if (text) { return text; } return ""; } static void crm_log_filter_source(int source, const char *trace_files, const char *trace_fns, const char *trace_fmts, const char *trace_tags, const char *trace_blackbox, struct qb_log_callsite *cs) { if (qb_log_ctl(source, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) { return; } else if (cs->tags != crm_trace_nonlog && source == QB_LOG_BLACKBOX) { /* Blackbox gets everything if enabled */ qb_bit_set(cs->targets, source); } else if (source == blackbox_trigger && blackbox_trigger > 0) { /* Should this log message result in the blackbox being dumped */ if (cs->priority <= LOG_ERR) { qb_bit_set(cs->targets, source); } else if (trace_blackbox) { char *key = crm_strdup_printf("%s:%d", cs->function, cs->lineno); if (strstr(trace_blackbox, key) != NULL) { qb_bit_set(cs->targets, source); } free(key); } } else if (source == QB_LOG_SYSLOG) { /* No tracing to syslog */ if (cs->priority <= crm_log_priority && cs->priority <= crm_log_level) { qb_bit_set(cs->targets, source); } /* Log file tracing options... */ } else if (cs->priority <= crm_log_level) { qb_bit_set(cs->targets, source); } else if (trace_files && strstr(trace_files, cs->filename) != NULL) { qb_bit_set(cs->targets, source); } else if (trace_fns && strstr(trace_fns, cs->function) != NULL) { qb_bit_set(cs->targets, source); } else if (trace_fmts && strstr(trace_fmts, cs->format) != NULL) { qb_bit_set(cs->targets, source); } else if (trace_tags && cs->tags != 0 && cs->tags != crm_trace_nonlog && g_quark_to_string(cs->tags) != NULL) { qb_bit_set(cs->targets, source); } } static void crm_log_filter(struct qb_log_callsite *cs) { int lpc = 0; static int need_init = 1; static const char *trace_fns = NULL; static const char *trace_tags = NULL; static const char *trace_fmts = NULL; static const char *trace_files = NULL; static const char *trace_blackbox = NULL; if (need_init) { need_init = 0; trace_fns = getenv("PCMK_trace_functions"); trace_fmts = getenv("PCMK_trace_formats"); trace_tags = getenv("PCMK_trace_tags"); trace_files = getenv("PCMK_trace_files"); trace_blackbox = getenv("PCMK_trace_blackbox"); if (trace_tags != NULL) { uint32_t tag; char token[500]; const char *offset = NULL; const char *next = trace_tags; do { offset = next; next = strchrnul(offset, ','); snprintf(token, sizeof(token), "%.*s", (int)(next - offset), offset); tag = g_quark_from_string(token); crm_info("Created GQuark %u from token '%s' in '%s'", tag, token, trace_tags); if (next[0] != 0) { next++; } } while (next != NULL && next[0] != 0); } } cs->targets = 0; /* Reset then find targets to enable */ for (lpc = QB_LOG_SYSLOG; lpc < QB_LOG_TARGET_MAX; lpc++) { crm_log_filter_source(lpc, trace_files, trace_fns, trace_fmts, trace_tags, trace_blackbox, cs); } } gboolean crm_is_callsite_active(struct qb_log_callsite *cs, uint8_t level, uint32_t tags) { gboolean refilter = FALSE; if (cs == NULL) { return FALSE; } if (cs->priority != level) { cs->priority = level; refilter = TRUE; } if (cs->tags != tags) { cs->tags = tags; refilter = TRUE; } if (refilter) { crm_log_filter(cs); } if (cs->targets == 0) { return FALSE; } return TRUE; } void crm_update_callsites(void) { static gboolean log = TRUE; if (log) { log = FALSE; crm_debug ("Enabling callsites based on priority=%d, files=%s, functions=%s, formats=%s, tags=%s", crm_log_level, getenv("PCMK_trace_files"), getenv("PCMK_trace_functions"), getenv("PCMK_trace_formats"), getenv("PCMK_trace_tags")); } qb_log_filter_fn_set(crm_log_filter); } static gboolean crm_tracing_enabled(void) { if (crm_log_level >= LOG_TRACE) { return TRUE; } else if (getenv("PCMK_trace_files") || getenv("PCMK_trace_functions") || getenv("PCMK_trace_formats") || getenv("PCMK_trace_tags")) { return TRUE; } return FALSE; } static int crm_priority2int(const char *name) { struct syslog_names { const char *name; int priority; }; static struct syslog_names p_names[] = { {"emerg", LOG_EMERG}, {"alert", LOG_ALERT}, {"crit", LOG_CRIT}, {"error", LOG_ERR}, {"warning", LOG_WARNING}, {"notice", LOG_NOTICE}, {"info", LOG_INFO}, {"debug", LOG_DEBUG}, {NULL, -1} }; int lpc; for (lpc = 0; name != NULL && p_names[lpc].name != NULL; lpc++) { if (crm_str_eq(p_names[lpc].name, name, TRUE)) { return p_names[lpc].priority; } } return crm_log_priority; } static void crm_identity(const char *entity, int argc, char **argv) { if(crm_system_name != NULL) { /* Nothing to do */ } else if (entity) { free(crm_system_name); crm_system_name = strdup(entity); } else if (argc > 0 && argv != NULL) { char *mutable = strdup(argv[0]); char *modified = basename(mutable); if (strstr(modified, "lt-") == modified) { modified += 3; } free(crm_system_name); crm_system_name = strdup(modified); free(mutable); } else if (crm_system_name == NULL) { crm_system_name = strdup("Unknown"); } setenv("PCMK_service", crm_system_name, 1); } void crm_log_preinit(const char *entity, int argc, char **argv) { /* Configure libqb logging with nothing turned on */ int lpc = 0; int32_t qb_facility = 0; static bool have_logging = FALSE; if(have_logging == FALSE) { have_logging = TRUE; crm_xml_init(); /* Sets buffer allocation strategy */ if (crm_trace_nonlog == 0) { crm_trace_nonlog = g_quark_from_static_string("Pacemaker non-logging tracepoint"); } umask(S_IWGRP | S_IWOTH | S_IROTH); /* Redirect messages from glib functions to our handler */ glib_log_default = g_log_set_default_handler(crm_glib_handler, NULL); /* and for good measure... - this enum is a bit field (!) */ g_log_set_always_fatal((GLogLevelFlags) 0); /*value out of range */ /* Who do we log as */ crm_identity(entity, argc, argv); qb_facility = qb_log_facility2int("local0"); qb_log_init(crm_system_name, qb_facility, LOG_ERR); crm_log_level = LOG_CRIT; /* Nuke any syslog activity until it's asked for */ qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE); /* Set format strings and disable threading * Pacemaker and threads do not mix well (due to the amount of forking) */ qb_log_tags_stringify_fn_set(crm_quark_to_string); for (lpc = QB_LOG_SYSLOG; lpc < QB_LOG_TARGET_MAX; lpc++) { qb_log_ctl(lpc, QB_LOG_CONF_THREADED, QB_FALSE); set_format_string(lpc, crm_system_name); } } } gboolean crm_log_init(const char *entity, uint8_t level, gboolean daemon, gboolean to_stderr, int argc, char **argv, gboolean quiet) { const char *syslog_priority = NULL; const char *logfile = daemon_option("logfile"); const char *facility = daemon_option("logfacility"); const char *f_copy = facility; crm_is_daemon = daemon; crm_log_preinit(entity, argc, argv); if(level > crm_log_level) { crm_log_level = level; } /* Should we log to syslog */ if (facility == NULL) { if(crm_is_daemon) { facility = "daemon"; } else { facility = "none"; } set_daemon_option("logfacility", facility); } if (safe_str_eq(facility, "none")) { quiet = TRUE; } else { qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_FACILITY, qb_log_facility2int(facility)); } if (daemon_option_enabled(crm_system_name, "debug")) { /* Override the default setting */ crm_log_level = LOG_DEBUG; } /* What lower threshold do we have for sending to syslog */ syslog_priority = daemon_option("logpriority"); if(syslog_priority) { int priority = crm_priority2int(syslog_priority); crm_log_priority = priority; qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "*", priority); } else { qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "*", LOG_NOTICE); } + // Log to syslog unless requested to be quiet if (!quiet) { - /* Nuke any syslog activity */ qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE); } /* Should we log to stderr */ if (daemon_option_enabled(crm_system_name, "stderr")) { /* Override the default setting */ to_stderr = TRUE; } crm_enable_stderr(to_stderr); /* Should we log to a file */ if (safe_str_eq("none", logfile)) { /* No soup^Hlogs for you! */ } else if(crm_is_daemon) { - /* The daemons always get a log file, unless explicitly set to configured 'none' */ + // Daemons always get a log file, unless explicitly set to "none" crm_add_logfile(logfile); } else if(logfile) { crm_add_logfile(logfile); } if (crm_is_daemon && daemon_option_enabled(crm_system_name, "blackbox")) { crm_enable_blackbox(0); } /* Summary */ crm_trace("Quiet: %d, facility %s", quiet, f_copy); daemon_option("logfile"); daemon_option("logfacility"); crm_update_callsites(); /* Ok, now we can start logging... */ if (quiet == FALSE && crm_is_daemon == FALSE) { crm_log_args(argc, argv); } if (crm_is_daemon) { const char *user = getenv("USER"); if (user != NULL && safe_str_neq(user, "root") && safe_str_neq(user, CRM_DAEMON_USER)) { crm_trace("Not switching to corefile directory for %s", user); crm_is_daemon = FALSE; } } if (crm_is_daemon) { int user = getuid(); const char *base = CRM_CORE_DIR; struct passwd *pwent = getpwuid(user); if (pwent == NULL) { crm_perror(LOG_ERR, "Cannot get name for uid: %d", user); } else if (safe_str_neq(pwent->pw_name, "root") && safe_str_neq(pwent->pw_name, CRM_DAEMON_USER)) { crm_trace("Don't change active directory for regular user: %s", pwent->pw_name); } else if (chdir(base) < 0) { crm_perror(LOG_INFO, "Cannot change active directory to %s", base); } else { crm_info("Changed active directory to %s", base); #if 0 { char path[512]; snprintf(path, 512, "%s-%lu", crm_system_name, (unsigned long) getpid()); mkdir(path, 0750); chdir(path); crm_info("Changed active directory to %s/%s/%s", base, pwent->pw_name, path); } #endif } /* Original meanings from signal(7) * * Signal Value Action Comment * SIGTRAP 5 Core Trace/breakpoint trap * SIGUSR1 30,10,16 Term User-defined signal 1 * SIGUSR2 31,12,17 Term User-defined signal 2 * * Our usage is as similar as possible */ mainloop_add_signal(SIGUSR1, crm_enable_blackbox); mainloop_add_signal(SIGUSR2, crm_disable_blackbox); mainloop_add_signal(SIGTRAP, crm_trigger_blackbox); } return TRUE; } /* returns the old value */ unsigned int set_crm_log_level(unsigned int level) { unsigned int old = crm_log_level; crm_log_level = level; crm_update_callsites(); crm_trace("New log level: %d", level); return old; } void crm_enable_stderr(int enable) { if (enable && qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) { qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_TRUE); crm_update_callsites(); } else if (enable == FALSE) { qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE); } } void crm_bump_log_level(int argc, char **argv) { static int args = TRUE; int level = crm_log_level; if (args && argc > 1) { crm_log_args(argc, argv); } if (qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_STATE_GET, 0) == QB_LOG_STATE_ENABLED) { set_crm_log_level(level + 1); } /* Enable after potentially logging the argstring, not before */ crm_enable_stderr(TRUE); } unsigned int get_crm_log_level(void) { return crm_log_level; } #define ARGS_FMT "Invoked: %s" void crm_log_args(int argc, char **argv) { int lpc = 0; int len = 0; int existing_len = 0; int line = __LINE__; static int logged = 0; char *arg_string = NULL; if (argc == 0 || argv == NULL || logged) { return; } logged = 1; for (; lpc < argc; lpc++) { if (argv[lpc] == NULL) { break; } len = 2 + strlen(argv[lpc]); /* +1 space, +1 EOS */ arg_string = realloc_safe(arg_string, len + existing_len); existing_len += sprintf(arg_string + existing_len, "%s ", argv[lpc]); } qb_log_from_external_source(__func__, __FILE__, ARGS_FMT, LOG_NOTICE, line, 0, arg_string); free(arg_string); } void crm_log_output_fn(const char *file, const char *function, int line, int level, const char *prefix, const char *output) { const char *next = NULL; const char *offset = NULL; if (output == NULL) { level = LOG_DEBUG; output = "-- empty --"; } next = output; do { offset = next; next = strchrnul(offset, '\n'); do_crm_log_alias(level, file, function, line, "%s [ %.*s ]", prefix, (int)(next - offset), offset); if (next[0] != 0) { next++; } } while (next != NULL && next[0] != 0); } diff --git a/lib/common/operations.c b/lib/common/operations.c index a77566baea..bbf0812e3d 100644 --- a/lib/common/operations.c +++ b/lib/common/operations.c @@ -1,572 +1,573 @@ /* * Copyright (C) 2004-2017 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include /*! * \brief Generate an operation key * * \param[in] rsc_id ID of resource being operated on * \param[in] op_type Operation name * \param[in] interval Operation interval * * \return Newly allocated memory containing operation key as string * * \note It is the caller's responsibility to free() the result. */ char * generate_op_key(const char *rsc_id, const char *op_type, int interval) { CRM_ASSERT(rsc_id != NULL); CRM_ASSERT(op_type != NULL); CRM_ASSERT(interval >= 0); return crm_strdup_printf("%s_%s_%d", rsc_id, op_type, interval); } gboolean parse_op_key(const char *key, char **rsc_id, char **op_type, int *interval) { char *notify = NULL; char *mutable_key = NULL; char *mutable_key_ptr = NULL; int len = 0, offset = 0, ch = 0; CRM_CHECK(key != NULL, return FALSE); *interval = 0; len = strlen(key); offset = len - 1; crm_trace("Source: %s", key); while (offset > 0 && isdigit(key[offset])) { int digits = len - offset; ch = key[offset] - '0'; CRM_CHECK(ch < 10, return FALSE); CRM_CHECK(ch >= 0, return FALSE); while (digits > 1) { digits--; ch = ch * 10; } *interval += ch; offset--; } crm_trace(" Interval: %d", *interval); CRM_CHECK(key[offset] == '_', return FALSE); mutable_key = strdup(key); mutable_key[offset] = 0; offset--; while (offset > 0 && key[offset] != '_') { offset--; } CRM_CHECK(key[offset] == '_', free(mutable_key); return FALSE); mutable_key_ptr = mutable_key + offset + 1; crm_trace(" Action: %s", mutable_key_ptr); *op_type = strdup(mutable_key_ptr); mutable_key[offset] = 0; offset--; CRM_CHECK(mutable_key != mutable_key_ptr, free(mutable_key); return FALSE); notify = strstr(mutable_key, "_post_notify"); if (notify && safe_str_eq(notify, "_post_notify")) { notify[0] = 0; } notify = strstr(mutable_key, "_pre_notify"); if (notify && safe_str_eq(notify, "_pre_notify")) { notify[0] = 0; } crm_trace(" Resource: %s", mutable_key); *rsc_id = mutable_key; return TRUE; } char * generate_notify_key(const char *rsc_id, const char *notify_type, const char *op_type) { CRM_CHECK(rsc_id != NULL, return NULL); CRM_CHECK(op_type != NULL, return NULL); CRM_CHECK(notify_type != NULL, return NULL); return crm_strdup_printf("%s_%s_notify_%s_0", rsc_id, notify_type, op_type); } char * generate_transition_magic(const char *transition_key, int op_status, int op_rc) { CRM_CHECK(transition_key != NULL, return NULL); return crm_strdup_printf("%d:%d;%s", op_status, op_rc, transition_key); } gboolean decode_transition_magic(const char *magic, char **uuid, int *transition_id, int *action_id, int *op_status, int *op_rc, int *target_rc) { int res = 0; char *key = NULL; gboolean result = TRUE; CRM_CHECK(magic != NULL, return FALSE); CRM_CHECK(op_rc != NULL, return FALSE); CRM_CHECK(op_status != NULL, return FALSE); key = calloc(1, strlen(magic) + 1); res = sscanf(magic, "%d:%d;%s", op_status, op_rc, key); if (res != 3) { crm_warn("Only found %d items in: '%s'", res, magic); free(key); return FALSE; } CRM_CHECK(decode_transition_key(key, uuid, transition_id, action_id, target_rc), result = FALSE); free(key); return result; } char * generate_transition_key(int transition_id, int action_id, int target_rc, const char *node) { CRM_CHECK(node != NULL, return NULL); return crm_strdup_printf("%d:%d:%d:%-*s", action_id, transition_id, target_rc, 36, node); } gboolean decode_transition_key(const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc) { CRM_CHECK(uuid != NULL, return FALSE); CRM_CHECK(target_rc != NULL, return FALSE); CRM_CHECK(action_id != NULL, return FALSE); CRM_CHECK(transition_id != NULL, return FALSE); *uuid = calloc(37, sizeof(char)); if (sscanf(key, "%d:%d:%d:%36s", action_id, transition_id, target_rc, *uuid) != 4) { crm_err("Invalid transition key '%s'", key); free(*uuid); *uuid = NULL; *target_rc = -1; *action_id = -1; *transition_id = -1; return FALSE; } if (strlen(*uuid) != 36) { crm_warn("Invalid UUID '%s' in transition key '%s'", *uuid, key); } return TRUE; } void filter_action_parameters(xmlNode * param_set, const char *version) { char *key = NULL; char *timeout = NULL; char *interval = NULL; const char *attr_filter[] = { XML_ATTR_ID, XML_ATTR_CRM_VERSION, XML_LRM_ATTR_OP_DIGEST, XML_LRM_ATTR_TARGET, XML_LRM_ATTR_TARGET_UUID, "pcmk_external_ip" }; gboolean do_delete = FALSE; int lpc = 0; static int meta_len = 0; if (meta_len == 0) { meta_len = strlen(CRM_META); } if (param_set == NULL) { return; } for (lpc = 0; lpc < DIMOF(attr_filter); lpc++) { xml_remove_prop(param_set, attr_filter[lpc]); } key = crm_meta_name(XML_LRM_ATTR_INTERVAL); interval = crm_element_value_copy(param_set, key); free(key); key = crm_meta_name(XML_ATTR_TIMEOUT); timeout = crm_element_value_copy(param_set, key); if (param_set) { xmlAttrPtr xIter = param_set->properties; while (xIter) { const char *prop_name = (const char *)xIter->name; xIter = xIter->next; do_delete = FALSE; if (strncasecmp(prop_name, CRM_META, meta_len) == 0) { do_delete = TRUE; } if (do_delete) { xml_remove_prop(param_set, prop_name); } } } if (crm_get_msec(interval) > 0) { /* Re-instate the operation's timeout value */ if (timeout != NULL) { crm_xml_add(param_set, key, timeout); } } free(interval); free(timeout); free(key); } #define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" static void append_digest(lrmd_event_data_t * op, xmlNode * update, const char *version, const char *magic, int level) { /* this will enable us to later determine that the * resource's parameters have changed and we should force * a restart */ char *digest = NULL; xmlNode *args_xml = NULL; if (op->params == NULL) { return; } args_xml = create_xml_node(NULL, XML_TAG_PARAMS); g_hash_table_foreach(op->params, hash2field, args_xml); filter_action_parameters(args_xml, version); digest = calculate_operation_digest(args_xml, version); #if 0 if (level < get_crm_log_level() && op->interval == 0 && crm_str_eq(op->op_type, CRMD_ACTION_START, TRUE)) { char *digest_source = dump_xml_unformatted(args_xml); do_crm_log(level, "Calculated digest %s for %s (%s). Source: %s\n", digest, ID(update), magic, digest_source); free(digest_source); } #endif crm_xml_add(update, XML_LRM_ATTR_OP_DIGEST, digest); free_xml(args_xml); free(digest); } int rsc_op_expected_rc(lrmd_event_data_t * op) { int rc = 0; if (op && op->user_data) { int dummy = 0; char *uuid = NULL; decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &rc); free(uuid); } return rc; } gboolean did_rsc_op_fail(lrmd_event_data_t * op, int target_rc) { switch (op->op_status) { case PCMK_LRM_OP_CANCELLED: case PCMK_LRM_OP_PENDING: return FALSE; break; case PCMK_LRM_OP_NOTSUPPORTED: case PCMK_LRM_OP_TIMEOUT: case PCMK_LRM_OP_ERROR: return TRUE; break; default: if (target_rc != op->rc) { return TRUE; } } return FALSE; } /*! * \brief Create a CIB XML element for an operation * * \param[in] parent If not NULL, make new XML node a child of this one * \param[in] prefix Generate an ID using this prefix * \param[in] task Operation task to set * \param[in] interval Operation interval to set * \param[in] timeout If not NULL, operation timeout to set * * \return New XML object on success, NULL otherwise */ xmlNode * crm_create_op_xml(xmlNode *parent, const char *prefix, const char *task, const char *interval, const char *timeout) { xmlNode *xml_op; CRM_CHECK(prefix && task && interval, return NULL); xml_op = create_xml_node(parent, XML_ATTR_OP); crm_xml_set_id(xml_op, "%s-%s-%s", prefix, task, interval); crm_xml_add(xml_op, XML_LRM_ATTR_INTERVAL, interval); crm_xml_add(xml_op, "name", task); if (timeout) { crm_xml_add(xml_op, XML_ATTR_TIMEOUT, timeout); } return xml_op; } xmlNode * create_operation_update(xmlNode * parent, lrmd_event_data_t * op, const char * caller_version, int target_rc, const char * node, const char * origin, int level) { char *key = NULL; char *magic = NULL; char *op_id = NULL; char *op_id_additional = NULL; char *local_user_data = NULL; const char *exit_reason = NULL; xmlNode *xml_op = NULL; const char *task = NULL; CRM_CHECK(op != NULL, return NULL); do_crm_log(level, "%s: Updating resource %s after %s op %s (interval=%d)", origin, op->rsc_id, op->op_type, services_lrm_status_str(op->op_status), op->interval); crm_trace("DC version: %s", caller_version); task = op->op_type; - /* remap the task name under various scenarios - * this makes life easier for the PE when trying determine the current state + + /* Remap the task name under various scenarios, to make life easier for the + * PE when determining the current state. */ if (crm_str_eq(task, "reload", TRUE)) { if (op->op_status == PCMK_LRM_OP_DONE) { task = CRMD_ACTION_START; } else { task = CRMD_ACTION_STATUS; } } else if (crm_str_eq(task, CRMD_ACTION_MIGRATE, TRUE)) { /* if the migrate_from fails it will have enough info to do the right thing */ if (op->op_status == PCMK_LRM_OP_DONE) { task = CRMD_ACTION_STOP; } else { task = CRMD_ACTION_STATUS; } } else if ((op->op_status == PCMK_LRM_OP_DONE) && crm_str_eq(task, CRMD_ACTION_MIGRATED, TRUE)) { task = CRMD_ACTION_START; } key = generate_op_key(op->rsc_id, task, op->interval); if (crm_str_eq(task, CRMD_ACTION_NOTIFY, TRUE)) { const char *n_type = crm_meta_value(op->params, "notify_type"); const char *n_task = crm_meta_value(op->params, "notify_operation"); CRM_LOG_ASSERT(n_type != NULL); CRM_LOG_ASSERT(n_task != NULL); op_id = generate_notify_key(op->rsc_id, n_type, n_task); /* these are not yet allowed to fail */ op->op_status = PCMK_LRM_OP_DONE; op->rc = 0; } else if (did_rsc_op_fail(op, target_rc)) { op_id = generate_op_key(op->rsc_id, "last_failure", 0); if (op->interval == 0) { /* Ensure 'last' gets updated too in case recording-pending="true" */ op_id_additional = generate_op_key(op->rsc_id, "last", 0); } exit_reason = op->exit_reason; } else if (op->interval > 0) { op_id = strdup(key); } else { op_id = generate_op_key(op->rsc_id, "last", 0); } again: xml_op = find_entity(parent, XML_LRM_TAG_RSC_OP, op_id); if (xml_op == NULL) { xml_op = create_xml_node(parent, XML_LRM_TAG_RSC_OP); } if (op->user_data == NULL) { crm_debug("Generating fake transition key for:" " %s_%s_%d %d from %s", op->rsc_id, op->op_type, op->interval, op->call_id, origin); local_user_data = generate_transition_key(-1, op->call_id, target_rc, FAKE_TE_ID); op->user_data = local_user_data; } if(magic == NULL) { magic = generate_transition_magic(op->user_data, op->op_status, op->rc); } crm_xml_add(xml_op, XML_ATTR_ID, op_id); crm_xml_add(xml_op, XML_LRM_ATTR_TASK_KEY, key); crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task); crm_xml_add(xml_op, XML_ATTR_ORIGIN, origin); crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version); crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data); crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, magic); crm_xml_add(xml_op, XML_LRM_ATTR_EXIT_REASON, exit_reason == NULL ? "" : exit_reason); crm_xml_add(xml_op, XML_LRM_ATTR_TARGET, node); /* For context during triage */ crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc); crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status); crm_xml_add_int(xml_op, XML_LRM_ATTR_INTERVAL, op->interval); if (compare_version("2.1", caller_version) <= 0) { if (op->t_run || op->t_rcchange || op->exec_time || op->queue_time) { crm_trace("Timing data (%s_%s_%d): last=%u change=%u exec=%u queue=%u", op->rsc_id, op->op_type, op->interval, op->t_run, op->t_rcchange, op->exec_time, op->queue_time); if (op->interval == 0) { /* The values are the same for non-recurring ops */ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_RUN, op->t_run); crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_run); } else if(op->t_rcchange) { /* last-run is not accurate for recurring ops */ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_rcchange); } else { /* ...but is better than nothing otherwise */ crm_xml_add_int(xml_op, XML_RSC_OP_LAST_CHANGE, op->t_run); } crm_xml_add_int(xml_op, XML_RSC_OP_T_EXEC, op->exec_time); crm_xml_add_int(xml_op, XML_RSC_OP_T_QUEUE, op->queue_time); } } if (crm_str_eq(op->op_type, CRMD_ACTION_MIGRATE, TRUE) || crm_str_eq(op->op_type, CRMD_ACTION_MIGRATED, TRUE)) { /* * Record migrate_source and migrate_target always for migrate ops. */ const char *name = XML_LRM_ATTR_MIGRATE_SOURCE; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); name = XML_LRM_ATTR_MIGRATE_TARGET; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); } append_digest(op, xml_op, caller_version, magic, LOG_DEBUG); if (op_id_additional) { free(op_id); op_id = op_id_additional; op_id_additional = NULL; goto again; } if (local_user_data) { free(local_user_data); op->user_data = NULL; } free(magic); free(op_id); free(key); return xml_op; } /*! * \brief Check whether an operation requires resource agent meta-data * * \param[in] rsc_class Resource agent class (or NULL to skip class check) * \param[in] op Operation action (or NULL to skip op check) * * \return TRUE if operation needs meta-data, FALSE otherwise * \note At least one of rsc_class and op must be specified. */ bool crm_op_needs_metadata(const char *rsc_class, const char *op) { /* Agent meta-data is used to determine whether a reload is possible, and to * evaluate versioned parameters -- so if this op is not relevant to those * features, we don't need the meta-data. */ CRM_CHECK(rsc_class || op, return FALSE); if (rsc_class && strcmp(rsc_class, PCMK_RESOURCE_CLASS_OCF) && strcmp(rsc_class, PCMK_RESOURCE_CLASS_STONITH)) { /* Meta-data is only needed for resource classes that use parameters */ return FALSE; } /* Meta-data is only needed for these actions */ if (op && strcmp(op, CRMD_ACTION_START) && strcmp(op, CRMD_ACTION_STATUS) && strcmp(op, CRMD_ACTION_PROMOTE) && strcmp(op, CRMD_ACTION_DEMOTE) && strcmp(op, CRMD_ACTION_RELOAD) && strcmp(op, CRMD_ACTION_MIGRATE) && strcmp(op, CRMD_ACTION_MIGRATED) && strcmp(op, CRMD_ACTION_NOTIFY)) { return FALSE; } return TRUE; }