diff --git a/cts/CIB.py b/cts/CIB.py index 49fad7b923..2e606b222d 100644 --- a/cts/CIB.py +++ b/cts/CIB.py @@ -1,522 +1,520 @@ """ CIB generator for Pacemaker's Cluster Test Suite (CTS) """ # Pacemaker targets compatibility with Python 2.7 and 3.2+ from __future__ import print_function, unicode_literals, absolute_import, division __copyright__ = "Copyright 2008-2018 Andrew Beekhof " __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import os import warnings import tempfile from cts.CTSvars import * class CibBase(object): def __init__(self, Factory, tag, _id, **kwargs): self.tag = tag self.name = _id self.kwargs = kwargs self.children = [] self.Factory = Factory def __repr__(self): return "%s-%s" % (self.tag, self.name) def add_child(self, child): self.children.append(child) def __setitem__(self, key, value): if value: self.kwargs[key] = value else: self.kwargs.pop(key, None) from cts.cib_xml import * class ConfigBase(object): cts_cib = None version = "unknown" Factory = None def __init__(self, CM, factory, tmpfile=None): self.CM = CM self.Factory = factory if not tmpfile: warnings.filterwarnings("ignore") f=tempfile.NamedTemporaryFile(delete=True) f.close() tmpfile = f.name warnings.resetwarnings() self.Factory.tmpfile = tmpfile def version(self): return self.version def NextIP(self): ip = self.CM.Env["IPBase"] if ":" in ip: (prefix, sep, suffix) = ip.rpartition(":") suffix = str(hex(int(suffix, 16)+1)).lstrip("0x") else: (prefix, sep, suffix) = ip.rpartition(".") suffix = str(int(suffix)+1) ip = prefix + sep + suffix self.CM.Env["IPBase"] = ip return ip.strip() class CIB12(ConfigBase): version = "pacemaker-1.2" counter = 1 def _show(self, command=""): output = "" (rc, result) = self.Factory.rsh(self.Factory.target, "HOME=/root CIB_file="+self.Factory.tmpfile+" cibadmin -Ql "+command, None, ) for line in result: output += line self.Factory.debug("Generated Config: "+line) return output def NewIP(self, name=None, standard="ocf"): if self.CM.Env["IPagent"] == "IPaddr2": ip = self.NextIP() if not name: if ":" in ip: (prefix, sep, suffix) = ip.rpartition(":") name = "r"+suffix else: name = "r"+ip r = Resource(self.Factory, name, self.CM.Env["IPagent"], standard) r["ip"] = ip if ":" in ip: r["cidr_netmask"] = "64" r["nic"] = "eth0" else: r["cidr_netmask"] = "32" else: if not name: name = "r%s%d" % (self.CM.Env["IPagent"], self.counter) self.counter = self.counter + 1 r = Resource(self.Factory, name, self.CM.Env["IPagent"], standard) r.add_op("monitor", "5s") return r def get_node_id(self, node_name): """ Check the cluster configuration for a node ID. """ # We can't account for every possible configuration, # so we only return a node ID if: # * The node is specified in /etc/corosync/corosync.conf # with "ring0_addr:" equal to node_name and "nodeid:" # explicitly specified. # In all other cases, we return 0. node_id = 0 # awkward command: use } as record separator # so each corosync.conf "object" is one record; # match the "node {" record that has "ring0_addr: node_name"; # then print the substring of that record after "nodeid:" (rc, output) = self.Factory.rsh(self.Factory.target, r"""awk -v RS="}" """ r"""'/^(\s*nodelist\s*{)?\s*node\s*{.*(ring0_addr|name):\s*%s(\s+|$)/""" r"""{gsub(/.*nodeid:\s*/,"");gsub(/\s+.*$/,"");print}'""" r""" /etc/corosync/corosync.conf""" % node_name, None) if rc == 0 and len(output) == 1: try: node_id = int(output[0]) except ValueError: node_id = 0 return node_id def install(self, target): old = self.Factory.tmpfile # Force a rebuild self.cts_cib = None self.Factory.tmpfile = CTSvars.CRM_CONFIG_DIR+"/cib.xml" self.contents(target) self.Factory.rsh(self.Factory.target, "chown "+CTSvars.CRM_DAEMON_USER+" "+self.Factory.tmpfile) self.Factory.tmpfile = old def contents(self, target=None): # fencing resource if self.cts_cib: return self.cts_cib if target: self.Factory.target = target self.Factory.rsh(self.Factory.target, "HOME=/root cibadmin --empty %s > %s" % (self.version, self.Factory.tmpfile)) self.num_nodes = len(self.CM.Env["nodes"]) no_quorum = "stop" if self.num_nodes < 3: no_quorum = "ignore" self.Factory.log("Cluster only has %d nodes, configuring: no-quorum-policy=ignore" % self.num_nodes) # We don't need a nodes section unless we add attributes stn = None # Fencing resource # Define first so that the shell doesn't reject every update if self.CM.Env["DoFencing"]: # Define the "real" fencing device st = Resource(self.Factory, "Fencing", ""+self.CM.Env["stonith-type"], "stonith") # Set a threshold for unreliable stonith devices such as the vmware one st.add_meta("migration-threshold", "5") st.add_op("monitor", "120s", timeout="120s") st.add_op("stop", "0", timeout="60s") st.add_op("start", "0", timeout="60s") # For remote node tests, a cluster node is stopped and brought back up # as a remote node with the name "remote-OLDNAME". To allow fencing # devices to fence these nodes, create a list of all possible node names. all_node_names = [ prefix+n for n in self.CM.Env["nodes"] for prefix in ('', 'remote-') ] # Add all parameters specified by user entries = self.CM.Env["stonith-params"].split(',') for entry in entries: try: (name, value) = entry.split('=', 1) except ValueError: print("Warning: skipping invalid fencing parameter: %s" % entry) continue # Allow user to specify "all" as the node list, and expand it here if name in [ "hostlist", "pcmk_host_list" ] and value == "all": value = ' '.join(all_node_names) st[name] = value st.commit() # Test advanced fencing logic if True: stf_nodes = [] stt_nodes = [] attr_nodes = {} # Create the levels stl = FencingTopology(self.Factory) for node in self.CM.Env["nodes"]: # Remote node tests will rename the node remote_node = "remote-" + node # Randomly assign node to a fencing method ftype = self.CM.Env.RandomGen.choice(["levels-and", "levels-or ", "broadcast "]) # For levels-and, randomly choose targeting by node name or attribute by = "" if ftype == "levels-and": node_id = self.get_node_id(node) if node_id == 0 or self.CM.Env.RandomGen.choice([True, False]): by = " (by name)" else: attr_nodes[node] = node_id by = " (by attribute)" self.CM.log(" - Using %s fencing for node: %s%s" % (ftype, node, by)) if ftype == "levels-and": # If targeting by name, add a topology level for this node if node not in attr_nodes: stl.level(1, node, "FencingPass,Fencing") # Always target remote nodes by name, otherwise we would need to add # an attribute to the remote node only during remote tests (we don't # want nonexistent remote nodes showing up in the non-remote tests). # That complexity is not worth the effort. stl.level(1, remote_node, "FencingPass,Fencing") # Add the node (and its remote equivalent) to the list of levels-and nodes. stt_nodes.extend([node, remote_node]) elif ftype == "levels-or ": for n in [ node, remote_node ]: stl.level(1, n, "FencingFail") stl.level(2, n, "Fencing") stf_nodes.extend([node, remote_node]) # If any levels-and nodes were targeted by attribute, # create the attributes and a level for the attribute. if attr_nodes: stn = Nodes(self.Factory) for (node_name, node_id) in list(attr_nodes.items()): stn.add_node(node_name, node_id, { "cts-fencing" : "levels-and" }) stl.level(1, None, "FencingPass,Fencing", "cts-fencing", "levels-and") # Create a Dummy agent that always passes for levels-and if len(stt_nodes): - self.CM.install_helper("fence_dummy", destdir="/usr/sbin", sourcedir=CTSvars.Fencing_home) stt = Resource(self.Factory, "FencingPass", "fence_dummy", "stonith") stt["pcmk_host_list"] = " ".join(stt_nodes) # Wait this many seconds before doing anything, handy for letting disks get flushed too stt["random_sleep_range"] = "30" stt["mode"] = "pass" stt.commit() # Create a Dummy agent that always fails for levels-or if len(stf_nodes): - self.CM.install_helper("fence_dummy", destdir="/usr/sbin", sourcedir=CTSvars.Fencing_home) stf = Resource(self.Factory, "FencingFail", "fence_dummy", "stonith") stf["pcmk_host_list"] = " ".join(stf_nodes) # Wait this many seconds before doing anything, handy for letting disks get flushed too stf["random_sleep_range"] = "30" stf["mode"] = "fail" stf.commit() # Now commit the levels themselves stl.commit() o = Option(self.Factory) o["stonith-enabled"] = self.CM.Env["DoFencing"] o["start-failure-is-fatal"] = "false" o["pe-input-series-max"] = "5000" o["shutdown-escalation"] = "5min" o["batch-limit"] = "10" o["dc-deadtime"] = "5s" o["no-quorum-policy"] = no_quorum if self.CM.Env["DoBSC"] == 1: o["ident-string"] = "Linux-HA TEST configuration file - REMOVEME!!" o.commit() o = OpDefaults(self.Factory) o["timeout"] = "90s" o.commit() # Commit the nodes section if we defined one if stn is not None: stn.commit() # Add an alerts section if possible if self.Factory.rsh.exists_on_all(self.CM.Env["notification-agent"], self.CM.Env["nodes"]): alerts = Alerts(self.Factory) alerts.add_alert(self.CM.Env["notification-agent"], self.CM.Env["notification-recipient"]) alerts.commit() # Add resources? if self.CM.Env["CIBResource"] == 1: self.add_resources() if self.CM.cluster_monitor == 1: mon = Resource(self.Factory, "cluster_mon", "ocf", "ClusterMon", "pacemaker") mon.add_op("start", "0", requires="nothing") mon.add_op("monitor", "5s", requires="nothing") mon["update"] = "10" mon["extra_options"] = "-r -n" mon["user"] = "abeekhof" mon["htmlfile"] = "/suse/abeekhof/Export/cluster.html" mon.commit() #self._create('''location prefer-dc cluster_mon rule -INFINITY: \#is_dc eq false''') # generate cib self.cts_cib = self._show() if self.Factory.tmpfile != CTSvars.CRM_CONFIG_DIR+"/cib.xml": self.Factory.rsh(self.Factory.target, "rm -f "+self.Factory.tmpfile) return self.cts_cib def add_resources(self): # Per-node resources for node in self.CM.Env["nodes"]: name = "rsc_"+node r = self.NewIP(name) r.prefer(node, "100") r.commit() # Migrator # Make this slightly sticky (since we have no other location constraints) to avoid relocation during Reattach m = Resource(self.Factory, "migrator","Dummy", "ocf", "pacemaker") m["passwd"] = "whatever" m.add_meta("resource-stickiness","1") m.add_meta("allow-migrate", "1") m.add_op("monitor", "P10S") m.commit() # Ping the test master p = Resource(self.Factory, "ping-1","ping", "ocf", "pacemaker") p.add_op("monitor", "60s") p["host_list"] = self.CM.Env["cts-master"] p["name"] = "connected" p["debug"] = "true" c = Clone(self.Factory, "Connectivity", p) c["globally-unique"] = "false" c.commit() # promotable clone resource s = Resource(self.Factory, "stateful-1", "Stateful", "ocf", "pacemaker") s.add_op("monitor", "15s", timeout="60s") s.add_op("monitor", "16s", timeout="60s", role="Master") ms = Clone(self.Factory, "promotable-1", s) ms["promotable"] = "true" ms["clone-max"] = self.num_nodes ms["clone-node-max"] = 1 ms["promoted-max"] = 1 ms["promoted-node-max"] = 1 # Require connectivity to run the promotable clone r = Rule(self.Factory, "connected", "-INFINITY", op="or") r.add_child(Expression(self.Factory, "m1-connected-1", "connected", "lt", "1")) r.add_child(Expression(self.Factory, "m1-connected-2", "connected", "not_defined", None)) ms.prefer("connected", rule=r) ms.commit() # Group Resource g = Group(self.Factory, "group-1") g.add_child(self.NewIP()) if self.CM.Env["have_systemd"]: sysd = Resource(self.Factory, "petulant", "pacemaker-cts-dummyd@10", "service") sysd.add_op("monitor", "P10S") g.add_child(sysd) else: g.add_child(self.NewIP()) g.add_child(self.NewIP()) # Make group depend on the promotable clone g.after("promotable-1", first="promote", then="start") g.colocate("promotable-1", "INFINITY", withrole="Master") g.commit() # LSB resource lsb = Resource(self.Factory, "lsb-dummy", "LSBDummy", "lsb") lsb.add_op("monitor", "5s") # LSB with group lsb.after("group-1") lsb.colocate("group-1") lsb.commit() class CIB20(CIB12): version = "pacemaker-2.5" class CIB30(CIB12): version = "pacemaker-3.0" #class HASI(CIB10): # def add_resources(self): # # DLM resource # self._create('''primitive dlm ocf:pacemaker:controld op monitor interval=120s''') # self._create('''clone dlm-clone dlm meta globally-unique=false interleave=true''') # O2CB resource # self._create('''primitive o2cb ocf:ocfs2:o2cb op monitor interval=120s''') # self._create('''clone o2cb-clone o2cb meta globally-unique=false interleave=true''') # self._create('''colocation o2cb-with-dlm INFINITY: o2cb-clone dlm-clone''') # self._create('''order start-o2cb-after-dlm mandatory: dlm-clone o2cb-clone''') class ConfigFactory(object): def __init__(self, CM): self.CM = CM self.rsh = self.CM.rsh self.register("pacemaker12", CIB12, CM, self) self.register("pacemaker20", CIB20, CM, self) self.register("pacemaker30", CIB30, CM, self) # self.register("hae", HASI, CM, self) self.target = self.CM.Env["nodes"][0] self.tmpfile = None def log(self, args): self.CM.log("cib: %s" % args) def debug(self, args): self.CM.debug("cib: %s" % args) def register(self, methodName, constructor, *args, **kargs): """register a constructor""" _args = [constructor] _args.extend(args) setattr(self, methodName, ConfigFactoryItem(*_args, **kargs)) def unregister(self, methodName): """unregister a constructor""" delattr(self, methodName) def createConfig(self, name="pacemaker-1.0"): if name == "pacemaker-1.0": name = "pacemaker10"; elif name == "pacemaker-1.2": name = "pacemaker12"; elif name == "pacemaker-2.0": name = "pacemaker20"; elif name == "pacemaker-3.0": name = "pacemaker30"; elif name == "hasi": name = "hae"; if hasattr(self, name): return getattr(self, name)() else: self.CM.log("Configuration variant '%s' is unknown. Defaulting to latest config" % name) return self.pacemaker30() class ConfigFactoryItem(object): def __init__(self, function, *args, **kargs): self._function = function self._args = args self._kargs = kargs def __call__(self, *args, **kargs): """call function""" _args = list(self._args) _args.extend(args) _kargs = self._kargs.copy() _kargs.update(kargs) return self._function(*_args,**_kargs) if __name__ == '__main__': """ Unit test (pass cluster node names as command line arguments) """ import cts.CTS import cts.CM_corosync import sys if len(sys.argv) < 2: print("Usage: %s ..." % sys.argv[0]) sys.exit(1) args = [ "--nodes", " ".join(sys.argv[1:]), "--clobber-cib", "--populate-resources", "--stack", "corosync", "--test-ip-base", "fe80::1234:56:7890:1000", "--stonith", "rhcs", ] env = CTS.CtsLab(args) cm = CM_corosync.crm_corosync(env) CibFactory = ConfigFactory(cm) cib = CibFactory.createConfig("pacemaker-2.0") print(cib.contents()) diff --git a/cts/CTS.py.in b/cts/CTS.py.in index 9c3e8aa2dd..de47e82e88 100644 --- a/cts/CTS.py.in +++ b/cts/CTS.py.in @@ -1,955 +1,939 @@ """ Main classes for Pacemaker's Cluster Test Suite (CTS) """ # Pacemaker targets compatibility with Python 2.7 and 3.2+ from __future__ import print_function, unicode_literals, absolute_import, division __copyright__ = "Copyright 2000-2018 Alan Robertson " __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import os import re import sys import time import traceback if sys.version_info > (3,): from collections import UserDict else: from UserDict import UserDict from cts.CTSvars import * from cts.logging import LogFactory from cts.watcher import LogWatcher from cts.remote import RemoteFactory, input_wrapper from cts.environment import EnvFactory from cts.patterns import PatternSelector has_log_stats = {} log_stats_bin = CTSvars.CRM_DAEMON_DIR + "/cts_log_stats.sh" log_stats = """ #!@BASH_PATH@ # Tool for generating system load reports while CTS runs trap "" 1 f=$1; shift action=$1; shift base=`basename $0` if [ ! -e $f ]; then echo "Time, Load 1, Load 5, Load 15, Test Marker" > $f fi function killpid() { if [ -e $f.pid ]; then kill -9 `cat $f.pid` rm -f $f.pid fi } function status() { if [ -e $f.pid ]; then kill -0 `cat $f.pid` return $? else return 1 fi } function start() { # Is it already running? if status then return fi echo Active as $$ echo $$ > $f.pid while [ 1 = 1 ]; do uptime | sed s/up.*:/,/ | tr '\\n' ',' >> $f #top -b -c -n1 | grep -e usr/libexec/pacemaker | grep -v -e grep -e python | head -n 1 | sed s@/usr/libexec/pacemaker/@@ | awk '{print " 0, "$9", "$10", "$12}' | tr '\\n' ',' >> $f echo 0 >> $f sleep 5 done } case $action in start) start ;; start-bg|bg) # Use c --ssh -- ./stats.sh file start-bg nohup $0 $f start >/dev/null 2>&1 > $f echo " $*" >> $f start ;; *) echo "Unknown action: $action." ;; esac """ class CtsLab(object): '''This class defines the Lab Environment for the Cluster Test System. It defines those things which are expected to change from test environment to test environment for the same cluster manager. It is where you define the set of nodes that are in your test lab what kind of reset mechanism you use, etc. This class is derived from a UserDict because we hold many different parameters of different kinds, and this provides provide a uniform and extensible interface useful for any kind of communication between the user/administrator/tester and CTS. At this point in time, it is the intent of this class to model static configuration and/or environmental data about the environment which doesn't change as the tests proceed. Well-known names (keys) are an important concept in this class. The HasMinimalKeys member function knows the minimal set of well-known names for the class. The following names are standard (well-known) at this time: nodes An array of the nodes in the cluster reset A ResetMechanism object logger An array of objects that log strings... CMclass The type of ClusterManager we are running (This is a class object, not a class instance) RandSeed Random seed. It is a triple of bytes. (optional) The CTS code ignores names it doesn't know about/need. The individual tests have access to this information, and it is perfectly acceptable to provide hints, tweaks, fine-tuning directions or other information to the tests through this mechanism. ''' def __init__(self, args=None): self.Env = EnvFactory().getInstance(args) self.Scenario = None self.logger = LogFactory() self.rsh = RemoteFactory().getInstance() def dump(self): self.Env.dump() def has_key(self, key): return key in list(self.Env.keys()) def __getitem__(self, key): return self.Env[key] def __setitem__(self, key, value): self.Env[key] = value def run(self, Scenario, Iterations): if not Scenario: self.logger.log("No scenario was defined") return 1 self.logger.log("Cluster nodes: ") for node in self.Env["nodes"]: self.logger.log(" * %s" % (node)) if not Scenario.SetUp(): return 1 try : Scenario.run(Iterations) except : self.logger.log("Exception by %s" % sys.exc_info()[0]) self.logger.traceback(traceback) Scenario.summarize() Scenario.TearDown() return 1 #ClusterManager.oprofileSave(Iterations) Scenario.TearDown() Scenario.summarize() if Scenario.Stats["failure"] > 0: return Scenario.Stats["failure"] elif Scenario.Stats["success"] != Iterations: self.logger.log("No failure count but success != requested iterations") return 1 return 0 def __CheckNode(self, node): "Raise a ValueError if the given node isn't valid" if not self.IsValidNode(node): raise ValueError("Invalid node [%s] in CheckNode" % node) class NodeStatus(object): def __init__(self, env): self.Env = env def IsNodeBooted(self, node): '''Return TRUE if the given node is booted (responds to pings)''' if self.Env["docker"]: return RemoteFactory().getInstance()("localhost", "docker inspect --format {{.State.Running}} %s | grep -q true" % node, silent=True) == 0 return RemoteFactory().getInstance()("localhost", "ping -nq -c1 -w1 %s" % node, silent=True) == 0 def IsSshdUp(self, node): rc = RemoteFactory().getInstance()(node, "true", silent=True) return rc == 0 def WaitForNodeToComeUp(self, node, Timeout=300): '''Return TRUE when given node comes up, or None/FALSE if timeout''' timeout = Timeout anytimeouts = 0 while timeout > 0: if self.IsNodeBooted(node) and self.IsSshdUp(node): if anytimeouts: # Fudge to wait for the system to finish coming up time.sleep(30) LogFactory().debug("Node %s now up" % node) return 1 time.sleep(30) if (not anytimeouts): LogFactory().debug("Waiting for node %s to come up" % node) anytimeouts = 1 timeout = timeout - 1 LogFactory().log("%s did not come up within %d tries" % (node, Timeout)) if self.Env["continue"] == 1: answer = "Y" else: try: answer = input_wrapper('Continue? [nY]') except EOFError as e: answer = "n" if answer and answer == "n": raise ValueError("%s did not come up within %d tries" % (node, Timeout)) def WaitForAllNodesToComeUp(self, nodes, timeout=300): '''Return TRUE when all nodes come up, or FALSE if timeout''' for node in nodes: if not self.WaitForNodeToComeUp(node, timeout): return None return 1 class ClusterManager(UserDict): '''The Cluster Manager class. This is an subclass of the Python dictionary class. (this is because it contains lots of {name,value} pairs, not because it's behavior is that terribly similar to a dictionary in other ways.) This is an abstract class which class implements high-level operations on the cluster and/or its cluster managers. Actual cluster managers classes are subclassed from this type. One of the things we do is track the state we think every node should be in. ''' def __InitialConditions(self): #if os.geteuid() != 0: # raise ValueError("Must Be Root!") None def _finalConditions(self): for key in list(self.keys()): if self[key] == None: raise ValueError("Improper derivation: self[" + key + "] must be overridden by subclass.") def __init__(self, Environment, randseed=None): self.Env = EnvFactory().getInstance() self.templates = PatternSelector(self.Env["Name"]) self.__InitialConditions() self.logger = LogFactory() self.TestLoggingLevel=0 self.data = {} self.name = self.Env["Name"] self.rsh = RemoteFactory().getInstance() self.ShouldBeStatus={} self.ns = NodeStatus(self.Env) self.OurNode = os.uname()[1].lower() self.__instance_errorstoignore = [] def __getitem__(self, key): if key == "Name": return self.name print("FIXME: Getting %s from %s" % (key, repr(self))) if key in self.data: return self.data[key] return self.templates.get_patterns(self.Env["Name"], key) def __setitem__(self, key, value): print("FIXME: Setting %s=%s on %s" % (key, value, repr(self))) self.data[key] = value def key_for_node(self, node): return node def instance_errorstoignore_clear(self): '''Allows the test scenario to reset instance errors to ignore on each iteration.''' self.__instance_errorstoignore = [] def instance_errorstoignore(self): '''Return list of errors which are 'normal' for a specific test instance''' return self.__instance_errorstoignore def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [] def log(self, args): self.logger.log(args) def debug(self, args): self.logger.debug(args) def prepare(self): '''Finish the Initialization process. Prepare to test...''' print(repr(self)+"prepare") for node in self.Env["nodes"]: if self.StataCM(node): self.ShouldBeStatus[node] = "up" else: self.ShouldBeStatus[node] = "down" if self.Env["experimental-tests"]: self.unisolate_node(node) def upcount(self): '''How many nodes are up?''' count = 0 for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "up": count = count + 1 return count - def install_helper(self, filename, destdir=None, nodes=None, sourcedir=None): - if sourcedir == None: - sourcedir = CTSvars.CTS_home - file_with_path = "%s/%s" % (sourcedir, filename) - if not nodes: - nodes = self.Env["nodes"] - - if not destdir: - destdir = CTSvars.CTS_home - - self.debug("Installing %s to %s on %s" % (filename, destdir, repr(self.Env["nodes"]))) - for node in nodes: - self.rsh(node, "mkdir -p %s" % destdir) - self.rsh.cp(file_with_path, "root@%s:%s/%s" % (node, destdir, filename)) - return file_with_path - def install_support(self, command="install"): for node in self.Env["nodes"]: self.rsh(node, CTSvars.CRM_DAEMON_DIR + "/cts-support " + command) def install_config(self, node): return None def prepare_fencing_watcher(self, name): # If we don't have quorum now but get it as a result of starting this node, # then a bunch of nodes might get fenced upnode = None if self.HasQuorum(None): self.debug("Have quorum") return None if not self.templates["Pat:Fencing_start"]: print("No start pattern") return None if not self.templates["Pat:Fencing_ok"]: print("No ok pattern") return None stonith = None stonithPats = [] for peer in self.Env["nodes"]: if self.ShouldBeStatus[peer] != "up": stonithPats.append(self.templates["Pat:Fencing_ok"] % peer) stonithPats.append(self.templates["Pat:Fencing_start"] % peer) stonith = LogWatcher(self.Env["LogFileName"], stonithPats, "StartupFencing", 0, hosts=self.Env["nodes"], kind=self.Env["LogWatcher"]) stonith.setwatch() return stonith def fencing_cleanup(self, node, stonith): peer_list = [] peer_state = {} self.debug("Looking for nodes that were fenced as a result of %s starting" % node) # If we just started a node, we may now have quorum (and permission to fence) if not stonith: self.debug("Nothing to do") return peer_list q = self.HasQuorum(None) if not q and len(self.Env["nodes"]) > 2: # We didn't gain quorum - we shouldn't have shot anyone self.debug("Quorum: %d Len: %d" % (q, len(self.Env["nodes"]))) return peer_list for n in self.Env["nodes"]: peer_state[n] = "unknown" # Now see if any states need to be updated self.debug("looking for: " + repr(stonith.regexes)) shot = stonith.look(0) while shot: line = repr(shot) self.debug("Found: " + line) del stonith.regexes[stonith.whichmatch] # Extract node name for n in self.Env["nodes"]: if re.search(self.templates["Pat:Fencing_ok"] % n, shot): peer = n peer_state[peer] = "complete" self.__instance_errorstoignore.append(self.templates["Pat:Fencing_ok"] % peer) elif peer_state[n] != "complete" and re.search(self.templates["Pat:Fencing_start"] % n, shot): # TODO: Correctly detect multiple fencing operations for the same host peer = n peer_state[peer] = "in-progress" self.__instance_errorstoignore.append(self.templates["Pat:Fencing_start"] % peer) if not peer: self.logger.log("ERROR: Unknown stonith match: %s" % line) elif not peer in peer_list: self.debug("Found peer: " + peer) peer_list.append(peer) # Get the next one shot = stonith.look(60) for peer in peer_list: self.debug(" Peer %s was fenced as a result of %s starting: %s" % (peer, node, peer_state[peer])) if self.Env["at-boot"]: self.ShouldBeStatus[peer] = "up" else: self.ShouldBeStatus[peer] = "down" if peer_state[peer] == "in-progress": # Wait for any in-progress operations to complete shot = stonith.look(60) while len(stonith.regexes) and shot: line = repr(shot) self.debug("Found: " + line) del stonith.regexes[stonith.whichmatch] shot = stonith.look(60) # Now make sure the node is alive too self.ns.WaitForNodeToComeUp(peer, self.Env["DeadTime"]) # Poll until it comes up if self.Env["at-boot"]: if not self.StataCM(peer): time.sleep(self.Env["StartTime"]) if not self.StataCM(peer): self.logger.log("ERROR: Peer %s failed to restart after being fenced" % peer) return None return peer_list def StartaCM(self, node, verbose=False): '''Start up the cluster manager on a given node''' if verbose: self.logger.log("Starting %s on node %s" % (self.templates["Name"], node)) else: self.debug("Starting %s on node %s" % (self.templates["Name"], node)) ret = 1 if not node in self.ShouldBeStatus: self.ShouldBeStatus[node] = "down" if self.ShouldBeStatus[node] != "down": return 1 patterns = [] # Technically we should always be able to notice ourselves starting patterns.append(self.templates["Pat:Local_started"] % node) if self.upcount() == 0: patterns.append(self.templates["Pat:DC_started"] % node) else: patterns.append(self.templates["Pat:NonDC_started"] % node) watch = LogWatcher( self.Env["LogFileName"], patterns, "StartaCM", self.Env["StartTime"]+10, hosts=self.Env["nodes"], kind=self.Env["LogWatcher"]) self.install_config(node) self.ShouldBeStatus[node] = "any" if self.StataCM(node) and self.cluster_stable(self.Env["DeadTime"]): self.logger.log ("%s was already started" % (node)) return 1 stonith = self.prepare_fencing_watcher(node) watch.setwatch() if self.rsh(node, self.templates["StartCmd"]) != 0: self.logger.log ("Warn: Start command failed on node %s" % (node)) self.fencing_cleanup(node, stonith) return None self.ShouldBeStatus[node] = "up" watch_result = watch.lookforall() if watch.unmatched: for regex in watch.unmatched: self.logger.log ("Warn: Startup pattern not found: %s" % (regex)) if watch_result and self.cluster_stable(self.Env["DeadTime"]): #self.debug("Found match: "+ repr(watch_result)) self.fencing_cleanup(node, stonith) return 1 elif self.StataCM(node) and self.cluster_stable(self.Env["DeadTime"]): self.fencing_cleanup(node, stonith) return 1 self.logger.log ("Warn: Start failed for node %s" % (node)) return None def StartaCMnoBlock(self, node, verbose=False): '''Start up the cluster manager on a given node with none-block mode''' if verbose: self.logger.log("Starting %s on node %s" % (self["Name"], node)) else: self.debug("Starting %s on node %s" % (self["Name"], node)) self.install_config(node) self.rsh(node, self.templates["StartCmd"], synchronous=0) self.ShouldBeStatus[node] = "up" return 1 def StopaCM(self, node, verbose=False, force=False): '''Stop the cluster manager on a given node''' if verbose: self.logger.log("Stopping %s on node %s" % (self["Name"], node)) else: self.debug("Stopping %s on node %s" % (self["Name"], node)) if self.ShouldBeStatus[node] != "up" and force == False: return 1 if self.rsh(node, self.templates["StopCmd"]) == 0: # Make sure we can continue even if corosync leaks # fdata-* is the old name #self.rsh(node, "rm -f /dev/shm/qb-* /dev/shm/fdata-*") self.ShouldBeStatus[node] = "down" self.cluster_stable(self.Env["DeadTime"]) return 1 else: self.logger.log ("ERROR: Could not stop %s on node %s" % (self["Name"], node)) return None def StopaCMnoBlock(self, node): '''Stop the cluster manager on a given node with none-block mode''' self.debug("Stopping %s on node %s" % (self["Name"], node)) self.rsh(node, self.templates["StopCmd"], synchronous=0) self.ShouldBeStatus[node] = "down" return 1 def cluster_stable(self, timeout = None): time.sleep(self.Env["StableTime"]) return 1 def node_stable(self, node): return 1 def RereadCM(self, node): '''Force the cluster manager on a given node to reread its config This may be a no-op on certain cluster managers. ''' rc=self.rsh(node, self.templates["RereadCmd"]) if rc == 0: return 1 else: self.logger.log ("Could not force %s on node %s to reread its config" % (self["Name"], node)) return None def StataCM(self, node): '''Report the status of the cluster manager on a given node''' out=self.rsh(node, self.templates["StatusCmd"] % node, 1) ret= (str.find(out, 'stopped') == -1) try: if ret: if self.ShouldBeStatus[node] == "down": self.logger.log( "Node status for %s is %s but we think it should be %s" % (node, "up", self.ShouldBeStatus[node])) else: if self.ShouldBeStatus[node] == "up": self.logger.log( "Node status for %s is %s but we think it should be %s" % (node, "down", self.ShouldBeStatus[node])) except KeyError: pass if ret: self.ShouldBeStatus[node] = "up" else: self.ShouldBeStatus[node] = "down" return ret def startall(self, nodelist=None, verbose=False, quick=False): '''Start the cluster manager on every node in the cluster. We can do it on a subset of the cluster if nodelist is not None. ''' map = {} if not nodelist: nodelist = self.Env["nodes"] for node in nodelist: if self.ShouldBeStatus[node] == "down": self.ns.WaitForAllNodesToComeUp(nodelist, 300) if not quick: # This is used for "basic sanity checks", so only start one node ... if not self.StartaCM(node, verbose=verbose): return 0 return 1 # Approximation of SimulStartList for --boot watchpats = [ ] watchpats.append(self.templates["Pat:DC_IDLE"]) for node in nodelist: watchpats.append(self.templates["Pat:Local_started"] % node) watchpats.append(self.templates["Pat:InfraUp"] % node) watchpats.append(self.templates["Pat:PacemakerUp"] % node) # Start all the nodes - at about the same time... watch = LogWatcher(self.Env["LogFileName"], watchpats, "fast-start", self.Env["DeadTime"]+10, hosts=self.Env["nodes"], kind=self.Env["LogWatcher"]) watch.setwatch() if not self.StartaCM(nodelist[0], verbose=verbose): return 0 for node in nodelist: self.StartaCMnoBlock(node, verbose=verbose) watch.lookforall() if watch.unmatched: for regex in watch.unmatched: self.logger.log ("Warn: Startup pattern not found: %s" % (regex)) if not self.cluster_stable(): self.logger.log("Cluster did not stabilize") return 0 return 1 def stopall(self, nodelist=None, verbose=False, force=False): '''Stop the cluster managers on every node in the cluster. We can do it on a subset of the cluster if nodelist is not None. ''' ret = 1 map = {} if not nodelist: nodelist = self.Env["nodes"] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "up" or force == True: if not self.StopaCM(node, verbose=verbose, force=force): ret = 0 return ret def rereadall(self, nodelist=None): '''Force the cluster managers on every node in the cluster to reread their config files. We can do it on a subset of the cluster if nodelist is not None. ''' map = {} if not nodelist: nodelist = self.Env["nodes"] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "up": self.RereadCM(node) def statall(self, nodelist=None): '''Return the status of the cluster managers in the cluster. We can do it on a subset of the cluster if nodelist is not None. ''' result = {} if not nodelist: nodelist = self.Env["nodes"] for node in nodelist: if self.StataCM(node): result[node] = "up" else: result[node] = "down" return result def isolate_node(self, target, nodes=None): '''isolate the communication between the nodes''' if not nodes: nodes = self.Env["nodes"] for node in nodes: if node != target: rc = self.rsh(target, self.templates["BreakCommCmd"] % self.key_for_node(node)) if rc != 0: self.logger.log("Could not break the communication between %s and %s: %d" % (target, node, rc)) return None else: self.debug("Communication cut between %s and %s" % (target, node)) return 1 def unisolate_node(self, target, nodes=None): '''fix the communication between the nodes''' if not nodes: nodes = self.Env["nodes"] for node in nodes: if node != target: restored = 0 # Limit the amount of time we have asynchronous connectivity for # Restore both sides as simultaneously as possible self.rsh(target, self.templates["FixCommCmd"] % self.key_for_node(node), synchronous=0) self.rsh(node, self.templates["FixCommCmd"] % self.key_for_node(target), synchronous=0) self.debug("Communication restored between %s and %s" % (target, node)) def reducecomm_node(self,node): '''reduce the communication between the nodes''' rc = self.rsh(node, self.templates["ReduceCommCmd"]%(self.Env["XmitLoss"],self.Env["RecvLoss"])) if rc == 0: return 1 else: self.logger.log("Could not reduce the communication between the nodes from node: %s" % node) return None def restorecomm_node(self,node): '''restore the saved communication between the nodes''' rc = 0 if float(self.Env["XmitLoss"]) != 0 or float(self.Env["RecvLoss"]) != 0 : rc = self.rsh(node, self.templates["RestoreCommCmd"]); if rc == 0: return 1 else: self.logger.log("Could not restore the communication between the nodes from node: %s" % node) return None def HasQuorum(self, node_list): "Return TRUE if the cluster currently has quorum" # If we are auditing a partition, then one side will # have quorum and the other not. # So the caller needs to tell us which we are checking # If no value for node_list is specified... assume all nodes raise ValueError("Abstract Class member (HasQuorum)") def Components(self): raise ValueError("Abstract Class member (Components)") def oprofileStart(self, node=None): if not node: for n in self.Env["oprofile"]: self.oprofileStart(n) elif node in self.Env["oprofile"]: self.debug("Enabling oprofile on %s" % node) self.rsh(node, "opcontrol --init") self.rsh(node, "opcontrol --setup --no-vmlinux --separate=lib --callgraph=20 --image=all") self.rsh(node, "opcontrol --start") self.rsh(node, "opcontrol --reset") def oprofileSave(self, test, node=None): if not node: for n in self.Env["oprofile"]: self.oprofileSave(test, n) elif node in self.Env["oprofile"]: self.rsh(node, "opcontrol --dump") self.rsh(node, "opcontrol --save=cts.%d" % test) # Read back with: opreport -l session:cts.0 image:/c* if None: self.rsh(node, "opcontrol --reset") else: self.oprofileStop(node) self.oprofileStart(node) def oprofileStop(self, node=None): if not node: for n in self.Env["oprofile"]: self.oprofileStop(n) elif node in self.Env["oprofile"]: self.debug("Stopping oprofile on %s" % node) self.rsh(node, "opcontrol --reset") self.rsh(node, "opcontrol --shutdown 2>&1 > /dev/null") def StatsExtract(self): if not self.Env["stats"]: return for host in self.Env["nodes"]: log_stats_file = "%s/cts-stats.csv" % CTSvars.CRM_DAEMON_DIR if host in has_log_stats: self.rsh(host, '''bash %s %s stop''' % (log_stats_bin, log_stats_file)) (rc, lines) = self.rsh(host, '''cat %s''' % log_stats_file, stdout=2) self.rsh(host, '''bash %s %s delete''' % (log_stats_bin, log_stats_file)) fname = "cts-stats-%d-nodes-%s.csv" % (len(self.Env["nodes"]), host) print("Extracted stats: %s" % fname) fd = open(fname, "a") fd.writelines(lines) fd.close() def StatsMark(self, testnum): '''Mark the test number in the stats log''' global has_log_stats if not self.Env["stats"]: return for host in self.Env["nodes"]: log_stats_file = "%s/cts-stats.csv" % CTSvars.CRM_DAEMON_DIR if not host in has_log_stats: global log_stats global log_stats_bin script=log_stats #script = re.sub("\\\\", "\\\\", script) script = re.sub('\"', '\\\"', script) script = re.sub("'", "\'", script) script = re.sub("`", "\`", script) script = re.sub("\$", "\\\$", script) self.debug("Installing %s on %s" % (log_stats_bin, host)) self.rsh(host, '''echo "%s" > %s''' % (script, log_stats_bin), silent=True) self.rsh(host, '''bash %s %s delete''' % (log_stats_bin, log_stats_file)) has_log_stats[host] = 1 # Now mark it self.rsh(host, '''bash %s %s mark %s''' % (log_stats_bin, log_stats_file, testnum), synchronous=0) class Resource(object): ''' This is an HA resource (not a resource group). A resource group is just an ordered list of Resource objects. ''' def __init__(self, cm, rsctype=None, instance=None): self.CM = cm self.ResourceType = rsctype self.Instance = instance self.needs_quorum = 1 def Type(self): return self.ResourceType def Instance(self, nodename): return self.Instance def IsRunningOn(self, nodename): ''' This member function returns true if our resource is running on the given node in the cluster. It is analagous to the "status" operation on SystemV init scripts and heartbeat scripts. FailSafe calls it the "exclusive" operation. ''' raise ValueError("Abstract Class member (IsRunningOn)") return None def IsWorkingCorrectly(self, nodename): ''' This member function returns true if our resource is operating correctly on the given node in the cluster. OCF does not require this operation, but it might be called the Monitor operation, which is what FailSafe calls it. For remotely monitorable resources (like IP addresses), they *should* be monitored remotely for testing. ''' raise ValueError("Abstract Class member (IsWorkingCorrectly)") return None def Start(self, nodename): ''' This member function starts or activates the resource. ''' raise ValueError("Abstract Class member (Start)") return None def Stop(self, nodename): ''' This member function stops or deactivates the resource. ''' raise ValueError("Abstract Class member (Stop)") return None def __repr__(self): if (self.Instance and len(self.Instance) > 1): return "{" + self.ResourceType + "::" + self.Instance + "}" else: return "{" + self.ResourceType + "}" class Component(object): def kill(self, node): None class Process(Component): def __init__(self, cm, name, process=None, dc_only=0, pats=[], dc_pats=[], badnews_ignore=[], common_ignore=[], triggersreboot=0): self.name = str(name) self.dc_only = dc_only self.pats = pats self.dc_pats = dc_pats self.CM = cm self.badnews_ignore = badnews_ignore self.badnews_ignore.extend(common_ignore) self.triggersreboot = triggersreboot if process: self.proc = str(process) else: self.proc = str(name) self.KillCmd = "killall -9 " + self.proc def kill(self, node): if self.CM.rsh(node, self.KillCmd) != 0: self.CM.log ("ERROR: Kill %s failed on node %s" % (self.name,node)) return None return 1 diff --git a/cts/cts-exec.in b/cts/cts-exec.in index f70b2e9689..b944e84956 100644 --- a/cts/cts-exec.in +++ b/cts/cts-exec.in @@ -1,1278 +1,1230 @@ #!@PYTHON@ """ Regression tests for Pacemaker's pacemaker-execd """ # Pacemaker targets compatibility with Python 2.7 and 3.2+ from __future__ import print_function, unicode_literals, absolute_import, division __copyright__ = "Copyright 2012-2018 Andrew Beekhof " __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import stat import sys import subprocess import shlex import shutil import time # Where to find test binaries # Prefer the source tree if available BUILD_DIR = "@abs_top_builddir@" TEST_DIR = sys.path[0] SBIN_DIR = "@sbindir@" # File permissions for executable scripts we create EXECMODE = stat.S_IRUSR | stat.S_IXUSR | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH # These values must be kept in sync with include/crm/crm.h class CrmExit: OK = 0 ERROR = 1 INVALID_PARAM = 2 UNIMPLEMENT_FEATURE = 3 INSUFFICIENT_PRIV = 4 NOT_INSTALLED = 5 NOT_CONFIGURED = 6 NOT_RUNNING = 7 USAGE = 64 DATAERR = 65 NOINPUT = 66 NOUSER = 67 NOHOST = 68 UNAVAILABLE = 69 SOFTWARE = 70 OSERR = 71 OSFILE = 72 CANTCREAT = 73 IOERR = 74 TEMPFAIL = 75 PROTOCOL = 76 NOPERM = 77 CONFIG = 78 FATAL = 100 PANIC = 101 DISCONNECT = 102 SOLO = 103 DIGEST = 104 NOSUCH = 105 QUORUM = 106 UNSAFE = 107 EXISTS = 108 MULTIPLE = 109 OLD = 110 TIMEOUT = 124 MAX = 255 def update_path(): """ Set the PATH environment variable appropriately for the tests """ new_path = os.environ['PATH'] if os.path.exists("%s/cts-exec.in" % TEST_DIR): print("Running tests from the source tree: %s (%s)" % (BUILD_DIR, TEST_DIR)) # For pacemaker-execd, cts-exec-helper, and pacemaker-remoted new_path = "%s/daemons/execd:%s" % (BUILD_DIR, new_path) new_path = "%s/tools:%s" % (BUILD_DIR, new_path) # For crm_resource # For pacemaker-fenced new_path = "%s/daemons/fenced:%s" % (BUILD_DIR, new_path) # For cts-support new_path = "%s/cts:%s" % (BUILD_DIR, new_path) else: print("Running tests from the install tree: @CRM_DAEMON_DIR@ (not %s)" % TEST_DIR) # For cts-exec-helper, cts-support, pacemaker-execd, pacemaker-fenced, # and pacemaker-remoted new_path = "@CRM_DAEMON_DIR@:%s" % (new_path) print('Using PATH="{}"'.format(new_path)) os.environ['PATH'] = new_path def pipe_output(pipes, stdout=True, stderr=False): """ Wrapper to get text output from pipes regardless of Python version """ output = "" pipe_outputs = pipes.communicate() if sys.version_info < (3,): if stdout: output = output + pipe_outputs[0] if stderr: output = output + pipe_outputs[1] else: if stdout: output = output + pipe_outputs[0].decode(sys.stdout.encoding) if stderr: output = output + pipe_outputs[1].decode(sys.stderr.encoding) return output def output_from_command(command): """ Run a command, and return its standard output. """ test = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE) test.wait() return pipe_output(test).split("\n") def write_file(filename, contents, executable=False): """ Create a file. """ print("Installing %s ..." % (filename)) f = io.open(filename, "w+") f.write(contents) f.close() if executable: os.chmod(filename, EXECMODE) class TestError(Exception): """ Base class for exceptions in this module """ pass class ExitCodeError(TestError): """ Exception raised when command exit status is unexpected """ def __init__(self, exit_code): self.exit_code = exit_code def __str__(self): return repr(self.exit_code) class OutputNotFoundError(TestError): """ Exception raised when command output does not contain wanted string """ def __init__(self, output): self.output = output def __str__(self): return repr(self.output) class OutputFoundError(TestError): """ Exception raised when command output contains unwanted string """ def __init__(self, output): self.output = output def __str__(self): return repr(self.output) class Test(object): """ Executor for a single pacemaker-execd regression test """ def __init__(self, name, description, verbose=0, tls=0): self.name = name self.description = description self.cmds = [] if tls: self.daemon_location = "pacemaker-remoted" else: self.daemon_location = "pacemaker-execd" self.test_tool_location = "cts-exec-helper" self.verbose = verbose self.tls = tls self.result_txt = "" self.cmd_tool_output = "" self.result_exitcode = CrmExit.OK self.execd_process = None self.stonith_process = None self.executed = 0 def __new_cmd(self, cmd, args, exitcode, stdout_match="", no_wait=0, stdout_negative_match="", kill=None): """ Add a command to be executed as part of this test """ if self.verbose and cmd == self.test_tool_location: args = args + " -V " if (cmd == self.test_tool_location) and self.tls: args = args + " -S " self.cmds.append( { "cmd" : cmd, "kill" : kill, "args" : args, "expected_exitcode" : exitcode, "stdout_match" : stdout_match, "stdout_negative_match" : stdout_negative_match, "no_wait" : no_wait, "cmd_output" : "", } ) def start_environment(self): """ Prepare the host for running a test """ ### make sure we are in full control here ### cmd = shlex.split("killall -q -9 pacemaker-fenced lt-pacemaker-fenced pacemaker-execd lt-pacemaker-execd cts-exec-helper lt-cts-exec-helper pacemaker-remoted") test = subprocess.Popen(cmd, stdout=subprocess.PIPE) test.wait() additional_args = "" if self.tls == 0: self.stonith_process = subprocess.Popen(shlex.split("pacemaker-fenced -s")) if self.verbose: additional_args = additional_args + " -V" self.execd_process = subprocess.Popen(shlex.split("%s %s -l /tmp/pacemaker-execd-regression.log" % (self.daemon_location, additional_args))) time.sleep(1) def clean_environment(self): """ Clean up the host after running a test """ if self.execd_process: self.execd_process.terminate() self.execd_process.wait() if self.verbose: print("Daemon output") logfile = io.open('/tmp/pacemaker-execd-regression.log', 'rt', errors='replace') for line in logfile: print(line.strip().encode('utf-8', 'replace')) os.remove('/tmp/pacemaker-execd-regression.log') if self.stonith_process: self.stonith_process.terminate() self.stonith_process.wait() self.execd_process = None self.stonith_process = None def add_sys_cmd(self, cmd, args): """ Add a simple command to be executed as part of this test """ self.__new_cmd(cmd, args, CrmExit.OK, "") def add_cmd_check_stdout(self, args, match, no_match=""): """ Add a command with expected output to be executed as part of this test """ self.__new_cmd(self.test_tool_location, args, CrmExit.OK, match, 0, no_match) def add_cmd(self, args): """ Add a cts-exec-helper command to be executed as part of this test """ self.__new_cmd(self.test_tool_location, args, CrmExit.OK, "") def add_cmd_and_kill(self, kill_proc, args): """ Add a cts-exec-helper command and system command to be executed as part of this test """ self.__new_cmd(self.test_tool_location, args, CrmExit.OK, "", kill=kill_proc) def add_expected_fail_cmd(self, args, exitcode=CrmExit.ERROR): """ Add a cts-exec-helper command to be executed as part of this test and expected to fail """ self.__new_cmd(self.test_tool_location, args, exitcode, "") def get_exitcode(self): """ Return the exit status of the last test execution """ return self.result_exitcode def print_result(self, filler): """ Print the result of the last test execution """ print("%s%s" % (filler, self.result_txt)) def run_cmd(self, args): """ Execute a command as part of this test """ cmd = shlex.split(args['args']) cmd.insert(0, args['cmd']) if self.verbose: print("\n\nRunning: "+" ".join(cmd)) test = subprocess.Popen(cmd, stdout=subprocess.PIPE) if args['kill']: if self.verbose: print("Also running: "+args['kill']) ### Typically, the kill argument is used to detect some sort of ### failure. Without yielding for a few seconds here, the process ### launched earlier that is listening for the failure may not have ### time to connect to pacemaker-execd. time.sleep(2) subprocess.Popen(shlex.split(args['kill'])) if args['no_wait'] == 0: test.wait() else: return CrmExit.OK output = pipe_output(test) args['cmd_output'] = output if test.returncode != args['expected_exitcode']: raise ExitCodeError(test.returncode) if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0: raise OutputNotFoundError(output) if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0: raise OutputFoundError(output) def set_error(self, step, cmd): """ Record failure of this test """ msg = "FAILURE - '%s' failed at step %d. Command: %s %s" self.result_txt = msg % (self.name, step, cmd['cmd'], cmd['args']) self.result_exitcode = CrmExit.ERROR def run(self): """ Execute this test. """ res = 0 i = 1 if self.tls and self.name.count("stonith") != 0: self.result_txt = "SKIPPED - '%s' - disabled when testing pacemaker_remote" % (self.name) print(self.result_txt) return res self.start_environment() if self.verbose: print("\n--- START TEST - %s" % self.name) self.result_txt = "SUCCESS - '%s'" % (self.name) self.result_exitcode = CrmExit.OK for cmd in self.cmds: try: self.run_cmd(cmd) except ExitCodeError as e: print(cmd['cmd_output']) print("Step %d FAILED - command returned %s, expected %d" % (i, e, cmd['expected_exitcode'])) self.set_error(i, cmd); break except OutputNotFoundError as e: print("Step %d FAILED - '%s' was not found in command output: %s" % (i, cmd['stdout_match'], e)) self.set_error(i, cmd); break except OutputFoundError as e: print("Step %d FAILED - '%s' was found in command output: %s" % (i, cmd['stdout_negative_match'], e)) self.set_error(i, cmd); break if self.verbose: print(cmd['cmd_output'].strip()) print("Step %d SUCCESS" % (i)) i = i + 1 self.clean_environment() print(self.result_txt) if self.verbose: print("--- END TEST - %s\n" % self.name) self.executed = 1 return res class Tests(object): """ Collection of all pacemaker-execd regression tests """ def __init__(self, verbose=0, tls=0): self.tests = [] self.verbose = verbose self.tls = tls self.rsc_classes = output_from_command("crm_resource --list-standards") self.rsc_classes = self.rsc_classes[:-1] # Strip trailing empty line self.installed_files = [] self.action_timeout = " -t 9000 " if self.tls: self.rsc_classes.remove("stonith") if "systemd" in self.rsc_classes: try: # This code doesn't need this import, but pacemaker-cts-dummyd # does, so ensure the dependency is available rather than cause # all systemd tests to fail. import systemd.daemon except ImportError: print("Fatal error: python systemd bindings not found. Is package installed?", file=sys.stderr) sys.exit(CrmExit.ERROR) print("Testing resource classes", repr(self.rsc_classes)) self.common_cmds = { "ocf_reg_line" : "-c register_rsc -r ocf_test_rsc "+self.action_timeout+" -C ocf -P pacemaker -T Dummy", "ocf_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"", "ocf_unreg_line" : "-c unregister_rsc -r \"ocf_test_rsc\" "+self.action_timeout, "ocf_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"", "ocf_start_line" : "-c exec -r \"ocf_test_rsc\" -a \"start\" "+self.action_timeout, "ocf_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:start rc:ok op_status:complete\" ", "ocf_stop_line" : "-c exec -r \"ocf_test_rsc\" -a \"stop\" "+self.action_timeout, "ocf_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:stop rc:ok op_status:complete\" ", "ocf_monitor_line" : '-c exec -r ocf_test_rsc -a monitor -i 2s ' + self.action_timeout, "ocf_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "ocf_cancel_line" : '-c cancel -r ocf_test_rsc -a monitor -i 2s ' + self.action_timeout, "ocf_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "systemd_reg_line" : "-c register_rsc -r systemd_test_rsc " + self.action_timeout + " -C systemd -T pacemaker-cts-dummyd@3", "systemd_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"", "systemd_unreg_line" : "-c unregister_rsc -r \"systemd_test_rsc\" "+self.action_timeout, "systemd_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"", "systemd_start_line" : "-c exec -r \"systemd_test_rsc\" -a \"start\" "+self.action_timeout, "systemd_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:start rc:ok op_status:complete\" ", "systemd_stop_line" : "-c exec -r \"systemd_test_rsc\" -a \"stop\" "+self.action_timeout, "systemd_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:stop rc:ok op_status:complete\" ", "systemd_monitor_line" : '-c exec -r systemd_test_rsc -a monitor -i 2s ' + self.action_timeout, "systemd_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:complete\" -t 15000 ", "systemd_cancel_line" : '-c cancel -r systemd_test_rsc -a monitor -i 2s ' + self.action_timeout, "systemd_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "upstart_reg_line" : "-c register_rsc -r upstart_test_rsc "+self.action_timeout+" -C upstart -T pacemaker-cts-dummyd", "upstart_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"", "upstart_unreg_line" : "-c unregister_rsc -r \"upstart_test_rsc\" "+self.action_timeout, "upstart_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"", "upstart_start_line" : "-c exec -r \"upstart_test_rsc\" -a \"start\" "+self.action_timeout, "upstart_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:start rc:ok op_status:complete\" ", "upstart_stop_line" : "-c exec -r \"upstart_test_rsc\" -a \"stop\" "+self.action_timeout, "upstart_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:stop rc:ok op_status:complete\" ", "upstart_monitor_line" : '-c exec -r upstart_test_rsc -a monitor -i 2s ' + self.action_timeout, "upstart_monitor_event" : '-l "NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:complete" -t 15000', "upstart_cancel_line" : '-c cancel -r upstart_test_rsc -a monitor -i 2s ' + self.action_timeout, "upstart_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "service_reg_line" : "-c register_rsc -r service_test_rsc "+self.action_timeout+" -C service -T LSBDummy", "service_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", "service_unreg_line" : "-c unregister_rsc -r \"service_test_rsc\" "+self.action_timeout, "service_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", "service_start_line" : "-c exec -r \"service_test_rsc\" -a \"start\" "+self.action_timeout, "service_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:start rc:ok op_status:complete\" ", "service_stop_line" : "-c exec -r \"service_test_rsc\" -a \"stop\" "+self.action_timeout, "service_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:stop rc:ok op_status:complete\" ", "service_monitor_line" : '-c exec -r service_test_rsc -a monitor -i 2s ' + self.action_timeout, "service_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "service_cancel_line" : '-c cancel -r service_test_rsc -a monitor -i 2s ' + self.action_timeout, "service_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "lsb_reg_line" : "-c register_rsc -r lsb_test_rsc "+self.action_timeout+" -C lsb -T LSBDummy", "lsb_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\" ", "lsb_unreg_line" : "-c unregister_rsc -r \"lsb_test_rsc\" "+self.action_timeout, "lsb_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\"", "lsb_start_line" : "-c exec -r \"lsb_test_rsc\" -a \"start\" "+self.action_timeout, "lsb_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:start rc:ok op_status:complete\" ", "lsb_stop_line" : "-c exec -r \"lsb_test_rsc\" -a \"stop\" "+self.action_timeout, "lsb_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:stop rc:ok op_status:complete\" ", "lsb_monitor_line" : '-c exec -r lsb_test_rsc -a status -i 2s ' + self.action_timeout, "lsb_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:complete\" "+self.action_timeout, "lsb_cancel_line" : '-c cancel -r lsb_test_rsc -a status -i 2s ' + self.action_timeout, "lsb_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:Cancelled\" ", - "stonith_reg_line" : "-c register_rsc -r stonith_test_rsc "+self.action_timeout+" -C stonith -P pacemaker -T fence_dummy_monitor", + "stonith_reg_line" : "-c register_rsc -r stonith_test_rsc " + self.action_timeout + + " -C stonith -P pacemaker -T fence_dummy", "stonith_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\" ", "stonith_unreg_line" : "-c unregister_rsc -r \"stonith_test_rsc\" "+self.action_timeout, "stonith_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\"", "stonith_start_line" : '-c exec -r stonith_test_rsc -a start ' + self.action_timeout, "stonith_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:start rc:ok op_status:complete\" ", "stonith_stop_line" : "-c exec -r \"stonith_test_rsc\" -a \"stop\" "+self.action_timeout, "stonith_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:stop rc:ok op_status:complete\" ", "stonith_monitor_line" : '-c exec -r stonith_test_rsc -a monitor -i 2s ' + self.action_timeout, "stonith_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "stonith_cancel_line" : '-c cancel -r stonith_test_rsc -a monitor -i 2s ' + self.action_timeout, "stonith_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:Cancelled\" ", } def new_test(self, name, description): """ Create a named test """ test = Test(name, description, self.verbose, self.tls) self.tests.append(test) return test def setup_test_environment(self): """ Prepare the host before executing any tests """ os.system("service pacemaker_remote stop") self.cleanup_test_environment() if self.tls and not os.path.isfile("/etc/pacemaker/authkey"): print("Installing /etc/pacemaker/authkey ...") os.system("mkdir -p /etc/pacemaker") os.system("dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1") self.installed_files.append("/etc/pacemaker/authkey") - dummy_fence_sleep_agent = ("""#!@PYTHON@ -import sys -import time -def main(): - for line in sys.stdin: - if line.count("monitor") > 0: - time.sleep(30000) - sys.exit(0) - sys.exit(1) -if __name__ == "__main__": - main() -""") - dummy_fence_agent = ("""#!/bin/sh -while read line; do - case ${line} in - *monitor*) exit 0;; - *metadata*) - echo '' - echo ' dummy description.' - echo ' http://www.example.com' - echo ' ' - echo ' ' - echo ' ' - echo ' ' - echo ' Fencing Action' - echo ' ' - echo ' ' - echo ' ' - echo ' ' - echo ' Physical plug number or name of virtual machine' - echo ' ' - echo ' ' - echo ' ' - echo ' ' - echo ' ' - echo ' ' - echo ' ' - echo ' ' - echo '' - exit 0;; - esac - exit 1 -done -""") - - write_file(SBIN_DIR + "/fence_dummy_sleep", dummy_fence_sleep_agent, executable=True); - write_file(SBIN_DIR + "/fence_dummy_monitor", dummy_fence_agent, executable=True); - # If we're in build directory, install agents if not already installed if os.path.exists("%s/cts/cts-exec.in" % BUILD_DIR): if not os.path.exists("@OCF_RA_DIR@/pacemaker"): # @TODO remember which components were created and remove them os.makedirs("@OCF_RA_DIR@/pacemaker", 0o755) for agent in ["Dummy", "Stateful", "ping"]: agent_source = "%s/extra/resources/%s" % (BUILD_DIR, agent) agent_dest = "@OCF_RA_DIR@/pacemaker/%s" % (agent) if not os.path.exists(agent_dest): print("Installing %s ..." % (agent_dest)) shutil.copyfile(agent_source, agent_dest) os.chmod(agent_dest, EXECMODE) self.installed_files.append(agent_dest) subprocess.call(["cts-support", "install"]) def cleanup_test_environment(self): """ Clean up the host after executing desired tests """ - os.system("rm -f " + SBIN_DIR + "/fence_dummy_monitor") - os.system("rm -f " + SBIN_DIR + "/fence_dummy_sleep") - for installed_file in self.installed_files: print("Removing %s ..." % (installed_file)) os.remove(installed_file) subprocess.call(["cts-support", "uninstall"]) def build_generic_tests(self): """ Register tests that apply to all resource classes """ common_cmds = self.common_cmds ### register/unregister tests ### for rsc in self.rsc_classes: test = self.new_test("generic_registration_%s" % (rsc), "Simple resource registration test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### start/stop tests ### for rsc in self.rsc_classes: test = self.new_test("generic_start_stop_%s" % (rsc), "Simple start and stop test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### monitor cancel test ### for rsc in self.rsc_classes: test = self.new_test("generic_monitor_cancel_%s" % (rsc), "Simple monitor cancel test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### monitor duplicate test ### for rsc in self.rsc_classes: test = self.new_test("generic_monitor_duplicate_%s" % (rsc), "Test creation and canceling of duplicate monitors for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) # Add the duplicate monitors test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) # verify we still get update events ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) # cancel the monitor, if the duplicate merged with the original, we should no longer see monitor updates test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### stop implies cancel test ### for rsc in self.rsc_classes: test = self.new_test("generic_stop_implies_cancel_%s" % (rsc), "Verify stopping a resource implies cancel of recurring ops for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) def build_multi_rsc_tests(self): """ Register complex tests that involve managing multiple resouces of different types """ common_cmds = self.common_cmds # do not use service and systemd at the same time, it is the same resource. ### register start monitor stop unregister resources of each type at the same time. ### test = self.new_test("multi_rsc_start_stop_all", "Start, monitor, and stop resources of multiple types and classes") for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) for rsc in self.rsc_classes: ### If this fails, that means the monitor is not being rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) def build_negative_tests(self): """ Register tests related to how pacemaker-execd handles failures """ ### ocf start timeout test ### test = self.new_test("ocf_start_timeout", "Force start timeout to occur, verify start failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" " + self.action_timeout + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") # -t must be less than self.action_timeout test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -k \"op_sleep\" -v \"5\" -t 1000 -w") test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out" ' + self.action_timeout) test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### stonith start timeout test ### test = self.new_test("stonith_start_timeout", "Force start timeout to occur, verify start failure.") - test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy_sleep\" " - + self.action_timeout + - "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") - test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 1000 -w") # -t must be less than self.action_timeout + test.add_cmd('-c register_rsc -r test_rsc ' + + '-C stonith -P pacemaker -T fence_dummy ' + + self.action_timeout + + '-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete"') + test.add_cmd('-c exec -r test_rsc -a start -k monitor_delay -v 30 ' + + '-t 1000 -w') # -t must be less than self.action_timeout test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out" ' + self.action_timeout) test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### stonith component fail ### common_cmds = self.common_cmds test = self.new_test("stonith_component_fail", "Kill stonith component after pacemaker-execd connects") test.add_cmd(common_cmds["stonith_reg_line"] + " " + common_cmds["stonith_reg_event"]) test.add_cmd(common_cmds["stonith_start_line"] + " " + common_cmds["stonith_start_event"]) test.add_cmd('-c exec -r stonith_test_rsc -a monitor -i 600s ' '-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete" ' + self.action_timeout) test.add_cmd_and_kill("killall -9 -q pacemaker-fenced lt-pacemaker-fenced", '-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:unknown error op_status:error" -t 15000') test.add_cmd(common_cmds["stonith_unreg_line"] + " " + common_cmds["stonith_unreg_event"]) ### monitor fail for ocf resources ### test = self.new_test("monitor_fail_ocf", "Force ocf monitor to fail, verify failure is reported.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" " + self.action_timeout + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"') test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' + self.action_timeout) test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' + self.action_timeout) test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state", '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete" ' + self.action_timeout) test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s ' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" " + self.action_timeout, CrmExit.TIMEOUT) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" " + self.action_timeout, CrmExit.TIMEOUT) test.add_cmd("-c unregister_rsc -r \"test_rsc\" " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### verify notify changes only for monitor operation. ### test = self.new_test("monitor_changes_only", "Verify when flag is set, only monitor changes are notified.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+" -o " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + ' -o -l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete" ') test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd_and_kill('rm -f @localstatedir@/run/Dummy-test_rsc.state', '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete"' + self.action_timeout) test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd('-c unregister_rsc -r "test_rsc" ' + self.action_timeout + '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete"') ### monitor fail for systemd resource ### if "systemd" in self.rsc_classes: test = self.new_test("monitor_fail_systemd", "Force systemd monitor to fail, verify failure is reported..") test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T pacemaker-cts-dummyd@3 " + self.action_timeout + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill("killall -9 -q pacemaker-cts-dummyd", '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete"' + self.action_timeout) test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### monitor fail for upstart resource ### if "upstart" in self.rsc_classes: test = self.new_test("monitor_fail_upstart", "Force upstart monitor to fail, verify failure is reported..") test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T pacemaker-cts-dummyd "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill('killall -9 -q dd', '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete"' + self.action_timeout) test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Cancel non-existent operation on a resource ### test = self.new_test("cancel_non_existent_op", "Attempt to cancel the wrong monitor operation, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) ### interval is wrong, should fail test.add_expected_fail_cmd('-c cancel -r test_rsc -a monitor -i 2s' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") ### action name is wrong, should fail test.add_expected_fail_cmd('-c cancel -r test_rsc -a stop -i 1s' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Attempt to invoke non-existent rsc id ### test = self.new_test("invoke_non_existent_rsc", "Attempt to perform operations on a non-existent rsc id.") test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r test_rsc -a stop "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_expected_fail_cmd('-c exec -r test_rsc -a monitor -i 6s ' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c cancel -r test_rsc -a start "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register and start a resource that doesn't exist, systemd ### if "systemd" in self.rsc_classes: test = self.new_test("start_uninstalled_systemd", "Register uninstalled systemd agent, try to start, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") if "upstart" in self.rsc_classes: test = self.new_test("start_uninstalled_upstart", "Register uninstalled upstart agent, try to start, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register and start a resource that doesn't exist, ocf ### test = self.new_test("start_uninstalled_ocf", "Register uninstalled ocf agent, try to start, verify expected failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pacemaker -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register ocf with non-existent provider ### test = self.new_test("start_ocf_bad_provider", "Register ocf agent with a non-existent provider, verify expected failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pancakes -T Dummy "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register ocf with empty provider field ### test = self.new_test("start_ocf_no_provider", "Register ocf agent with a no provider, verify expected failure.") test.add_expected_fail_cmd("-c register_rsc -r \"test_rsc\" -C ocf -T Dummy "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Error\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") def build_stress_tests(self): """ Register stress tests """ timeout = "-t 20000" iterations = 25 test = self.new_test("ocf_stress", "Verify OCF agent handling works under load") for i in range(iterations): test.add_cmd("-c register_rsc -r rsc_%s %s -C ocf -P heartbeat -T Dummy -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd('-c exec -r rsc_%s -a monitor %s -i 1s ' '-l "NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete"' % (i, timeout, i)) for i in range(iterations): test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) if "systemd" in self.rsc_classes: test = self.new_test("systemd_stress", "Verify systemd dbus connection works under load") for i in range(iterations): test.add_cmd("-c register_rsc -r rsc_%s %s -C systemd -T pacemaker-cts-dummyd@3 -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd('-c exec -r rsc_%s -a monitor %s -i 1s ' '-l "NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete"' % (i, timeout, i)) for i in range(iterations): test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) iterations = 9 timeout = "-t 30000" ### Verify recurring op in-flight collision is handled in series properly test = self.new_test("rsc_inflight_collision", "Verify recurring ops do not collide with other operations for the same rsc.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -a start %s -k op_sleep -v 1 -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\"" % (timeout)) for i in range(iterations): test.add_cmd('-c exec -r test_rsc -a monitor %s -i 100%dms ' '-k op_sleep -v 2 ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' % (timeout, i)) test.add_cmd("-c exec -r test_rsc -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\"" % (timeout)) test.add_cmd("-c unregister_rsc -r test_rsc %s -l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\"" % (timeout)) def build_custom_tests(self): """ Register tests that target specific cases """ ### verify resource temporary folder is created and used by OCF agents. ### test = self.new_test("rsc_tmp_dir", "Verify creation and use of rsc temporary state directory") test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@") test.add_cmd("-c register_rsc -r test_rsc -P heartbeat -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -a start -t 4000") test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@") test.add_sys_cmd("ls", "@CRM_RSCTMP_DIR@/Dummy-test_rsc.state") test.add_cmd("-c exec -r test_rsc -a stop -t 4000") test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### start delay then stop test ### test = self.new_test("start_delay", "Verify start delay works as expected.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -s 6000 -a start -w -t 6000") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 2000", CrmExit.TIMEOUT) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 6000") test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### start delay, but cancel before it gets a chance to start. ### test = self.new_test("start_delay_cancel", "Using start_delay, start a rsc, but cancel the start op before execution.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -s 5000 -a start -w -t 4000") test.add_cmd("-c cancel -r test_rsc -a start " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 5000", CrmExit.TIMEOUT) test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register a bunch of resources, verify we can get info on them ### test = self.new_test("verify_get_rsc_info", "Register multiple resources, verify retrieval of rsc info.") if "systemd" in self.rsc_classes: test.add_cmd("-c register_rsc -r rsc1 -C systemd -T pacemaker-cts-dummyd@3 "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ") if "upstart" in self.rsc_classes: test.add_cmd("-c register_rsc -r rsc1 -C upstart -T pacemaker-cts-dummyd "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc2 ") test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ") ### Register duplicate, verify only one entry exists and can still be removed. test = self.new_test("duplicate_registration", "Register resource multiple times, verify only one entry exists and can be removed.") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Stateful -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Stateful") test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ") ### verify the option to only send notification to the original client. ### test = self.new_test("notify_orig_client_only", "Verify option to only send notifications to the client originating the action.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r \"test_rsc\" -a \"monitor\" -i 1s ' + self.action_timeout + ' -n ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"') # this will fail because the monitor notifications should only go to the original caller, which no longer exists. test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s -t 6000 ') test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### get metadata ### test = self.new_test("get_ocf_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Dummy\"", "resource-agent name=\"Dummy\"") test.add_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Stateful\"") test.add_expected_fail_cmd("-c metadata -P \"pacemaker\" -T \"Stateful\"") test.add_expected_fail_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"fake_agent\"") ### get metadata ### test = self.new_test("get_lsb_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"lsb\" -T \"LSBDummy\"", "resource-agent name='LSBDummy'") ### get stonith metadata ### test = self.new_test("get_stonith_metadata", "Retrieve stonith metadata for a resource") - test.add_cmd_check_stdout("-c metadata -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy_monitor\"", - "resource-agent name=\"fence_dummy_monitor\"") + test.add_cmd_check_stdout("-c metadata -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy\"", + "resource-agent name=\"fence_dummy\"") ### get metadata ### if "systemd" in self.rsc_classes: test = self.new_test("get_systemd_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"systemd\" -T \"pacemaker-cts-dummyd@\"", "resource-agent name=\"pacemaker-cts-dummyd@\"") ### get metadata ### if "upstart" in self.rsc_classes: test = self.new_test("get_upstart_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"upstart\" -T \"pacemaker-cts-dummyd\"", "resource-agent name=\"pacemaker-cts-dummyd\"") ### get ocf providers ### test = self.new_test("list_ocf_providers", "Retrieve list of available resource providers, verifies pacemaker is a provider.") test.add_cmd_check_stdout("-c list_ocf_providers ", "pacemaker") test.add_cmd_check_stdout("-c list_ocf_providers -T ping", "pacemaker") ### Verify agents only exist in their lists ### test = self.new_test("verify_agent_lists", "Verify the agent lists contain the right data.") test.add_cmd_check_stdout("-c list_agents ", "Stateful") ### ocf ### test.add_cmd_check_stdout("-c list_agents -C ocf", "Stateful") test.add_cmd_check_stdout("-c list_agents -C lsb", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents ", "LSBDummy") ### init.d ### test.add_cmd_check_stdout("-c list_agents -C lsb", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C ocf", "", "pacemaker-cts-dummyd@") ### should not exist test.add_cmd_check_stdout("-c list_agents -C ocf", "", "pacemaker-cts-dummyd@") ### should not exist - test.add_cmd_check_stdout("-c list_agents -C lsb", "", "fence_dummy_monitor") ### should not exist - test.add_cmd_check_stdout("-c list_agents -C service", "", "fence_dummy_monitor") ### should not exist - test.add_cmd_check_stdout("-c list_agents -C ocf", "", "fence_dummy_monitor") ### should not exist + test.add_cmd_check_stdout("-c list_agents -C lsb", "", "fence_dummy") ### should not exist + test.add_cmd_check_stdout("-c list_agents -C service", "", "fence_dummy") ### should not exist + test.add_cmd_check_stdout("-c list_agents -C ocf", "", "fence_dummy") ### should not exist if "systemd" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents ", "pacemaker-cts-dummyd@") ### systemd ### test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C systemd", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C systemd", "pacemaker-cts-dummyd@") - test.add_cmd_check_stdout("-c list_agents -C systemd", "", "fence_dummy_monitor") ### should not exist + test.add_cmd_check_stdout("-c list_agents -C systemd", "", "fence_dummy") ### should not exist if "upstart" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents ", "pacemaker-cts-dummyd") ### upstart ### test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C upstart", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C upstart", "pacemaker-cts-dummyd") - test.add_cmd_check_stdout("-c list_agents -C upstart", "", "fence_dummy_monitor") ### should not exist + test.add_cmd_check_stdout("-c list_agents -C upstart", "", "fence_dummy") ### should not exist if "stonith" in self.rsc_classes: - test.add_cmd_check_stdout("-c list_agents -C stonith", "fence_dummy_monitor") ### stonith ### + test.add_cmd_check_stdout("-c list_agents -C stonith", "fence_dummy") ### stonith ### test.add_cmd_check_stdout("-c list_agents -C stonith", "", "pacemaker-cts-dummyd@") ### should not exist test.add_cmd_check_stdout("-c list_agents -C stonith", "", "Stateful") ### should not exist - test.add_cmd_check_stdout("-c list_agents ", "fence_dummy_monitor") + test.add_cmd_check_stdout("-c list_agents ", "fence_dummy") def print_list(self): """ List all registered tests """ print("\n==== %d TESTS FOUND ====" % (len(self.tests))) print("%35s - %s" % ("TEST NAME", "TEST DESCRIPTION")) print("%35s - %s" % ("--------------------", "--------------------")) for test in self.tests: print("%35s - %s" % (test.name, test.description)) print("==== END OF LIST ====\n") def run_single(self, name): """ Run a single named test """ for test in self.tests: if test.name == name: test.run() break def run_tests_matching(self, pattern): """ Run all tests whose name matches a pattern """ for test in self.tests: if test.name.count(pattern) != 0: test.run() def run_tests(self): """ Run all tests """ for test in self.tests: test.run() def exit(self): """ Exit (with error status code if any test failed) """ for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != CrmExit.OK: sys.exit(CrmExit.ERROR) sys.exit(CrmExit.OK) def print_results(self): """ Print summary of results of executed tests """ failures = 0 success = 0 print("\n\n======= FINAL RESULTS ==========") print("\n--- FAILURE RESULTS:") for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != CrmExit.OK: failures = failures + 1 test.print_result(" ") else: success = success + 1 if failures == 0: print(" None") print("\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures)) class TestOptions(object): """ Option handler """ def __init__(self): self.options = {} self.options['list-tests'] = 0 self.options['run-all'] = 1 self.options['run-only'] = "" self.options['run-only-pattern'] = "" self.options['verbose'] = 0 self.options['invalid-arg'] = "" self.options['show-usage'] = 0 self.options['pacemaker-remote'] = 0 def build_options(self, argv): """ Set options based on command-line arguments """ args = argv[1:] skip = 0 for i in range(0, len(args)): if skip: skip = 0 continue elif args[i] == "-h" or args[i] == "--help": self.options['show-usage'] = 1 elif args[i] == "-l" or args[i] == "--list-tests": self.options['list-tests'] = 1 elif args[i] == "-V" or args[i] == "--verbose": self.options['verbose'] = 1 elif args[i] == "-R" or args[i] == "--pacemaker-remote": self.options['pacemaker-remote'] = 1 elif args[i] == "-r" or args[i] == "--run-only": self.options['run-only'] = args[i+1] skip = 1 elif args[i] == "-p" or args[i] == "--run-only-pattern": self.options['run-only-pattern'] = args[i+1] skip = 1 def show_usage(self): """ Show command usage """ print("usage: " + sys.argv[0] + " [options]") print("If no options are provided, all tests will run") print("Options:") print("\t [--help | -h] Show usage") print("\t [--list-tests | -l] Print out all registered tests.") print("\t [--run-only | -r 'testname'] Run a specific test") print("\t [--verbose | -V] Verbose output") print("\t [--pacemaker-remote | -R Test pacemaker-remoted binary instead of pacemaker-execd") print("\t [--run-only-pattern | -p 'string'] Run only tests containing the string value") print("\n\tExample: Run only the test 'start_stop'") print("\t\t " + sys.argv[0] + " --run-only start_stop") print("\n\tExample: Run only the tests with the string 'systemd' present in them") print("\t\t " + sys.argv[0] + " --run-only-pattern systemd") def main(argv): """ Run pacemaker-execd regression tests as specified by arguments """ update_path() opts = TestOptions() opts.build_options(argv) tests = Tests(opts.options['verbose'], opts.options['pacemaker-remote']) tests.build_generic_tests() tests.build_multi_rsc_tests() tests.build_negative_tests() tests.build_custom_tests() tests.build_stress_tests() tests.setup_test_environment() print("Starting ...") if opts.options['list-tests']: tests.print_list() elif opts.options['show-usage']: opts.show_usage() elif opts.options['run-only-pattern'] != "": tests.run_tests_matching(opts.options['run-only-pattern']) tests.print_results() elif opts.options['run-only'] != "": tests.run_single(opts.options['run-only']) tests.print_results() else: tests.run_tests() tests.print_results() tests.cleanup_test_environment() tests.exit() if __name__ == "__main__": main(sys.argv) diff --git a/cts/cts-fencing.in b/cts/cts-fencing.in index 67fb339c5c..f7319a4a5e 100644 --- a/cts/cts-fencing.in +++ b/cts/cts-fencing.in @@ -1,1427 +1,1442 @@ #!@PYTHON@ """ Regression tests for Pacemaker's fencer """ # Pacemaker targets compatibility with Python 2.7 and 3.2+ from __future__ import print_function, unicode_literals, absolute_import, division __copyright__ = "Copyright 2012-2018 Andrew Beekhof " __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import sys import subprocess import shlex import time -FENCE_DUMMY = "@datadir@/@PACKAGE@/tests/cts/fence_dummy" +# Where to find test binaries +# Prefer the source tree if available +BUILD_DIR = "@abs_top_builddir@" +TEST_DIR = sys.path[0] # These values must be kept in sync with include/crm/crm.h class CrmExit: OK = 0 ERROR = 1 INVALID_PARAM = 2 UNIMPLEMENT_FEATURE = 3 INSUFFICIENT_PRIV = 4 NOT_INSTALLED = 5 NOT_CONFIGURED = 6 NOT_RUNNING = 7 USAGE = 64 DATAERR = 65 NOINPUT = 66 NOUSER = 67 NOHOST = 68 UNAVAILABLE = 69 SOFTWARE = 70 OSERR = 71 OSFILE = 72 CANTCREAT = 73 IOERR = 74 TEMPFAIL = 75 PROTOCOL = 76 NOPERM = 77 CONFIG = 78 FATAL = 100 PANIC = 101 DISCONNECT = 102 SOLO = 103 DIGEST = 104 NOSUCH = 105 QUORUM = 106 UNSAFE = 107 EXISTS = 108 MULTIPLE = 109 OLD = 110 TIMEOUT = 124 MAX = 255 +def update_path(): + """ Set the PATH environment variable appropriately for the tests """ + + new_path = os.environ['PATH'] + if os.path.exists("%s/cts-fencing.in" % TEST_DIR): + print("Running tests from the source tree: %s (%s)" % (BUILD_DIR, TEST_DIR)) + # For pacemaker-fenced and cts-fence-helper + new_path = "%s/daemons/fenced:%s" % (BUILD_DIR, new_path) + new_path = "%s/tools:%s" % (BUILD_DIR, new_path) # For stonith_admin + new_path = "%s/cts:%s" % (BUILD_DIR, new_path) # For cts-support + + else: + print("Running tests from the install tree: @CRM_DAEMON_DIR@ (not %s)" % TEST_DIR) + # For pacemaker-fenced, cts-fence-helper, and cts-support + new_path = "@CRM_DAEMON_DIR@:%s" % (new_path) + + print('Using PATH="{}"'.format(new_path)) + os.environ['PATH'] = new_path + + def pipe_output(pipes, stdout=True, stderr=False): """ Wrapper to get text output from pipes regardless of Python version """ output = "" pipe_outputs = pipes.communicate() if sys.version_info < (3,): if stdout: output = output + pipe_outputs[0] if stderr: output = output + pipe_outputs[1] else: if stdout: output = output + pipe_outputs[0].decode(sys.stdout.encoding) if stderr: output = output + pipe_outputs[1].decode(sys.stderr.encoding) return output def output_from_command(command): """ Execute command and return its standard output """ test = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE) test.wait() return pipe_output(test).split("\n") def localname(): """ Return the uname of the local host """ our_uname = output_from_command("uname -n") if our_uname: our_uname = our_uname[0] else: our_uname = "localhost" return our_uname def killall(process): """ Kill all instances of a process """ cmd = shlex.split("killall -9 -q %s" % process) test = subprocess.Popen(cmd, stdout=subprocess.PIPE) test.wait() class TestError(Exception): """ Base class for exceptions in this module """ pass class ExitCodeError(TestError): """ Exception raised when command exit status is unexpected """ def __init__(self, exit_code): self.exit_code = exit_code def __str__(self): return repr(self.exit_code) class OutputNotFoundError(TestError): """ Exception raised when command output does not contain wanted string """ def __init__(self, exit_code): self.output = output def __str__(self): return repr(self.output) class OutputFoundError(TestError): """ Exception raised when command output contains unwanted string """ def __init__(self, exit_code): self.output = output def __str__(self): return repr(self.output) class Test(object): """ Executor for a single test """ def __init__(self, name, description, verbose=0, with_cpg=0): self.name = name self.description = description self.cmds = [] self.verbose = verbose self.result_txt = "" self.cmd_tool_output = "" self.result_exitcode = CrmExit.OK if with_cpg: self.stonith_options = "-c" self.enable_corosync = 1 else: self.stonith_options = "-s" self.enable_corosync = 0 self.stonith_process = None self.stonith_output = "" self.stonith_patterns = [] self.negative_stonith_patterns = [] self.executed = 0 def __new_cmd(self, cmd, args, exitcode, stdout_match="", no_wait=0, stdout_negative_match="", kill=None): """ Add a command to be executed as part of this test """ self.cmds.append( { "cmd" : cmd, "kill" : kill, "args" : args, "expected_exitcode" : exitcode, "stdout_match" : stdout_match, "stdout_negative_match" : stdout_negative_match, "no_wait" : no_wait, } ) def start_environment(self): """ Prepare the host for executing a test """ # Make sure we are in full control killall("pacemakerd") killall("pacemaker-fenced") if self.verbose: self.stonith_options = self.stonith_options + " -V" print("Starting pacemaker-fenced with %s" % self.stonith_options) if os.path.exists("/tmp/stonith-regression.log"): os.remove('/tmp/stonith-regression.log') - cmd = "@CRM_DAEMON_DIR@/pacemaker-fenced %s -l /tmp/stonith-regression.log" % self.stonith_options + cmd = "pacemaker-fenced %s -l /tmp/stonith-regression.log" % self.stonith_options self.stonith_process = subprocess.Popen(shlex.split(cmd)) time.sleep(1) def clean_environment(self): """ Clean up the host after executing a test """ if self.stonith_process: self.stonith_process.terminate() self.stonith_process.wait() self.stonith_output = "" self.stonith_process = None logfile = io.open('/tmp/stonith-regression.log', 'rt') for line in logfile.readlines(): self.stonith_output = self.stonith_output + line if self.verbose: print("Daemon Output Start") print(self.stonith_output) print("Daemon Output End") os.remove('/tmp/stonith-regression.log') def add_stonith_log_pattern(self, pattern): """ Add a log pattern to expect from this test """ self.stonith_patterns.append(pattern) def add_stonith_neg_log_pattern(self, pattern): """ Add a log pattern that should not occur with this test """ self.negative_stonith_patterns.append(pattern) def add_cmd(self, cmd, args): """ Add a simple command to be executed as part of this test """ self.__new_cmd(cmd, args, CrmExit.OK, "") def add_cmd_no_wait(self, cmd, args): """ Add a simple command to be executed (without waiting) as part of this test """ self.__new_cmd(cmd, args, CrmExit.OK, "", 1) def add_cmd_check_stdout(self, cmd, args, match, no_match=""): """ Add a simple command with expected output to be executed as part of this test """ self.__new_cmd(cmd, args, CrmExit.OK, match, 0, no_match) def add_expected_fail_cmd(self, cmd, args, exitcode=CrmExit.ERROR): """ Add a command to be executed as part of this test and expected to fail """ self.__new_cmd(cmd, args, exitcode, "") def get_exitcode(self): """ Return the exit status of the last test execution """ return self.result_exitcode def print_result(self, filler): """ Print the result of the last test execution """ print("%s%s" % (filler, self.result_txt)) def run_cmd(self, args): """ Execute a command as part of this test """ cmd = shlex.split(args['args']) cmd.insert(0, args['cmd']) if self.verbose: print("\n\nRunning: "+" ".join(cmd)) test = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if args['kill']: if self.verbose: print("Also running: "+args['kill']) subprocess.Popen(shlex.split(args['kill'])) if args['no_wait'] == 0: test.wait() else: return CrmExit.OK output = pipe_output(test, stderr=True) if self.verbose: print(output) if test.returncode != args['expected_exitcode']: raise ExitCodeError(test.returncode) if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0: raise OutputNotFoundError(output) if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0: raise OutputFoundError(output) def count_negative_matches(self, outline): """ Return 1 if a line matches patterns that shouldn't have occurred """ count = 0 for line in self.negative_stonith_patterns: if outline.count(line): count = 1 if self.verbose: print("This pattern should not have matched = '%s" % (line)) return count def match_stonith_patterns(self): """ Check test output for expected patterns """ negative_matches = 0 cur = 0 pats = self.stonith_patterns total_patterns = len(self.stonith_patterns) if len(self.stonith_patterns) == 0 and len(self.negative_stonith_patterns) == 0: return for line in self.stonith_output.split("\n"): negative_matches = negative_matches + self.count_negative_matches(line) if len(pats) == 0: continue cur = -1 for pat in pats: cur = cur + 1 if line.count(pats[cur]): del pats[cur] break if len(pats) > 0 or negative_matches: if self.verbose: for pat in pats: print("Pattern Not Matched = '%s'" % pat) msg = "FAILURE - '%s' failed. %d patterns out of %d not matched. %d negative matches." self.result_txt = msg % (self.name, len(pats), total_patterns, negative_matches) self.result_exitcode = CrmExit.ERROR def set_error(self, step, cmd): """ Record failure of this test """ msg = "FAILURE - '%s' failed at step %d. Command: %s %s" self.result_txt = msg % (self.name, step, cmd['cmd'], cmd['args']) self.result_exitcode = CrmExit.ERROR def run(self): """ Execute this test. """ res = 0 i = 1 self.start_environment() if self.verbose: print("\n--- START TEST - %s" % self.name) self.result_txt = "SUCCESS - '%s'" % (self.name) self.result_exitcode = CrmExit.OK for cmd in self.cmds: try: self.run_cmd(cmd) except ExitCodeError as e: print("Step %d FAILED - command returned %s, expected %d" % (i, e, cmd['expected_exitcode'])) self.set_error(i, cmd); break except OutputNotFoundError as e: print("Step %d FAILED - '%s' was not found in command output: %s" % (i, cmd['stdout_match'], e)) self.set_error(i, cmd); break except OutputFoundError as e: print("Step %d FAILED - '%s' was found in command output: %s" % (i, cmd['stdout_negative_match'], e)) self.set_error(i, cmd); break if self.verbose: print("Step %d SUCCESS" % (i)) i = i + 1 self.clean_environment() if self.result_exitcode == CrmExit.OK: self.match_stonith_patterns() print(self.result_txt) if self.verbose: print("--- END TEST - %s\n" % self.name) self.executed = 1 return res class Tests(object): """ Collection of all fencing regression tests """ def __init__(self, verbose=0): self.tests = [] self.verbose = verbose self.autogen_corosync_cfg = 0 if not os.path.exists("/etc/corosync/corosync.conf"): self.autogen_corosync_cfg = 1 def new_test(self, name, description, with_cpg=0): """ Create a named test """ test = Test(name, description, self.verbose, with_cpg) self.tests.append(test) return test def print_list(self): """ List all registered tests """ print("\n==== %d TESTS FOUND ====" % (len(self.tests))) print("%35s - %s" % ("TEST NAME", "TEST DESCRIPTION")) print("%35s - %s" % ("--------------------", "--------------------")) for test in self.tests: print("%35s - %s" % (test.name, test.description)) print("==== END OF LIST ====\n") def start_corosync(self): """ Start the corosync process """ if self.verbose: print("Starting corosync") test = subprocess.Popen("corosync", stdout=subprocess.PIPE) test.wait() time.sleep(10) def run_single(self, name): """ Run a single named test """ for test in self.tests: if test.name == name: test.run() break def run_tests_matching(self, pattern): """ Run all tests whose name matches a pattern """ for test in self.tests: if test.name.count(pattern) != 0: test.run() def run_cpg_only(self): """ Run all corosync-enabled tests """ for test in self.tests: if test.enable_corosync: test.run() def run_no_cpg(self): """ Run all standalone tests """ for test in self.tests: if not test.enable_corosync: test.run() def run_tests(self): """ Run all tests """ for test in self.tests: test.run() def exit(self): """ Exit (with error status code if any test failed) """ for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != CrmExit.OK: sys.exit(CrmExit.ERROR) sys.exit(CrmExit.OK) def print_results(self): """ Print summary of results of executed tests """ failures = 0 success = 0 print("\n\n======= FINAL RESULTS ==========") print("\n--- FAILURE RESULTS:") for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != CrmExit.OK: failures = failures + 1 test.print_result(" ") else: success = success + 1 if failures == 0: print(" None") print("\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures)) def build_api_sanity_tests(self): """ Register tests to verify basic API usage """ verbose_arg = "" if self.verbose: verbose_arg = "-V" test = self.new_test("standalone_low_level_api_test", "Sanity test client api in standalone mode.") - test.add_cmd("@CRM_DAEMON_DIR@/cts-fence-helper", "-t %s" % (verbose_arg)) + test.add_cmd("cts-fence-helper", "-t %s" % (verbose_arg)) test = self.new_test("cpg_low_level_api_test", "Sanity test client api using mainloop and cpg.", 1) - test.add_cmd("@CRM_DAEMON_DIR@/cts-fence-helper", "-m %s" % (verbose_arg)) + test.add_cmd("cts-fence-helper", "-m %s" % (verbose_arg)) def build_custom_timeout_tests(self): """ Register tests to verify custom timeout usage """ # custom timeout without topology test = self.new_test("cpg_custom_timeout_1", "Verify per device timeouts work as expected without using topology.", 1) test.add_cmd('stonith_admin', '-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node1 node2 node3"') test.add_cmd('stonith_admin', '-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=1"') test.add_cmd('stonith_admin', '-R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=4"') test.add_cmd("stonith_admin", "-F node3 -t 2") # timeout is 2+1+4 = 7 test.add_stonith_log_pattern("Total timeout set to 7") # custom timeout _WITH_ topology test = self.new_test("cpg_custom_timeout_2", "Verify per device timeouts work as expected _WITH_ topology.", 1) test.add_cmd('stonith_admin', '-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node1 node2 node3"') test.add_cmd('stonith_admin', '-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=1"') test.add_cmd('stonith_admin', '-R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=4000"') test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 3 -v false2") test.add_cmd("stonith_admin", "-F node3 -t 2") # timeout is 2+1+4000 = 4003 test.add_stonith_log_pattern("Total timeout set to 4003") def build_fence_merge_tests(self): """ Register tests to verify when fence operations should be merged """ ### Simple test that overlapping fencing operations get merged test = self.new_test("cpg_custom_merge_single", "Verify overlapping identical fencing operations are merged, no fencing levels used.", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") ### one merger will happen test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") ### Test that multiple mergers occur test = self.new_test("cpg_custom_merge_multiple", "Verify multiple overlapping identical fencing operations are merged", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"delay=2\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") ### 4 mergers should occur test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") ### Test that multiple mergers occur with topologies used test = self.new_test("cpg_custom_merge_with_topology", "Verify multiple overlapping identical fencing operations are merged with fencing levels.", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") ### 4 mergers should occur test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") def build_fence_no_merge_tests(self): """ Register tests to verify when fence operations should not be merged """ test = self.new_test("cpg_custom_no_merge", "Verify differing fencing operations are not merged", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3 node2\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") test.add_cmd_no_wait("stonith_admin", "-F node2 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") test.add_stonith_neg_log_pattern("Merging stonith action off for node node3 originating from client") def build_standalone_tests(self): """ Register a grab bag of tests that can be executed in standalone or corosync mode """ test_types = [ { "prefix" : "standalone", "use_cpg" : 0, }, { "prefix" : "cpg", "use_cpg" : 1, }, ] # test what happens when all devices timeout for test_type in test_types: test = self.new_test("%s_fence_multi_device_failure" % test_type["prefix"], "Verify that all devices timeout, a fencing failure is returned.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false3 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") if test_type["use_cpg"] == 1: test.add_expected_fail_cmd("stonith_admin", "-F node3 -t 2", CrmExit.TIMEOUT) test.add_stonith_log_pattern("Total timeout set to 6") else: test.add_expected_fail_cmd("stonith_admin", "-F node3 -t 2", CrmExit.ERROR) test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: ") test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: ") test.add_stonith_log_pattern("for host 'node3' with device 'false3' returned: ") # test what happens when multiple devices can fence a node, but the first device fails. for test_type in test_types: test = self.new_test("%s_fence_device_failure_rollover" % test_type["prefix"], "Verify that when one fence device fails for a node, the others are tried.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-F node3 -t 2") if test_type["use_cpg"] == 1: test.add_stonith_log_pattern("Total timeout set to 6") # simple topology test for one device for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_simple" % test_type["prefix"], "Verify all fencing devices at a level are used.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true") test.add_cmd("stonith_admin", "-F node3 -t 2") test.add_stonith_log_pattern("Total timeout set to 2") test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") # add topology, delete topology, verify fencing still works for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_add_remove" % test_type["prefix"], "Verify fencing occurrs after all topology levels are removed", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true") test.add_cmd("stonith_admin", "-d node3 -i 1") test.add_cmd("stonith_admin", "-F node3 -t 2") test.add_stonith_log_pattern("Total timeout set to 2") test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") # test what happens when the first fencing level has multiple devices. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_device_fails" % test_type["prefix"], "Verify if one device in a level fails, the other is tried.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R false -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true") test.add_cmd("stonith_admin", "-F node3 -t 20") test.add_stonith_log_pattern("Total timeout set to 40") test.add_stonith_log_pattern("for host 'node3' with device 'false' returned: -201") test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") # test what happens when the first fencing level fails. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_multi_level_fails" % test_type["prefix"], "Verify if one level fails, the next leve is tried.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") test.add_cmd("stonith_admin", "-F node3 -t 3") test.add_stonith_log_pattern("Total timeout set to 18") test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -201") test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: -201") test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0") test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0") # test what happens when the first fencing level had devices that no one has registered for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_missing_devices" % test_type["prefix"], "Verify topology can continue with missing devices.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") test.add_cmd("stonith_admin", "-F node3 -t 2") # Test what happens if multiple fencing levels are defined, and then the first one is removed. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_level_removal" % test_type["prefix"], "Verify level removal works.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") # Now remove level 2, verify none of the devices in level two are hit. test.add_cmd("stonith_admin", "-d node3 -i 2") test.add_cmd("stonith_admin", "-F node3 -t 20") test.add_stonith_log_pattern("Total timeout set to 8") test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -201") test.add_stonith_neg_log_pattern("for host 'node3' with device 'false2' returned: ") test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0") test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0") # Test targeting a topology level by node name pattern. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_level_pattern" % test_type["prefix"], "Verify targeting topology by node name pattern works.", test_type["use_cpg"]) test.add_cmd("stonith_admin", """-R true -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node1 node2 node3" """) test.add_cmd("stonith_admin", """-r '@node.*' -i 1 -v true""") test.add_cmd("stonith_admin", "-F node3 -t 2") test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") # test allowing commas and semicolons as delimiters in pcmk_host_list for test_type in test_types: test = self.new_test("%s_host_list_delimiters" % test_type["prefix"], "Verify commas and semicolons can be used as pcmk_host_list delimiters", test_type["use_cpg"]) test.add_cmd("stonith_admin", """-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node1,node2,node3" """) test.add_cmd("stonith_admin", """-R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=pcmk1;pcmk2;pcmk3" """) test.add_cmd("stonith_admin", "stonith_admin -F node2 -t 2") test.add_cmd("stonith_admin", "stonith_admin -F pcmk3 -t 2") test.add_stonith_log_pattern("for host 'node2' with device 'true1' returned: 0") test.add_stonith_log_pattern("for host 'pcmk3' with device 'true2' returned: 0") # test the stonith builds the correct list of devices that can fence a node. for test_type in test_types: test = self.new_test("%s_list_devices" % test_type["prefix"], "Verify list of devices that can fence a node is correct", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true2", "true1") test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true3", "true1") # simple test of device monitor for test_type in test_types: test = self.new_test("%s_monitor" % test_type["prefix"], "Verify device is reachable", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-Q true1") test.add_cmd("stonith_admin", "-Q false1") test.add_expected_fail_cmd("stonith_admin", "-Q true2", CrmExit.ERROR) # Verify monitor occurs for duration of timeout period on failure for test_type in test_types: test = self.new_test("%s_monitor_timeout" % test_type["prefix"], "Verify monitor uses duration of timeout period given.", test_type["use_cpg"]) test.add_cmd("stonith_admin", '-R true1 -a fence_dummy -o "mode=fail" -o "monitor_mode=fail" -o "pcmk_host_list=node3"') test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 5", CrmExit.ERROR) test.add_stonith_log_pattern("Attempt 2 to execute") # Verify monitor occurs for duration of timeout period on failure, but stops at max retries for test_type in test_types: test = self.new_test("%s_monitor_timeout_max_retries" % test_type["prefix"], "Verify monitor retries until max retry value or timeout is hit.", test_type["use_cpg"]) test.add_cmd("stonith_admin", '-R true1 -a fence_dummy -o "mode=fail" -o "monitor_mode=fail" -o "pcmk_host_list=node3"') test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 15", CrmExit.ERROR) test.add_stonith_log_pattern("Attempted to execute agent fence_dummy (list) the maximum number of times") # simple register test for test_type in test_types: test = self.new_test("%s_register" % test_type["prefix"], "Verify devices can be registered and un-registered", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-Q true1") test.add_cmd("stonith_admin", "-D true1") test.add_expected_fail_cmd("stonith_admin", "-Q true1", CrmExit.ERROR) # simple reboot test for test_type in test_types: test = self.new_test("%s_reboot" % test_type["prefix"], "Verify devices can be rebooted", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-B node3 -t 2") test.add_cmd("stonith_admin", "-D true1") test.add_expected_fail_cmd("stonith_admin", "-Q true1", CrmExit.ERROR) # test fencing history. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_fence_history" % test_type["prefix"], "Verify last fencing operation is returned.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-F node3 -t 2 -V") test.add_cmd_check_stdout("stonith_admin", "-H node3", "was able to turn off node node3", "") # simple test of dynamic list query for test_type in test_types: test = self.new_test("%s_dynamic_list_query" % test_type["prefix"], "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found") # fence using dynamic list query for test_type in test_types: test = self.new_test("%s_fence_dynamic_list_query" % test_type["prefix"], "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "-F fake_port_1 -t 5 -V") # simple test of query using status action for test_type in test_types: test = self.new_test("%s_status_query" % test_type["prefix"], "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found") # test what happens when no reboot action is advertised for test_type in test_types: test = self.new_test("%s_no_reboot_support" % test_type["prefix"], "Verify reboot action defaults to off when no reboot action is advertised by agent.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_no_reboot -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-B node1 -t 5 -V") test.add_stonith_log_pattern("does not advertise support for 'reboot', performing 'off'") test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") # make sure reboot is used when reboot action is advertised for test_type in test_types: test = self.new_test("%s_with_reboot_support" % test_type["prefix"], "Verify reboot action can be used when metadata advertises it.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-B node1 -t 5 -V") test.add_stonith_neg_log_pattern("does not advertise support for 'reboot', performing 'off'") test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") def build_nodeid_tests(self): """ Register tests that use a corosync node id """ our_uname = localname() ### verify nodeid is supplied when nodeid is in the metadata parameters test = self.new_test("cpg_supply_nodeid", "Verify nodeid is given when fence agent has nodeid as parameter", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname)) test.add_stonith_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname)) ### verify nodeid is _NOT_ supplied when nodeid is not in the metadata parameters test = self.new_test("cpg_do_not_supply_nodeid", "Verify nodeid is _NOT_ given when fence agent does not have nodeid as parameter", 1) # use a host name that won't be in corosync.conf test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=regr-test\"") test.add_cmd("stonith_admin", "-F regr-test -t 3") test.add_stonith_neg_log_pattern("For stonith action (off) for victim regr-test, adding nodeid") ### verify nodeid use doesn't explode standalone mode test = self.new_test("standalone_do_not_supply_nodeid", "Verify nodeid in metadata parameter list doesn't kill standalone mode", 0) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname)) test.add_stonith_neg_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname)) def build_unfence_tests(self): """ Register tests that verify unfencing """ our_uname = localname() ### verify unfencing using automatic unfencing test = self.new_test("cpg_unfence_required_1", "Verify require unfencing on all devices when automatic=true in agent's metadata", 1) test.add_cmd('stonith_admin', '-R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname)) test.add_cmd('stonith_admin', '-R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) # both devices should be executed test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)") ### verify unfencing using automatic unfencing fails if any of the required agents fail test = self.new_test("cpg_unfence_required_2", "Verify require unfencing on all devices when automatic=true in agent's metadata", 1) test.add_cmd('stonith_admin', '-R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname)) test.add_cmd('stonith_admin', '-R true2 -a fence_dummy_auto_unfence -o "mode=fail" -o "pcmk_host_list=%s"' % (our_uname)) test.add_expected_fail_cmd("stonith_admin", "-U %s -t 6" % (our_uname), CrmExit.ERROR) ### verify unfencing using automatic devices with topology test = self.new_test("cpg_unfence_required_3", "Verify require unfencing on all devices even when at different topology levels", 1) test.add_cmd('stonith_admin', '-R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)") ### verify unfencing using automatic devices with topology test = self.new_test("cpg_unfence_required_4", "Verify all required devices are executed even with topology levels fail.", 1) test.add_cmd('stonith_admin', '-R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R true3 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R true4 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R false3 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R false4 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v false1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v false2" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v false3" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true3" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 3 -v false4" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 4 -v true4" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)") test.add_stonith_log_pattern("with device 'true3' returned: 0 (OK)") test.add_stonith_log_pattern("with device 'true4' returned: 0 (OK)") def build_unfence_on_target_tests(self): """ Register tests that verify unfencing that runs on the target """ our_uname = localname() ### verify unfencing using on_target device test = self.new_test("cpg_unfence_on_target_1", "Verify unfencing with on_target = true", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("(on) to be executed on the target node") ### verify failure of unfencing using on_target device test = self.new_test("cpg_unfence_on_target_2", "Verify failure unfencing with on_target = true", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake_1234\"" % (our_uname)) test.add_expected_fail_cmd("stonith_admin", "-U node_fake_1234 -t 3", CrmExit.ERROR) test.add_stonith_log_pattern("(on) to be executed on the target node") ### verify unfencing using on_target device with topology test = self.new_test("cpg_unfence_on_target_3", "Verify unfencing with on_target = true using topology", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("(on) to be executed on the target node") ### verify unfencing using on_target device with topology fails when victim node doesn't exist test = self.new_test("cpg_unfence_on_target_4", "Verify unfencing failure with on_target = true using topology", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v true1") test.add_cmd("stonith_admin", "-r node_fake -i 2 -v true2") test.add_expected_fail_cmd("stonith_admin", "-U node_fake -t 3", CrmExit.ERROR) test.add_stonith_log_pattern("(on) to be executed on the target node") def build_remap_tests(self): """ Register tests that verify remapping of reboots to off-on """ test = self.new_test("cpg_remap_simple", "Verify sequential topology reboot is remapped to all-off-then-all-on", 1) test.add_cmd("stonith_admin", """-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """ """-o "pcmk_off_timeout=1" -o "pcmk_reboot_timeout=10" """) test.add_cmd("stonith_admin", """-R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """ """-o "pcmk_off_timeout=2" -o "pcmk_reboot_timeout=20" """) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", "-B node_fake -t 5") test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") # timeout should be sum of off timeouts (1+2=3), not reboot timeouts (10+20=30) test.add_stonith_log_pattern("Total timeout set to 3 for peer's fencing of node_fake") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true1'") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true2'") test.add_stonith_log_pattern("Remapped off of node_fake complete, remapping to on") # fence_dummy sets "on" as an on_target action test.add_stonith_log_pattern("Ignoring true1 'on' failure (no capable peers) for node_fake") test.add_stonith_log_pattern("Ignoring true2 'on' failure (no capable peers) for node_fake") test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") test = self.new_test("cpg_remap_automatic", "Verify remapped topology reboot skips automatic 'on'", 1) test.add_cmd("stonith_admin", """-R true1 -a fence_dummy_auto_unfence """ """-o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true2 -a fence_dummy_auto_unfence """ """-o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", "-B node_fake -t 5") test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true1'") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true2'") test.add_stonith_log_pattern("Remapped off of node_fake complete, remapping to on") test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") test.add_stonith_neg_log_pattern("perform op 'node_fake on' with") test.add_stonith_neg_log_pattern("'on' failure") test = self.new_test("cpg_remap_complex_1", "Verify remapped topology reboot in second level works if non-remapped first level fails", 1) test.add_cmd("stonith_admin", """-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v false1") test.add_cmd("stonith_admin", "-r node_fake -i 2 -v true1 -v true2") test.add_cmd("stonith_admin", "-B node_fake -t 5") test.add_stonith_log_pattern("perform op 'node_fake reboot' with 'false1'") test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true1'") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true2'") test.add_stonith_log_pattern("Remapped off of node_fake complete, remapping to on") test.add_stonith_log_pattern("Ignoring true1 'on' failure (no capable peers) for node_fake") test.add_stonith_log_pattern("Ignoring true2 'on' failure (no capable peers) for node_fake") test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") test = self.new_test("cpg_remap_complex_2", "Verify remapped topology reboot failure in second level proceeds to third level", 1) test.add_cmd("stonith_admin", """-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true3 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v false1") test.add_cmd("stonith_admin", "-r node_fake -i 2 -v true1 -v false2 -v true3") test.add_cmd("stonith_admin", "-r node_fake -i 3 -v true2") test.add_cmd("stonith_admin", "-B node_fake -t 5") test.add_stonith_log_pattern("perform op 'node_fake reboot' with 'false1'") test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true1'") test.add_stonith_log_pattern("perform op 'node_fake off' with 'false2'") test.add_stonith_log_pattern("Attempted to execute agent fence_dummy (off) the maximum number of times") test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") test.add_stonith_log_pattern("perform op 'node_fake reboot' with 'true2'") test.add_stonith_neg_log_pattern("node_fake with true3") def setup_environment(self, use_corosync): """ Prepare the host before executing any tests """ if self.autogen_corosync_cfg and use_corosync: corosync_conf = (""" totem { version: 2 crypto_cipher: none crypto_hash: none nodeid: 101 secauth: off interface { ttl: 1 ringnumber: 0 mcastport: 6666 mcastaddr: 226.94.1.1 bindnetaddr: 127.0.0.1 } } logging { debug: off fileline: off to_syslog: no to_stderr: no syslog_facility: daemon timestamp: on to_logfile: yes logfile: @CRM_LOG_DIR@/corosync.test.log logfile_priority: info } """) os.system("cat <<-END >>/etc/corosync/corosync.conf\n%s\nEND" % (corosync_conf)) if use_corosync: ### make sure we are in control ### killall("corosync") self.start_corosync() - os.system("cp %s @sbindir@/fence_dummy" % FENCE_DUMMY) - - # modifies dummy agent to do require unfencing - os.system("sed 's/on_target=/automatic=/g' %s > @sbindir@/fence_dummy_auto_unfence" % FENCE_DUMMY) - os.system("chmod 711 @sbindir@/fence_dummy_auto_unfence") - - # modifies dummy agent to not advertise reboot - os.system("sed 's/^.*.*//g' %s > @sbindir@/fence_dummy_no_reboot" % FENCE_DUMMY) - os.system("chmod 711 @sbindir@/fence_dummy_no_reboot") + subprocess.call(["cts-support", "install"]) def cleanup_environment(self, use_corosync): """ Clean up the host after executing desired tests """ if use_corosync: killall("corosync") if self.verbose and os.path.exists('@CRM_LOG_DIR@/corosync.test.log'): print("Corosync output") logfile = io.open('@CRM_LOG_DIR@/corosync.test.log', 'rt') for line in logfile.readlines(): print(line.strip()) os.remove('@CRM_LOG_DIR@/corosync.test.log') if self.autogen_corosync_cfg: os.system("rm -f /etc/corosync/corosync.conf") - os.system("rm -f @sbindir@/fence_dummy") - os.system("rm -f @sbindir@/fence_dummy_auto_unfence") - os.system("rm -f @sbindir@/fence_dummy_no_reboot") + subprocess.call(["cts-support", "uninstall"]) class TestOptions(object): """ Option handler """ def __init__(self): self.options = {} self.options['list-tests'] = 0 self.options['run-all'] = 1 self.options['run-only'] = "" self.options['run-only-pattern'] = "" self.options['verbose'] = 0 self.options['invalid-arg'] = "" self.options['cpg-only'] = 0 self.options['no-cpg'] = 0 self.options['show-usage'] = 0 def build_options(self, argv): """ Set options based on command-line arguments """ args = argv[1:] skip = 0 for i in range(0, len(args)): if skip: skip = 0 continue elif args[i] == "-h" or args[i] == "--help": self.options['show-usage'] = 1 elif args[i] == "-l" or args[i] == "--list-tests": self.options['list-tests'] = 1 elif args[i] == "-V" or args[i] == "--verbose": self.options['verbose'] = 1 elif args[i] == "-n" or args[i] == "--no-cpg": self.options['no-cpg'] = 1 elif args[i] == "-c" or args[i] == "--cpg-only": self.options['cpg-only'] = 1 elif args[i] == "-r" or args[i] == "--run-only": self.options['run-only'] = args[i+1] skip = 1 elif args[i] == "-p" or args[i] == "--run-only-pattern": self.options['run-only-pattern'] = args[i+1] skip = 1 def show_usage(self): """ Show command usage """ print("usage: " + sys.argv[0] + " [options]") print("If no options are provided, all tests will run") print("Options:") print("\t [--help | -h] Show usage") print("\t [--list-tests | -l] Print out all registered tests.") print("\t [--cpg-only | -c] Only run tests that require corosync.") print("\t [--no-cpg | -n] Only run tests that do not require corosync") print("\t [--run-only | -r 'testname'] Run a specific test") print("\t [--verbose | -V] Verbose output") print("\t [--run-only-pattern | -p 'string'] Run only tests containing the string value") print("\n\tExample: Run only the test 'start_stop'") print("\t\t " + sys.argv[0] + " --run-only start_stop") print("\n\tExample: Run only the tests with the string 'systemd' present in them") print("\t\t " + sys.argv[0] + " --run-only-pattern systemd") def main(argv): """ Run fencing regression tests as specified by arguments """ + update_path() + opts = TestOptions() opts.build_options(argv) use_corosync = 1 tests = Tests(opts.options['verbose']) tests.build_standalone_tests() tests.build_custom_timeout_tests() tests.build_api_sanity_tests() tests.build_fence_merge_tests() tests.build_fence_no_merge_tests() tests.build_unfence_tests() tests.build_unfence_on_target_tests() tests.build_nodeid_tests() tests.build_remap_tests() if opts.options['list-tests']: tests.print_list() sys.exit(CrmExit.OK) elif opts.options['show-usage']: opts.show_usage() sys.exit(CrmExit.OK) print("Starting ...") if opts.options['no-cpg']: use_corosync = 0 tests.setup_environment(use_corosync) if opts.options['run-only-pattern'] != "": tests.run_tests_matching(opts.options['run-only-pattern']) tests.print_results() elif opts.options['run-only'] != "": tests.run_single(opts.options['run-only']) tests.print_results() elif opts.options['no-cpg']: tests.run_no_cpg() tests.print_results() elif opts.options['cpg-only']: tests.run_cpg_only() tests.print_results() else: tests.run_tests() tests.print_results() tests.cleanup_environment(use_corosync) tests.exit() if __name__ == "__main__": main(sys.argv) diff --git a/cts/cts-support.in b/cts/cts-support.in index ca87ff7a41..24081702da 100644 --- a/cts/cts-support.in +++ b/cts/cts-support.in @@ -1,128 +1,147 @@ #!/bin/sh # # Installer for support files needed by Pacemaker's Cluster Test Suite # # Copyright 2018 Red Hat, Inc. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # USAGE_TEXT="Usage: $0 " HELP_TEXT="$USAGE_TEXT Commands (must be run as root): install Install support files needed by Pacemaker CTS uninstall Remove support files needed by Pacemaker CTS" # These constants must track crm_exit_t values CRM_EX_OK=0 CRM_EX_ERROR=1 CRM_EX_USAGE=64 UNIT_DIR="@systemdunitdir@" LIBEXEC_DIR="@libexecdir@/pacemaker" INIT_DIR="@INITDIR@" +SBIN_DIR="@sbindir@" DATA_DIR="@datadir@/pacemaker/tests/cts" UPSTART_DIR="/etc/init" DUMMY_DAEMON="pacemaker-cts-dummyd" DUMMY_DAEMON_UNIT="pacemaker-cts-dummyd@.service" LSB_DUMMY="LSBDummy" UPSTART_DUMMY="pacemaker-cts-dummyd.conf" +FENCE_DUMMY="fence_dummy" +FENCE_DUMMY_ALIASES="fence_dummy_auto_unfence fence_dummy_no_reboot" # If the install directory doesn't exist, assume we're in a build directory. if [ ! -d "$DATA_DIR" ]; then # If readlink supports -e (i.e. GNU), use it. readlink -e / >/dev/null 2>/dev/null if [ $? -eq 0 ]; then DATA_DIR="$(dirname "$(readlink -e "$0")")" else DATA_DIR="$(dirname "$0")" fi fi usage() { echo "Error:" "$@" echo "$USAGE_TEXT" exit $CRM_EX_USAGE } must_be_root() { if ! [ "$(id -u)" = "0" ]; then usage "this command must be run as root" return $CRM_EX_ERROR fi return $CRM_EX_OK } support_uninstall() { must_be_root || return $CRM_EX_ERROR if [ -e "$UNIT_DIR/$DUMMY_DAEMON_UNIT" ]; then echo "Removing $UNIT_DIR/$DUMMY_DAEMON_UNIT ..." rm -f "$UNIT_DIR/$DUMMY_DAEMON_UNIT" systemctl daemon-reload # Ignore failure fi for FILE in \ "$LIBEXEC_DIR/$DUMMY_DAEMON" \ "$UPSTART_DIR/$UPSTART_DUMMY" \ + "$SBIN_DIR/$FENCE_DUMMY" \ "$INIT_DIR/$LSB_DUMMY" do if [ -e "$FILE" ]; then echo "Removing $FILE ..." rm -f "$FILE" fi done + for ALIAS in $FENCE_DUMMY_ALIASES; do \ + FILE="$SBIN_DIR/$ALIAS" + if [ -L "$FILE" ] || [ -e "$FILE" ]; then + echo "Removing $FILE ..." + rm -f "$FILE" + fi + done return $CRM_EX_OK } support_install() { support_uninstall || return $CRM_EX_ERROR cd "$DATA_DIR" || return $CRM_EX_ERROR if [ -d "$UNIT_DIR" ]; then echo "Installing $DUMMY_DAEMON ..." mkdir -p "$LIBEXEC_DIR" install -m 0755 "$DUMMY_DAEMON" "$LIBEXEC_DIR" || return $CRM_EX_ERROR echo "Installing $DUMMY_DAEMON_UNIT ..." install -m 0644 "$DUMMY_DAEMON_UNIT" "$UNIT_DIR" || return $CRM_EX_ERROR systemctl daemon-reload # Ignore failure fi + echo "Installing $FENCE_DUMMY to $SBIN_DIR ..." + mkdir -p "$SBIN_DIR" + install -m 0755 "$FENCE_DUMMY" "$SBIN_DIR" || return $CRM_EX_ERROR + for alias in $FENCE_DUMMY_ALIASES; do \ + echo "Installing $alias to $SBIN_DIR ..." + ln -s "$FENCE_DUMMY" "$SBIN_DIR/$alias" || return $CRM_EX_ERROR + done + echo "Installing $LSB_DUMMY to $INIT_DIR ..." mkdir -p "$INIT_DIR" install -m 0755 "$LSB_DUMMY" "$INIT_DIR" || return $CRM_EX_ERROR if [ -d "$UPSTART_DIR" -a -f "$UPSTART_DUMMY" ]; then echo "Installing $UPSTART_DUMMY to $UPSTART_DIR ..." install -m 0644 "$UPSTART_DUMMY" "$UPSTART_DIR" || return $CRM_EX_ERROR fi return $CRM_EX_OK } COMMAND="" while [ $# -gt 0 ] ; do case "$1" in --help) echo "$HELP_TEXT" exit $CRM_EX_OK ;; install|uninstall) COMMAND="$1" shift ;; *) usage "unknown option '$1'" ;; esac done case "$COMMAND" in install) support_install ;; uninstall) support_uninstall ;; *) usage "must specify command" ;; esac diff --git a/cts/fence_dummy.in b/cts/fence_dummy.in index 95dec43899..218f838384 100644 --- a/cts/fence_dummy.in +++ b/cts/fence_dummy.in @@ -1,463 +1,491 @@ #!@PYTHON@ """Dummy fence agent for testing """ # Pacemaker targets compatibility with Python 2.7 and 3.2+ from __future__ import print_function, unicode_literals, absolute_import, division __copyright__ = "Copyright (C) 2012-2018 Andrew Beekhof " __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import re import sys import time import random import atexit import getopt -AGENT_VERSION = "4.0.0" +AGENT_VERSION = "4.0.1" OCF_VERSION = "1.0" SHORT_DESC = "Dummy fence agent" LONG_DESC = """fence_dummy is a fake fencing agent which reports success based on its mode (pass|fail|random) without doing anything.""" -# Short options used: ifhmnoqsvBDHMRUV +# Short options used: difhmnoqsvBDHMRUV ALL_OPT = { "quiet" : { "getopt" : "q", "help" : "", "order" : 50 }, "verbose" : { "getopt" : "v", "longopt" : "verbose", "help" : "-v, --verbose Verbose mode", "required" : "0", "shortdesc" : "Verbose mode", "order" : 51 }, "debug" : { "getopt" : "D:", "longopt" : "debug-file", "help" : "-D, --debug-file=[debugfile] Debugging to output file", "required" : "0", "shortdesc" : "Write debug information to given file", "order" : 52 }, "version" : { "getopt" : "V", "longopt" : "version", "help" : "-V, --version Display version information and exit", "required" : "0", "shortdesc" : "Display version information and exit", "order" : 53 }, "help" : { "getopt" : "h", "longopt" : "help", "help" : "-h, --help Display this help and exit", "required" : "0", "shortdesc" : "Display help and exit", "order" : 54 }, "action" : { "getopt" : "o:", "longopt" : "action", "help" : "-o, --action=[action] Action: status, list, reboot (default), off or on", "required" : "1", "shortdesc" : "Fencing Action", "default" : "reboot", "order" : 1 }, "nodename" : { "getopt" : "N:", "longopt" : "nodename", "help" : "-N, --nodename Node name of fence victim (ignored)", "required" : "0", "shortdesc" : "The node name of fence victim (ignored)", "order" : 2 }, "mode": { "getopt" : "M:", "longopt" : "mode", "required" : "0", "help" : "-M, --mode=(pass|fail|random) Exit status to return for non-monitor operations", "shortdesc" : "Whether fence operations should always pass, always fail, or fail at random", "order" : 3 }, "monitor_mode" : { "getopt" : "m:", "longopt" : "monitor_mode", "help" : "-m, --monitor_mode=(pass|fail|random) Exit status to return for monitor operations", "required" : "0", "shortdesc" : "Whether monitor operations should always pass, always fail, or fail at random", "order" : 3 }, "random_sleep_range": { "getopt" : "R:", "required" : "0", "longopt" : "random_sleep_range", "help" : "-R, --random_sleep_range=[seconds] Sleep between 1 and [seconds] before returning", "shortdesc" : "Wait randomly between 1 and [seconds]", "order" : 3 }, "mock_dynamic_hosts" : { "getopt" : "H:", "longopt" : "mock_dynamic_hosts", "help" : "-H, --mock_dynamic_hosts=[list] What to return when dynamically queried for possible targets", "required" : "0", "shortdesc" : "A list of hosts we can fence", "order" : 3 }, "delay" : { "getopt" : "f:", "longopt" : "delay", "help" : "-f, --delay [seconds] Wait X seconds before fencing is started", "required" : "0", "shortdesc" : "Wait X seconds before fencing is started", "default" : "0", "order" : 3 }, + "monitor_delay" : { + "getopt" : "d:", + "longopt" : "monitor_delay", + "help" : "-d, --monitor_delay [seconds] Wait X seconds before monitor completes", + "required" : "0", + "shortdesc" : "Wait X seconds before monitor completes", + "default" : "0", + "order" : 3 + }, "port" : { "getopt" : "n:", "longopt" : "plug", "help" : "-n, --plug=[id] Physical plug number on device (ignored)", "required" : "1", "shortdesc" : "Ignored", "order" : 4 }, "switch" : { "getopt" : "s:", "longopt" : "switch", "help" : "-s, --switch=[id] Physical switch number on device (ignored)", "required" : "0", "shortdesc" : "Ignored", "order" : 4 }, "nodeid" : { "getopt" : "i:", "longopt" : "nodeid", "help" : "-i, --nodeid Corosync id of fence victim (ignored)", "required" : "0", "shortdesc" : "Ignored", "order" : 4 }, "uuid" : { "getopt" : "U:", "longopt" : "uuid", "help" : "-U, --uuid UUID of the VM to fence (ignored)", "required" : "0", "shortdesc" : "Ignored", "order" : 4 } } +auto_unfence = False +no_reboot = False def agent(): """ Return name this file was run as. """ return os.path.basename(sys.argv[0]) def fail_usage(message): """ Print a usage message and exit. """ sys.exit("%s\nPlease use '-h' for usage" % message) def show_docs(options): """ Handle informational options (display info and exit). """ device_opt = options["device_opt"] if "-h" in options: usage(device_opt) sys.exit(0) if "-o" in options and options["-o"].lower() == "metadata": metadata(device_opt, options) sys.exit(0) if "-V" in options: print(AGENT_VERSION) sys.exit(0) def sorted_options(avail_opt): """ Return a list of all options, in their internally specified order. """ sorted_list = [(key, ALL_OPT[key]) for key in avail_opt] sorted_list.sort(key=lambda x: x[1]["order"]) return sorted_list def usage(avail_opt): """ Print a usage message. """ print("Usage:") print("\t" + agent() + " [options]") print("Options:") for dummy, value in sorted_options(avail_opt): if len(value["help"]) != 0: print(" " + value["help"]) def metadata(avail_opt, options): """ Print agent metadata. """ # This log is just for testing handling of stderr output print("asked for fence_dummy metadata", file=sys.stderr) print(""" -%s -%s -""" % (agent(), SHORT_DESC, AGENT_VERSION, OCF_VERSION, LONG_DESC)) + %s + %s + """ % (agent(), SHORT_DESC, AGENT_VERSION, OCF_VERSION, LONG_DESC)) for option, dummy in sorted_options(avail_opt): if "shortdesc" in ALL_OPT[option]: - print("\t") + print(' ') default = "" default_name_arg = "-" + ALL_OPT[option]["getopt"][:-1] default_name_no_arg = "-" + ALL_OPT[option]["getopt"] if "default" in ALL_OPT[option]: default = 'default="%s"' % str(ALL_OPT[option]["default"]) elif default_name_arg in options: if options[default_name_arg]: try: default = 'default="%s"' % options[default_name_arg] except TypeError: ## @todo/@note: Currently there is no clean way how to handle lists ## we can create a string from it but we can't set it on command line default = 'default="%s"' % str(options[default_name_arg]) elif default_name_no_arg in options: default = 'default="true"' mixed = ALL_OPT[option]["help"] ## split it between option and help text res = re.compile(r"^(.*--\S+)\s+", re.IGNORECASE | re.S).search(mixed) if None != res: mixed = res.group(1) mixed = mixed.replace("<", "<").replace(">", ">") - print("\t\t") + print(' ') if ALL_OPT[option]["getopt"].count(":") > 0: - print("\t\t") + print(' ') else: - print("\t\t") + print(' ') - print("\t\t" + ALL_OPT[option]["shortdesc"] + "") - print("\t") + print(' ' + ALL_OPT[option]["shortdesc"] + '') + print(' ') - print(""" - -\t -\t -\t -\t -\t -\t -\t - -""") + print(' \n ') + if auto_unfence: + attr_name = 'automatic' + else: + attr_name = 'on_target' + print(' ') + print(' ') + if not no_reboot: + print(' ') + print(' ') + print(' ') + print(' ') + print(' ') + print(' ') + print('') def option_longopt(option): """ Return the getopt-compatible long-option name of the given option. """ if ALL_OPT[option]["getopt"].endswith(":"): return ALL_OPT[option]["longopt"] + "=" else: return ALL_OPT[option]["longopt"] def opts_from_command_line(argv, avail_opt): """ Read options from command-line arguments. """ # Prepare list of options for getopt getopt_string = "" longopt_list = [] for k in avail_opt: if k in ALL_OPT: getopt_string += ALL_OPT[k]["getopt"] else: fail_usage("Parse error: unknown option '"+k+"'") if k in ALL_OPT and "longopt" in ALL_OPT[k]: longopt_list.append(option_longopt(k)) try: opt, dummy = getopt.gnu_getopt(argv, getopt_string, longopt_list) except getopt.GetoptError as error: fail_usage("Parse error: " + error.msg) # Transform longopt to short one which are used in fencing agents old_opt = opt opt = {} for old_option in dict(old_opt).keys(): if old_option.startswith("--"): for option in ALL_OPT.keys(): if "longopt" in ALL_OPT[option] and "--" + ALL_OPT[option]["longopt"] == old_option: opt["-" + ALL_OPT[option]["getopt"].rstrip(":")] = dict(old_opt)[old_option] else: opt[old_option] = dict(old_opt)[old_option] # Compatibility Layer (with what? probably not needed for fence_dummy) new_opt = dict(opt) if "-T" in new_opt: new_opt["-o"] = "status" if "-n" in new_opt: new_opt["-m"] = new_opt["-n"] opt = new_opt return opt def opts_from_stdin(avail_opt): """ Read options from standard input. """ opt = {} name = "" for line in sys.stdin.readlines(): line = line.strip() if line.startswith("#") or (len(line) == 0): continue (name, value) = (line + "=").split("=", 1) value = value[:-1] # Compatibility Layer (with what? probably not needed for fence_dummy) if name == "option": name = "action" if name not in avail_opt: print("Parse error: Ignoring unknown option '%s'" % line, file=sys.stderr) continue if ALL_OPT[name]["getopt"].endswith(":"): opt["-"+ALL_OPT[name]["getopt"].rstrip(":")] = value elif value.lower() in ["1", "yes", "on", "true"]: opt["-"+ALL_OPT[name]["getopt"]] = "1" return opt def process_input(avail_opt): """ Set standard environment variables, and parse all options. """ # Set standard environment os.putenv("LANG", "C") os.putenv("LC_ALL", "C") # Read options from command line or standard input if len(sys.argv) > 1: return opts_from_command_line(sys.argv[1:], avail_opt) else: return opts_from_stdin(avail_opt) def atexit_handler(): """ Close stdout on exit. """ try: sys.stdout.close() os.close(1) except IOError: sys.exit("%s failed to close standard output" % agent()) def success_mode(options, option, default_value): """ Return exit code specified by option. """ if option in options: test_value = options[option] else: test_value = default_value if test_value == "pass": exitcode = 0 elif test_value == "fail": exitcode = 1 else: exitcode = random.randint(0, 1) return exitcode def write_options(options): """ Write out all options to debug file. """ try: debugfile = io.open(options["-D"], 'at') debugfile.write("### %s ###\n" % (time.strftime("%Y-%m-%d %H:%M:%S"))) for option in sorted(options): debugfile.write("%s=%s\n" % (option, options[option])) debugfile.write("###\n") debugfile.close() except IOError: pass def main(): """ Make it so! """ + global auto_unfence + global no_reboot + + # Meta-data can't take parameters, so we simulate different meta-data + # behavior based on the executable name (which can be a symbolic link). + if (sys.argv[0].endswith("_auto_unfence")): + auto_unfence = True + elif (sys.argv[0].endswith("_no_reboot")): + no_reboot = True + device_opt = ALL_OPT.keys() ## Defaults for fence agent atexit.register(atexit_handler) options = process_input(device_opt) options["device_opt"] = device_opt show_docs(options) # dump input to file if "-D" in options: write_options(options) if "-f" in options: val = int(options["-f"]) print("delay sleep for %d seconds" % val, file=sys.stderr) time.sleep(val) # random sleep for testing if "-R" in options: val = int(options["-R"]) ran = random.randint(1, val) print("random sleep for %d seconds" % ran, file=sys.stderr) time.sleep(ran) if "-o" in options: action = options["-o"] else: action = "action" if action == "monitor": + if "-d" in options: + time.sleep(int(options["-d"])) exitcode = success_mode(options, "-m", "pass") elif action == "list": print("fence_dummy action (list) called", file=sys.stderr) if "-H" in options: print(options["-H"]) exitcode = 0 else: print("dynamic hostlist requires mock_dynamic_hosts to be set", file=sys.stderr) exitcode = 1 else: exitcode = success_mode(options, "-M", "random") # Ensure we generate some error output on failure exit. if exitcode == 1: print("simulated %s failure" % action, file=sys.stderr) sys.exit(exitcode) if __name__ == "__main__": main() diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c index e36e36882e..c5ce1ab01a 100644 --- a/daemons/fenced/cts-fence-helper.c +++ b/daemons/fenced/cts-fence-helper.c @@ -1,668 +1,658 @@ -/* - * Copyright (C) 2009 Andrew Beekhof - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +/* + * Copyright 2009-2018 Andrew Beekhof + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static GMainLoop *mainloop = NULL; static crm_trigger_t *trig = NULL; static int mainloop_iter = 0; static int callback_rc = 0; typedef void (*mainloop_test_iteration_cb) (int check_event); #define MAINLOOP_DEFAULT_TIMEOUT 2 #define mainloop_test_done(pass) \ if (pass) { \ crm_info("SUCCESS - %s", __FUNCTION__); \ mainloop_iter++; \ mainloop_set_trigger(trig); \ } else { \ crm_err("FAILURE = %s async_callback %d", __FUNCTION__, callback_rc); \ crm_exit(CRM_EX_ERROR); \ } \ callback_rc = 0; \ /* *INDENT-OFF* */ enum test_modes { test_standard = 0, // test using a specific developer environment test_passive, // watch notifications only test_api_sanity, // sanity-test stonith client API using fence_dummy test_api_mainloop, // sanity-test mainloop code with async responses }; static struct crm_option long_options[] = { {"verbose", 0, 0, 'V'}, {"version", 0, 0, '$'}, {"help", 0, 0, '?'}, {"passive", 0, 0, 'p'}, {"api_test", 0, 0, 't'}, {"mainloop_api_test", 0, 0, 'm'}, {0, 0, 0, 0} }; /* *INDENT-ON* */ static stonith_t *st = NULL; static struct pollfd pollfd; static int st_opts = st_opt_sync_call; static int expected_notifications = 0; static int verbose = 0; static void dispatch_helper(int timeout) { int rc; crm_debug("Looking for notification"); pollfd.events = POLLIN; while (true) { rc = poll(&pollfd, 1, timeout); /* wait 10 minutes, -1 forever */ if (rc > 0) { if (!stonith_dispatch(st)) { break; } } else { break; } } } static void st_callback(stonith_t * st, stonith_event_t * e) { if (st->state == stonith_disconnected) { crm_exit(CRM_EX_DISCONNECT); } crm_notice("Operation %s requested by %s %s for peer %s. %s reported: %s (ref=%s)", e->operation, e->origin, e->result == pcmk_ok ? "completed" : "failed", e->target, e->executioner ? e->executioner : "", pcmk_strerror(e->result), e->id); if (expected_notifications) { expected_notifications--; } } static void st_global_callback(stonith_t * stonith, stonith_callback_data_t * data) { crm_notice("Call id %d completed with rc %d", data->call_id, data->rc); } static void passive_test(void) { int rc = 0; rc = st->cmds->connect(st, crm_system_name, &pollfd.fd); crm_debug("Connect: %d", rc); st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, st_callback); st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, st_callback); st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback); st->cmds->register_notification(st, STONITH_OP_DEVICE_DEL, st_callback); st->cmds->register_callback(st, 0, 120, st_opt_timeout_updates, NULL, "st_global_callback", st_global_callback); dispatch_helper(600 * 1000); } #define single_test(cmd, str, num_notifications, expected_rc) \ { \ int rc = 0; \ rc = cmd; \ expected_notifications = 0; \ if (num_notifications) { \ expected_notifications = num_notifications; \ dispatch_helper(500); \ } \ if (rc != expected_rc) { \ crm_err("FAILURE - expected rc %d != %d(%s) for cmd - %s", expected_rc, rc, pcmk_strerror(rc), str); \ crm_exit(CRM_EX_ERROR); \ } else if (expected_notifications) { \ crm_err("FAILURE - expected %d notifications, got only %d for cmd - %s", \ num_notifications, num_notifications - expected_notifications, str); \ crm_exit(CRM_EX_ERROR); \ } else { \ if (verbose) { \ crm_info("SUCCESS - %s: %d", str, rc); \ } else { \ crm_debug("SUCCESS - %s: %d", str, rc); \ } \ } \ }\ static void run_fence_failure_test(void) { stonith_key_value_t *params = NULL; params = stonith_key_value_add(params, "pcmk_host_map", "false_1_node1=1,2 false_1_node2=3,4"); params = stonith_key_value_add(params, "mode", "fail"); single_test(st-> cmds->register_device(st, st_opts, "test-id1", "stonith-ng", "fence_dummy", params), "Register device1 for failure test", 1, 0); single_test(st->cmds->fence(st, st_opts, "false_1_node2", "off", 3, 0), "Fence failure results off", 1, -pcmk_err_generic); single_test(st->cmds->fence(st, st_opts, "false_1_node2", "reboot", 3, 0), "Fence failure results reboot", 1, -pcmk_err_generic); single_test(st->cmds->remove_device(st, st_opts, "test-id1"), "Remove device1 for failure test", 1, 0); stonith_key_value_freeall(params, 1, 1); } static void run_fence_failure_rollover_test(void) { stonith_key_value_t *params = NULL; params = stonith_key_value_add(params, "pcmk_host_map", "false_1_node1=1,2 false_1_node2=3,4"); params = stonith_key_value_add(params, "mode", "fail"); single_test(st-> cmds->register_device(st, st_opts, "test-id1", "stonith-ng", "fence_dummy", params), "Register device1 for rollover test", 1, 0); stonith_key_value_freeall(params, 1, 1); params = NULL; params = stonith_key_value_add(params, "pcmk_host_map", "false_1_node1=1,2 false_1_node2=3,4"); params = stonith_key_value_add(params, "mode", "pass"); single_test(st-> cmds->register_device(st, st_opts, "test-id2", "stonith-ng", "fence_dummy", params), "Register device2 for rollover test", 1, 0); single_test(st->cmds->fence(st, st_opts, "false_1_node2", "off", 3, 0), "Fence rollover results off", 1, 0); /* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */ single_test(st->cmds->fence(st, st_opts, "false_1_node2", "on", 3, 0), "Fence rollover results on", 1, -ENODEV); single_test(st->cmds->remove_device(st, st_opts, "test-id1"), "Remove device1 for rollover tests", 1, 0); single_test(st->cmds->remove_device(st, st_opts, "test-id2"), "Remove device2 for rollover tests", 1, 0); stonith_key_value_freeall(params, 1, 1); } static void run_standard_test(void) { stonith_key_value_t *params = NULL; params = stonith_key_value_add(params, "pcmk_host_map", "false_1_node1=1,2 false_1_node2=3,4"); params = stonith_key_value_add(params, "mode", "pass"); params = stonith_key_value_add(params, "mock_dynamic_hosts", "false_1_node1 false_1_node2"); single_test(st-> cmds->register_device(st, st_opts, "test-id", "stonith-ng", "fence_dummy", params), "Register", 1, 0); stonith_key_value_freeall(params, 1, 1); params = NULL; single_test(st->cmds->list(st, st_opts, "test-id", NULL, 1), "list", 1, 0); single_test(st->cmds->monitor(st, st_opts, "test-id", 1), "Monitor", 1, 0); single_test(st->cmds->status(st, st_opts, "test-id", "false_1_node2", 1), "Status false_1_node2", 1, 0); single_test(st->cmds->status(st, st_opts, "test-id", "false_1_node1", 1), "Status false_1_node1", 1, 0); single_test(st->cmds->fence(st, st_opts, "unknown-host", "off", 1, 0), "Fence unknown-host (expected failure)", 0, -ENODEV); single_test(st->cmds->fence(st, st_opts, "false_1_node1", "off", 1, 0), "Fence false_1_node1", 1, 0); /* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */ single_test(st->cmds->fence(st, st_opts, "false_1_node1", "on", 1, 0), "Unfence false_1_node1", 1, -ENODEV); /* Confirm that an invalid level index is rejected */ single_test(st->cmds->register_level(st, st_opts, "node1", 999, params), "Attempt to register an invalid level index", 0, -EINVAL); single_test(st->cmds->remove_device(st, st_opts, "test-id"), "Remove test-id", 1, 0); stonith_key_value_freeall(params, 1, 1); } static void sanity_tests(void) { int rc = 0; rc = st->cmds->connect(st, crm_system_name, &pollfd.fd); crm_debug("Connect: %d", rc); st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, st_callback); st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, st_callback); st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback); st->cmds->register_notification(st, STONITH_OP_DEVICE_DEL, st_callback); st->cmds->register_callback(st, 0, 120, st_opt_timeout_updates, NULL, "st_global_callback", st_global_callback); crm_info("Starting API Sanity Tests"); run_standard_test(); run_fence_failure_test(); run_fence_failure_rollover_test(); crm_info("Sanity Tests Passed"); } static void standard_dev_test(void) { int rc = 0; char *tmp = NULL; stonith_key_value_t *params = NULL; rc = st->cmds->connect(st, crm_system_name, &pollfd.fd); crm_debug("Connect: %d", rc); params = stonith_key_value_add(params, "pcmk_host_map", "some-host=pcmk-7 true_1_node1=3,4"); rc = st->cmds->register_device(st, st_opts, "test-id", "stonith-ng", "fence_xvm", params); crm_debug("Register: %d", rc); rc = st->cmds->list(st, st_opts, "test-id", &tmp, 10); crm_debug("List: %d output: %s", rc, tmp ? tmp : ""); rc = st->cmds->monitor(st, st_opts, "test-id", 10); crm_debug("Monitor: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node2", 10); crm_debug("Status false_1_node2: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); rc = st->cmds->fence(st, st_opts, "unknown-host", "off", 60, 0); crm_debug("Fence unknown-host: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); rc = st->cmds->fence(st, st_opts, "false_1_node1", "off", 60, 0); crm_debug("Fence false_1_node1: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); rc = st->cmds->fence(st, st_opts, "false_1_node1", "on", 10, 0); crm_debug("Unfence false_1_node1: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); rc = st->cmds->fence(st, st_opts, "some-host", "off", 10, 0); crm_debug("Fence alias: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "some-host", 10); crm_debug("Status alias: %d", rc); rc = st->cmds->fence(st, st_opts, "false_1_node1", "on", 10, 0); crm_debug("Unfence false_1_node1: %d", rc); rc = st->cmds->remove_device(st, st_opts, "test-id"); crm_debug("Remove test-id: %d", rc); stonith_key_value_freeall(params, 1, 1); } static void iterate_mainloop_tests(gboolean event_ready); static void mainloop_callback(stonith_t * stonith, stonith_callback_data_t * data) { callback_rc = data->rc; iterate_mainloop_tests(TRUE); } static int register_callback_helper(int callid) { return st->cmds->register_callback(st, callid, MAINLOOP_DEFAULT_TIMEOUT, st_opt_timeout_updates, NULL, "callback", mainloop_callback); } static void test_async_fence_pass(int check_event) { int rc = 0; if (check_event) { if (callback_rc != 0) { mainloop_test_done(FALSE); } else { mainloop_test_done(TRUE); } return; } rc = st->cmds->fence(st, 0, "true_1_node1", "off", MAINLOOP_DEFAULT_TIMEOUT, 0); if (rc < 0) { crm_err("fence failed with rc %d", rc); mainloop_test_done(FALSE); } register_callback_helper(rc); /* wait for event */ } #define CUSTOM_TIMEOUT_ADDITION 10 static void test_async_fence_custom_timeout(int check_event) { int rc = 0; static time_t begin = 0; if (check_event) { uint32_t diff = (time(NULL) - begin); if (callback_rc != -ETIME) { mainloop_test_done(FALSE); } else if (diff < CUSTOM_TIMEOUT_ADDITION + MAINLOOP_DEFAULT_TIMEOUT) { crm_err ("Custom timeout test failed, callback expiration should be updated to %d, actual timeout was %d", CUSTOM_TIMEOUT_ADDITION + MAINLOOP_DEFAULT_TIMEOUT, diff); mainloop_test_done(FALSE); } else { mainloop_test_done(TRUE); } return; } begin = time(NULL); rc = st->cmds->fence(st, 0, "custom_timeout_node1", "off", MAINLOOP_DEFAULT_TIMEOUT, 0); if (rc < 0) { crm_err("fence failed with rc %d", rc); mainloop_test_done(FALSE); } register_callback_helper(rc); /* wait for event */ } static void test_async_fence_timeout(int check_event) { int rc = 0; if (check_event) { if (callback_rc != -ENODEV) { mainloop_test_done(FALSE); } else { mainloop_test_done(TRUE); } return; } rc = st->cmds->fence(st, 0, "false_1_node2", "off", MAINLOOP_DEFAULT_TIMEOUT, 0); if (rc < 0) { crm_err("fence failed with rc %d", rc); mainloop_test_done(FALSE); } register_callback_helper(rc); /* wait for event */ } static void test_async_monitor(int check_event) { int rc = 0; if (check_event) { if (callback_rc) { mainloop_test_done(FALSE); } else { mainloop_test_done(TRUE); } return; } rc = st->cmds->monitor(st, 0, "false_1", MAINLOOP_DEFAULT_TIMEOUT); if (rc < 0) { crm_err("monitor failed with rc %d", rc); mainloop_test_done(FALSE); } register_callback_helper(rc); /* wait for event */ } static void test_register_async_devices(int check_event) { char buf[16] = { 0, }; stonith_key_value_t *params = NULL; params = stonith_key_value_add(params, "pcmk_host_map", "false_1_node1=1,2"); params = stonith_key_value_add(params, "mode", "fail"); st->cmds->register_device(st, st_opts, "false_1", "stonith-ng", "fence_dummy", params); stonith_key_value_freeall(params, 1, 1); params = NULL; params = stonith_key_value_add(params, "pcmk_host_map", "true_1_node1=1,2"); params = stonith_key_value_add(params, "mode", "pass"); st->cmds->register_device(st, st_opts, "true_1", "stonith-ng", "fence_dummy", params); stonith_key_value_freeall(params, 1, 1); params = NULL; params = stonith_key_value_add(params, "pcmk_host_map", "custom_timeout_node1=1,2"); params = stonith_key_value_add(params, "mode", "fail"); params = stonith_key_value_add(params, "delay", "1000"); snprintf(buf, sizeof(buf) - 1, "%d", MAINLOOP_DEFAULT_TIMEOUT + CUSTOM_TIMEOUT_ADDITION); params = stonith_key_value_add(params, "pcmk_off_timeout", buf); st->cmds->register_device(st, st_opts, "false_custom_timeout", "stonith-ng", "fence_dummy", params); stonith_key_value_freeall(params, 1, 1); mainloop_test_done(TRUE); } static void try_mainloop_connect(int check_event) { int tries = 10; int i = 0; int rc = 0; for (i = 0; i < tries; i++) { rc = st->cmds->connect(st, crm_system_name, NULL); if (!rc) { crm_info("stonith client connection established"); mainloop_test_done(TRUE); return; } else { crm_info("stonith client connection failed"); } sleep(1); } crm_err("API CONNECTION FAILURE"); mainloop_test_done(FALSE); } static void iterate_mainloop_tests(gboolean event_ready) { static mainloop_test_iteration_cb callbacks[] = { try_mainloop_connect, test_register_async_devices, test_async_monitor, test_async_fence_pass, test_async_fence_timeout, test_async_fence_custom_timeout, }; if (mainloop_iter == (sizeof(callbacks) / sizeof(mainloop_test_iteration_cb))) { /* all tests ran, everything passed */ crm_info("ALL MAINLOOP TESTS PASSED!"); crm_exit(CRM_EX_OK); } callbacks[mainloop_iter] (event_ready); } static gboolean trigger_iterate_mainloop_tests(gpointer user_data) { iterate_mainloop_tests(FALSE); return TRUE; } static void test_shutdown(int nsig) { int rc = 0; if (st) { rc = st->cmds->disconnect(st); crm_info("Disconnect: %d", rc); crm_debug("Destroy"); stonith_api_delete(st); } if (rc) { crm_exit(CRM_EX_ERROR); } } static void mainloop_tests(void) { trig = mainloop_add_trigger(G_PRIORITY_HIGH, trigger_iterate_mainloop_tests, NULL); mainloop_set_trigger(trig); mainloop_add_signal(SIGTERM, test_shutdown); crm_info("Starting"); mainloop = g_main_loop_new(NULL, FALSE); g_main_loop_run(mainloop); } int main(int argc, char **argv) { int argerr = 0; int flag; int option_index = 0; enum test_modes mode = test_standard; + crm_log_cli_init("cts-fence-helper"); crm_set_options(NULL, "mode [options]", long_options, "Provides a summary of cluster's current state." "\n\nOutputs varying levels of detail in a number of different formats.\n"); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) { break; } switch (flag) { case 'V': verbose = 1; break; case '$': case '?': crm_help(flag, CRM_EX_OK); break; case 'p': mode = test_passive; break; case 't': mode = test_api_sanity; break; case 'm': mode = test_api_mainloop; break; default: ++argerr; break; } } crm_log_init(NULL, LOG_INFO, TRUE, (verbose? TRUE : FALSE), argc, argv, FALSE); if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', CRM_EX_USAGE); } crm_debug("Create"); st = stonith_api_new(); switch (mode) { case test_standard: standard_dev_test(); break; case test_passive: passive_test(); break; case test_api_sanity: sanity_tests(); break; case test_api_mainloop: mainloop_tests(); break; } test_shutdown(0); return CRM_EX_OK; }