diff --git a/cts/cts-fencing.in b/cts/cts-fencing.in index 8ddd4fc68a..990146651e 100644 --- a/cts/cts-fencing.in +++ b/cts/cts-fencing.in @@ -1,954 +1,953 @@ #!@PYTHON@ """ Regression tests for Pacemaker's fencer """ __copyright__ = "Copyright 2012-2024 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import argparse import os import sys import subprocess import tempfile # These imports allow running from a source checkout after running `make`. # Note that while this doesn't necessarily mean it will successfully run tests, # but being able to see --help output can be useful. if os.path.exists("@abs_top_srcdir@/python"): sys.path.insert(0, "@abs_top_srcdir@/python") if os.path.exists("@abs_top_builddir@/python") and "@abs_top_builddir@" != "@abs_top_srcdir@": sys.path.insert(0, "@abs_top_builddir@/python") from pacemaker.buildoptions import BuildOptions from pacemaker.exitstatus import ExitStatus from pacemaker._cts.corosync import Corosync, localname from pacemaker._cts.errors import ExitCodeError, OutputFoundError, OutputNotFoundError, XmlValidationError from pacemaker._cts.process import killall, exit_if_proc_running from pacemaker._cts.test import Test, Tests TEST_DIR = sys.path[0] def update_path(): """ Set the PATH environment variable appropriately for the tests """ new_path = os.environ['PATH'] if os.path.exists("%s/cts-fencing.in" % TEST_DIR): print("Running tests from the source tree: %s (%s)" % (BuildOptions._BUILD_DIR, TEST_DIR)) # For pacemaker-fenced and cts-fence-helper new_path = "%s/daemons/fenced:%s" % (BuildOptions._BUILD_DIR, new_path) new_path = "%s/tools:%s" % (BuildOptions._BUILD_DIR, new_path) # For stonith_admin new_path = "%s/cts/support:%s" % (BuildOptions._BUILD_DIR, new_path) # For cts-support else: print("Running tests from the install tree: %s (not %s)" % (BuildOptions.DAEMON_DIR, TEST_DIR)) # For pacemaker-fenced, cts-fence-helper, and cts-support new_path = "%s:%s" % (BuildOptions.DAEMON_DIR, new_path) print('Using PATH="%s"' % new_path) os.environ['PATH'] = new_path class FenceTest(Test): """ Executor for a single test """ def __init__(self, name, description, **kwargs): Test.__init__(self, name, description, **kwargs) self._daemon_location = "pacemaker-fenced" def _kill_daemons(self): killall(["pacemakerd", "pacemaker-fenced"]) def _start_daemons(self): cmd = ["pacemaker-fenced", "-c", "-l", self.logpath] if self.verbose: cmd += ["-V"] print("Starting %s" % " ".join(cmd)) self._daemon_process = subprocess.Popen(cmd) class FenceTests(Tests): """ Collection of all fencing regression tests """ def __init__(self, **kwargs): Tests.__init__(self, **kwargs) self._corosync = Corosync(self.verbose, self.logdir, "cts-fencing") def new_test(self, name, description): """ Create a named test """ test = FenceTest(name, description, verbose=self.verbose, timeout=self.timeout, force_wait=self.force_wait, logdir=self.logdir) self._tests.append(test) return test def build_api_sanity_tests(self): """ Register tests to verify basic API usage """ verbose_arg = "" if self.verbose: verbose_arg = "-V" test = self.new_test("low_level_api_test", "Sanity-test client API") test.add_cmd("cts-fence-helper", args="-t %s" % verbose_arg, validate=False) test = self.new_test("low_level_api_mainloop_test", "Sanity-test client API using mainloop") test.add_cmd("cts-fence-helper", args="-m %s" % verbose_arg, validate=False) def build_custom_timeout_tests(self): """ Register tests to verify custom timeout usage """ # custom timeout without topology test = self.new_test("custom_timeout_1", "Verify per device timeouts work as expected without using topology") test.add_cmd('stonith_admin', args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd('stonith_admin', args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3 -o pcmk_off_timeout=1') test.add_cmd('stonith_admin', args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node3 -o pcmk_off_timeout=4') test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") # timeout is 5+1+4 = 10 test.add_log_pattern("Total timeout set to 12s") # custom timeout _WITH_ topology test = self.new_test("custom_timeout_2", "Verify per device timeouts work as expected _WITH_ topology") test.add_cmd('stonith_admin', args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd('stonith_admin', args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3 -o pcmk_off_timeout=1000ms') test.add_cmd('stonith_admin', args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node3 -o pcmk_off_timeout=4000s') test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v false2") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") # timeout is 5+1+4000 = 4006 test.add_log_pattern("Total timeout set to 4807s") def build_fence_merge_tests(self): """ Register tests to verify when fence operations should be merged """ ### Simple test that overlapping fencing operations get merged test = self.new_test("custom_merge_single", "Verify overlapping identical fencing operations are merged, no fencing levels used") test.add_cmd("stonith_admin", args="--output-as=xml -R false1 -a fence_dummy -o mode=fail -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10") ### one merger will happen test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") ### Test that multiple mergers occur test = self.new_test("custom_merge_multiple", "Verify multiple overlapping identical fencing operations are merged") test.add_cmd("stonith_admin", args="--output-as=xml -R false1 -a fence_dummy -o mode=fail -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o delay=2 -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10") ### 4 mergers should occur test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") ### Test that multiple mergers occur with topologies used test = self.new_test("custom_merge_with_topology", "Verify multiple overlapping identical fencing operations are merged with fencing levels") test.add_cmd("stonith_admin", args="--output-as=xml -R false1 -a fence_dummy -o mode=fail -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10") ### 4 mergers should occur test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") def build_fence_no_merge_tests(self): """ Register tests to verify when fence operations should not be merged """ test = self.new_test("custom_no_merge", "Verify differing fencing operations are not merged") test.add_cmd("stonith_admin", args="--output-as=xml -R false1 -a fence_dummy -o mode=fail -o pcmk_host_list=node3 node2") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3 node2") test.add_cmd("stonith_admin", args="--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node3 node2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -F node2 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client", negative=True) def build_standalone_tests(self): """ Register a grab bag of tests """ # test what happens when all devices timeout test = self.new_test("fence_multi_device_failure", "Verify that all devices timeout, a fencing failure is returned") test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R false3 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 2", expected_exitcode=ExitStatus.TIMEOUT) test.add_log_pattern("Total timeout set to 7s") test.add_log_pattern("targeting node3 using false1 returned ") test.add_log_pattern("targeting node3 using false2 returned ") test.add_log_pattern("targeting node3 using false3 returned ") # test what happens when multiple devices can fence a node, but the first device fails test = self.new_test("fence_device_failure_rollover", "Verify that when one fence device fails for a node, the others are tried") test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") test.add_log_pattern("Total timeout set to 18s") # test what happens when we try to use a missing fence-agent test = self.new_test("fence_missing_agent", "Verify proper error-handling when using a non-existent fence-agent") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_missing -o mode=pass -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R true2 -a fence_dummy -o mode=pass -o pcmk_host_list=node2") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5", expected_exitcode=ExitStatus.NOSUCH) test.add_cmd("stonith_admin", args="--output-as=xml -F node2 -t 5") # simple topology test for one device test = self.new_test("topology_simple", "Verify all fencing devices at a level are used") test.add_cmd("stonith_admin", args='--output-as=xml -R true -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") test.add_log_pattern("Total timeout set to 6s") test.add_log_pattern("targeting node3 using true returned 0") # add topology, delete topology, verify fencing still works test = self.new_test("topology_add_remove", "Verify fencing occurrs after all topology levels are removed") test.add_cmd("stonith_admin", args='--output-as=xml -R true -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true") test.add_cmd("stonith_admin", args="--output-as=xml -d node3 -i 1") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") test.add_log_pattern("Total timeout set to 6s") test.add_log_pattern("targeting node3 using true returned 0") # test what happens when the first fencing level has multiple devices test = self.new_test("topology_device_fails", "Verify if one device in a level fails, the other is tried") test.add_cmd("stonith_admin", args='--output-as=xml -R false -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 20") test.add_log_pattern("Total timeout set to 48s") test.add_log_pattern("targeting node3 using false returned 1") test.add_log_pattern("targeting node3 using true returned 0") # test what happens when the first fencing level fails test = self.new_test("topology_multi_level_fails", "Verify if one level fails, the next leve is tried") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true4 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v false2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true3") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true4") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 3") test.add_log_pattern("Total timeout set to 21s") test.add_log_pattern("targeting node3 using false1 returned 1") test.add_log_pattern("targeting node3 using false2 returned 1") test.add_log_pattern("targeting node3 using true3 returned 0") test.add_log_pattern("targeting node3 using true4 returned 0") # test what happens when the first fencing level had devices that no one has registered test = self.new_test("topology_missing_devices", "Verify topology can continue with missing devices") test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true4 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v false2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true3") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true4") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") # Test what happens if multiple fencing levels are defined, and then the first one is removed test = self.new_test("topology_level_removal", "Verify level removal works") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true4 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v false2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true3") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true4") # Now remove level 2, verify none of the devices in level two are hit test.add_cmd("stonith_admin", args="--output-as=xml -d node3 -i 2") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 20") test.add_log_pattern("Total timeout set to 96s") test.add_log_pattern("targeting node3 using false1 returned 1") test.add_log_pattern("targeting node3 using false2 returned ", negative=True) test.add_log_pattern("targeting node3 using true3 returned 0") test.add_log_pattern("targeting node3 using true4 returned 0") # Test targeting a topology level by node name pattern test = self.new_test("topology_level_pattern", "Verify targeting topology by node name pattern works") test.add_cmd("stonith_admin", args='--output-as=xml -R true -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -r '@node.*' -i 1 -v true") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") test.add_log_pattern("targeting node3 using true returned 0") # test allowing commas and semicolons as delimiters in pcmk_host_list test = self.new_test("host_list_delimiters", "Verify commas and semicolons can be used as pcmk_host_list delimiters") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1,node2,node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=pcmk1;pcmk2;pcmk3"') test.add_cmd("stonith_admin", args="stonith_admin --output-as=xml -F node2 -t 5") test.add_cmd("stonith_admin", args="stonith_admin --output-as=xml -F pcmk3 -t 5") test.add_log_pattern("targeting node2 using true1 returned 0") test.add_log_pattern("targeting pcmk3 using true2 returned 0") # test the stonith builds the correct list of devices that can fence a node test = self.new_test("list_devices", "Verify list of devices that can fence a node is correct") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -l node1 -V", stdout_match="true2", stdout_no_match="true1") test.add_cmd("stonith_admin", args="--output-as=xml -l node1 -V", stdout_match="true3", stdout_no_match="true1") # simple test of device monitor test = self.new_test("monitor", "Verify device is reachable") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node3"') test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node3"') test.add_cmd("stonith_admin", args="--output-as=xml -Q true1") test.add_cmd("stonith_admin", args="--output-as=xml -Q false1") test.add_cmd("stonith_admin", args="--output-as=xml -Q true2", expected_exitcode=ExitStatus.NOSUCH) # Verify monitor occurs for duration of timeout period on failure test = self.new_test("monitor_timeout", "Verify monitor uses duration of timeout period given") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=fail -o monitor_mode=fail -o pcmk_host_list=node3') test.add_cmd("stonith_admin", args="--output-as=xml -Q true1 -t 5", expected_exitcode=ExitStatus.ERROR) test.add_log_pattern("Attempt 2 to execute") # Verify monitor occurs for duration of timeout period on failure, but stops at max retries test = self.new_test("monitor_timeout_max_retries", "Verify monitor retries until max retry value or timeout is hit") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=fail -o monitor_mode=fail -o pcmk_host_list=node3') test.add_cmd("stonith_admin", args="--output-as=xml -Q true1 -t 15", expected_exitcode=ExitStatus.ERROR) test.add_log_pattern("Attempted to execute agent fence_dummy (list) the maximum number of times") # simple register test test = self.new_test("register", "Verify devices can be registered and un-registered") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3') test.add_cmd("stonith_admin", args="--output-as=xml -Q true1") test.add_cmd("stonith_admin", args="--output-as=xml -D true1") test.add_cmd("stonith_admin", args="--output-as=xml -Q true1", expected_exitcode=ExitStatus.NOSUCH) # simple reboot test test = self.new_test("reboot", "Verify devices can be rebooted") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3') test.add_cmd("stonith_admin", args="--output-as=xml -B node3 -t 5") test.add_cmd("stonith_admin", args="--output-as=xml -D true1") test.add_cmd("stonith_admin", args="--output-as=xml -Q true1", expected_exitcode=ExitStatus.NOSUCH) # test fencing history test = self.new_test("fence_history", "Verify last fencing operation is returned") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3') test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5 -V") test.add_cmd("stonith_admin", args="--output-as=xml -H node3", stdout_match='action="off" target="node3" .* status="success"') # simple test of dynamic list query test = self.new_test("dynamic_list_query", "Verify dynamic list of fencing devices can be retrieved") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", args="--output-as=xml -R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", args="--output-as=xml -R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", args="--output-as=xml -l fake_port_1", stdout_match='count="3"') # fence using dynamic list query test = self.new_test("fence_dynamic_list_query", "Verify dynamic list of fencing devices can be retrieved") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", args="--output-as=xml -R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", args="--output-as=xml -R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", args="--output-as=xml -F fake_port_1 -t 5 -V") # simple test of query using status action test = self.new_test("status_query", "Verify dynamic list of fencing devices can be retrieved") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_check=status') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o pcmk_host_check=status') test.add_cmd("stonith_admin", args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o pcmk_host_check=status') test.add_cmd("stonith_admin", args="--output-as=xml -l fake_port_1", stdout_match='count="3"') # test what happens when no reboot action is advertised test = self.new_test("no_reboot_support", "Verify reboot action defaults to off when no reboot action is advertised by agent") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy_no_reboot -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -B node1 -t 5 -V") test.add_log_pattern("does not support reboot") test.add_log_pattern("using true1 returned 0") # make sure reboot is used when reboot action is advertised test = self.new_test("with_reboot_support", "Verify reboot action can be used when metadata advertises it") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -B node1 -t 5 -V") test.add_log_pattern("does not advertise support for 'reboot', performing 'off'", negative=True) test.add_log_pattern("using true1 returned 0") # make sure all fencing delays are applied correctly and taken into account by fencing timeouts with topology test = self.new_test("topology_delays", "Verify all fencing delays are applied correctly and taken into account by fencing timeouts with topology") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3" -o pcmk_delay_base=1') test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3" -o pcmk_delay_base=1') # Resulting "random" delay will always be 1 since (rand() % (delay_max - delay_base)) is always 0 here test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3" -o pcmk_delay_base=1 -o pcmk_delay_max=2') test.add_cmd("stonith_admin", args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true3") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 --delay 1") # Total fencing timeout takes all fencing delays into account test.add_log_pattern("Total timeout set to 582s") # Fencing timeout for the first device takes the requested fencing delay # and pcmk_delay_base into account test.add_log_pattern(r"Requesting that .* perform 'off' action targeting node3 using true1 .*146s.*", regex=True) # Requested fencing delay is applied only for the first device in the # first level, with the static delay from pcmk_delay_base added test.add_log_pattern("Delaying 'off' action targeting node3 using true1 for 2s | timeout=120s requested_delay=1s base=1s max=1s") # Fencing timeout no longer takes the requested fencing delay into account for further devices test.add_log_pattern(r"Requesting that .* perform 'off' action targeting node3 using false1 .*145s.*", regex=True) # Requested fencing delay is no longer applied for further devices test.add_log_pattern("Delaying 'off' action targeting node3 using false1 for 1s | timeout=120s requested_delay=0s base=1s max=1s") # Fencing timeout takes pcmk_delay_max into account test.add_log_pattern(r"Requesting that .* perform 'off' action targeting node3 using true2 .*146s.*", regex=True) test.add_log_pattern("Delaying 'off' action targeting node3 using true2 for 1s | timeout=120s requested_delay=0s base=1s max=2s") test.add_log_pattern("Delaying 'off' action targeting node3 using true3", negative=True) def build_nodeid_tests(self): """ Register tests that use a corosync node id """ our_uname = localname() ### verify nodeid is supplied when nodeid is in the metadata parameters test = self.new_test("supply_nodeid", "Verify nodeid is given when fence agent has nodeid as parameter") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=%s"' % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -F %s -t 3" % our_uname) test.add_log_pattern("as nodeid with fence action 'off' targeting %s" % (our_uname)) ### verify nodeid is _NOT_ supplied when nodeid is not in the metadata parameters test = self.new_test("do_not_supply_nodeid", "Verify nodeid is _NOT_ given when fence agent does not have nodeid as parameter") # use a host name that won't be in corosync.conf test.add_cmd("stonith_admin", - args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=regr-test') + args='--output-as=xml -R true1 -a fence_dummy_no_nodeid ' + '-o mode=pass -o pcmk_host_list="regr-test %s"' + % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -F regr-test -t 3") test.add_log_pattern("as nodeid with fence action 'off' targeting regr-test", negative=True) - - test.add_cmd("stonith_admin", - args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=%s"' % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -F %s -t 3" % our_uname) test.add_log_pattern("as nodeid with fence action 'off' targeting %s" % our_uname, negative=True) def build_unfence_tests(self): """ Register tests that verify unfencing """ our_uname = localname() ### verify unfencing using automatic unfencing test = self.new_test("unfence_required_1", "Verify require unfencing on all devices when automatic=true in agent's metadata") test.add_cmd('stonith_admin', args='--output-as=xml -R true1 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list=%s"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R true2 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list=%s"' % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -U %s -t 3" % our_uname) # both devices should be executed test.add_log_pattern("using true1 returned 0") test.add_log_pattern("using true2 returned 0") ### verify unfencing using automatic unfencing fails if any of the required agents fail test = self.new_test("unfence_required_2", "Verify require unfencing on all devices when automatic=true in agent's metadata") test.add_cmd('stonith_admin', args='--output-as=xml -R true1 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list=%s"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R true2 -a fence_dummy_auto_unfence -o mode=fail -o "pcmk_host_list=%s"' % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -U %s -t 6" % our_uname, expected_exitcode=ExitStatus.ERROR) ### verify unfencing using automatic devices with topology test = self.new_test("unfence_required_3", "Verify require unfencing on all devices even when at different topology levels") test.add_cmd('stonith_admin', args='--output-as=xml -R true1 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R true2 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 1 -v true1" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 2 -v true2" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -U %s -t 3" % our_uname) test.add_log_pattern("using true1 returned 0") test.add_log_pattern("using true2 returned 0") ### verify unfencing using automatic devices with topology test = self.new_test("unfence_required_4", "Verify all required devices are executed even with topology levels fail") test.add_cmd('stonith_admin', args='--output-as=xml -R true1 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R true2 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R true3 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R true4 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R false3 -a fence_dummy -o mode=fail -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R false4 -a fence_dummy -o mode=fail -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 1 -v true1" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 1 -v false1" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 2 -v false2" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 2 -v true2" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 2 -v false3" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 2 -v true3" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 3 -v false4" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 4 -v true4" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -U %s -t 3" % our_uname) test.add_log_pattern("using true1 returned 0") test.add_log_pattern("using true2 returned 0") test.add_log_pattern("using true3 returned 0") test.add_log_pattern("using true4 returned 0") def build_unfence_on_target_tests(self): """ Register tests that verify unfencing that runs on the target """ our_uname = localname() ### verify unfencing using on_target device test = self.new_test("unfence_on_target_1", "Verify unfencing with on_target = true") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=%s"' % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -U %s -t 3" % our_uname) test.add_log_pattern("(on) to be executed on target") ### verify failure of unfencing using on_target device test = self.new_test("unfence_on_target_2", "Verify failure unfencing with on_target = true") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=%s node_fake_1234"' % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -U node_fake_1234 -t 3", expected_exitcode=ExitStatus.NOSUCH) test.add_log_pattern("(on) to be executed on target") ### verify unfencing using on_target device with topology test = self.new_test("unfence_on_target_3", "Verify unfencing with on_target = true using topology") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 1 -v true1" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 2 -v true2" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -U %s -t 3" % our_uname) test.add_log_pattern("(on) to be executed on target") ### verify unfencing using on_target device with topology fails when target node doesn't exist test = self.new_test("unfence_on_target_4", "Verify unfencing failure with on_target = true using topology") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=%s node_fake"' % our_uname) test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=%s node_fake"' % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 1 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 2 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -U node_fake -t 3", expected_exitcode=ExitStatus.NOSUCH) test.add_log_pattern("(on) to be executed on target") def build_remap_tests(self): """ Register tests that verify remapping of reboots to off-on """ test = self.new_test("remap_simple", "Verify sequential topology reboot is remapped to all-off-then-all-on") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake ' '-o pcmk_off_timeout=1 -o pcmk_reboot_timeout=10') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake ' '-o pcmk_off_timeout=2 -o pcmk_reboot_timeout=20') test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -B node_fake -t 5") test.add_log_pattern("Remapping multiple-device reboot targeting node_fake") # timeout should be sum of off timeouts (1+2=3), not reboot timeouts (10+20=30) test.add_log_pattern("Total timeout set to 3s for peer's fencing targeting node_fake") test.add_log_pattern("perform 'off' action targeting node_fake using true1") test.add_log_pattern("perform 'off' action targeting node_fake using true2") test.add_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'") # fence_dummy sets "on" as an on_target action test.add_log_pattern("Ignoring true1 'on' failure (no capable peers) targeting node_fake") test.add_log_pattern("Ignoring true2 'on' failure (no capable peers) targeting node_fake") test.add_log_pattern("Undoing remap of reboot targeting node_fake") test = self.new_test("remap_simple_off", "Verify sequential topology reboot skips 'on' if " "pcmk_reboot_action=off or agent doesn't support " "'on'") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass " "-o pcmk_host_list=node_fake -o pcmk_off_timeout=1 " "-o pcmk_reboot_timeout=10 -o pcmk_reboot_action=off") test.add_cmd("stonith_admin", args="--output-as=xml -R true2 -a fence_dummy_no_on " "-o mode=pass -o pcmk_host_list=node_fake " "-o pcmk_off_timeout=2 -o pcmk_reboot_timeout=20") test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -B node_fake -t 5") test.add_log_pattern("Remapping multiple-device reboot targeting node_fake") # timeout should be sum of off timeouts (1+2=3), not reboot timeouts (10+20=30) test.add_log_pattern("Total timeout set to 3s for peer's fencing targeting node_fake") test.add_log_pattern("perform 'off' action targeting node_fake using true1") test.add_log_pattern("perform 'off' action targeting node_fake using true2") test.add_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'") # "on" should be skipped test.add_log_pattern("Not turning node_fake back on using " "true1 because the device is configured " "to stay off") test.add_log_pattern("Not turning node_fake back on using true2" " because the agent doesn't support 'on'") test.add_log_pattern("Undoing remap of reboot targeting node_fake") test = self.new_test("remap_automatic", "Verify remapped topology reboot skips automatic 'on'") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy_auto_unfence ' '-o mode=pass -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy_auto_unfence ' '-o "mode=pass" -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -B node_fake -t 5") test.add_log_pattern("Remapping multiple-device reboot targeting node_fake") test.add_log_pattern("perform 'off' action targeting node_fake using true1") test.add_log_pattern("perform 'off' action targeting node_fake using true2") test.add_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'") test.add_log_pattern("Undoing remap of reboot targeting node_fake") test.add_log_pattern("perform 'on' action targeting node_fake using", negative=True) test.add_log_pattern("'on' failure", negative=True) test = self.new_test("remap_complex_1", "Verify remapped topology reboot in second level works if non-remapped first level fails") test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 2 -v true1 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -B node_fake -t 5") test.add_log_pattern("perform 'reboot' action targeting node_fake using false1") test.add_log_pattern("Remapping multiple-device reboot targeting node_fake") test.add_log_pattern("perform 'off' action targeting node_fake using true1") test.add_log_pattern("perform 'off' action targeting node_fake using true2") test.add_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'") test.add_log_pattern("Ignoring true1 'on' failure (no capable peers) targeting node_fake") test.add_log_pattern("Ignoring true2 'on' failure (no capable peers) targeting node_fake") test.add_log_pattern("Undoing remap of reboot targeting node_fake") test = self.new_test("remap_complex_2", "Verify remapped topology reboot failure in second level proceeds to third level") test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 2 -v true1 -v false2 -v true3") test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 3 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -B node_fake -t 5") test.add_log_pattern("perform 'reboot' action targeting node_fake using false1") test.add_log_pattern("Remapping multiple-device reboot targeting node_fake") test.add_log_pattern("perform 'off' action targeting node_fake using true1") test.add_log_pattern("perform 'off' action targeting node_fake using false2") test.add_log_pattern("Attempted to execute agent fence_dummy (off) the maximum number of times") test.add_log_pattern("Undoing remap of reboot targeting node_fake") test.add_log_pattern("perform 'reboot' action targeting node_fake using true2") test.add_log_pattern("node_fake with true3", negative=True) def build_query_tests(self): """ run stonith_admin --metadata for the fence_dummy agent and check command output """ test = self.new_test("get_metadata", "Run stonith_admin --metadata for the fence_dummy agent") test.add_cmd("stonith_admin", args="--output-as=xml -a fence_dummy --metadata", stdout_match='= offset: logfile.seek(offset) else: print("%sFile truncated from %d to %d" % (prefix, offset, newsize)) if (newsize * 1.05) < offset: logfile.seek(0) # Don't block when we reach EOF fcntl.fcntl(logfile.fileno(), fcntl.F_SETFL, os.O_NONBLOCK) count = 0 while True: if logfile.tell() >= newsize: break if limit and count >= limit: break line = logfile.readline() if not line: break print(line.strip()) count += 1 print("%sLast read: %d, limit=%d, count=%d" % (prefix, logfile.tell(), limit, count)) def build_options(): """Handle command line arguments.""" # Create the top-level parser parser = argparse.ArgumentParser(description="Support tool for CTS") subparsers = parser.add_subparsers(dest="subparser_name") # Create the parser for the "install" command subparsers.add_parser("install", help="Install support files") # Create the parser for the "uninstall" command subparsers.add_parser("uninstall", help="Remove support files") # Create the parser for the "watch" command watch_parser = subparsers.add_parser("watch", help="Remote log watcher") watch_parser.add_argument("-f", "--filename", default="/var/log/messages", help="File to watch") watch_parser.add_argument("-l", "--limit", type=int, default=0, help="Maximum number of lines to read") watch_parser.add_argument("-o", "--offset", default=0, help="Which line number to start reading from") watch_parser.add_argument("-p", "--prefix", default="", help="String to add to the beginning of each line") args = parser.parse_args() return args if __name__ == "__main__": opts = build_options() if os.geteuid() != 0: print("This command must be run as root") sys.exit(ExitStatus.ERROR) # If the install directory doesn't exist, assume we're in a build directory. data_dir = "%s/pacemaker/tests/cts" % BuildOptions.DATA_DIR if not os.path.exists(data_dir): data_dir = "%s/pacemaker/tests/cts" % BuildOptions._BUILD_DIR if opts.subparser_name == "install": cmd_install(data_dir) if opts.subparser_name == "uninstall": cmd_uninstall() if opts.subparser_name == "watch": cmd_watch(opts.filename, opts.limit, opts.offset, opts.prefix) diff --git a/cts/support/fence_dummy.in b/cts/support/fence_dummy.in index 6404000dd5..42c9a54eb9 100644 --- a/cts/support/fence_dummy.in +++ b/cts/support/fence_dummy.in @@ -1,517 +1,519 @@ #!@PYTHON@ """Dummy fence agent for testing.""" __copyright__ = "Copyright 2012-2024 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import re import sys import time import random import atexit import getopt import contextlib AGENT_VERSION = "4.1.0" OCF_VERSION = "1.0" SHORT_DESC = "Dummy fence agent" LONG_DESC = """fence_dummy is a fake fencing agent which reports success based on its mode (pass|fail|random) without doing anything.""" # Short options used: difhmnoqsvBDHMRUV ALL_OPT = { "quiet": { "getopt": "q", "help": "", "order": 50 }, "verbose": { "getopt": "v", "longopt": "verbose", "help": "-v, --verbose Verbose mode", "required": "0", "shortdesc": "Verbose mode", "order": 51 }, "debug": { "getopt": "D:", "longopt": "debug-file", "help": "-D, --debug-file=[debugfile] Debugging to output file", "required": "0", "shortdesc": "Write debug information to given file", "order": 52 }, "version": { "getopt": "V", "longopt": "version", "help": "-V, --version Display version information and exit", "required": "0", "shortdesc": "Display version information and exit", "order": 53 }, "help": { "getopt": "h", "longopt": "help", "help": "-h, --help Display this help and exit", "required": "0", "shortdesc": "Display help and exit", "order": 54 }, "action": { "getopt": "o:", "longopt": "action", "help": "-o, --action=[action] Action: validate-all, status, list, reboot (default), off or on", "required": "1", "shortdesc": "Fencing Action", "default": "reboot", "order": 1 }, "nodename": { "getopt": "N:", "longopt": "nodename", "help": "-N, --nodename Node name of fence target (ignored)", "required": "0", "shortdesc": "The node name of fence target (ignored)", "order": 2 }, "mode": { "getopt": "M:", "longopt": "mode", "required": "0", "help": "-M, --mode=(pass|fail|random) Exit status to return for non-monitor operations", "shortdesc": "Whether fence operations should always pass, always fail, or fail at random", "order": 3 }, "monitor_mode": { "getopt": "m:", "longopt": "monitor_mode", "help": "-m, --monitor_mode=(pass|fail|random) Exit status to return for monitor operations", "required": "0", "shortdesc": "Whether monitor operations should always pass, always fail, or fail at random", "order": 3 }, "random_sleep_range": { "getopt": "R:", "required": "0", "longopt": "random_sleep_range", "help": "-R, --random_sleep_range=[seconds] Sleep between 1 and [seconds] before returning", "shortdesc": "Wait randomly between 1 and [seconds]", "order": 3 }, "mock_dynamic_hosts": { "getopt": "H:", "longopt": "mock_dynamic_hosts", "help": "-H, --mock_dynamic_hosts=[list] What to return when dynamically queried for possible targets", "required": "0", "shortdesc": "A list of hosts we can fence", "order": 3 }, "delay": { "getopt": "f:", "longopt": "delay", "help": "-f, --delay [seconds] Wait X seconds before fencing is started", "required": "0", "shortdesc": "Wait X seconds before fencing is started", "default": "0", "order": 3 }, "monitor_delay": { "getopt": "d:", "longopt": "monitor_delay", "help": "-d, --monitor_delay [seconds] Wait X seconds before monitor completes", "required": "0", "shortdesc": "Wait X seconds before monitor completes", "default": "0", "order": 3 }, "off_delay": { "getopt": "F:", "longopt": "off_delay", "help": "-F, --off_delay [seconds] Wait additional X seconds before off action", "required": "0", "shortdesc": "Wait additional X seconds before off action", "default": "0", "order": 3 }, "plug": { "getopt": "n:", "longopt": "plug", "help": "-n, --plug=[id] Physical plug number on device (ignored)", "required": "1", "shortdesc": "Ignored", "order": 4 }, "port": { "getopt": "n:", "longopt": "plug", "help": "-n, --plug=[id] Physical plug number on device (ignored)", "required": "1", "shortdesc": "Ignored", "order": 4 }, "switch": { "getopt": "s:", "longopt": "switch", "help": "-s, --switch=[id] Physical switch number on device (ignored)", "required": "0", "shortdesc": "Ignored", "order": 4 }, "nodeid": { "getopt": "i:", "longopt": "nodeid", "help": "-i, --nodeid Corosync id of fence target (ignored)", "required": "0", "shortdesc": "Ignored", "order": 4 }, "uuid": { "getopt": "U:", "longopt": "uuid", "help": "-U, --uuid UUID of the VM to fence (ignored)", "required": "0", "shortdesc": "Ignored", "order": 4 } } def agent(): """Return name this file was run as.""" return os.path.basename(sys.argv[0]) def fail_usage(message): """Print a usage message and exit.""" sys.exit("%s\nPlease use '-h' for usage" % message) def show_docs(options, auto_unfence, no_reboot, no_on): """Handle informational options (display info and exit).""" device_opt = options["device_opt"] if "-h" in options: usage(device_opt) sys.exit(0) if "-o" in options and options["-o"].lower() == "metadata": f = "%s.fail" % __file__ if not os.path.exists(f): metadata(device_opt, options, auto_unfence, no_reboot, no_on) else: os.remove(f) sys.exit(0) if "-V" in options: print(AGENT_VERSION) sys.exit(0) def sorted_options(avail_opt): """Return a list of all options, in their internally specified order.""" sorted_list = [(key, ALL_OPT[key]) for key in avail_opt] sorted_list.sort(key=lambda x: x[1]["order"]) return sorted_list def usage(avail_opt): """Print a usage message.""" print("Usage:") print("\t%s [options]" % agent()) print("Options:") for (_, value) in sorted_options(avail_opt): if len(value["help"]) != 0: print(" %s" % value["help"]) def metadata(avail_opt, options, auto_unfence, no_reboot, no_on): """Print agent metadata.""" # This log is just for testing handling of stderr output print("asked for fence_dummy metadata", file=sys.stderr) print(""" %s %s """ % (agent(), SHORT_DESC, AGENT_VERSION, OCF_VERSION, LONG_DESC)) for (option, _) in sorted_options(avail_opt): if "shortdesc" not in ALL_OPT[option]: continue print(' ' % (option, ALL_OPT[option]["required"])) default = "" default_name_arg = "-%s" % ALL_OPT[option]["getopt"][:-1] default_name_no_arg = "-%s" % ALL_OPT[option]["getopt"] if "default" in ALL_OPT[option]: default = 'default="%s"' % ALL_OPT[option]["default"] elif options.get(default_name_arg) is not None: try: default = 'default="%s"' % options[default_name_arg] except TypeError: # @todo/@note: Currently there is no clean way how to handle lists # we can create a string from it but we can't set it on command line default = 'default="%s"' % str(options[default_name_arg]) elif default_name_no_arg in options: default = 'default="true"' mixed = ALL_OPT[option]["help"] # split it between option and help text res = re.compile(r"^(.*--\S+)\s+", re.IGNORECASE | re.S).search(mixed) if res is not None: mixed = res.group(1) mixed = mixed.replace("<", "<").replace(">", ">") print(' ' % mixed) if ALL_OPT[option]["getopt"].count(":") > 0: print(' ' % default) else: print(' ' % default) print(' %s' % ALL_OPT[option]["shortdesc"]) print(' ') print(' \n ') if not no_on: if auto_unfence: attr_name = 'automatic' else: attr_name = 'on_target' print(' ' % attr_name) print(' ') if not no_reboot: print(' ') print(' ') print(' ') print(' ') print(' ') print(' ') print('') def option_longopt(option): """Return the getopt-compatible long-option name of the given option.""" if ALL_OPT[option]["getopt"].endswith(":"): return ALL_OPT[option]["longopt"] + "=" return ALL_OPT[option]["longopt"] def opts_from_command_line(argv, avail_opt): """Read options from command-line arguments.""" # Prepare list of options for getopt getopt_string = "" longopt_list = [] for k in avail_opt: if k in ALL_OPT: getopt_string += ALL_OPT[k]["getopt"] else: fail_usage("Parse error: unknown option '%s'" % k) if k in ALL_OPT and "longopt" in ALL_OPT[k]: longopt_list.append(option_longopt(k)) try: (opt, _) = getopt.gnu_getopt(argv, getopt_string, longopt_list) except getopt.GetoptError as error: fail_usage("Parse error: %s" % error.msg) # Transform longopt to short one which are used in fencing agents old_opt = opt opt = {} for old_option in dict(old_opt): if old_option.startswith("--"): for rec in ALL_OPT.values(): if rec.get("longopt") is None: continue long = "--%s" % rec["longopt"] if long == old_option: short = "-%s" % rec["getopt"][0] opt[short] = dict(old_opt)[old_option] else: opt[old_option] = dict(old_opt)[old_option] # Compatibility Layer (with what? probably not needed for fence_dummy) new_opt = dict(opt) if "-T" in new_opt: new_opt["-o"] = "status" if "-n" in new_opt: new_opt["-m"] = new_opt["-n"] opt = new_opt return opt def opts_from_stdin(avail_opt): """Read options from standard input.""" opt = {} name = "" for line in sys.stdin.readlines(): line = line.strip() if line.startswith("#") or (len(line) == 0): continue (name, value) = (line + "=").split("=", 1) value = value[:-1] # Compatibility Layer (with what? probably not needed for fence_dummy) if name == "option": name = "action" if name not in avail_opt: print("Parse error: Ignoring unknown option '%s'" % line, file=sys.stderr) continue if ALL_OPT[name]["getopt"].endswith(":"): short = "-%s" % ALL_OPT[name]["getopt"][0] opt[short] = value elif value.lower() in ["1", "yes", "on", "true"]: short = "-%s" % ALL_OPT[name]["getopt"] opt[short] = "1" return opt def process_input(avail_opt): """Set standard environment variables, and parse all options.""" # Set standard environment os.putenv("LANG", "C") os.putenv("LC_ALL", "C") # Read options from command line or standard input if len(sys.argv) > 1: return opts_from_command_line(sys.argv[1:], avail_opt) return opts_from_stdin(avail_opt) def atexit_handler(): """Close stdout on exit.""" try: sys.stdout.close() os.close(1) except IOError: sys.exit("%s failed to close standard output" % agent()) def success_mode(options, option, default_value): """Return exit code specified by option.""" if option in options: test_value = options[option] else: test_value = default_value if test_value == "pass": exitcode = 0 elif test_value == "fail": exitcode = 1 else: exitcode = random.randint(0, 1) return exitcode def write_options(options): """Write out all options to debug file.""" with contextlib.suppress(IOError): with io.open(options["-D"], "at", encoding="utf-8") as debugfile: debugfile.write("### %s ###\n" % time.strftime("%Y-%m-%d %H:%M:%S")) for option in sorted(options): debugfile.write("%s=%s\n" % (option, options[option])) debugfile.write("###\n") def main(): """Run the dummy fencing agent.""" auto_unfence = False no_reboot = False no_on = False # Meta-data can't take parameters, so we simulate different meta-data # behavior based on the executable name (which can be a symbolic link). if sys.argv[0].endswith("_auto_unfence"): auto_unfence = True elif sys.argv[0].endswith("_no_reboot"): no_reboot = True elif sys.argv[0].endswith("_no_on"): no_on = True + elif sys.argv[0].endswith("_no_nodeid"): + del ALL_OPT["nodeid"] device_opt = ALL_OPT.keys() # Defaults for fence agent atexit.register(atexit_handler) options = process_input(device_opt) options["device_opt"] = device_opt show_docs(options, auto_unfence, no_reboot, no_on) action = options.get("-o", "reboot") # dump input to file if "-D" in options and action != "validate-all": write_options(options) if "-f" in options and action != "validate-all": val = int(options["-f"]) print("delay sleep for %d seconds" % val, file=sys.stderr) time.sleep(val) # random sleep for testing if "-R" in options and action != "validate-all": val = int(options["-R"]) ran = random.randint(1, val) print("random sleep for %d seconds" % ran, file=sys.stderr) time.sleep(ran) if action == "monitor": if "-d" in options: time.sleep(int(options["-d"])) exitcode = success_mode(options, "-m", "pass") elif action == "list": print("fence_dummy action (list) called", file=sys.stderr) if "-H" in options: print(options["-H"]) exitcode = 0 else: print("dynamic hostlist requires mock_dynamic_hosts to be set", file=sys.stderr) exitcode = 1 elif action == "validate-all": if "-f" in options: val = int(options["-f"]) if val > 10: exitcode = 1 else: exitcode = 0 else: exitcode = 1 elif action == "off": if "-F" in options: time.sleep(int(options["-F"])) exitcode = success_mode(options, "-M", "random") else: exitcode = success_mode(options, "-M", "random") # Ensure we generate some error output on failure exit. if exitcode == 1: print("simulated %s failure" % action, file=sys.stderr) sys.exit(exitcode) if __name__ == "__main__": main()