diff --git a/cts/cts-exec.in b/cts/cts-exec.in index 8c1b92a1b9..d53e92f568 100644 --- a/cts/cts-exec.in +++ b/cts/cts-exec.in @@ -1,1153 +1,1141 @@ #!@PYTHON@ """ Regression tests for Pacemaker's pacemaker-execd """ __copyright__ = "Copyright 2012-2023 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import psutil import stat import sys import subprocess import shlex import shutil import tempfile import time # Where to find test binaries # Prefer the source tree if available TEST_DIR = sys.path[0] # These imports allow running from a source checkout after running `make`. # Note that while this doesn't necessarily mean it will successfully run tests, # but being able to see --help output can be useful. if os.path.exists("@abs_top_srcdir@/python"): sys.path.insert(0, "@abs_top_srcdir@/python") if os.path.exists("@abs_top_builddir@/python") and "@abs_top_builddir@" != "@abs_top_srcdir@": sys.path.insert(0, "@abs_top_builddir@/python") from pacemaker.buildoptions import BuildOptions from pacemaker.exitstatus import ExitStatus from pacemaker._cts.errors import ExitCodeError, OutputFoundError, OutputNotFoundError from pacemaker._cts.process import killall, exit_if_proc_running, pipe_communicate, stdout_from_command from pacemaker._cts.test import Test # File permissions for executable scripts we create EXECMODE = stat.S_IRUSR | stat.S_IXUSR | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH def update_path(): """ Set the PATH environment variable appropriately for the tests """ new_path = os.environ['PATH'] if os.path.exists("%s/cts-exec.in" % TEST_DIR): print("Running tests from the source tree: %s (%s)" % (BuildOptions._BUILD_DIR, TEST_DIR)) # For pacemaker-execd, cts-exec-helper, and pacemaker-remoted new_path = "%s/daemons/execd:%s" % (BuildOptions._BUILD_DIR, new_path) new_path = "%s/tools:%s" % (BuildOptions._BUILD_DIR, new_path) # For crm_resource # For pacemaker-fenced new_path = "%s/daemons/fenced:%s" % (BuildOptions._BUILD_DIR, new_path) # For cts-support new_path = "%s/cts/support:%s" % (BuildOptions._BUILD_DIR, new_path) else: print("Running tests from the install tree: %s (not %s)" % (BuildOptions.DAEMON_DIR, TEST_DIR)) # For cts-exec-helper, cts-support, pacemaker-execd, pacemaker-fenced, # and pacemaker-remoted new_path = "%s:%s" % (BuildOptions.DAEMON_DIR, new_path) print('Using PATH="{}"'.format(new_path)) os.environ['PATH'] = new_path class ExecTest(Test): """ Executor for a single pacemaker-execd regression test """ def __init__(self, name, description, **kwargs): Test.__init__(self, name, description, **kwargs) self.tls = kwargs.get("tls", False) if self.tls: self._daemon_location = "pacemaker-remoted" else: self._daemon_location = "pacemaker-execd" self._execd_process = None self._test_tool_location = "cts-exec-helper" def _new_cmd(self, cmd, args, exitcode, **kwargs): """ Add a command to be executed as part of this test """ if self.verbose and cmd == self._test_tool_location: args += " -V " if (cmd == self._test_tool_location) and self.tls: args += " -S " kwargs["validate"] = False kwargs["check_rng"] = False kwargs["check_stderr"] = False Test._new_cmd(self, cmd, args, exitcode, **kwargs) def start_environment(self): """ Prepare the host for running a test """ ### make sure we are in full control here ### killall([ "pacemaker-fenced", "lt-pacemaker-fenced", "pacemaker-execd", "lt-pacemaker-execd", "cts-exec-helper", "lt-cts-exec-helper", "pacemaker-remoted", ]) additional_args = "" if not self.tls: self._stonith_process = subprocess.Popen(shlex.split("pacemaker-fenced -s")) if self.verbose: additional_args = additional_args + " -V" self._execd_process = subprocess.Popen(shlex.split("%s %s -l %s" % (self._daemon_location, additional_args, self.logpath))) logfile = None init_time = time.time() update_time = init_time while True: time.sleep(0.1) if not self.force_wait and logfile == None \ and os.path.exists(self.logpath): logfile = io.open(self.logpath, 'rt', encoding = "ISO-8859-1") if not self.force_wait and logfile != None: for line in logfile.readlines(): if "successfully started" in line: return now = time.time() if self.timeout > 0 and (now - init_time) >= self.timeout: if not self.force_wait: print("\tDaemon %s doesn't seem to have been initialized within %fs." "\n\tConsider specifying a longer '--timeout' value." %(self._daemon_location, self.timeout)) return if self.verbose and (now - update_time) >= 5: print("Waiting for %s to be initialized: %fs ..." %(self._daemon_location, now - init_time)) update_time = now def clean_environment(self): """ Clean up the host after running a test """ if self._execd_process: self._execd_process.terminate() self._execd_process.wait() if self.verbose: print("Daemon output") logfile = io.open(self.logpath, 'rt', errors='replace') for line in logfile: print(line.strip().encode('utf-8', 'replace')) if self._stonith_process: self._stonith_process.terminate() self._stonith_process.wait() self._execd_process = None self._stonith_process = None def add_cmd(self, args): """ Add a cts-exec-helper command to be executed as part of this test """ self._new_cmd(self._test_tool_location, args, ExitStatus.OK) def add_cmd_and_kill(self, args, kill_proc): """ Add a cts-exec-helper command and system command to be executed as part of this test """ self._new_cmd(self._test_tool_location, args, ExitStatus.OK, kill=kill_proc) def add_cmd_check_stdout(self, args, match, no_match=None): """ Add a command with expected output to be executed as part of this test """ self._new_cmd(self._test_tool_location, args, ExitStatus.OK, stdout_match=match, stdout_negative_match=no_match) def add_cmd_expected_fail(self, args, exitcode=ExitStatus.ERROR): """ Add a cts-exec-helper command to be executed as part of this test and expected to fail """ self._new_cmd(self._test_tool_location, args, exitcode) def add_sys_cmd(self, cmd, args): """ Add a simple command to be executed as part of this test """ self._new_cmd(cmd, args, ExitStatus.OK) - def print_result(self, filler): - """ Print the result of the last test execution """ - - print("%s%s" % (filler, self._result_txt)) - - def set_error(self, step, cmd): - """ Record failure of this test """ - - msg = "FAILURE - '%s' failed at step %d. Command: %s %s" - self._result_txt = msg % (self.name, step, cmd['cmd'], cmd['args']) - self.exitcode = ExitStatus.ERROR - def run(self): """ Execute this test. """ i = 1 if self.tls and self.name.count("stonith") != 0: self._result_txt = "SKIPPED - '%s' - disabled when testing pacemaker_remote" % (self.name) print(self._result_txt) return self.start_environment() if self.verbose: print("\n--- START TEST - %s" % self.name) self._result_txt = "SUCCESS - '%s'" % (self.name) self.exitcode = ExitStatus.OK for cmd in self.cmds: try: self.run_cmd(cmd) except ExitCodeError as e: print("Step %d FAILED - command returned %s, expected %d" % (i, e, cmd['expected_exitcode'])) self.set_error(i, cmd); break except OutputNotFoundError as e: print("Step %d FAILED - '%s' was not found in command output: %s" % (i, cmd['stdout_match'], e)) self.set_error(i, cmd); break except OutputFoundError as e: print("Step %d FAILED - '%s' was found in command output: %s" % (i, cmd['stdout_negative_match'], e)) self.set_error(i, cmd); break if self.verbose: print("Step %d SUCCESS" % (i)) i = i + 1 self.clean_environment() print(self._result_txt) if self.verbose: print("--- END TEST - %s\n" % self.name) self.executed = True class Tests(object): """ Collection of all pacemaker-execd regression tests """ def __init__(self, verbose=False, tls=False, timeout=2, force_wait=False, logdir="/tmp"): self.tests = [] self.verbose = verbose self.tls = tls self.timeout = timeout self.force_wait = force_wait self.logdir = logdir self.rsc_classes = stdout_from_command(["crm_resource, "--list-standards"]) self.rsc_classes = self.rsc_classes[:-1] # Strip trailing empty line self.installed_files = [] self.action_timeout = " -t 9000 " if self.tls: self.rsc_classes.remove("stonith") try: self.rsc_classes.remove("nagios") except ValueError: # Not found pass if "systemd" in self.rsc_classes: try: # This code doesn't need this import, but pacemaker-cts-dummyd # does, so ensure the dependency is available rather than cause # all systemd tests to fail. import systemd.daemon except ImportError: print("Python systemd bindings not found.") print("The tests for systemd class are not going to be run.") self.rsc_classes.remove("systemd") print("Testing resource classes", repr(self.rsc_classes)) self.common_cmds = { "ocf_reg_line" : "-c register_rsc -r ocf_test_rsc "+self.action_timeout+" -C ocf -P pacemaker -T Dummy", "ocf_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"", "ocf_unreg_line" : "-c unregister_rsc -r \"ocf_test_rsc\" "+self.action_timeout, "ocf_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"", "ocf_start_line" : "-c exec -r \"ocf_test_rsc\" -a \"start\" "+self.action_timeout, "ocf_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:start rc:ok op_status:complete\" ", "ocf_stop_line" : "-c exec -r \"ocf_test_rsc\" -a \"stop\" "+self.action_timeout, "ocf_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:stop rc:ok op_status:complete\" ", "ocf_monitor_line" : '-c exec -r ocf_test_rsc -a monitor -i 2s ' + self.action_timeout, "ocf_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "ocf_cancel_line" : '-c cancel -r ocf_test_rsc -a monitor -i 2s ' + self.action_timeout, "ocf_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "systemd_reg_line" : "-c register_rsc -r systemd_test_rsc " + self.action_timeout + " -C systemd -T pacemaker-cts-dummyd@3", "systemd_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"", "systemd_unreg_line" : "-c unregister_rsc -r \"systemd_test_rsc\" "+self.action_timeout, "systemd_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"", "systemd_start_line" : "-c exec -r \"systemd_test_rsc\" -a \"start\" "+self.action_timeout, "systemd_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:start rc:ok op_status:complete\" ", "systemd_stop_line" : "-c exec -r \"systemd_test_rsc\" -a \"stop\" "+self.action_timeout, "systemd_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:stop rc:ok op_status:complete\" ", "systemd_monitor_line" : '-c exec -r systemd_test_rsc -a monitor -i 2s ' + self.action_timeout, "systemd_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:complete\" -t 15000 ", "systemd_cancel_line" : '-c cancel -r systemd_test_rsc -a monitor -i 2s ' + self.action_timeout, "systemd_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "upstart_reg_line" : "-c register_rsc -r upstart_test_rsc "+self.action_timeout+" -C upstart -T pacemaker-cts-dummyd", "upstart_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"", "upstart_unreg_line" : "-c unregister_rsc -r \"upstart_test_rsc\" "+self.action_timeout, "upstart_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"", "upstart_start_line" : "-c exec -r \"upstart_test_rsc\" -a \"start\" "+self.action_timeout, "upstart_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:start rc:ok op_status:complete\" ", "upstart_stop_line" : "-c exec -r \"upstart_test_rsc\" -a \"stop\" "+self.action_timeout, "upstart_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:stop rc:ok op_status:complete\" ", "upstart_monitor_line" : '-c exec -r upstart_test_rsc -a monitor -i 2s ' + self.action_timeout, "upstart_monitor_event" : '-l "NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:complete" -t 15000', "upstart_cancel_line" : '-c cancel -r upstart_test_rsc -a monitor -i 2s ' + self.action_timeout, "upstart_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "service_reg_line" : "-c register_rsc -r service_test_rsc "+self.action_timeout+" -C service -T LSBDummy", "service_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", "service_unreg_line" : "-c unregister_rsc -r \"service_test_rsc\" "+self.action_timeout, "service_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", "service_start_line" : "-c exec -r \"service_test_rsc\" -a \"start\" "+self.action_timeout, "service_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:start rc:ok op_status:complete\" ", "service_stop_line" : "-c exec -r \"service_test_rsc\" -a \"stop\" "+self.action_timeout, "service_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:stop rc:ok op_status:complete\" ", "service_monitor_line" : '-c exec -r service_test_rsc -a monitor -i 2s ' + self.action_timeout, "service_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "service_cancel_line" : '-c cancel -r service_test_rsc -a monitor -i 2s ' + self.action_timeout, "service_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "lsb_reg_line" : "-c register_rsc -r lsb_test_rsc "+self.action_timeout+" -C lsb -T LSBDummy", "lsb_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\" ", "lsb_unreg_line" : "-c unregister_rsc -r \"lsb_test_rsc\" "+self.action_timeout, "lsb_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\"", "lsb_start_line" : "-c exec -r \"lsb_test_rsc\" -a \"start\" "+self.action_timeout, "lsb_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:start rc:ok op_status:complete\" ", "lsb_stop_line" : "-c exec -r \"lsb_test_rsc\" -a \"stop\" "+self.action_timeout, "lsb_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:stop rc:ok op_status:complete\" ", "lsb_monitor_line" : '-c exec -r lsb_test_rsc -a status -i 2s ' + self.action_timeout, "lsb_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:complete\" "+self.action_timeout, "lsb_cancel_line" : '-c cancel -r lsb_test_rsc -a status -i 2s ' + self.action_timeout, "lsb_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:Cancelled\" ", "stonith_reg_line" : "-c register_rsc -r stonith_test_rsc " + self.action_timeout + " -C stonith -P pacemaker -T fence_dummy", "stonith_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\" ", "stonith_unreg_line" : "-c unregister_rsc -r \"stonith_test_rsc\" "+self.action_timeout, "stonith_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\"", "stonith_start_line" : '-c exec -r stonith_test_rsc -a start ' + self.action_timeout, "stonith_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:start rc:ok op_status:complete\" ", "stonith_stop_line" : "-c exec -r \"stonith_test_rsc\" -a \"stop\" "+self.action_timeout, "stonith_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:stop rc:ok op_status:complete\" ", "stonith_monitor_line" : '-c exec -r stonith_test_rsc -a monitor -i 2s ' + self.action_timeout, "stonith_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "stonith_cancel_line" : '-c cancel -r stonith_test_rsc -a monitor -i 2s ' + self.action_timeout, "stonith_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:Cancelled\" ", } def new_test(self, name, description): """ Create a named test """ test = ExecTest(name, description, verbose=self.verbose, tls=self.tls, timeout=self.timeout, force_wait=self.force_wait, logdir=self.logdir) self.tests.append(test) return test def setup_test_environment(self): """ Prepare the host before executing any tests """ if BuildOptions.REMOTE_ENABLED: os.system("service pacemaker_remote stop") self.cleanup_test_environment() if self.tls and not os.path.isfile("/etc/pacemaker/authkey"): print("Installing /etc/pacemaker/authkey ...") os.system("mkdir -p /etc/pacemaker") os.system("dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1") self.installed_files.append("/etc/pacemaker/authkey") # If we're in build directory, install agents if not already installed if os.path.exists("%s/cts/cts-exec.in" % BuildOptions._BUILD_DIR): if not os.path.exists("%s/pacemaker" % BuildOptions.OCF_RA_INSTALL_DIR): # @TODO remember which components were created and remove them os.makedirs("%s/pacemaker" % BuildOptions.OCF_RA_INSTALL_DIR, 0o755) for agent in ["Dummy", "Stateful", "ping"]: agent_source = "%s/extra/resources/%s" % (BuildOptions._BUILD_DIR, agent) agent_dest = "%s/pacemaker/%s" % (BuildOptions.OCF_RA_INSTALL_DIR, agent) if not os.path.exists(agent_dest): print("Installing %s ..." % (agent_dest)) shutil.copyfile(agent_source, agent_dest) os.chmod(agent_dest, EXECMODE) self.installed_files.append(agent_dest) subprocess.call(["cts-support", "install"]) def cleanup_test_environment(self): """ Clean up the host after executing desired tests """ for installed_file in self.installed_files: print("Removing %s ..." % (installed_file)) os.remove(installed_file) subprocess.call(["cts-support", "uninstall"]) def build_generic_tests(self): """ Register tests that apply to all resource classes """ common_cmds = self.common_cmds ### register/unregister tests ### for rsc in self.rsc_classes: test = self.new_test("generic_registration_%s" % (rsc), "Simple resource registration test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### start/stop tests ### for rsc in self.rsc_classes: test = self.new_test("generic_start_stop_%s" % (rsc), "Simple start and stop test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### monitor cancel test ### for rsc in self.rsc_classes: test = self.new_test("generic_monitor_cancel_%s" % (rsc), "Simple monitor cancel test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_cmd_expected_fail(common_cmds["%s_monitor_event" % (rsc)], ExitStatus.TIMEOUT) ### If this happens the monitor did not actually cancel correctly. ### test.add_cmd_expected_fail(common_cmds["%s_monitor_event" % (rsc)], ExitStatus.TIMEOUT) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### monitor duplicate test ### for rsc in self.rsc_classes: test = self.new_test("generic_monitor_duplicate_%s" % (rsc), "Test creation and canceling of duplicate monitors for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) # Add the duplicate monitors test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) # verify we still get update events ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) # cancel the monitor, if the duplicate merged with the original, we should no longer see monitor updates test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_cmd_expected_fail(common_cmds["%s_monitor_event" % (rsc)], ExitStatus.TIMEOUT) ### If this happens the monitor did not actually cancel correctly. ### test.add_cmd_expected_fail(common_cmds["%s_monitor_event" % (rsc)], ExitStatus.TIMEOUT) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### stop implies cancel test ### for rsc in self.rsc_classes: test = self.new_test("generic_stop_implies_cancel_%s" % (rsc), "Verify stopping a resource implies cancel of recurring ops for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_cmd_expected_fail(common_cmds["%s_monitor_event" % (rsc)], ExitStatus.TIMEOUT) ### If this happens the monitor did not actually cancel correctly. ### test.add_cmd_expected_fail(common_cmds["%s_monitor_event" % (rsc)], ExitStatus.TIMEOUT) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) def build_multi_rsc_tests(self): """ Register complex tests that involve managing multiple resouces of different types """ common_cmds = self.common_cmds # do not use service and systemd at the same time, it is the same resource. ### register start monitor stop unregister resources of each type at the same time. ### test = self.new_test("multi_rsc_start_stop_all", "Start, monitor, and stop resources of multiple types and classes") for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) for rsc in self.rsc_classes: ### If this fails, that means the monitor is not being rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) def build_negative_tests(self): """ Register tests related to how pacemaker-execd handles failures """ ### ocf start timeout test ### test = self.new_test("ocf_start_timeout", "Force start timeout to occur, verify start failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" " + self.action_timeout + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") # -t must be less than self.action_timeout test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -k \"op_sleep\" -v \"5\" -t 1000 -w") test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:error op_status:Timed Out" ' + self.action_timeout) test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### stonith start timeout test ### test = self.new_test("stonith_start_timeout", "Force start timeout to occur, verify start failure.") test.add_cmd('-c register_rsc -r test_rsc ' + '-C stonith -P pacemaker -T fence_dummy ' + self.action_timeout + '-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete"') test.add_cmd('-c exec -r test_rsc -a start -k monitor_delay -v 30 ' + '-t 1000 -w') # -t must be less than self.action_timeout test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:error op_status:Timed Out" ' + self.action_timeout) test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### stonith component fail ### common_cmds = self.common_cmds test = self.new_test("stonith_component_fail", "Kill stonith component after pacemaker-execd connects") test.add_cmd(common_cmds["stonith_reg_line"] + " " + common_cmds["stonith_reg_event"]) test.add_cmd(common_cmds["stonith_start_line"] + " " + common_cmds["stonith_start_event"]) test.add_cmd('-c exec -r stonith_test_rsc -a monitor -i 600s ' '-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete" ' + self.action_timeout) test.add_cmd_and_kill('-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:error op_status:error" -t 15000', "killall -9 -q pacemaker-fenced lt-pacemaker-fenced") test.add_cmd(common_cmds["stonith_unreg_line"] + " " + common_cmds["stonith_unreg_event"]) ### monitor fail for ocf resources ### test = self.new_test("monitor_fail_ocf", "Force ocf monitor to fail, verify failure is reported.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" " + self.action_timeout + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"') test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' + self.action_timeout) test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' + self.action_timeout) test.add_cmd_and_kill('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete" ' + self.action_timeout, "rm -f %s/run/Dummy-test_rsc.state" % BuildOptions.LOCAL_STATE_DIR) test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s ' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_cmd_expected_fail("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" " + self.action_timeout, ExitStatus.TIMEOUT) test.add_cmd_expected_fail("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" " + self.action_timeout, ExitStatus.TIMEOUT) test.add_cmd("-c unregister_rsc -r \"test_rsc\" " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### verify notify changes only for monitor operation. ### test = self.new_test("monitor_changes_only", "Verify when flag is set, only monitor changes are notified.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+" -o " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + ' -o -l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete" ') test.add_cmd_expected_fail("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, ExitStatus.TIMEOUT) test.add_cmd_and_kill('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete"' + self.action_timeout, 'rm -f %s/run/Dummy-test_rsc.state' % BuildOptions.LOCAL_STATE_DIR) test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_cmd_expected_fail("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, ExitStatus.TIMEOUT) test.add_cmd_expected_fail("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, ExitStatus.TIMEOUT) test.add_cmd('-c unregister_rsc -r "test_rsc" ' + self.action_timeout + '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete"') ### monitor fail for systemd resource ### if "systemd" in self.rsc_classes: test = self.new_test("monitor_fail_systemd", "Force systemd monitor to fail, verify failure is reported..") test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T pacemaker-cts-dummyd@3 " + self.action_timeout + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete"' + self.action_timeout, "pkill -9 -f pacemaker-cts-dummyd") test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_cmd_expected_fail("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, ExitStatus.TIMEOUT) test.add_cmd_expected_fail("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, ExitStatus.TIMEOUT) test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### monitor fail for upstart resource ### if "upstart" in self.rsc_classes: test = self.new_test("monitor_fail_upstart", "Force upstart monitor to fail, verify failure is reported..") test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T pacemaker-cts-dummyd "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete"' + self.action_timeout, 'killall -9 -q dd') test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_cmd_expected_fail("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, ExitStatus.TIMEOUT) test.add_cmd_expected_fail("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, ExitStatus.TIMEOUT) test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Cancel non-existent operation on a resource ### test = self.new_test("cancel_non_existent_op", "Attempt to cancel the wrong monitor operation, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) ### interval is wrong, should fail test.add_cmd_expected_fail('-c cancel -r test_rsc -a monitor -i 2s' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") ### action name is wrong, should fail test.add_cmd_expected_fail('-c cancel -r test_rsc -a stop -i 1s' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Attempt to invoke non-existent rsc id ### test = self.new_test("invoke_non_existent_rsc", "Attempt to perform operations on a non-existent rsc id.") test.add_cmd_expected_fail("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:error op_status:complete\" ") test.add_cmd_expected_fail("-c exec -r test_rsc -a stop "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd_expected_fail('-c exec -r test_rsc -a monitor -i 6s ' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd_expected_fail("-c cancel -r test_rsc -a start "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register and start a resource that doesn't exist, systemd ### if "systemd" in self.rsc_classes: test = self.new_test("start_uninstalled_systemd", "Register uninstalled systemd agent, try to start, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") if "upstart" in self.rsc_classes: test = self.new_test("start_uninstalled_upstart", "Register uninstalled upstart agent, try to start, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register and start a resource that doesn't exist, ocf ### test = self.new_test("start_uninstalled_ocf", "Register uninstalled ocf agent, try to start, verify expected failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pacemaker -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register ocf with non-existent provider ### test = self.new_test("start_ocf_bad_provider", "Register ocf agent with a non-existent provider, verify expected failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pancakes -T Dummy "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register ocf with empty provider field ### test = self.new_test("start_ocf_no_provider", "Register ocf agent with a no provider, verify expected failure.") test.add_cmd_expected_fail("-c register_rsc -r \"test_rsc\" -C ocf -T Dummy "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd_expected_fail("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Error\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") def build_stress_tests(self): """ Register stress tests """ timeout = "-t 20000" iterations = 25 test = self.new_test("ocf_stress", "Verify OCF agent handling works under load") for i in range(iterations): test.add_cmd("-c register_rsc -r rsc_%s %s -C ocf -P heartbeat -T Dummy -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd('-c exec -r rsc_%s -a monitor %s -i 1s ' '-l "NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete"' % (i, timeout, i)) for i in range(iterations): test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) if "systemd" in self.rsc_classes: test = self.new_test("systemd_stress", "Verify systemd dbus connection works under load") for i in range(iterations): test.add_cmd("-c register_rsc -r rsc_%s %s -C systemd -T pacemaker-cts-dummyd@3 -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd('-c exec -r rsc_%s -a monitor %s -i 1s ' '-l "NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete"' % (i, timeout, i)) for i in range(iterations): test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) iterations = 9 timeout = "-t 30000" ### Verify recurring op in-flight collision is handled in series properly test = self.new_test("rsc_inflight_collision", "Verify recurring ops do not collide with other operations for the same rsc.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -a start %s -k op_sleep -v 1 -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\"" % (timeout)) for i in range(iterations): test.add_cmd('-c exec -r test_rsc -a monitor %s -i 100%dms ' '-k op_sleep -v 2 ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' % (timeout, i)) test.add_cmd("-c exec -r test_rsc -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\"" % (timeout)) test.add_cmd("-c unregister_rsc -r test_rsc %s -l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\"" % (timeout)) def build_custom_tests(self): """ Register tests that target specific cases """ ### verify resource temporary folder is created and used by OCF agents. ### test = self.new_test("rsc_tmp_dir", "Verify creation and use of rsc temporary state directory") test.add_sys_cmd("ls", "-al %s" % BuildOptions.RSC_TMP_DIR) test.add_cmd("-c register_rsc -r test_rsc -P heartbeat -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -a start -t 4000") test.add_sys_cmd("ls", "-al %s" % BuildOptions.RSC_TMP_DIR) test.add_sys_cmd("ls", "%s/Dummy-test_rsc.state" % BuildOptions.RSC_TMP_DIR) test.add_cmd("-c exec -r test_rsc -a stop -t 4000") test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### start delay then stop test ### test = self.new_test("start_delay", "Verify start delay works as expected.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -s 6000 -a start -w -t 6000") test.add_cmd_expected_fail("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 2000", ExitStatus.TIMEOUT) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 6000") test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### start delay, but cancel before it gets a chance to start. ### test = self.new_test("start_delay_cancel", "Using start_delay, start a rsc, but cancel the start op before execution.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -s 5000 -a start -w -t 4000") test.add_cmd("-c cancel -r test_rsc -a start " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") test.add_cmd_expected_fail("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 5000", ExitStatus.TIMEOUT) test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register a bunch of resources, verify we can get info on them ### test = self.new_test("verify_get_rsc_info", "Register multiple resources, verify retrieval of rsc info.") if "systemd" in self.rsc_classes: test.add_cmd("-c register_rsc -r rsc1 -C systemd -T pacemaker-cts-dummyd@3 "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout) test.add_cmd_expected_fail("-c get_rsc_info -r rsc1 ") if "upstart" in self.rsc_classes: test.add_cmd("-c register_rsc -r rsc1 -C upstart -T pacemaker-cts-dummyd "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout) test.add_cmd_expected_fail("-c get_rsc_info -r rsc1 ") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc2 ") test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout) test.add_cmd_expected_fail("-c get_rsc_info -r rsc2 ") ### Register duplicate, verify only one entry exists and can still be removed. test = self.new_test("duplicate_registration", "Register resource multiple times, verify only one entry exists and can be removed.") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Stateful -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Stateful") test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout) test.add_cmd_expected_fail("-c get_rsc_info -r rsc2 ") ### verify the option to only send notification to the original client. ### test = self.new_test("notify_orig_client_only", "Verify option to only send notifications to the client originating the action.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r \"test_rsc\" -a \"monitor\" -i 1s ' + self.action_timeout + ' -n ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"') # this will fail because the monitor notifications should only go to the original caller, which no longer exists. test.add_cmd_expected_fail("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, ExitStatus.TIMEOUT) test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s -t 6000 ') test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### get metadata ### test = self.new_test("get_ocf_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Dummy\"", "resource-agent name=\"Dummy\"") test.add_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Stateful\"") test.add_cmd_expected_fail("-c metadata -P \"pacemaker\" -T \"Stateful\"") test.add_cmd_expected_fail("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"fake_agent\"") ### get metadata ### test = self.new_test("get_lsb_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"lsb\" -T \"LSBDummy\"", "resource-agent name='LSBDummy'") ### get stonith metadata ### test = self.new_test("get_stonith_metadata", "Retrieve stonith metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy\"", "resource-agent name=\"fence_dummy\"") ### get metadata ### if "systemd" in self.rsc_classes: test = self.new_test("get_systemd_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"systemd\" -T \"pacemaker-cts-dummyd@\"", "resource-agent name=\"pacemaker-cts-dummyd@\"") ### get metadata ### if "upstart" in self.rsc_classes: test = self.new_test("get_upstart_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"upstart\" -T \"pacemaker-cts-dummyd\"", "resource-agent name=\"pacemaker-cts-dummyd\"") ### get ocf providers ### test = self.new_test("list_ocf_providers", "Retrieve list of available resource providers, verifies pacemaker is a provider.") test.add_cmd_check_stdout("-c list_ocf_providers ", "pacemaker") test.add_cmd_check_stdout("-c list_ocf_providers -T ping", "pacemaker") ### Verify agents only exist in their lists ### test = self.new_test("verify_agent_lists", "Verify the agent lists contain the right data.") test.add_cmd_check_stdout("-c list_agents ", "Stateful") ### ocf ### test.add_cmd_check_stdout("-c list_agents -C ocf", "Stateful") test.add_cmd_check_stdout("-c list_agents -C lsb", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents ", "LSBDummy") ### init.d ### test.add_cmd_check_stdout("-c list_agents -C lsb", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C ocf", "", "pacemaker-cts-dummyd@") ### should not exist test.add_cmd_check_stdout("-c list_agents -C ocf", "", "pacemaker-cts-dummyd@") ### should not exist test.add_cmd_check_stdout("-c list_agents -C lsb", "", "fence_dummy") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "fence_dummy") ### should not exist test.add_cmd_check_stdout("-c list_agents -C ocf", "", "fence_dummy") ### should not exist if "systemd" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents ", "pacemaker-cts-dummyd@") ### systemd ### test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C systemd", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C systemd", "pacemaker-cts-dummyd@") test.add_cmd_check_stdout("-c list_agents -C systemd", "", "fence_dummy") ### should not exist if "upstart" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents ", "pacemaker-cts-dummyd") ### upstart ### test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C upstart", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C upstart", "pacemaker-cts-dummyd") test.add_cmd_check_stdout("-c list_agents -C upstart", "", "fence_dummy") ### should not exist if "stonith" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents -C stonith", "fence_dummy") ### stonith ### test.add_cmd_check_stdout("-c list_agents -C stonith", "", "pacemaker-cts-dummyd@") ### should not exist test.add_cmd_check_stdout("-c list_agents -C stonith", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents ", "fence_dummy") def print_list(self): """ List all registered tests """ print("\n==== %d TESTS FOUND ====" % (len(self.tests))) print("%35s - %s" % ("TEST NAME", "TEST DESCRIPTION")) print("%35s - %s" % ("--------------------", "--------------------")) for test in self.tests: print("%35s - %s" % (test.name, test.description)) print("==== END OF LIST ====\n") def run_single(self, name): """ Run a single named test """ for test in self.tests: if test.name == name: test.run() break def run_tests_matching(self, pattern): """ Run all tests whose name matches a pattern """ for test in self.tests: if test.name.count(pattern) != 0: test.run() def run_tests(self): """ Run all tests """ for test in self.tests: test.run() def exit(self): """ Exit (with error status code if any test failed) """ for test in self.tests: if not test.executed: continue if test.exitcode != ExitStatus.OK: sys.exit(ExitStatus.ERROR) sys.exit(ExitStatus.OK) def print_results(self): """ Print summary of results of executed tests """ failures = 0 success = 0 print("\n\n======= FINAL RESULTS ==========") print("\n--- FAILURE RESULTS:") for test in self.tests: if not test.executed: continue if test.exitcode != ExitStatus.OK: failures = failures + 1 test.print_result(" ") else: success = success + 1 if failures == 0: print(" None") print("\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures)) class TestOptions(object): """ Option handler """ def __init__(self): self.options = {} self.options['list-tests'] = False self.options['run-all'] = True self.options['run-only'] = "" self.options['run-only-pattern'] = "" self.options['verbose'] = False self.options['timeout'] = 2 self.options['force-wait'] = False self.options['invalid-arg'] = "" self.options['show-usage'] = False self.options['pacemaker-remote'] = False def build_options(self, argv): """ Set options based on command-line arguments """ args = argv[1:] skip = False for i in range(0, len(args)): if skip: skip = False continue elif args[i] == "-h" or args[i] == "--help": self.options['show-usage'] = True elif args[i] == "-l" or args[i] == "--list-tests": self.options['list-tests'] = True elif args[i] == "-V" or args[i] == "--verbose": self.options['verbose'] = True elif args[i] == "-t" or args[i] == "--timeout": self.options['timeout'] = float(args[i+1]) elif args[i] == "-w" or args[i] == "--force-wait": self.options['force-wait'] = True elif args[i] == "-R" or args[i] == "--pacemaker-remote": if BuildOptions.REMOTE_ENABLED: self.options['pacemaker-remote'] = True else: print("ERROR: This build does not support Pacemaker Remote") sys.exit(ExitStatus.USAGE) elif args[i] == "-r" or args[i] == "--run-only": self.options['run-only'] = args[i+1] skip = True elif args[i] == "-p" or args[i] == "--run-only-pattern": self.options['run-only-pattern'] = args[i+1] skip = True def show_usage(): """ Show command usage """ print("usage: " + sys.argv[0] + " [options]") print("If no options are provided, all tests will run") print("Options:") print("\t [--help | -h] Show usage") print("\t [--list-tests | -l] Print out all registered tests.") print("\t [--run-only | -r 'testname'] Run a specific test") print("\t [--verbose | -V] Verbose output") print("\t [--timeout | -t 'floating point number']" "\n\t\tUp to how many seconds each test case waits for the daemon to be initialized." "\n\t\tDefaults to 2. The value 0 means no limit.") print("\t [--force-wait | -w]" "\n\t\tEach test case waits the default/specified --timeout for the daemon without tracking the log.") if BuildOptions.REMOTE_ENABLED: print("\t [--pacemaker-remote | -R Test pacemaker-remoted binary instead of pacemaker-execd") print("\t [--run-only-pattern | -p 'string'] Run only tests containing the string value") print("\n\tExample: Run only the test 'start_stop'") print("\t\t " + sys.argv[0] + " --run-only start_stop") print("\n\tExample: Run only the tests with the string 'systemd' present in them") print("\t\t " + sys.argv[0] + " --run-only-pattern systemd") def main(argv): """ Run pacemaker-execd regression tests as specified by arguments """ update_path() # Ensure all command output is in portable locale for comparison os.environ['LC_ALL'] = "C" opts = TestOptions() opts.build_options(argv) if opts.options['show-usage']: show_usage() sys.exit(ExitStatus.OK) if opts.options['pacemaker-remote']: daemon_name = "pacemaker-remoted" else: daemon_name = "pacemaker-execd" exit_if_proc_running(daemon_name) # Create a temporary directory for log files (the directory will # automatically be erased when done) with tempfile.TemporaryDirectory(prefix="cts-exec-") as logdir: tests = Tests(opts.options['verbose'], opts.options['pacemaker-remote'], opts.options['timeout'], opts.options['force-wait'], logdir) tests.build_generic_tests() tests.build_multi_rsc_tests() tests.build_negative_tests() tests.build_custom_tests() tests.build_stress_tests() if opts.options['list-tests']: tests.print_list() sys.exit(ExitStatus.OK) tests.setup_test_environment() print("Starting ...") if opts.options['run-only-pattern'] != "": tests.run_tests_matching(opts.options['run-only-pattern']) tests.print_results() elif opts.options['run-only'] != "": tests.run_single(opts.options['run-only']) tests.print_results() else: tests.run_tests() tests.print_results() tests.cleanup_test_environment() tests.exit() if __name__ == "__main__": main(sys.argv) diff --git a/cts/cts-fencing.in b/cts/cts-fencing.in index 05b052b35b..da4995887b 100644 --- a/cts/cts-fencing.in +++ b/cts/cts-fencing.in @@ -1,1439 +1,1427 @@ #!@PYTHON@ """ Regression tests for Pacemaker's fencer """ __copyright__ = "Copyright 2012-2023 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import psutil import sys import subprocess import shlex import time import tempfile import signal # These imports allow running from a source checkout after running `make`. # Note that while this doesn't necessarily mean it will successfully run tests, # but being able to see --help output can be useful. if os.path.exists("@abs_top_srcdir@/python"): sys.path.insert(0, "@abs_top_srcdir@/python") if os.path.exists("@abs_top_builddir@/python") and "@abs_top_builddir@" != "@abs_top_srcdir@": sys.path.insert(0, "@abs_top_builddir@/python") from pacemaker.buildoptions import BuildOptions from pacemaker.exitstatus import ExitStatus from pacemaker._cts.errors import ExitCodeError, OutputFoundError, OutputNotFoundError, XmlValidationError from pacemaker._cts.process import killall, exit_if_proc_running, stdout_from_command from pacemaker._cts.test import Test TEST_DIR = sys.path[0] AUTOGEN_COROSYNC_TEMPLATE = """ totem { version: 2 cluster_name: cts-fencing crypto_cipher: none crypto_hash: none transport: udp } nodelist { node { nodeid: 1 name: %s ring0_addr: 127.0.0.1 } } logging { debug: off to_syslog: no to_stderr: no to_logfile: yes logfile: %s } """ def update_path(): """ Set the PATH environment variable appropriately for the tests """ new_path = os.environ['PATH'] if os.path.exists("%s/cts-fencing.in" % TEST_DIR): print("Running tests from the source tree: %s (%s)" % (BuildOptions._BUILD_DIR, TEST_DIR)) # For pacemaker-fenced and cts-fence-helper new_path = "%s/daemons/fenced:%s" % (BuildOptions._BUILD_DIR, new_path) new_path = "%s/tools:%s" % (BuildOptions._BUILD_DIR, new_path) # For stonith_admin new_path = "%s/cts/support:%s" % (BuildOptions._BUILD_DIR, new_path) # For cts-support else: print("Running tests from the install tree: %s (not %s)" % (BuildOptions.DAEMON_DIR, TEST_DIR)) # For pacemaker-fenced, cts-fence-helper, and cts-support new_path = "%s:%s" % (BuildOptions.DAEMON_DIR, new_path) print('Using PATH="{}"'.format(new_path)) os.environ['PATH'] = new_path def localname(): """ Return the uname of the local host """ our_uname = stdout_from_command(["uname", "-n"]) if our_uname: our_uname = our_uname[0] else: our_uname = "localhost" return our_uname class FenceTest(Test): """ Executor for a single test """ def __init__(self, name, description, **kwargs): Test.__init__(self, name, description, **kwargs) if kwargs.get("with_cpg", False): self._enable_corosync = True self._stonith_options = ["-c"] else: self._enable_corosync = False self._stonith_options = ["-s"] self._daemon_location = "pacemaker-fenced" self._negative_stonith_patterns = [] self._stonith_output = "" self._stonith_patterns = [] def start_environment(self): """ Prepare the host for executing a test """ # Make sure we are in full control killall(["pacemakerd", "pacemaker-fenced"]) if self.verbose: self._stonith_options += ["-V"] print("Starting pacemaker-fenced with %s" % self._stonith_options) if os.path.exists(self.logpath): os.remove(self.logpath) cmd = ["pacemaker-fenced", "-l", self.logpath] + self._stonith_options self._stonith_process = subprocess.Popen(cmd) logfile = None init_time = time.time() update_time = init_time while True: time.sleep(0.1) if not self.force_wait and logfile == None \ and os.path.exists(self.logpath): logfile = io.open(self.logpath, 'rt', encoding = "ISO-8859-1") if not self.force_wait and logfile != None: for line in logfile.readlines(): if "successfully started" in line: return now = time.time() if self.timeout > 0 and (now - init_time) >= self.timeout: if not self.force_wait: print("\tDaemon pacemaker-fenced doesn't seem to have been initialized within %fs." "\n\tConsider specifying a longer '--timeout' value." %(self.timeout)) return if self.verbose and (now - update_time) >= 5: print("Waiting for pacemaker-fenced to be initialized: %fs ..." %(now - init_time)) update_time = now def clean_environment(self): """ Clean up the host after executing a test """ if self._stonith_process: if self._stonith_process.poll() == None: self._stonith_process.terminate() self._stonith_process.wait() else: return_code = { getattr(signal, _signame): _signame for _signame in dir(signal) if _signame.startswith('SIG') and not _signame.startswith("SIG_") }.get(-self._stonith_process.returncode, "RET=%d" % (self._stonith_process.returncode)) msg = "FAILURE - '%s' failed. pacemaker-fenced abnormally exited during test (%s)." self._result_txt = msg % (self.name, return_code) self.exitcode = ExitStatus.ERROR self._stonith_output = "" self._stonith_process = None # the default for utf-8 encoding would error out if e.g. memory corruption # makes fenced output any kind of 8 bit value - while still interesting # for debugging and we'd still like the regression-test to go over the # full set of test-cases logfile = io.open(self.logpath, 'rt', encoding = "ISO-8859-1") for line in logfile.readlines(): self._stonith_output += line if self.verbose: print("Daemon Output Start") print(self._stonith_output) print("Daemon Output End") def add_stonith_log_pattern(self, pattern): """ Add a log pattern to expect from this test """ self._stonith_patterns.append(pattern) def add_stonith_neg_log_pattern(self, pattern): """ Add a log pattern that should not occur with this test """ self._negative_stonith_patterns.append(pattern) - def print_result(self, filler): - """ Print the result of the last test execution """ - - print("%s%s" % (filler, self._result_txt)) - def count_negative_matches(self, outline): """ Return 1 if a line matches patterns that shouldn't have occurred """ count = 0 for line in self._negative_stonith_patterns: if outline.count(line): count = 1 if self.verbose: print("This pattern should not have matched = '%s" % (line)) return count def match_stonith_patterns(self): """ Check test output for expected patterns """ negative_matches = 0 cur = 0 pats = self._stonith_patterns total_patterns = len(self._stonith_patterns) if len(self._stonith_patterns) == 0 and len(self._negative_stonith_patterns) == 0: return for line in self._stonith_output.split("\n"): negative_matches = negative_matches + self.count_negative_matches(line) if len(pats) == 0: continue cur = -1 for pat in pats: cur = cur + 1 if line.count(pats[cur]): del pats[cur] break if len(pats) > 0 or negative_matches: if self.verbose: for pat in pats: print("Pattern Not Matched = '%s'" % pat) msg = "FAILURE - '%s' failed. %d patterns out of %d not matched. %d negative matches." self._result_txt = msg % (self.name, len(pats), total_patterns, negative_matches) self.exitcode = ExitStatus.ERROR - def set_error(self, step, cmd): - """ Record failure of this test """ - - msg = "FAILURE - '%s' failed at step %d. Command: %s %s" - self._result_txt = msg % (self.name, step, cmd['cmd'], cmd['args']) - self.exitcode = ExitStatus.ERROR - def run(self): """ Execute this test. """ i = 1 self.start_environment() if self.verbose: print("\n--- START TEST - %s" % self.name) self._result_txt = "SUCCESS - '%s'" % (self.name) self.exitcode = ExitStatus.OK for cmd in self._cmds: try: self.run_cmd(cmd) except ExitCodeError as e: print("Step %d FAILED - command returned %s, expected %d" % (i, e, cmd['expected_exitcode'])) self.set_error(i, cmd); break except OutputNotFoundError as e: print("Step %d FAILED - '%s' was not found in command output: %s" % (i, cmd['stdout_match'], e)) self.set_error(i, cmd); break except OutputFoundError as e: print("Step %d FAILED - '%s' was found in command output: %s" % (i, cmd['stdout_negative_match'], e)) self.set_error(i, cmd); break except XmlValidationError as e: print("Step %d FAILED - xmllint failed: %s" % (i, e)) self.set_error(i, cmd); break if self.verbose: print("Step %d SUCCESS" % (i)) i = i + 1 self.clean_environment() if self.exitcode == ExitStatus.OK: self.match_stonith_patterns() print(self._result_txt) if self.verbose: print("--- END TEST - %s\n" % self.name) self.executed = True class Tests(object): """ Collection of all fencing regression tests """ def __init__(self, verbose=False, timeout=2, force_wait=False, logdir="/tmp"): self.tests = [] self.verbose = verbose self.timeout = timeout self.force_wait = force_wait self.logdir = logdir self.autogen_corosync_cfg = not os.path.exists(BuildOptions.COROSYNC_CONFIG_FILE) def new_test(self, name, description, with_cpg=False): """ Create a named test """ test = FenceTest(name, description, verbose=self.verbose, with_cpg=with_cpg, timeout=self.timeout, force_wait=self.force_wait, logdir=self.logdir) self.tests.append(test) return test def print_list(self): """ List all registered tests """ print("\n==== %d TESTS FOUND ====" % (len(self.tests))) print("%35s - %s" % ("TEST NAME", "TEST DESCRIPTION")) print("%35s - %s" % ("--------------------", "--------------------")) for test in self.tests: print("%35s - %s" % (test.name, test.description)) print("==== END OF LIST ====\n") def start_corosync(self): """ Start the corosync process """ if self.verbose: print("Starting corosync") test = subprocess.Popen("corosync", stdout=subprocess.PIPE) test.wait() time.sleep(10) def run_single(self, name): """ Run a single named test """ for test in self.tests: if test.name == name: test.run() break def run_tests_matching(self, pattern): """ Run all tests whose name matches a pattern """ for test in self.tests: if test.name.count(pattern) != 0: test.run() def run_cpg_only(self): """ Run all corosync-enabled tests """ for test in self.tests: if test._enable_corosync: test.run() def run_no_cpg(self): """ Run all standalone tests """ for test in self.tests: if not test._enable_corosync: test.run() def run_tests(self): """ Run all tests """ for test in self.tests: test.run() def exit(self): """ Exit (with error status code if any test failed) """ for test in self.tests: if not test.executed: continue if test.exitcode != ExitStatus.OK: sys.exit(ExitStatus.ERROR) sys.exit(ExitStatus.OK) def print_results(self): """ Print summary of results of executed tests """ failures = 0 success = 0 print("\n\n======= FINAL RESULTS ==========") print("\n--- FAILURE RESULTS:") for test in self.tests: if not test.executed: continue if test.exitcode != ExitStatus.OK: failures = failures + 1 test.print_result(" ") else: success = success + 1 if failures == 0: print(" None") print("\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures)) def build_api_sanity_tests(self): """ Register tests to verify basic API usage """ verbose_arg = "" if self.verbose: verbose_arg = "-V" test = self.new_test("standalone_low_level_api_test", "Sanity test client api in standalone mode.") test.add_cmd("cts-fence-helper", "-t %s" % (verbose_arg), validate=False) test = self.new_test("cpg_low_level_api_test", "Sanity test client api using mainloop and cpg.", True) test.add_cmd("cts-fence-helper", "-m %s" % (verbose_arg), validate=False) def build_custom_timeout_tests(self): """ Register tests to verify custom timeout usage """ # custom timeout without topology test = self.new_test("cpg_custom_timeout_1", "Verify per device timeouts work as expected without using topology.", True) test.add_cmd('stonith_admin', '--output-as=xml -R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node1 node2 node3"') test.add_cmd('stonith_admin', '--output-as=xml -R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=1"') test.add_cmd('stonith_admin', '--output-as=xml -R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=4"') test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5") # timeout is 5+1+4 = 10 test.add_stonith_log_pattern("Total timeout set to 12") # custom timeout _WITH_ topology test = self.new_test("cpg_custom_timeout_2", "Verify per device timeouts work as expected _WITH_ topology.", True) test.add_cmd('stonith_admin', '--output-as=xml -R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node1 node2 node3"') test.add_cmd('stonith_admin', '--output-as=xml -R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=1"') test.add_cmd('stonith_admin', '--output-as=xml -R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=4000"') test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v false2") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5") # timeout is 5+1+4000 = 4006 test.add_stonith_log_pattern("Total timeout set to 4807") def build_fence_merge_tests(self): """ Register tests to verify when fence operations should be merged """ ### Simple test that overlapping fencing operations get merged test = self.new_test("cpg_custom_merge_single", "Verify overlapping identical fencing operations are merged, no fencing levels used.", True) test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 10") ### one merger will happen test.add_stonith_log_pattern("Merging fencing action 'off' targeting node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") ### Test that multiple mergers occur test = self.new_test("cpg_custom_merge_multiple", "Verify multiple overlapping identical fencing operations are merged", True) test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"delay=2\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 10") ### 4 mergers should occur test.add_stonith_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_stonith_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_stonith_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_stonith_log_pattern("Merging fencing action 'off' targeting node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") ### Test that multiple mergers occur with topologies used test = self.new_test("cpg_custom_merge_with_topology", "Verify multiple overlapping identical fencing operations are merged with fencing levels.", True) test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false2") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true1") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 10") ### 4 mergers should occur test.add_stonith_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_stonith_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_stonith_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_stonith_log_pattern("Merging fencing action 'off' targeting node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") def build_fence_no_merge_tests(self): """ Register tests to verify when fence operations should not be merged """ test = self.new_test("cpg_custom_no_merge", "Verify differing fencing operations are not merged", True) test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"") test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3 node2\" ") test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false2") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true1") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node2 -t 10") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_stonith_neg_log_pattern("Merging fencing action 'off' targeting node3 originating from client") def build_standalone_tests(self): """ Register a grab bag of tests that can be executed in standalone or corosync mode """ test_types = [ { "prefix" : "standalone", "use_cpg" : False, }, { "prefix" : "cpg", "use_cpg" : True, }, ] # test what happens when all devices timeout for test_type in test_types: test = self.new_test("%s_fence_multi_device_failure" % test_type["prefix"], "Verify that all devices timeout, a fencing failure is returned.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R false3 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") if test_type["use_cpg"]: test.add_cmd_expected_fail("stonith_admin", "--output-as=xml -F node3 -t 2", ExitStatus.TIMEOUT) test.add_stonith_log_pattern("Total timeout set to 7") else: test.add_cmd_expected_fail("stonith_admin", "--output-as=xml -F node3 -t 2", ExitStatus.ERROR) test.add_stonith_log_pattern("targeting node3 using false1 returned ") test.add_stonith_log_pattern("targeting node3 using false2 returned ") test.add_stonith_log_pattern("targeting node3 using false3 returned ") # test what happens when multiple devices can fence a node, but the first device fails. for test_type in test_types: test = self.new_test("%s_fence_device_failure_rollover" % test_type["prefix"], "Verify that when one fence device fails for a node, the others are tried.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5") if test_type["use_cpg"]: test.add_stonith_log_pattern("Total timeout set to 18") # test what happens when we try to use a missing fence-agent. for test_type in test_types: test = self.new_test("%s_fence_missing_agent" % test_type["prefix"], "Verify proper error-handling when using a non-existent fence-agent.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_missing -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node2\"") test.add_cmd_expected_fail("stonith_admin", "--output-as=xml -F node3 -t 5", ExitStatus.NOSUCH) test.add_cmd("stonith_admin", "--output-as=xml -F node2 -t 5") # simple topology test for one device for test_type in test_types: if not test_type["use_cpg"]: continue test = self.new_test("%s_topology_simple" % test_type["prefix"], "Verify all fencing devices at a level are used.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5") test.add_stonith_log_pattern("Total timeout set to 6") test.add_stonith_log_pattern("targeting node3 using true returned 0") # add topology, delete topology, verify fencing still works for test_type in test_types: if not test_type["use_cpg"]: continue test = self.new_test("%s_topology_add_remove" % test_type["prefix"], "Verify fencing occurrs after all topology levels are removed", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true") test.add_cmd("stonith_admin", "--output-as=xml -d node3 -i 1") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5") test.add_stonith_log_pattern("Total timeout set to 6") test.add_stonith_log_pattern("targeting node3 using true returned 0") # test what happens when the first fencing level has multiple devices. for test_type in test_types: if not test_type["use_cpg"]: continue test = self.new_test("%s_topology_device_fails" % test_type["prefix"], "Verify if one device in a level fails, the other is tried.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R false -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 20") test.add_stonith_log_pattern("Total timeout set to 48") test.add_stonith_log_pattern("targeting node3 using false returned 1") test.add_stonith_log_pattern("targeting node3 using true returned 0") # test what happens when the first fencing level fails. for test_type in test_types: if not test_type["use_cpg"]: continue test = self.new_test("%s_topology_multi_level_fails" % test_type["prefix"], "Verify if one level fails, the next leve is tried.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v true4") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 3") test.add_stonith_log_pattern("Total timeout set to 21") test.add_stonith_log_pattern("targeting node3 using false1 returned 1") test.add_stonith_log_pattern("targeting node3 using false2 returned 1") test.add_stonith_log_pattern("targeting node3 using true3 returned 0") test.add_stonith_log_pattern("targeting node3 using true4 returned 0") # test what happens when the first fencing level had devices that no one has registered for test_type in test_types: if not test_type["use_cpg"]: continue test = self.new_test("%s_topology_missing_devices" % test_type["prefix"], "Verify topology can continue with missing devices.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v true4") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5") # Test what happens if multiple fencing levels are defined, and then the first one is removed. for test_type in test_types: if not test_type["use_cpg"]: continue test = self.new_test("%s_topology_level_removal" % test_type["prefix"], "Verify level removal works.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v true4") # Now remove level 2, verify none of the devices in level two are hit. test.add_cmd("stonith_admin", "--output-as=xml -d node3 -i 2") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 20") test.add_stonith_log_pattern("Total timeout set to 96") test.add_stonith_log_pattern("targeting node3 using false1 returned 1") test.add_stonith_neg_log_pattern("targeting node3 using false2 returned ") test.add_stonith_log_pattern("targeting node3 using true3 returned 0") test.add_stonith_log_pattern("targeting node3 using true4 returned 0") # Test targeting a topology level by node name pattern. for test_type in test_types: if not test_type["use_cpg"]: continue test = self.new_test("%s_topology_level_pattern" % test_type["prefix"], "Verify targeting topology by node name pattern works.", test_type["use_cpg"]) test.add_cmd("stonith_admin", """--output-as=xml -R true -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node1 node2 node3" """) test.add_cmd("stonith_admin", """--output-as=xml -r '@node.*' -i 1 -v true""") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5") test.add_stonith_log_pattern("targeting node3 using true returned 0") # test allowing commas and semicolons as delimiters in pcmk_host_list for test_type in test_types: test = self.new_test("%s_host_list_delimiters" % test_type["prefix"], "Verify commas and semicolons can be used as pcmk_host_list delimiters", test_type["use_cpg"]) test.add_cmd("stonith_admin", """--output-as=xml -R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node1,node2,node3" """) test.add_cmd("stonith_admin", """--output-as=xml -R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=pcmk1;pcmk2;pcmk3" """) test.add_cmd("stonith_admin", "stonith_admin --output-as=xml -F node2 -t 5") test.add_cmd("stonith_admin", "stonith_admin --output-as=xml -F pcmk3 -t 5") test.add_stonith_log_pattern("targeting node2 using true1 returned 0") test.add_stonith_log_pattern("targeting pcmk3 using true2 returned 0") # test the stonith builds the correct list of devices that can fence a node. for test_type in test_types: test = self.new_test("%s_list_devices" % test_type["prefix"], "Verify list of devices that can fence a node is correct", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd_check_stdout("stonith_admin", "--output-as=xml -l node1 -V", "true2", "true1") test.add_cmd_check_stdout("stonith_admin", "--output-as=xml -l node1 -V", "true3", "true1") # simple test of device monitor for test_type in test_types: test = self.new_test("%s_monitor" % test_type["prefix"], "Verify device is reachable", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -Q true1") test.add_cmd("stonith_admin", "--output-as=xml -Q false1") test.add_cmd_expected_fail("stonith_admin", "--output-as=xml -Q true2", ExitStatus.NOSUCH) # Verify monitor occurs for duration of timeout period on failure for test_type in test_types: test = self.new_test("%s_monitor_timeout" % test_type["prefix"], "Verify monitor uses duration of timeout period given.", test_type["use_cpg"]) test.add_cmd("stonith_admin", '--output-as=xml -R true1 -a fence_dummy -o "mode=fail" -o "monitor_mode=fail" -o "pcmk_host_list=node3"') test.add_cmd_expected_fail("stonith_admin", "--output-as=xml -Q true1 -t 5", ExitStatus.ERROR) test.add_stonith_log_pattern("Attempt 2 to execute") # Verify monitor occurs for duration of timeout period on failure, but stops at max retries for test_type in test_types: test = self.new_test("%s_monitor_timeout_max_retries" % test_type["prefix"], "Verify monitor retries until max retry value or timeout is hit.", test_type["use_cpg"]) test.add_cmd("stonith_admin", '--output-as=xml -R true1 -a fence_dummy -o "mode=fail" -o "monitor_mode=fail" -o "pcmk_host_list=node3"') test.add_cmd_expected_fail("stonith_admin", "--output-as=xml -Q true1 -t 15", ExitStatus.ERROR) test.add_stonith_log_pattern("Attempted to execute agent fence_dummy (list) the maximum number of times") # simple register test for test_type in test_types: test = self.new_test("%s_register" % test_type["prefix"], "Verify devices can be registered and un-registered", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -Q true1") test.add_cmd("stonith_admin", "--output-as=xml -D true1") test.add_cmd_expected_fail("stonith_admin", "--output-as=xml -Q true1", ExitStatus.NOSUCH) # simple reboot test for test_type in test_types: test = self.new_test("%s_reboot" % test_type["prefix"], "Verify devices can be rebooted", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -B node3 -t 5") test.add_cmd("stonith_admin", "--output-as=xml -D true1") test.add_cmd_expected_fail("stonith_admin", "--output-as=xml -Q true1", ExitStatus.NOSUCH) # test fencing history. for test_type in test_types: if not test_type["use_cpg"]: continue test = self.new_test("%s_fence_history" % test_type["prefix"], "Verify last fencing operation is returned.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5 -V") test.add_cmd_check_stdout("stonith_admin", "--output-as=xml -H node3", 'action="off" target="node3" .* status="success"') # simple test of dynamic list query for test_type in test_types: test = self.new_test("%s_dynamic_list_query" % test_type["prefix"], "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "--output-as=xml -R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd_check_stdout("stonith_admin", "--output-as=xml -l fake_port_1", 'count="3"') # fence using dynamic list query for test_type in test_types: test = self.new_test("%s_fence_dynamic_list_query" % test_type["prefix"], "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "--output-as=xml -R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "--output-as=xml -F fake_port_1 -t 5 -V") # simple test of query using status action for test_type in test_types: test = self.new_test("%s_status_query" % test_type["prefix"], "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd("stonith_admin", "--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd_check_stdout("stonith_admin", "--output-as=xml -l fake_port_1", 'count="3"') # test what happens when no reboot action is advertised for test_type in test_types: test = self.new_test("%s_no_reboot_support" % test_type["prefix"], "Verify reboot action defaults to off when no reboot action is advertised by agent.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy_no_reboot -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -B node1 -t 5 -V") test.add_stonith_log_pattern("does not support reboot") test.add_stonith_log_pattern("using true1 returned 0") # make sure reboot is used when reboot action is advertised for test_type in test_types: test = self.new_test("%s_with_reboot_support" % test_type["prefix"], "Verify reboot action can be used when metadata advertises it.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -B node1 -t 5 -V") test.add_stonith_neg_log_pattern("does not advertise support for 'reboot', performing 'off'") test.add_stonith_log_pattern("using true1 returned 0") # make sure requested fencing delay is applied only for the first device in the first level # make sure static delay from pcmk_delay_base is added for test_type in test_types: if not test_type["use_cpg"]: continue test = self.new_test("%s_topology_delay" % test_type["prefix"], "Verify requested fencing delay is applied only for the first device in the first level and pcmk_delay_base is added.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\" -o \"pcmk_delay_base=1\"") test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\" -o \"pcmk_delay_base=1\"") test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true3") test.add_cmd("stonith_admin", "--output-as=xml -F node3 --delay 1") test.add_stonith_log_pattern("Delaying 'off' action targeting node3 using true1 for 2s | timeout=120s requested_delay=1s base=1s max=1s") test.add_stonith_log_pattern("Delaying 'off' action targeting node3 using false1 for 1s | timeout=120s requested_delay=0s base=1s max=1s") test.add_stonith_neg_log_pattern("Delaying 'off' action targeting node3 using true2") test.add_stonith_neg_log_pattern("Delaying 'off' action targeting node3 using true3") def build_nodeid_tests(self): """ Register tests that use a corosync node id """ our_uname = localname() ### verify nodeid is supplied when nodeid is in the metadata parameters test = self.new_test("cpg_supply_nodeid", "Verify nodeid is given when fence agent has nodeid as parameter", True) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -F %s -t 3" % (our_uname)) test.add_stonith_log_pattern("as nodeid with fence action 'off' targeting %s" % (our_uname)) ### verify nodeid is _NOT_ supplied when nodeid is not in the metadata parameters test = self.new_test("cpg_do_not_supply_nodeid", "Verify nodeid is _NOT_ given when fence agent does not have nodeid as parameter", True) # use a host name that won't be in corosync.conf test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=regr-test\"") test.add_cmd("stonith_admin", "--output-as=xml -F regr-test -t 3") test.add_stonith_neg_log_pattern("as nodeid with fence action 'off' targeting regr-test") ### verify nodeid use doesn't explode standalone mode test = self.new_test("standalone_do_not_supply_nodeid", "Verify nodeid in metadata parameter list doesn't kill standalone mode", False) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -F %s -t 3" % (our_uname)) test.add_stonith_neg_log_pattern("as nodeid with fence action 'off' targeting %s" % (our_uname)) def build_unfence_tests(self): """ Register tests that verify unfencing """ our_uname = localname() ### verify unfencing using automatic unfencing test = self.new_test("cpg_unfence_required_1", "Verify require unfencing on all devices when automatic=true in agent's metadata", True) test.add_cmd('stonith_admin', '--output-as=xml -R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname)) # both devices should be executed test.add_stonith_log_pattern("using true1 returned 0") test.add_stonith_log_pattern("using true2 returned 0") ### verify unfencing using automatic unfencing fails if any of the required agents fail test = self.new_test("cpg_unfence_required_2", "Verify require unfencing on all devices when automatic=true in agent's metadata", True) test.add_cmd('stonith_admin', '--output-as=xml -R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R true2 -a fence_dummy_auto_unfence -o "mode=fail" -o "pcmk_host_list=%s"' % (our_uname)) test.add_cmd_expected_fail("stonith_admin", "--output-as=xml -U %s -t 6" % (our_uname), ExitStatus.ERROR) ### verify unfencing using automatic devices with topology test = self.new_test("cpg_unfence_required_3", "Verify require unfencing on all devices even when at different topology levels", True) test.add_cmd('stonith_admin', '--output-as=xml -R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("using true1 returned 0") test.add_stonith_log_pattern("using true2 returned 0") ### verify unfencing using automatic devices with topology test = self.new_test("cpg_unfence_required_4", "Verify all required devices are executed even with topology levels fail.", True) test.add_cmd('stonith_admin', '--output-as=xml -R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R true3 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R true4 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R false3 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R false4 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 1 -v false1" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v false2" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v false3" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v true3" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 3 -v false4" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 4 -v true4" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("using true1 returned 0") test.add_stonith_log_pattern("using true2 returned 0") test.add_stonith_log_pattern("using true3 returned 0") test.add_stonith_log_pattern("using true4 returned 0") def build_unfence_on_target_tests(self): """ Register tests that verify unfencing that runs on the target """ our_uname = localname() ### verify unfencing using on_target device test = self.new_test("cpg_unfence_on_target_1", "Verify unfencing with on_target = true", True) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("(on) to be executed on target") ### verify failure of unfencing using on_target device test = self.new_test("cpg_unfence_on_target_2", "Verify failure unfencing with on_target = true", True) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake_1234\"" % (our_uname)) test.add_cmd_expected_fail("stonith_admin", "--output-as=xml -U node_fake_1234 -t 3", ExitStatus.NOSUCH) test.add_stonith_log_pattern("(on) to be executed on target") ### verify unfencing using on_target device with topology test = self.new_test("cpg_unfence_on_target_3", "Verify unfencing with on_target = true using topology", True) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("(on) to be executed on target") ### verify unfencing using on_target device with topology fails when target node doesn't exist test = self.new_test("cpg_unfence_on_target_4", "Verify unfencing failure with on_target = true using topology", True) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 1 -v true1") test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 2 -v true2") test.add_cmd_expected_fail("stonith_admin", "--output-as=xml -U node_fake -t 3", ExitStatus.NOSUCH) test.add_stonith_log_pattern("(on) to be executed on target") def build_remap_tests(self): """ Register tests that verify remapping of reboots to off-on """ test = self.new_test("cpg_remap_simple", "Verify sequential topology reboot is remapped to all-off-then-all-on", True) test.add_cmd("stonith_admin", """--output-as=xml -R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """ """-o "pcmk_off_timeout=1" -o "pcmk_reboot_timeout=10" """) test.add_cmd("stonith_admin", """--output-as=xml -R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """ """-o "pcmk_off_timeout=2" -o "pcmk_reboot_timeout=20" """) test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", "--output-as=xml -B node_fake -t 5") test.add_stonith_log_pattern("Remapping multiple-device reboot targeting node_fake") # timeout should be sum of off timeouts (1+2=3), not reboot timeouts (10+20=30) test.add_stonith_log_pattern("Total timeout set to 3 for peer's fencing targeting node_fake") test.add_stonith_log_pattern("perform 'off' action targeting node_fake using true1") test.add_stonith_log_pattern("perform 'off' action targeting node_fake using true2") test.add_stonith_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'") # fence_dummy sets "on" as an on_target action test.add_stonith_log_pattern("Ignoring true1 'on' failure (no capable peers) targeting node_fake") test.add_stonith_log_pattern("Ignoring true2 'on' failure (no capable peers) targeting node_fake") test.add_stonith_log_pattern("Undoing remap of reboot targeting node_fake") test = self.new_test("cpg_remap_simple_off", "Verify sequential topology reboot skips 'on' if " "pcmk_reboot_action=off or agent doesn't support " "'on'", True) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o mode=pass " "-o pcmk_host_list=node_fake -o pcmk_off_timeout=1 " "-o pcmk_reboot_timeout=10 -o pcmk_reboot_action=off") test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy_no_on " "-o mode=pass -o pcmk_host_list=node_fake " "-o pcmk_off_timeout=2 -o pcmk_reboot_timeout=20") test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", "--output-as=xml -B node_fake -t 5") test.add_stonith_log_pattern("Remapping multiple-device reboot targeting node_fake") # timeout should be sum of off timeouts (1+2=3), not reboot timeouts (10+20=30) test.add_stonith_log_pattern("Total timeout set to 3 for peer's fencing targeting node_fake") test.add_stonith_log_pattern("perform 'off' action targeting node_fake using true1") test.add_stonith_log_pattern("perform 'off' action targeting node_fake using true2") test.add_stonith_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'") # "on" should be skipped test.add_stonith_log_pattern("Not turning node_fake back on using " "true1 because the device is configured " "to stay off") test.add_stonith_log_pattern("Not turning node_fake back on using true2" " because the agent doesn't support 'on'") test.add_stonith_log_pattern("Undoing remap of reboot targeting node_fake") test = self.new_test("cpg_remap_automatic", "Verify remapped topology reboot skips automatic 'on'", True) test.add_cmd("stonith_admin", """--output-as=xml -R true1 -a fence_dummy_auto_unfence """ """-o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """--output-as=xml -R true2 -a fence_dummy_auto_unfence """ """-o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", "--output-as=xml -B node_fake -t 5") test.add_stonith_log_pattern("Remapping multiple-device reboot targeting node_fake") test.add_stonith_log_pattern("perform 'off' action targeting node_fake using true1") test.add_stonith_log_pattern("perform 'off' action targeting node_fake using true2") test.add_stonith_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'") test.add_stonith_log_pattern("Undoing remap of reboot targeting node_fake") test.add_stonith_neg_log_pattern("perform 'on' action targeting node_fake using") test.add_stonith_neg_log_pattern("'on' failure") test = self.new_test("cpg_remap_complex_1", "Verify remapped topology reboot in second level works if non-remapped first level fails", True) test.add_cmd("stonith_admin", """--output-as=xml -R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """--output-as=xml -R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """--output-as=xml -R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 1 -v false1") test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 2 -v true1 -v true2") test.add_cmd("stonith_admin", "--output-as=xml -B node_fake -t 5") test.add_stonith_log_pattern("perform 'reboot' action targeting node_fake using false1") test.add_stonith_log_pattern("Remapping multiple-device reboot targeting node_fake") test.add_stonith_log_pattern("perform 'off' action targeting node_fake using true1") test.add_stonith_log_pattern("perform 'off' action targeting node_fake using true2") test.add_stonith_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'") test.add_stonith_log_pattern("Ignoring true1 'on' failure (no capable peers) targeting node_fake") test.add_stonith_log_pattern("Ignoring true2 'on' failure (no capable peers) targeting node_fake") test.add_stonith_log_pattern("Undoing remap of reboot targeting node_fake") test = self.new_test("cpg_remap_complex_2", "Verify remapped topology reboot failure in second level proceeds to third level", True) test.add_cmd("stonith_admin", """--output-as=xml -R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """--output-as=xml -R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """--output-as=xml -R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """--output-as=xml -R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """--output-as=xml -R true3 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 1 -v false1") test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 2 -v true1 -v false2 -v true3") test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 3 -v true2") test.add_cmd("stonith_admin", "--output-as=xml -B node_fake -t 5") test.add_stonith_log_pattern("perform 'reboot' action targeting node_fake using false1") test.add_stonith_log_pattern("Remapping multiple-device reboot targeting node_fake") test.add_stonith_log_pattern("perform 'off' action targeting node_fake using true1") test.add_stonith_log_pattern("perform 'off' action targeting node_fake using false2") test.add_stonith_log_pattern("Attempted to execute agent fence_dummy (off) the maximum number of times") test.add_stonith_log_pattern("Undoing remap of reboot targeting node_fake") test.add_stonith_log_pattern("perform 'reboot' action targeting node_fake using true2") test.add_stonith_neg_log_pattern("node_fake with true3") def build_query_tests(self): """ run stonith_admin --metadata for the fence_dummy agent and check command output """ test = self.new_test("get_metadata", "Run stonith_admin --metadata for the fence_dummy agent", True) test.add_cmd_check_stdout("stonith_admin", "--output-as=xml -a fence_dummy --metadata", '<shortdesc lang') def build_metadata_tests(self): """ run fence-agents coming with pacemaker with -o metadata and check for valid xml """ test = self.new_test("check_metadata_dummy", "Run fence_dummy -o metadata and check for valid xml", False) test.add_cmd("fence_dummy", "-o metadata", check_rng=False, check_stderr=False) # fence_dummy prints on stderr to check that tools just listen on stdout test = self.new_test("check_metadata_watchdog", "Run fence_watchdog -o metadata and check for valid xml", False) test.add_cmd("fence_watchdog", "-o metadata", check_rng=False) def build_validate_tests(self): """ run stonith_admin --validate for the fence_dummy agent and check command output """ test = self.new_test("validate_dummy", "Run stonith_admin --validate-all and check output", False) test.add_cmd_expected_fail("stonith_admin", "-a fence_dummy --validate --output-as=xml") test.add_cmd("stonith_admin", """-a fence_dummy --validate -o "delay=5" --output-as=xml""", check_rng=False) test.add_cmd_expected_fail("stonith_admin", """-a fence_dummy --validate -o "delay=15" --output-as=xml""") def setup_environment(self, use_corosync): """ Prepare the host before executing any tests """ if use_corosync: if self.autogen_corosync_cfg: logname = os.path.join(self.logdir, "corosync.log") corosync_cfg = io.open(BuildOptions.COROSYNC_CONFIG_FILE, "w") corosync_cfg.write(AUTOGEN_COROSYNC_TEMPLATE % (localname(), logname)) corosync_cfg.close() ### make sure we are in control ### killall(["corosync"]) self.start_corosync() subprocess.call(["cts-support", "install"]) def cleanup_environment(self, use_corosync): """ Clean up the host after executing desired tests """ if use_corosync: killall(["corosync"]) if self.autogen_corosync_cfg: if self.verbose: print("Corosync output") logfile = io.open(os.path.join(self.logdir, "corosync.log"), 'rt') for line in logfile.readlines(): print(line.strip()) logfile.close() os.remove(BuildOptions.COROSYNC_CONFIG_FILE) subprocess.call(["cts-support", "uninstall"]) class TestOptions(object): """ Option handler """ def __init__(self): self.options = {} self.options['list-tests'] = False self.options['run-all'] = True self.options['run-only'] = "" self.options['run-only-pattern'] = "" self.options['verbose'] = False self.options['timeout'] = 2 self.options['force-wait'] = False self.options['invalid-arg'] = "" self.options['cpg-only'] = False self.options['no-cpg'] = False self.options['show-usage'] = False def build_options(self, argv): """ Set options based on command-line arguments """ args = argv[1:] skip = False for i in range(0, len(args)): if skip: skip = False continue elif args[i] == "-h" or args[i] == "--help": self.options['show-usage'] = True elif args[i] == "-l" or args[i] == "--list-tests": self.options['list-tests'] = True elif args[i] == "-V" or args[i] == "--verbose": self.options['verbose'] = True elif args[i] == "-t" or args[i] == "--timeout": self.options['timeout'] = float(args[i+1]) elif args[i] == "-w" or args[i] == "--force-wait": self.options['force-wait'] = True elif args[i] == "-n" or args[i] == "--no-cpg": self.options['no-cpg'] = True elif args[i] == "-c" or args[i] == "--cpg-only": self.options['cpg-only'] = True elif args[i] == "-r" or args[i] == "--run-only": self.options['run-only'] = args[i+1] skip = True elif args[i] == "-p" or args[i] == "--run-only-pattern": self.options['run-only-pattern'] = args[i+1] skip = True def show_usage(self): """ Show command usage """ print("usage: " + sys.argv[0] + " [options]") print("If no options are provided, all tests will run") print("Options:") print("\t [--help | -h] Show usage") print("\t [--list-tests | -l] Print out all registered tests.") print("\t [--cpg-only | -c] Only run tests that require corosync.") print("\t [--no-cpg | -n] Only run tests that do not require corosync") print("\t [--run-only | -r 'testname'] Run a specific test") print("\t [--verbose | -V] Verbose output") print("\t [--timeout | -t 'floating point number']" "\n\t\tUp to how many seconds each test case waits for the daemon to be initialized." "\n\t\tDefaults to 2. The value 0 means no limit.") print("\t [--force-wait | -w]" "\n\t\tEach test case waits the default/specified --timeout for the daemon without tracking the log.") print("\t [--run-only-pattern | -p 'string'] Run only tests containing the string value") print("\n\tExample: Run only the test 'start_stop'") print("\t\t " + sys.argv[0] + " --run-only start_stop") print("\n\tExample: Run only the tests with the string 'systemd' present in them") print("\t\t " + sys.argv[0] + " --run-only-pattern systemd") def main(argv): """ Run fencing regression tests as specified by arguments """ update_path() # Ensure all command output is in portable locale for comparison os.environ['LC_ALL'] = "C" opts = TestOptions() opts.build_options(argv) exit_if_proc_running("pacemaker-fenced") use_corosync = not opts.options['no-cpg'] if use_corosync: exit_if_proc_running("corosync") # Create a temporary directory for log files (the directory and its # contents will automatically be erased when done) with tempfile.TemporaryDirectory(prefix="cts-fencing-") as logdir: tests = Tests(opts.options['verbose'], opts.options['timeout'], opts.options['force-wait'], logdir) tests.build_standalone_tests() tests.build_custom_timeout_tests() tests.build_api_sanity_tests() tests.build_fence_merge_tests() tests.build_fence_no_merge_tests() tests.build_unfence_tests() tests.build_unfence_on_target_tests() tests.build_nodeid_tests() tests.build_remap_tests() tests.build_query_tests() tests.build_metadata_tests() tests.build_validate_tests() if opts.options['list-tests']: tests.print_list() sys.exit(ExitStatus.OK) elif opts.options['show-usage']: opts.show_usage() sys.exit(ExitStatus.OK) print("Starting ...") tests.setup_environment(use_corosync) if opts.options['run-only-pattern'] != "": tests.run_tests_matching(opts.options['run-only-pattern']) tests.print_results() elif opts.options['run-only'] != "": tests.run_single(opts.options['run-only']) tests.print_results() elif opts.options['no-cpg']: tests.run_no_cpg() tests.print_results() elif opts.options['cpg-only']: tests.run_cpg_only() tests.print_results() else: tests.run_tests() tests.print_results() tests.cleanup_environment(use_corosync) tests.exit() if __name__ == "__main__": main(sys.argv) diff --git a/python/pacemaker/_cts/test.py b/python/pacemaker/_cts/test.py index 97758c1571..abe34a8e8a 100644 --- a/python/pacemaker/_cts/test.py +++ b/python/pacemaker/_cts/test.py @@ -1,256 +1,268 @@ """ A module providing base classes for defining regression tests and groups of regression tests. Everything exported here should be considered an abstract class that needs to be subclassed in order to do anything useful. Various functions will raise NotImplementedError if not overridden by a subclass. """ __copyright__ = "Copyright 2009-2023 the Pacemaker project contributors" __license__ = "LGPLv2.1+" __all__ = ["Test"] import os import re import shlex import subprocess import sys import time from pacemaker._cts.errors import ExitCodeError, OutputFoundError, OutputNotFoundError, XmlValidationError from pacemaker._cts.process import pipe_communicate from pacemaker.buildoptions import BuildOptions from pacemaker.exitstatus import ExitStatus def find_validator(rng_file): if os.access("/usr/bin/xmllint", os.X_OK): if rng_file == None: return ["xmllint", "-"] else: return ["xmllint", "--relaxng", rng_file, "-"] else: return None def rng_directory(): if "PCMK_schema_directory" in os.environ: return os.environ["PCMK_schema_directory"] elif os.path.exists("%s/cts-fencing.in" % sys.path[0]): return "xml" else: return BuildOptions.SCHEMA_DIR class Test: """ The base class for a single regression test. A single regression test may still run multiple commands as part of its execution. """ def __init__(self, name, description, **kwargs): """ Create a new Test instance. This method must be provided by all subclasses, which must call Test.__init__ first. Arguments: description -- A user-readable description of the test, helpful in identifying what test is running or has failed. name -- The name of the test. Command line tools use this attribute to allow running only tests with the exact name, or tests whose name matches a given pattern. This should be unique among all tests. Keyword arguments: force_wait -- logdir -- The base directory under which to create a directory to store output and temporary data. timeout -- How long to wait for the test to complete. verbose -- Whether to print additional information, including verbose command output and daemon log files. """ self.description = description self.executed = False self.name = name self.force_wait = kwargs.get("force_wait", False) self.logdir = kwargs.get("logdir", "/tmp") self.timeout = kwargs.get("timeout", 2) self.verbose = kwargs.get("verbose", False) self._cmds = [] self._daemon_location = None self._result_exitcode = ExitStatus.OK self._result_txt = "" self._stonith_process = None ### ### PROPERTIES ### @property def exitcode(self): """ The final exitcode of the Test. If all commands pass, this property will be ExitStatus.OK. Otherwise, this property will be the exitcode of the first command to fail. """ return self._result_exitcode @exitcode.setter def exitcode(self, value): self._result_exitcode = value @property def logpath(self): """ The path to the log for whatever daemon is being tested. Note that this requires all subclasses to set self._daemon_location before accessing this property or an exception will be raised. """ return os.path.join(self.logdir, self._daemon_location + ".log") ### ### PRIVATE METHODS ### def _new_cmd(self, cmd, args, exitcode, **kwargs): """ Add a command to be executed as part of this test. Arguments: cmd -- The program to run. args -- Commands line arguments to pass to cmd, as a string. exitcode -- The expected exit code of cmd. This can be used to run a command that is expected to fail. Keyword arguments: stdout_match -- If not None, a string that is expected to be present in the stdout of cmd. This can be a regular expression. no_wait -- Do not wait for cmd to complete. stdout_negative_match -- If not None, a string that is expected to be missing in the stdout of cmd. This can be a regualr expression. kill -- A command to be run after cmd, typically in order to kill a failed process. This should be the entire command line including arguments as a single string. validate -- If True, the output of cmd will be passed to xmllint for validation. If validation fails, XmlValidationError will be raised. check_rng -- If True and validate is True, command output will additionally be checked against the api-result.rng file. check_stderr -- If True, the stderr of cmd will be included in output. """ self._cmds.append( { "args": args, "check_rng": kwargs.get("check_rng", True), "check_stderr": kwargs.get("check_stderr", True), "cmd": cmd, "expected_exitcode": exitcode, "kill": kwargs.get("kill", None), "no_wait": kwargs.get("no_wait", False), "stdout_match": kwargs.get("stdout_match", None), "stdout_negative_match": kwargs.get("stdout_negative_match", None), "validate": kwargs.get("validate", True), } ) ### ### PUBLIC METHODS ### def add_cmd(self, cmd, args, validate=True, check_rng=True, check_stderr=True): """ Add a simple command to be executed as part of this test """ self._new_cmd(cmd, args, ExitStatus.OK, validate=validate, check_rng=check_rng, check_stderr=check_stderr) def add_cmd_and_kill(self, cmd, args, kill_proc): """ Add a command and system command to be executed as part of this test """ self._new_cmd(cmd, args, ExitStatus.OK, kill=kill_proc) def add_cmd_check_stdout(self, cmd, args, match, no_match=None): """ Add a simple command with expected output to be executed as part of this test """ self._new_cmd(cmd, args, ExitStatus.OK, stdout_match=match, stdout_negative_match=no_match) def add_cmd_expected_fail(self, cmd, args, exitcode=ExitStatus.ERROR): """ Add a command that is expected to fail to be executed as part of this test """ self._new_cmd(cmd, args, exitcode) def add_cmd_no_wait(self, cmd, args): """ Add a simple command to be executed (without waiting) as part of this test """ self._new_cmd(cmd, args, ExitStatus.OK, no_wait=True) + def print_result(self, filler): + """ Print the result of the last test execution """ + + print("%s%s" % (filler, self._result_txt)) + def run_cmd(self, args): """ Execute a command as part of this test """ cmd = shlex.split(args['args']) cmd.insert(0, args['cmd']) if self.verbose: print("\n\nRunning: %s" % " ".join(cmd)) test = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if args['kill']: if self.verbose: print("Also running: %s" % args['kill']) ### Typically, the kill argument is used to detect some sort of ### failure. Without yielding for a few seconds here, the process ### launched earlier that is listening for the failure may not have ### time to connect to pacemaker-execd. time.sleep(2) subprocess.Popen(shlex.split(args['kill'])) if not args['no_wait']: test.wait() else: return ExitStatus.OK output = pipe_communicate(test, check_stderr=args['check_stderr']) if self.verbose: print(output) if test.returncode != args['expected_exitcode']: raise ExitCodeError(test.returncode) if args['stdout_match'] is not None and \ re.search(args['stdout_match'], output) is None: raise OutputNotFoundError(output) if args['stdout_negative_match'] is not None and \ re.search(args['stdout_negative_match'], output) is not None: raise OutputFoundError(output) if args['validate']: if args['check_rng']: rng_file = rng_directory() + "/api/api-result.rng" else: rng_file = None cmd = find_validator(rng_file) if not cmd: return if self.verbose: print("\nRunning: %s" % " ".join(cmd)) validator = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output = pipe_communicate(validator, check_stderr=True, stdin=output) if self.verbose: print(output) if validator.returncode != 0: raise XmlValidationError(output) + + def set_error(self, step, cmd): + """ Record failure of this test """ + + msg = "FAILURE - '%s' failed at step %d. Command: %s %s" + self._result_txt = msg % (self.name, step, cmd['cmd'], cmd['args']) + self.exitcode = ExitStatus.ERROR