diff --git a/cts/cts-exec.in b/cts/cts-exec.in index 64152d4cb6..42d0fe8824 100644 --- a/cts/cts-exec.in +++ b/cts/cts-exec.in @@ -1,1297 +1,1304 @@ #!@PYTHON@ """ Regression tests for Pacemaker's pacemaker-execd """ __copyright__ = "Copyright 2012-2022 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import psutil import stat import sys import subprocess import shlex import shutil import tempfile import time # Where to find test binaries # Prefer the source tree if available BUILD_DIR = "@abs_top_builddir@" TEST_DIR = sys.path[0] SBIN_DIR = "@sbindir@" # Check whether Pacemaker Remote support was built REMOTE_ENABLED = "@PC_NAME_GNUTLS@" != "" # File permissions for executable scripts we create EXECMODE = stat.S_IRUSR | stat.S_IXUSR | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH # These values must be kept in sync with include/crm/crm.h class CrmExit(object): OK = 0 ERROR = 1 INVALID_PARAM = 2 UNIMPLEMENT_FEATURE = 3 INSUFFICIENT_PRIV = 4 NOT_INSTALLED = 5 NOT_CONFIGURED = 6 NOT_RUNNING = 7 USAGE = 64 DATAERR = 65 NOINPUT = 66 NOUSER = 67 NOHOST = 68 UNAVAILABLE = 69 SOFTWARE = 70 OSERR = 71 OSFILE = 72 CANTCREAT = 73 IOERR = 74 TEMPFAIL = 75 PROTOCOL = 76 NOPERM = 77 CONFIG = 78 FATAL = 100 PANIC = 101 DISCONNECT = 102 SOLO = 103 DIGEST = 104 NOSUCH = 105 QUORUM = 106 UNSAFE = 107 EXISTS = 108 MULTIPLE = 109 OLD = 110 TIMEOUT = 124 MAX = 255 def update_path(): """ Set the PATH environment variable appropriately for the tests """ new_path = os.environ['PATH'] if os.path.exists("%s/cts-exec.in" % TEST_DIR): print("Running tests from the source tree: %s (%s)" % (BUILD_DIR, TEST_DIR)) # For pacemaker-execd, cts-exec-helper, and pacemaker-remoted new_path = "%s/daemons/execd:%s" % (BUILD_DIR, new_path) new_path = "%s/tools:%s" % (BUILD_DIR, new_path) # For crm_resource # For pacemaker-fenced new_path = "%s/daemons/fenced:%s" % (BUILD_DIR, new_path) # For cts-support new_path = "%s/cts/support:%s" % (BUILD_DIR, new_path) else: print("Running tests from the install tree: @CRM_DAEMON_DIR@ (not %s)" % TEST_DIR) # For cts-exec-helper, cts-support, pacemaker-execd, pacemaker-fenced, # and pacemaker-remoted new_path = "@CRM_DAEMON_DIR@:%s" % (new_path) print('Using PATH="{}"'.format(new_path)) os.environ['PATH'] = new_path def output_from_command(command): """ Run a command, and return its standard output. """ test = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE) test.wait() output = test.communicate()[0].decode(sys.stdout.encoding) return output.split("\n") def is_proc_running(process_name): """ Check whether a process with a given name is running """ for proc in psutil.process_iter(["name"]): if proc.info["name"] == process_name: return True return False +def exit_if_proc_running(process_name): + """ Exit with error if a given process is running """ + + if is_proc_running(process_name): + print("Error: %s is already running!" % process_name) + print("Run %s only when the cluster is stopped." % sys.argv[0]) + sys.exit(CrmExit.ERROR) + + +def main(argv): class TestError(Exception): """ Base class for exceptions in this module """ pass class ExitCodeError(TestError): """ Exception raised when command exit status is unexpected """ def __init__(self, exit_code): self.exit_code = exit_code def __str__(self): return repr(self.exit_code) class OutputNotFoundError(TestError): """ Exception raised when command output does not contain wanted string """ def __init__(self, output): self.output = output def __str__(self): return repr(self.output) class OutputFoundError(TestError): """ Exception raised when command output contains unwanted string """ def __init__(self, output): self.output = output def __str__(self): return repr(self.output) class Test(object): """ Executor for a single pacemaker-execd regression test """ def __init__(self, name, description, verbose=0, tls=0, timeout=2, force_wait=0, logdir="/tmp"): self.name = name self.description = description self.cmds = [] if tls: self.daemon_location = "pacemaker-remoted" else: self.daemon_location = "pacemaker-execd" self.logpath = os.path.join(logdir, self.daemon_location + ".log") self.test_tool_location = "cts-exec-helper" self.verbose = verbose self.tls = tls self.timeout = timeout self.force_wait = force_wait self.result_txt = "" self.cmd_tool_output = "" self.result_exitcode = CrmExit.OK self.execd_process = None self.stonith_process = None self.executed = 0 def __new_cmd(self, cmd, args, exitcode, stdout_match="", no_wait=0, stdout_negative_match="", kill=None): """ Add a command to be executed as part of this test """ if self.verbose and cmd == self.test_tool_location: args = args + " -V " if (cmd == self.test_tool_location) and self.tls: args = args + " -S " self.cmds.append( { "cmd" : cmd, "kill" : kill, "args" : args, "expected_exitcode" : exitcode, "stdout_match" : stdout_match, "stdout_negative_match" : stdout_negative_match, "no_wait" : no_wait, "cmd_output" : "", } ) def start_environment(self): """ Prepare the host for running a test """ ### make sure we are in full control here ### cmd = shlex.split("killall -q -9 pacemaker-fenced lt-pacemaker-fenced pacemaker-execd lt-pacemaker-execd cts-exec-helper lt-cts-exec-helper pacemaker-remoted") test = subprocess.Popen(cmd, stdout=subprocess.PIPE) test.wait() additional_args = "" if self.tls == 0: self.stonith_process = subprocess.Popen(shlex.split("pacemaker-fenced -s")) if self.verbose: additional_args = additional_args + " -V" self.execd_process = subprocess.Popen(shlex.split("%s %s -l %s" % (self.daemon_location, additional_args, self.logpath))) logfile = None init_time = time.time() update_time = init_time while True: time.sleep(0.1) if self.force_wait == 0 and logfile == None \ and os.path.exists(self.logpath): logfile = io.open(self.logpath, 'rt', encoding = "ISO-8859-1") if self.force_wait == 0 and logfile != None: for line in logfile.readlines(): if "successfully started" in line: return now = time.time() if self.timeout > 0 and (now - init_time) >= self.timeout: if self.force_wait == 0: print("\tDaemon %s doesn't seem to have been initialized within %fs." "\n\tConsider specifying a longer '--timeout' value." %(self.daemon_location, self.timeout)) return if self.verbose and (now - update_time) >= 5: print("Waiting for %s to be initialized: %fs ..." %(self.daemon_location, now - init_time)) update_time = now def clean_environment(self): """ Clean up the host after running a test """ if self.execd_process: self.execd_process.terminate() self.execd_process.wait() if self.verbose: print("Daemon output") logfile = io.open(self.logpath, 'rt', errors='replace') for line in logfile: print(line.strip().encode('utf-8', 'replace')) if self.stonith_process: self.stonith_process.terminate() self.stonith_process.wait() self.execd_process = None self.stonith_process = None def add_sys_cmd(self, cmd, args): """ Add a simple command to be executed as part of this test """ self.__new_cmd(cmd, args, CrmExit.OK, "") def add_cmd_check_stdout(self, args, match, no_match=""): """ Add a command with expected output to be executed as part of this test """ self.__new_cmd(self.test_tool_location, args, CrmExit.OK, match, 0, no_match) def add_cmd(self, args): """ Add a cts-exec-helper command to be executed as part of this test """ self.__new_cmd(self.test_tool_location, args, CrmExit.OK, "") def add_cmd_and_kill(self, kill_proc, args): """ Add a cts-exec-helper command and system command to be executed as part of this test """ self.__new_cmd(self.test_tool_location, args, CrmExit.OK, "", kill=kill_proc) def add_expected_fail_cmd(self, args, exitcode=CrmExit.ERROR): """ Add a cts-exec-helper command to be executed as part of this test and expected to fail """ self.__new_cmd(self.test_tool_location, args, exitcode, "") def get_exitcode(self): """ Return the exit status of the last test execution """ return self.result_exitcode def print_result(self, filler): """ Print the result of the last test execution """ print("%s%s" % (filler, self.result_txt)) def run_cmd(self, args): """ Execute a command as part of this test """ cmd = shlex.split(args['args']) cmd.insert(0, args['cmd']) if self.verbose: print("\n\nRunning: "+" ".join(cmd)) test = subprocess.Popen(cmd, stdout=subprocess.PIPE) if args['kill']: if self.verbose: print("Also running: "+args['kill']) ### Typically, the kill argument is used to detect some sort of ### failure. Without yielding for a few seconds here, the process ### launched earlier that is listening for the failure may not have ### time to connect to pacemaker-execd. time.sleep(2) subprocess.Popen(shlex.split(args['kill'])) if args['no_wait'] == 0: test.wait() else: return CrmExit.OK output = test.communicate()[0].decode(sys.stdout.encoding) args['cmd_output'] = output if test.returncode != args['expected_exitcode']: raise ExitCodeError(test.returncode) if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0: raise OutputNotFoundError(output) if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0: raise OutputFoundError(output) def set_error(self, step, cmd): """ Record failure of this test """ msg = "FAILURE - '%s' failed at step %d. Command: %s %s" self.result_txt = msg % (self.name, step, cmd['cmd'], cmd['args']) self.result_exitcode = CrmExit.ERROR def run(self): """ Execute this test. """ res = 0 i = 1 if self.tls and self.name.count("stonith") != 0: self.result_txt = "SKIPPED - '%s' - disabled when testing pacemaker_remote" % (self.name) print(self.result_txt) return res self.start_environment() if self.verbose: print("\n--- START TEST - %s" % self.name) self.result_txt = "SUCCESS - '%s'" % (self.name) self.result_exitcode = CrmExit.OK for cmd in self.cmds: try: self.run_cmd(cmd) except ExitCodeError as e: print(cmd['cmd_output']) print("Step %d FAILED - command returned %s, expected %d" % (i, e, cmd['expected_exitcode'])) self.set_error(i, cmd); break except OutputNotFoundError as e: print("Step %d FAILED - '%s' was not found in command output: %s" % (i, cmd['stdout_match'], e)) self.set_error(i, cmd); break except OutputFoundError as e: print("Step %d FAILED - '%s' was found in command output: %s" % (i, cmd['stdout_negative_match'], e)) self.set_error(i, cmd); break if self.verbose: print(cmd['cmd_output'].strip()) print("Step %d SUCCESS" % (i)) i = i + 1 self.clean_environment() print(self.result_txt) if self.verbose: print("--- END TEST - %s\n" % self.name) self.executed = 1 return res class Tests(object): """ Collection of all pacemaker-execd regression tests """ def __init__(self, verbose=0, tls=0, timeout=2, force_wait=0, logdir="/tmp"): self.tests = [] self.verbose = verbose self.tls = tls self.timeout = timeout self.force_wait = force_wait self.logdir = logdir self.rsc_classes = output_from_command("crm_resource --list-standards") self.rsc_classes = self.rsc_classes[:-1] # Strip trailing empty line self.installed_files = [] self.action_timeout = " -t 9000 " if self.tls: self.rsc_classes.remove("stonith") try: self.rsc_classes.remove("nagios") except ValueError: # Not found pass if "systemd" in self.rsc_classes: try: # This code doesn't need this import, but pacemaker-cts-dummyd # does, so ensure the dependency is available rather than cause # all systemd tests to fail. import systemd.daemon except ImportError: print("Python systemd bindings not found.") print("The tests for systemd class are not going to be run.") self.rsc_classes.remove("systemd") print("Testing resource classes", repr(self.rsc_classes)) self.common_cmds = { "ocf_reg_line" : "-c register_rsc -r ocf_test_rsc "+self.action_timeout+" -C ocf -P pacemaker -T Dummy", "ocf_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"", "ocf_unreg_line" : "-c unregister_rsc -r \"ocf_test_rsc\" "+self.action_timeout, "ocf_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"", "ocf_start_line" : "-c exec -r \"ocf_test_rsc\" -a \"start\" "+self.action_timeout, "ocf_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:start rc:ok op_status:complete\" ", "ocf_stop_line" : "-c exec -r \"ocf_test_rsc\" -a \"stop\" "+self.action_timeout, "ocf_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:stop rc:ok op_status:complete\" ", "ocf_monitor_line" : '-c exec -r ocf_test_rsc -a monitor -i 2s ' + self.action_timeout, "ocf_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "ocf_cancel_line" : '-c cancel -r ocf_test_rsc -a monitor -i 2s ' + self.action_timeout, "ocf_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "systemd_reg_line" : "-c register_rsc -r systemd_test_rsc " + self.action_timeout + " -C systemd -T pacemaker-cts-dummyd@3", "systemd_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"", "systemd_unreg_line" : "-c unregister_rsc -r \"systemd_test_rsc\" "+self.action_timeout, "systemd_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"", "systemd_start_line" : "-c exec -r \"systemd_test_rsc\" -a \"start\" "+self.action_timeout, "systemd_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:start rc:ok op_status:complete\" ", "systemd_stop_line" : "-c exec -r \"systemd_test_rsc\" -a \"stop\" "+self.action_timeout, "systemd_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:stop rc:ok op_status:complete\" ", "systemd_monitor_line" : '-c exec -r systemd_test_rsc -a monitor -i 2s ' + self.action_timeout, "systemd_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:complete\" -t 15000 ", "systemd_cancel_line" : '-c cancel -r systemd_test_rsc -a monitor -i 2s ' + self.action_timeout, "systemd_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "upstart_reg_line" : "-c register_rsc -r upstart_test_rsc "+self.action_timeout+" -C upstart -T pacemaker-cts-dummyd", "upstart_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"", "upstart_unreg_line" : "-c unregister_rsc -r \"upstart_test_rsc\" "+self.action_timeout, "upstart_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"", "upstart_start_line" : "-c exec -r \"upstart_test_rsc\" -a \"start\" "+self.action_timeout, "upstart_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:start rc:ok op_status:complete\" ", "upstart_stop_line" : "-c exec -r \"upstart_test_rsc\" -a \"stop\" "+self.action_timeout, "upstart_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:stop rc:ok op_status:complete\" ", "upstart_monitor_line" : '-c exec -r upstart_test_rsc -a monitor -i 2s ' + self.action_timeout, "upstart_monitor_event" : '-l "NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:complete" -t 15000', "upstart_cancel_line" : '-c cancel -r upstart_test_rsc -a monitor -i 2s ' + self.action_timeout, "upstart_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "service_reg_line" : "-c register_rsc -r service_test_rsc "+self.action_timeout+" -C service -T LSBDummy", "service_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", "service_unreg_line" : "-c unregister_rsc -r \"service_test_rsc\" "+self.action_timeout, "service_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", "service_start_line" : "-c exec -r \"service_test_rsc\" -a \"start\" "+self.action_timeout, "service_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:start rc:ok op_status:complete\" ", "service_stop_line" : "-c exec -r \"service_test_rsc\" -a \"stop\" "+self.action_timeout, "service_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:stop rc:ok op_status:complete\" ", "service_monitor_line" : '-c exec -r service_test_rsc -a monitor -i 2s ' + self.action_timeout, "service_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "service_cancel_line" : '-c cancel -r service_test_rsc -a monitor -i 2s ' + self.action_timeout, "service_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "lsb_reg_line" : "-c register_rsc -r lsb_test_rsc "+self.action_timeout+" -C lsb -T LSBDummy", "lsb_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\" ", "lsb_unreg_line" : "-c unregister_rsc -r \"lsb_test_rsc\" "+self.action_timeout, "lsb_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\"", "lsb_start_line" : "-c exec -r \"lsb_test_rsc\" -a \"start\" "+self.action_timeout, "lsb_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:start rc:ok op_status:complete\" ", "lsb_stop_line" : "-c exec -r \"lsb_test_rsc\" -a \"stop\" "+self.action_timeout, "lsb_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:stop rc:ok op_status:complete\" ", "lsb_monitor_line" : '-c exec -r lsb_test_rsc -a status -i 2s ' + self.action_timeout, "lsb_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:complete\" "+self.action_timeout, "lsb_cancel_line" : '-c cancel -r lsb_test_rsc -a status -i 2s ' + self.action_timeout, "lsb_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:Cancelled\" ", "stonith_reg_line" : "-c register_rsc -r stonith_test_rsc " + self.action_timeout + " -C stonith -P pacemaker -T fence_dummy", "stonith_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\" ", "stonith_unreg_line" : "-c unregister_rsc -r \"stonith_test_rsc\" "+self.action_timeout, "stonith_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\"", "stonith_start_line" : '-c exec -r stonith_test_rsc -a start ' + self.action_timeout, "stonith_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:start rc:ok op_status:complete\" ", "stonith_stop_line" : "-c exec -r \"stonith_test_rsc\" -a \"stop\" "+self.action_timeout, "stonith_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:stop rc:ok op_status:complete\" ", "stonith_monitor_line" : '-c exec -r stonith_test_rsc -a monitor -i 2s ' + self.action_timeout, "stonith_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "stonith_cancel_line" : '-c cancel -r stonith_test_rsc -a monitor -i 2s ' + self.action_timeout, "stonith_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:Cancelled\" ", } def new_test(self, name, description): """ Create a named test """ test = Test(name, description, self.verbose, self.tls, self.timeout, self.force_wait, self.logdir) self.tests.append(test) return test def setup_test_environment(self): """ Prepare the host before executing any tests """ if REMOTE_ENABLED: os.system("service pacemaker_remote stop") self.cleanup_test_environment() if self.tls and not os.path.isfile("/etc/pacemaker/authkey"): print("Installing /etc/pacemaker/authkey ...") os.system("mkdir -p /etc/pacemaker") os.system("dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1") self.installed_files.append("/etc/pacemaker/authkey") # If we're in build directory, install agents if not already installed if os.path.exists("%s/cts/cts-exec.in" % BUILD_DIR): if not os.path.exists("@OCF_RA_INSTALL_DIR@/pacemaker"): # @TODO remember which components were created and remove them os.makedirs("@OCF_RA_INSTALL_DIR@/pacemaker", 0o755) for agent in ["Dummy", "Stateful", "ping"]: agent_source = "%s/extra/resources/%s" % (BUILD_DIR, agent) agent_dest = "@OCF_RA_INSTALL_DIR@/pacemaker/%s" % (agent) if not os.path.exists(agent_dest): print("Installing %s ..." % (agent_dest)) shutil.copyfile(agent_source, agent_dest) os.chmod(agent_dest, EXECMODE) self.installed_files.append(agent_dest) subprocess.call(["cts-support", "install"]) def cleanup_test_environment(self): """ Clean up the host after executing desired tests """ for installed_file in self.installed_files: print("Removing %s ..." % (installed_file)) os.remove(installed_file) subprocess.call(["cts-support", "uninstall"]) def build_generic_tests(self): """ Register tests that apply to all resource classes """ common_cmds = self.common_cmds ### register/unregister tests ### for rsc in self.rsc_classes: test = self.new_test("generic_registration_%s" % (rsc), "Simple resource registration test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### start/stop tests ### for rsc in self.rsc_classes: test = self.new_test("generic_start_stop_%s" % (rsc), "Simple start and stop test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### monitor cancel test ### for rsc in self.rsc_classes: test = self.new_test("generic_monitor_cancel_%s" % (rsc), "Simple monitor cancel test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### monitor duplicate test ### for rsc in self.rsc_classes: test = self.new_test("generic_monitor_duplicate_%s" % (rsc), "Test creation and canceling of duplicate monitors for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) # Add the duplicate monitors test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) # verify we still get update events ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) # cancel the monitor, if the duplicate merged with the original, we should no longer see monitor updates test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### stop implies cancel test ### for rsc in self.rsc_classes: test = self.new_test("generic_stop_implies_cancel_%s" % (rsc), "Verify stopping a resource implies cancel of recurring ops for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) def build_multi_rsc_tests(self): """ Register complex tests that involve managing multiple resouces of different types """ common_cmds = self.common_cmds # do not use service and systemd at the same time, it is the same resource. ### register start monitor stop unregister resources of each type at the same time. ### test = self.new_test("multi_rsc_start_stop_all", "Start, monitor, and stop resources of multiple types and classes") for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) for rsc in self.rsc_classes: ### If this fails, that means the monitor is not being rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) def build_negative_tests(self): """ Register tests related to how pacemaker-execd handles failures """ ### ocf start timeout test ### test = self.new_test("ocf_start_timeout", "Force start timeout to occur, verify start failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" " + self.action_timeout + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") # -t must be less than self.action_timeout test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -k \"op_sleep\" -v \"5\" -t 1000 -w") test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:error op_status:Timed Out" ' + self.action_timeout) test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### stonith start timeout test ### test = self.new_test("stonith_start_timeout", "Force start timeout to occur, verify start failure.") test.add_cmd('-c register_rsc -r test_rsc ' + '-C stonith -P pacemaker -T fence_dummy ' + self.action_timeout + '-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete"') test.add_cmd('-c exec -r test_rsc -a start -k monitor_delay -v 30 ' + '-t 1000 -w') # -t must be less than self.action_timeout test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:error op_status:Timed Out" ' + self.action_timeout) test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### stonith component fail ### common_cmds = self.common_cmds test = self.new_test("stonith_component_fail", "Kill stonith component after pacemaker-execd connects") test.add_cmd(common_cmds["stonith_reg_line"] + " " + common_cmds["stonith_reg_event"]) test.add_cmd(common_cmds["stonith_start_line"] + " " + common_cmds["stonith_start_event"]) test.add_cmd('-c exec -r stonith_test_rsc -a monitor -i 600s ' '-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete" ' + self.action_timeout) test.add_cmd_and_kill("killall -9 -q pacemaker-fenced lt-pacemaker-fenced", '-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:error op_status:error" -t 15000') test.add_cmd(common_cmds["stonith_unreg_line"] + " " + common_cmds["stonith_unreg_event"]) ### monitor fail for ocf resources ### test = self.new_test("monitor_fail_ocf", "Force ocf monitor to fail, verify failure is reported.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" " + self.action_timeout + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"') test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' + self.action_timeout) test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' + self.action_timeout) test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state", '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete" ' + self.action_timeout) test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s ' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" " + self.action_timeout, CrmExit.TIMEOUT) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" " + self.action_timeout, CrmExit.TIMEOUT) test.add_cmd("-c unregister_rsc -r \"test_rsc\" " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### verify notify changes only for monitor operation. ### test = self.new_test("monitor_changes_only", "Verify when flag is set, only monitor changes are notified.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+" -o " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + ' -o -l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete" ') test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd_and_kill('rm -f @localstatedir@/run/Dummy-test_rsc.state', '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete"' + self.action_timeout) test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd('-c unregister_rsc -r "test_rsc" ' + self.action_timeout + '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete"') ### monitor fail for systemd resource ### if "systemd" in self.rsc_classes: test = self.new_test("monitor_fail_systemd", "Force systemd monitor to fail, verify failure is reported..") test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T pacemaker-cts-dummyd@3 " + self.action_timeout + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill("pkill -9 -f pacemaker-cts-dummyd", '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete"' + self.action_timeout) test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### monitor fail for upstart resource ### if "upstart" in self.rsc_classes: test = self.new_test("monitor_fail_upstart", "Force upstart monitor to fail, verify failure is reported..") test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T pacemaker-cts-dummyd "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill('killall -9 -q dd', '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete"' + self.action_timeout) test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Cancel non-existent operation on a resource ### test = self.new_test("cancel_non_existent_op", "Attempt to cancel the wrong monitor operation, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) ### interval is wrong, should fail test.add_expected_fail_cmd('-c cancel -r test_rsc -a monitor -i 2s' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") ### action name is wrong, should fail test.add_expected_fail_cmd('-c cancel -r test_rsc -a stop -i 1s' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Attempt to invoke non-existent rsc id ### test = self.new_test("invoke_non_existent_rsc", "Attempt to perform operations on a non-existent rsc id.") test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:error op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r test_rsc -a stop "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_expected_fail_cmd('-c exec -r test_rsc -a monitor -i 6s ' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c cancel -r test_rsc -a start "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register and start a resource that doesn't exist, systemd ### if "systemd" in self.rsc_classes: test = self.new_test("start_uninstalled_systemd", "Register uninstalled systemd agent, try to start, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") if "upstart" in self.rsc_classes: test = self.new_test("start_uninstalled_upstart", "Register uninstalled upstart agent, try to start, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register and start a resource that doesn't exist, ocf ### test = self.new_test("start_uninstalled_ocf", "Register uninstalled ocf agent, try to start, verify expected failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pacemaker -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register ocf with non-existent provider ### test = self.new_test("start_ocf_bad_provider", "Register ocf agent with a non-existent provider, verify expected failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pancakes -T Dummy "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register ocf with empty provider field ### test = self.new_test("start_ocf_no_provider", "Register ocf agent with a no provider, verify expected failure.") test.add_expected_fail_cmd("-c register_rsc -r \"test_rsc\" -C ocf -T Dummy "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Error\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") def build_stress_tests(self): """ Register stress tests """ timeout = "-t 20000" iterations = 25 test = self.new_test("ocf_stress", "Verify OCF agent handling works under load") for i in range(iterations): test.add_cmd("-c register_rsc -r rsc_%s %s -C ocf -P heartbeat -T Dummy -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd('-c exec -r rsc_%s -a monitor %s -i 1s ' '-l "NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete"' % (i, timeout, i)) for i in range(iterations): test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) if "systemd" in self.rsc_classes: test = self.new_test("systemd_stress", "Verify systemd dbus connection works under load") for i in range(iterations): test.add_cmd("-c register_rsc -r rsc_%s %s -C systemd -T pacemaker-cts-dummyd@3 -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd('-c exec -r rsc_%s -a monitor %s -i 1s ' '-l "NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete"' % (i, timeout, i)) for i in range(iterations): test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) iterations = 9 timeout = "-t 30000" ### Verify recurring op in-flight collision is handled in series properly test = self.new_test("rsc_inflight_collision", "Verify recurring ops do not collide with other operations for the same rsc.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -a start %s -k op_sleep -v 1 -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\"" % (timeout)) for i in range(iterations): test.add_cmd('-c exec -r test_rsc -a monitor %s -i 100%dms ' '-k op_sleep -v 2 ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' % (timeout, i)) test.add_cmd("-c exec -r test_rsc -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\"" % (timeout)) test.add_cmd("-c unregister_rsc -r test_rsc %s -l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\"" % (timeout)) def build_custom_tests(self): """ Register tests that target specific cases """ ### verify resource temporary folder is created and used by OCF agents. ### test = self.new_test("rsc_tmp_dir", "Verify creation and use of rsc temporary state directory") test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@") test.add_cmd("-c register_rsc -r test_rsc -P heartbeat -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -a start -t 4000") test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@") test.add_sys_cmd("ls", "@CRM_RSCTMP_DIR@/Dummy-test_rsc.state") test.add_cmd("-c exec -r test_rsc -a stop -t 4000") test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### start delay then stop test ### test = self.new_test("start_delay", "Verify start delay works as expected.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -s 6000 -a start -w -t 6000") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 2000", CrmExit.TIMEOUT) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 6000") test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### start delay, but cancel before it gets a chance to start. ### test = self.new_test("start_delay_cancel", "Using start_delay, start a rsc, but cancel the start op before execution.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -s 5000 -a start -w -t 4000") test.add_cmd("-c cancel -r test_rsc -a start " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 5000", CrmExit.TIMEOUT) test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register a bunch of resources, verify we can get info on them ### test = self.new_test("verify_get_rsc_info", "Register multiple resources, verify retrieval of rsc info.") if "systemd" in self.rsc_classes: test.add_cmd("-c register_rsc -r rsc1 -C systemd -T pacemaker-cts-dummyd@3 "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ") if "upstart" in self.rsc_classes: test.add_cmd("-c register_rsc -r rsc1 -C upstart -T pacemaker-cts-dummyd "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc2 ") test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ") ### Register duplicate, verify only one entry exists and can still be removed. test = self.new_test("duplicate_registration", "Register resource multiple times, verify only one entry exists and can be removed.") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Stateful -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Stateful") test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ") ### verify the option to only send notification to the original client. ### test = self.new_test("notify_orig_client_only", "Verify option to only send notifications to the client originating the action.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r \"test_rsc\" -a \"monitor\" -i 1s ' + self.action_timeout + ' -n ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"') # this will fail because the monitor notifications should only go to the original caller, which no longer exists. test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s -t 6000 ') test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### get metadata ### test = self.new_test("get_ocf_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Dummy\"", "resource-agent name=\"Dummy\"") test.add_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Stateful\"") test.add_expected_fail_cmd("-c metadata -P \"pacemaker\" -T \"Stateful\"") test.add_expected_fail_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"fake_agent\"") ### get metadata ### test = self.new_test("get_lsb_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"lsb\" -T \"LSBDummy\"", "resource-agent name='LSBDummy'") ### get stonith metadata ### test = self.new_test("get_stonith_metadata", "Retrieve stonith metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy\"", "resource-agent name=\"fence_dummy\"") ### get metadata ### if "systemd" in self.rsc_classes: test = self.new_test("get_systemd_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"systemd\" -T \"pacemaker-cts-dummyd@\"", "resource-agent name=\"pacemaker-cts-dummyd@\"") ### get metadata ### if "upstart" in self.rsc_classes: test = self.new_test("get_upstart_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"upstart\" -T \"pacemaker-cts-dummyd\"", "resource-agent name=\"pacemaker-cts-dummyd\"") ### get ocf providers ### test = self.new_test("list_ocf_providers", "Retrieve list of available resource providers, verifies pacemaker is a provider.") test.add_cmd_check_stdout("-c list_ocf_providers ", "pacemaker") test.add_cmd_check_stdout("-c list_ocf_providers -T ping", "pacemaker") ### Verify agents only exist in their lists ### test = self.new_test("verify_agent_lists", "Verify the agent lists contain the right data.") test.add_cmd_check_stdout("-c list_agents ", "Stateful") ### ocf ### test.add_cmd_check_stdout("-c list_agents -C ocf", "Stateful") test.add_cmd_check_stdout("-c list_agents -C lsb", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents ", "LSBDummy") ### init.d ### test.add_cmd_check_stdout("-c list_agents -C lsb", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C ocf", "", "pacemaker-cts-dummyd@") ### should not exist test.add_cmd_check_stdout("-c list_agents -C ocf", "", "pacemaker-cts-dummyd@") ### should not exist test.add_cmd_check_stdout("-c list_agents -C lsb", "", "fence_dummy") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "fence_dummy") ### should not exist test.add_cmd_check_stdout("-c list_agents -C ocf", "", "fence_dummy") ### should not exist if "systemd" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents ", "pacemaker-cts-dummyd@") ### systemd ### test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C systemd", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C systemd", "pacemaker-cts-dummyd@") test.add_cmd_check_stdout("-c list_agents -C systemd", "", "fence_dummy") ### should not exist if "upstart" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents ", "pacemaker-cts-dummyd") ### upstart ### test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C upstart", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C upstart", "pacemaker-cts-dummyd") test.add_cmd_check_stdout("-c list_agents -C upstart", "", "fence_dummy") ### should not exist if "stonith" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents -C stonith", "fence_dummy") ### stonith ### test.add_cmd_check_stdout("-c list_agents -C stonith", "", "pacemaker-cts-dummyd@") ### should not exist test.add_cmd_check_stdout("-c list_agents -C stonith", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents ", "fence_dummy") def print_list(self): """ List all registered tests """ print("\n==== %d TESTS FOUND ====" % (len(self.tests))) print("%35s - %s" % ("TEST NAME", "TEST DESCRIPTION")) print("%35s - %s" % ("--------------------", "--------------------")) for test in self.tests: print("%35s - %s" % (test.name, test.description)) print("==== END OF LIST ====\n") def run_single(self, name): """ Run a single named test """ for test in self.tests: if test.name == name: test.run() break def run_tests_matching(self, pattern): """ Run all tests whose name matches a pattern """ for test in self.tests: if test.name.count(pattern) != 0: test.run() def run_tests(self): """ Run all tests """ for test in self.tests: test.run() def exit(self): """ Exit (with error status code if any test failed) """ for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != CrmExit.OK: sys.exit(CrmExit.ERROR) sys.exit(CrmExit.OK) def print_results(self): """ Print summary of results of executed tests """ failures = 0 success = 0 print("\n\n======= FINAL RESULTS ==========") print("\n--- FAILURE RESULTS:") for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != CrmExit.OK: failures = failures + 1 test.print_result(" ") else: success = success + 1 if failures == 0: print(" None") print("\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures)) class TestOptions(object): """ Option handler """ def __init__(self): self.options = {} self.options['list-tests'] = 0 self.options['run-all'] = 1 self.options['run-only'] = "" self.options['run-only-pattern'] = "" self.options['verbose'] = 0 self.options['timeout'] = 2 self.options['force-wait'] = 0 self.options['invalid-arg'] = "" self.options['show-usage'] = 0 self.options['pacemaker-remote'] = 0 def build_options(self, argv): """ Set options based on command-line arguments """ args = argv[1:] skip = 0 for i in range(0, len(args)): if skip: skip = 0 continue elif args[i] == "-h" or args[i] == "--help": self.options['show-usage'] = 1 elif args[i] == "-l" or args[i] == "--list-tests": self.options['list-tests'] = 1 elif args[i] == "-V" or args[i] == "--verbose": self.options['verbose'] = 1 elif args[i] == "-t" or args[i] == "--timeout": self.options['timeout'] = float(args[i+1]) elif args[i] == "-w" or args[i] == "--force-wait": self.options['force-wait'] = 1 elif args[i] == "-R" or args[i] == "--pacemaker-remote": if REMOTE_ENABLED: self.options['pacemaker-remote'] = 1 else: print("ERROR: This build does not support Pacemaker Remote") sys.exit(CrmExit.USAGE) elif args[i] == "-r" or args[i] == "--run-only": self.options['run-only'] = args[i+1] skip = 1 elif args[i] == "-p" or args[i] == "--run-only-pattern": self.options['run-only-pattern'] = args[i+1] skip = 1 def show_usage(): """ Show command usage """ print("usage: " + sys.argv[0] + " [options]") print("If no options are provided, all tests will run") print("Options:") print("\t [--help | -h] Show usage") print("\t [--list-tests | -l] Print out all registered tests.") print("\t [--run-only | -r 'testname'] Run a specific test") print("\t [--verbose | -V] Verbose output") print("\t [--timeout | -t 'floating point number']" "\n\t\tUp to how many seconds each test case waits for the daemon to be initialized." "\n\t\tDefaults to 2. The value 0 means no limit.") print("\t [--force-wait | -w]" "\n\t\tEach test case waits the default/specified --timeout for the daemon without tracking the log.") if REMOTE_ENABLED: print("\t [--pacemaker-remote | -R Test pacemaker-remoted binary instead of pacemaker-execd") print("\t [--run-only-pattern | -p 'string'] Run only tests containing the string value") print("\n\tExample: Run only the test 'start_stop'") print("\t\t " + sys.argv[0] + " --run-only start_stop") print("\n\tExample: Run only the tests with the string 'systemd' present in them") print("\t\t " + sys.argv[0] + " --run-only-pattern systemd") def main(argv): """ Run pacemaker-execd regression tests as specified by arguments """ update_path() # Ensure all command output is in portable locale for comparison os.environ['LC_ALL'] = "C" opts = TestOptions() opts.build_options(argv) if opts.options['show-usage']: show_usage() sys.exit(CrmExit.OK) if opts.options['pacemaker-remote']: daemon_name = "pacemaker-remoted" else: daemon_name = "pacemaker-execd" - if is_proc_running(daemon_name): - print("Error: %s is already running!" % daemon_name) - print("Run %s only when the cluster is stopped." % sys.argv[0]) - sys.exit(CrmExit.ERROR) + exit_if_proc_running(daemon_name) # Create a temporary directory for log files (the directory will # automatically be erased when done) with tempfile.TemporaryDirectory(prefix="cts-exec-") as logdir: tests = Tests(opts.options['verbose'], opts.options['pacemaker-remote'], opts.options['timeout'], opts.options['force-wait'], logdir) tests.build_generic_tests() tests.build_multi_rsc_tests() tests.build_negative_tests() tests.build_custom_tests() tests.build_stress_tests() if opts.options['list-tests']: tests.print_list() sys.exit(CrmExit.OK) tests.setup_test_environment() print("Starting ...") if opts.options['run-only-pattern'] != "": tests.run_tests_matching(opts.options['run-only-pattern']) tests.print_results() elif opts.options['run-only'] != "": tests.run_single(opts.options['run-only']) tests.print_results() else: tests.run_tests() tests.print_results() tests.cleanup_test_environment() tests.exit() if __name__ == "__main__": main(sys.argv) diff --git a/cts/cts-fencing.in b/cts/cts-fencing.in index caef56e44d..460172d2cd 100644 --- a/cts/cts-fencing.in +++ b/cts/cts-fencing.in @@ -1,1652 +1,1657 @@ #!@PYTHON@ """ Regression tests for Pacemaker's fencer """ __copyright__ = "Copyright 2012-2022 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import psutil import re import sys import subprocess import shlex import time import tempfile import signal # Where to find test binaries # Prefer the source tree if available BUILD_DIR = "@abs_top_builddir@" SCHEMA_DIR = "@CRM_SCHEMA_DIRECTORY@" COROSYNC_CONF = "@PCMK__COROSYNC_CONF@" TEST_DIR = sys.path[0] AUTOGEN_COROSYNC_TEMPLATE = """ totem { version: 2 cluster_name: cts-fencing crypto_cipher: none crypto_hash: none transport: udp } nodelist { node { nodeid: 1 name: %s ring0_addr: 127.0.0.1 } } logging { debug: off to_syslog: no to_stderr: no to_logfile: yes logfile: %s } """ # These values must be kept in sync with include/crm/crm.h class CrmExit(object): OK = 0 ERROR = 1 INVALID_PARAM = 2 UNIMPLEMENT_FEATURE = 3 INSUFFICIENT_PRIV = 4 NOT_INSTALLED = 5 NOT_CONFIGURED = 6 NOT_RUNNING = 7 USAGE = 64 DATAERR = 65 NOINPUT = 66 NOUSER = 67 NOHOST = 68 UNAVAILABLE = 69 SOFTWARE = 70 OSERR = 71 OSFILE = 72 CANTCREAT = 73 IOERR = 74 TEMPFAIL = 75 PROTOCOL = 76 NOPERM = 77 CONFIG = 78 FATAL = 100 PANIC = 101 DISCONNECT = 102 SOLO = 103 DIGEST = 104 NOSUCH = 105 QUORUM = 106 UNSAFE = 107 EXISTS = 108 MULTIPLE = 109 OLD = 110 TIMEOUT = 124 MAX = 255 def update_path(): """ Set the PATH environment variable appropriately for the tests """ new_path = os.environ['PATH'] if os.path.exists("%s/cts-fencing.in" % TEST_DIR): print("Running tests from the source tree: %s (%s)" % (BUILD_DIR, TEST_DIR)) # For pacemaker-fenced and cts-fence-helper new_path = "%s/daemons/fenced:%s" % (BUILD_DIR, new_path) new_path = "%s/tools:%s" % (BUILD_DIR, new_path) # For stonith_admin new_path = "%s/cts/support:%s" % (BUILD_DIR, new_path) # For cts-support else: print("Running tests from the install tree: @CRM_DAEMON_DIR@ (not %s)" % TEST_DIR) # For pacemaker-fenced, cts-fence-helper, and cts-support new_path = "@CRM_DAEMON_DIR@:%s" % (new_path) print('Using PATH="{}"'.format(new_path)) os.environ['PATH'] = new_path def find_validator(rng_file): if os.access("/usr/bin/xmllint", os.X_OK): if rng_file == None: return ["xmllint", "-"] else: return ["xmllint", "--relaxng", rng_file, "-"] else: return None def rng_directory(): if "PCMK_schema_directory" in os.environ: return os.environ["PCMK_schema_directory"] elif os.path.exists("%s/cts-fencing.in" % TEST_DIR): return "xml" else: return SCHEMA_DIR def pipe_communicate(pipes, check_stderr=False, stdin=None): """ Get text output from pipes """ if stdin is not None: pipe_outputs = pipes.communicate(input=stdin.encode()) else: pipe_outputs = pipes.communicate() output = pipe_outputs[0].decode(sys.stdout.encoding) if check_stderr: output = output + pipe_outputs[1].decode(sys.stderr.encoding) return output def output_from_command(command): """ Execute command and return its standard output """ test = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE) test.wait() return pipe_communicate(test).split("\n") def localname(): """ Return the uname of the local host """ our_uname = output_from_command("uname -n") if our_uname: our_uname = our_uname[0] else: our_uname = "localhost" return our_uname def killall(process): """ Kill all instances of a process """ cmd = shlex.split("killall -9 -q %s" % process) test = subprocess.Popen(cmd, stdout=subprocess.PIPE) test.wait() def is_proc_running(process_name): """ Check whether a process with a given name is running """ for proc in psutil.process_iter(["name"]): if proc.info["name"] == process_name: return True return False +def exit_if_proc_running(process_name): + """ Exit with error if a given process is running """ + + if is_proc_running(process_name): + print("Error: %s is already running!" % process_name) + print("Run %s only when the cluster is stopped." % sys.argv[0]) + sys.exit(CrmExit.ERROR) + + class TestError(Exception): """ Base class for exceptions in this module """ pass class ExitCodeError(TestError): """ Exception raised when command exit status is unexpected """ def __init__(self, exit_code): self.exit_code = exit_code def __str__(self): return repr(self.exit_code) class OutputNotFoundError(TestError): """ Exception raised when command output does not contain wanted string """ def __init__(self, output): self.output = output def __str__(self): return repr(self.output) class OutputFoundError(TestError): """ Exception raised when command output contains unwanted string """ def __init__(self, output): self.output = output def __str__(self): return repr(self.output) class XmlValidationError(TestError): """ Exception raised when xmllint fails """ def __init__(self, output): self.output = output def __str__(self): return repr(self.output) class Test(object): """ Executor for a single test """ def __init__(self, name, description, verbose=0, with_cpg=0, timeout=2, force_wait=0, logdir="/tmp"): self.name = name self.description = description self.cmds = [] self.verbose = verbose self.timeout = timeout self.force_wait = force_wait self.logpath = os.path.join(logdir, "pacemaker-fenced.log") self.result_txt = "" self.cmd_tool_output = "" self.result_exitcode = CrmExit.OK if with_cpg: self.stonith_options = "-c" self.enable_corosync = 1 else: self.stonith_options = "-s" self.enable_corosync = 0 self.stonith_process = None self.stonith_output = "" self.stonith_patterns = [] self.negative_stonith_patterns = [] self.executed = 0 def __new_cmd(self, cmd, args, exitcode, stdout_match="", no_wait=0, stdout_negative_match="", kill=None, validate=True, check_rng=True, check_stderr=True): """ Add a command to be executed as part of this test """ self.cmds.append( { "cmd" : cmd, "kill" : kill, "args" : args, "expected_exitcode" : exitcode, "stdout_match" : stdout_match, "stdout_negative_match" : stdout_negative_match, "no_wait" : no_wait, "validate" : validate, "check_rng" : check_rng, "check_stderr" : check_stderr, } ) def start_environment(self): """ Prepare the host for executing a test """ # Make sure we are in full control killall("pacemakerd") killall("pacemaker-fenced") if self.verbose: self.stonith_options = self.stonith_options + " -V" print("Starting pacemaker-fenced with %s" % self.stonith_options) if os.path.exists(self.logpath): os.remove(self.logpath) cmd = "pacemaker-fenced %s -l %s" % (self.stonith_options, self.logpath) self.stonith_process = subprocess.Popen(shlex.split(cmd)) logfile = None init_time = time.time() update_time = init_time while True: time.sleep(0.1) if self.force_wait == 0 and logfile == None \ and os.path.exists(self.logpath): logfile = io.open(self.logpath, 'rt', encoding = "ISO-8859-1") if self.force_wait == 0 and logfile != None: for line in logfile.readlines(): if "successfully started" in line: return now = time.time() if self.timeout > 0 and (now - init_time) >= self.timeout: if self.force_wait == 0: print("\tDaemon pacemaker-fenced doesn't seem to have been initialized within %fs." "\n\tConsider specifying a longer '--timeout' value." %(self.timeout)) return if self.verbose and (now - update_time) >= 5: print("Waiting for pacemaker-fenced to be initialized: %fs ..." %(now - init_time)) update_time = now def clean_environment(self): """ Clean up the host after executing a test """ if self.stonith_process: if self.stonith_process.poll() == None: self.stonith_process.terminate() self.stonith_process.wait() else: return_code = { getattr(signal, _signame): _signame for _signame in dir(signal) if _signame.startswith('SIG') and not _signame.startswith("SIG_") }.get(-self.stonith_process.returncode, "RET=%d" % (self.stonith_process.returncode)) msg = "FAILURE - '%s' failed. pacemaker-fenced abnormally exited during test (%s)." self.result_txt = msg % (self.name, return_code) self.result_exitcode = CrmExit.ERROR self.stonith_output = "" self.stonith_process = None # the default for utf-8 encoding would error out if e.g. memory corruption # makes fenced output any kind of 8 bit value - while still interesting # for debugging and we'd still like the regression-test to go over the # full set of test-cases logfile = io.open(self.logpath, 'rt', encoding = "ISO-8859-1") for line in logfile.readlines(): self.stonith_output = self.stonith_output + line if self.verbose: print("Daemon Output Start") print(self.stonith_output) print("Daemon Output End") def add_stonith_log_pattern(self, pattern): """ Add a log pattern to expect from this test """ self.stonith_patterns.append(pattern) def add_stonith_neg_log_pattern(self, pattern): """ Add a log pattern that should not occur with this test """ self.negative_stonith_patterns.append(pattern) def add_cmd(self, cmd, args, validate=True, check_rng=True, check_stderr=True): """ Add a simple command to be executed as part of this test """ self.__new_cmd(cmd, args, CrmExit.OK, "", validate=validate, check_rng=check_rng, check_stderr=check_stderr) def add_cmd_no_wait(self, cmd, args): """ Add a simple command to be executed (without waiting) as part of this test """ self.__new_cmd(cmd, args, CrmExit.OK, "", 1) def add_cmd_check_stdout(self, cmd, args, match, no_match=""): """ Add a simple command with expected output to be executed as part of this test """ self.__new_cmd(cmd, args, CrmExit.OK, match, 0, no_match) def add_expected_fail_cmd(self, cmd, args, exitcode=CrmExit.ERROR): """ Add a command to be executed as part of this test and expected to fail """ self.__new_cmd(cmd, args, exitcode, "") def get_exitcode(self): """ Return the exit status of the last test execution """ return self.result_exitcode def print_result(self, filler): """ Print the result of the last test execution """ print("%s%s" % (filler, self.result_txt)) def run_cmd(self, args): """ Execute a command as part of this test """ cmd = shlex.split(args['args']) cmd.insert(0, args['cmd']) if self.verbose: print("\n\nRunning: "+" ".join(cmd)) test = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if args['kill']: if self.verbose: print("Also running: "+args['kill']) subprocess.Popen(shlex.split(args['kill'])) if args['no_wait'] == 0: test.wait() else: return CrmExit.OK output = pipe_communicate(test, check_stderr=args['check_stderr']) if self.verbose: print(output) if test.returncode != args['expected_exitcode']: raise ExitCodeError(test.returncode) if (args['stdout_match'] != "" and re.search(args['stdout_match'], output) is None): raise OutputNotFoundError(output) if (args['stdout_negative_match'] != "" and re.search(args['stdout_negative_match'], output) is not None): raise OutputFoundError(output) if args['validate']: if args['check_rng']: rng_file = rng_directory() + "/api/api-result.rng" else: rng_file = None cmd = find_validator(rng_file) if not cmd: return if self.verbose: print("\nRunning: "+" ".join(cmd)) validator = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output = pipe_communicate(validator, check_stderr=True, stdin=output) if self.verbose: print(output) if validator.returncode != 0: raise XmlValidationError(output) def count_negative_matches(self, outline): """ Return 1 if a line matches patterns that shouldn't have occurred """ count = 0 for line in self.negative_stonith_patterns: if outline.count(line): count = 1 if self.verbose: print("This pattern should not have matched = '%s" % (line)) return count def match_stonith_patterns(self): """ Check test output for expected patterns """ negative_matches = 0 cur = 0 pats = self.stonith_patterns total_patterns = len(self.stonith_patterns) if len(self.stonith_patterns) == 0 and len(self.negative_stonith_patterns) == 0: return for line in self.stonith_output.split("\n"): negative_matches = negative_matches + self.count_negative_matches(line) if len(pats) == 0: continue cur = -1 for pat in pats: cur = cur + 1 if line.count(pats[cur]): del pats[cur] break if len(pats) > 0 or negative_matches: if self.verbose: for pat in pats: print("Pattern Not Matched = '%s'" % pat) msg = "FAILURE - '%s' failed. %d patterns out of %d not matched. %d negative matches." self.result_txt = msg % (self.name, len(pats), total_patterns, negative_matches) self.result_exitcode = CrmExit.ERROR def set_error(self, step, cmd): """ Record failure of this test """ msg = "FAILURE - '%s' failed at step %d. Command: %s %s" self.result_txt = msg % (self.name, step, cmd['cmd'], cmd['args']) self.result_exitcode = CrmExit.ERROR def run(self): """ Execute this test. """ res = 0 i = 1 self.start_environment() if self.verbose: print("\n--- START TEST - %s" % self.name) self.result_txt = "SUCCESS - '%s'" % (self.name) self.result_exitcode = CrmExit.OK for cmd in self.cmds: try: self.run_cmd(cmd) except ExitCodeError as e: print("Step %d FAILED - command returned %s, expected %d" % (i, e, cmd['expected_exitcode'])) self.set_error(i, cmd); break except OutputNotFoundError as e: print("Step %d FAILED - '%s' was not found in command output: %s" % (i, cmd['stdout_match'], e)) self.set_error(i, cmd); break except OutputFoundError as e: print("Step %d FAILED - '%s' was found in command output: %s" % (i, cmd['stdout_negative_match'], e)) self.set_error(i, cmd); break except XmlValidationError as e: print("Step %d FAILED - xmllint failed: %s" % (i, e)) self.set_error(i, cmd); break if self.verbose: print("Step %d SUCCESS" % (i)) i = i + 1 self.clean_environment() if self.result_exitcode == CrmExit.OK: self.match_stonith_patterns() print(self.result_txt) if self.verbose: print("--- END TEST - %s\n" % self.name) self.executed = 1 return res class Tests(object): """ Collection of all fencing regression tests """ def __init__(self, verbose=0, timeout=2, force_wait=0, logdir="/tmp"): self.tests = [] self.verbose = verbose self.timeout = timeout self.force_wait = force_wait self.logdir = logdir self.autogen_corosync_cfg = not os.path.exists(COROSYNC_CONF) def new_test(self, name, description, with_cpg=0): """ Create a named test """ test = Test(name, description, self.verbose, with_cpg, self.timeout, self.force_wait, self.logdir) self.tests.append(test) return test def print_list(self): """ List all registered tests """ print("\n==== %d TESTS FOUND ====" % (len(self.tests))) print("%35s - %s" % ("TEST NAME", "TEST DESCRIPTION")) print("%35s - %s" % ("--------------------", "--------------------")) for test in self.tests: print("%35s - %s" % (test.name, test.description)) print("==== END OF LIST ====\n") def start_corosync(self): """ Start the corosync process """ if self.verbose: print("Starting corosync") test = subprocess.Popen("corosync", stdout=subprocess.PIPE) test.wait() time.sleep(10) def run_single(self, name): """ Run a single named test """ for test in self.tests: if test.name == name: test.run() break def run_tests_matching(self, pattern): """ Run all tests whose name matches a pattern """ for test in self.tests: if test.name.count(pattern) != 0: test.run() def run_cpg_only(self): """ Run all corosync-enabled tests """ for test in self.tests: if test.enable_corosync: test.run() def run_no_cpg(self): """ Run all standalone tests """ for test in self.tests: if not test.enable_corosync: test.run() def run_tests(self): """ Run all tests """ for test in self.tests: test.run() def exit(self): """ Exit (with error status code if any test failed) """ for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != CrmExit.OK: sys.exit(CrmExit.ERROR) sys.exit(CrmExit.OK) def print_results(self): """ Print summary of results of executed tests """ failures = 0 success = 0 print("\n\n======= FINAL RESULTS ==========") print("\n--- FAILURE RESULTS:") for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != CrmExit.OK: failures = failures + 1 test.print_result(" ") else: success = success + 1 if failures == 0: print(" None") print("\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures)) def build_api_sanity_tests(self): """ Register tests to verify basic API usage """ verbose_arg = "" if self.verbose: verbose_arg = "-V" test = self.new_test("standalone_low_level_api_test", "Sanity test client api in standalone mode.") test.add_cmd("cts-fence-helper", "-t %s" % (verbose_arg), validate=False) test = self.new_test("cpg_low_level_api_test", "Sanity test client api using mainloop and cpg.", 1) test.add_cmd("cts-fence-helper", "-m %s" % (verbose_arg), validate=False) def build_custom_timeout_tests(self): """ Register tests to verify custom timeout usage """ # custom timeout without topology test = self.new_test("cpg_custom_timeout_1", "Verify per device timeouts work as expected without using topology.", 1) test.add_cmd('stonith_admin', '--output-as=xml -R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node1 node2 node3"') test.add_cmd('stonith_admin', '--output-as=xml -R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=1"') test.add_cmd('stonith_admin', '--output-as=xml -R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=4"') test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5") # timeout is 5+1+4 = 10 test.add_stonith_log_pattern("Total timeout set to 12") # custom timeout _WITH_ topology test = self.new_test("cpg_custom_timeout_2", "Verify per device timeouts work as expected _WITH_ topology.", 1) test.add_cmd('stonith_admin', '--output-as=xml -R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node1 node2 node3"') test.add_cmd('stonith_admin', '--output-as=xml -R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=1"') test.add_cmd('stonith_admin', '--output-as=xml -R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=4000"') test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v false2") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5") # timeout is 5+1+4000 = 4006 test.add_stonith_log_pattern("Total timeout set to 4807") def build_fence_merge_tests(self): """ Register tests to verify when fence operations should be merged """ ### Simple test that overlapping fencing operations get merged test = self.new_test("cpg_custom_merge_single", "Verify overlapping identical fencing operations are merged, no fencing levels used.", 1) test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 10") ### one merger will happen test.add_stonith_log_pattern("Merging fencing action 'off' targeting node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") ### Test that multiple mergers occur test = self.new_test("cpg_custom_merge_multiple", "Verify multiple overlapping identical fencing operations are merged", 1) test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"delay=2\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 10") ### 4 mergers should occur test.add_stonith_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_stonith_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_stonith_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_stonith_log_pattern("Merging fencing action 'off' targeting node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") ### Test that multiple mergers occur with topologies used test = self.new_test("cpg_custom_merge_with_topology", "Verify multiple overlapping identical fencing operations are merged with fencing levels.", 1) test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false2") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true1") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 10") ### 4 mergers should occur test.add_stonith_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_stonith_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_stonith_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_stonith_log_pattern("Merging fencing action 'off' targeting node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") test.add_stonith_log_pattern("Operation 'off' targeting node3 by ") def build_fence_no_merge_tests(self): """ Register tests to verify when fence operations should not be merged """ test = self.new_test("cpg_custom_no_merge", "Verify differing fencing operations are not merged", 1) test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"") test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3 node2\" ") test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false2") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true1") test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node2 -t 10") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 10") test.add_stonith_neg_log_pattern("Merging fencing action 'off' targeting node3 originating from client") def build_standalone_tests(self): """ Register a grab bag of tests that can be executed in standalone or corosync mode """ test_types = [ { "prefix" : "standalone", "use_cpg" : 0, }, { "prefix" : "cpg", "use_cpg" : 1, }, ] # test what happens when all devices timeout for test_type in test_types: test = self.new_test("%s_fence_multi_device_failure" % test_type["prefix"], "Verify that all devices timeout, a fencing failure is returned.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R false3 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") if test_type["use_cpg"] == 1: test.add_expected_fail_cmd("stonith_admin", "--output-as=xml -F node3 -t 2", CrmExit.TIMEOUT) test.add_stonith_log_pattern("Total timeout set to 7") else: test.add_expected_fail_cmd("stonith_admin", "--output-as=xml -F node3 -t 2", CrmExit.ERROR) test.add_stonith_log_pattern("targeting node3 using false1 returned ") test.add_stonith_log_pattern("targeting node3 using false2 returned ") test.add_stonith_log_pattern("targeting node3 using false3 returned ") # test what happens when multiple devices can fence a node, but the first device fails. for test_type in test_types: test = self.new_test("%s_fence_device_failure_rollover" % test_type["prefix"], "Verify that when one fence device fails for a node, the others are tried.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5") if test_type["use_cpg"] == 1: test.add_stonith_log_pattern("Total timeout set to 18") # test what happens when we try to use a missing fence-agent. for test_type in test_types: test = self.new_test("%s_fence_missing_agent" % test_type["prefix"], "Verify proper error-handling when using a non-existent fence-agent.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_missing -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node2\"") test.add_expected_fail_cmd("stonith_admin", "--output-as=xml -F node3 -t 5", CrmExit.ERROR) test.add_cmd("stonith_admin", "--output-as=xml -F node2 -t 5") # simple topology test for one device for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_simple" % test_type["prefix"], "Verify all fencing devices at a level are used.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5") test.add_stonith_log_pattern("Total timeout set to 6") test.add_stonith_log_pattern("targeting node3 using true returned 0") # add topology, delete topology, verify fencing still works for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_add_remove" % test_type["prefix"], "Verify fencing occurrs after all topology levels are removed", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true") test.add_cmd("stonith_admin", "--output-as=xml -d node3 -i 1") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5") test.add_stonith_log_pattern("Total timeout set to 6") test.add_stonith_log_pattern("targeting node3 using true returned 0") # test what happens when the first fencing level has multiple devices. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_device_fails" % test_type["prefix"], "Verify if one device in a level fails, the other is tried.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R false -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 20") test.add_stonith_log_pattern("Total timeout set to 48") test.add_stonith_log_pattern("targeting node3 using false returned 1") test.add_stonith_log_pattern("targeting node3 using true returned 0") # test what happens when the first fencing level fails. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_multi_level_fails" % test_type["prefix"], "Verify if one level fails, the next leve is tried.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v true4") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 3") test.add_stonith_log_pattern("Total timeout set to 21") test.add_stonith_log_pattern("targeting node3 using false1 returned 1") test.add_stonith_log_pattern("targeting node3 using false2 returned 1") test.add_stonith_log_pattern("targeting node3 using true3 returned 0") test.add_stonith_log_pattern("targeting node3 using true4 returned 0") # test what happens when the first fencing level had devices that no one has registered for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_missing_devices" % test_type["prefix"], "Verify topology can continue with missing devices.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v true4") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5") # Test what happens if multiple fencing levels are defined, and then the first one is removed. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_level_removal" % test_type["prefix"], "Verify level removal works.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v true4") # Now remove level 2, verify none of the devices in level two are hit. test.add_cmd("stonith_admin", "--output-as=xml -d node3 -i 2") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 20") test.add_stonith_log_pattern("Total timeout set to 96") test.add_stonith_log_pattern("targeting node3 using false1 returned 1") test.add_stonith_neg_log_pattern("targeting node3 using false2 returned ") test.add_stonith_log_pattern("targeting node3 using true3 returned 0") test.add_stonith_log_pattern("targeting node3 using true4 returned 0") # Test targeting a topology level by node name pattern. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_level_pattern" % test_type["prefix"], "Verify targeting topology by node name pattern works.", test_type["use_cpg"]) test.add_cmd("stonith_admin", """--output-as=xml -R true -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node1 node2 node3" """) test.add_cmd("stonith_admin", """--output-as=xml -r '@node.*' -i 1 -v true""") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5") test.add_stonith_log_pattern("targeting node3 using true returned 0") # test allowing commas and semicolons as delimiters in pcmk_host_list for test_type in test_types: test = self.new_test("%s_host_list_delimiters" % test_type["prefix"], "Verify commas and semicolons can be used as pcmk_host_list delimiters", test_type["use_cpg"]) test.add_cmd("stonith_admin", """--output-as=xml -R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node1,node2,node3" """) test.add_cmd("stonith_admin", """--output-as=xml -R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=pcmk1;pcmk2;pcmk3" """) test.add_cmd("stonith_admin", "stonith_admin --output-as=xml -F node2 -t 5") test.add_cmd("stonith_admin", "stonith_admin --output-as=xml -F pcmk3 -t 5") test.add_stonith_log_pattern("targeting node2 using true1 returned 0") test.add_stonith_log_pattern("targeting pcmk3 using true2 returned 0") # test the stonith builds the correct list of devices that can fence a node. for test_type in test_types: test = self.new_test("%s_list_devices" % test_type["prefix"], "Verify list of devices that can fence a node is correct", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd_check_stdout("stonith_admin", "--output-as=xml -l node1 -V", "true2", "true1") test.add_cmd_check_stdout("stonith_admin", "--output-as=xml -l node1 -V", "true3", "true1") # simple test of device monitor for test_type in test_types: test = self.new_test("%s_monitor" % test_type["prefix"], "Verify device is reachable", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -Q true1") test.add_cmd("stonith_admin", "--output-as=xml -Q false1") test.add_expected_fail_cmd("stonith_admin", "--output-as=xml -Q true2", CrmExit.ERROR) # Verify monitor occurs for duration of timeout period on failure for test_type in test_types: test = self.new_test("%s_monitor_timeout" % test_type["prefix"], "Verify monitor uses duration of timeout period given.", test_type["use_cpg"]) test.add_cmd("stonith_admin", '--output-as=xml -R true1 -a fence_dummy -o "mode=fail" -o "monitor_mode=fail" -o "pcmk_host_list=node3"') test.add_expected_fail_cmd("stonith_admin", "--output-as=xml -Q true1 -t 5", CrmExit.ERROR) test.add_stonith_log_pattern("Attempt 2 to execute") # Verify monitor occurs for duration of timeout period on failure, but stops at max retries for test_type in test_types: test = self.new_test("%s_monitor_timeout_max_retries" % test_type["prefix"], "Verify monitor retries until max retry value or timeout is hit.", test_type["use_cpg"]) test.add_cmd("stonith_admin", '--output-as=xml -R true1 -a fence_dummy -o "mode=fail" -o "monitor_mode=fail" -o "pcmk_host_list=node3"') test.add_expected_fail_cmd("stonith_admin", "--output-as=xml -Q true1 -t 15", CrmExit.ERROR) test.add_stonith_log_pattern("Attempted to execute agent fence_dummy (list) the maximum number of times") # simple register test for test_type in test_types: test = self.new_test("%s_register" % test_type["prefix"], "Verify devices can be registered and un-registered", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -Q true1") test.add_cmd("stonith_admin", "--output-as=xml -D true1") test.add_expected_fail_cmd("stonith_admin", "--output-as=xml -Q true1", CrmExit.ERROR) # simple reboot test for test_type in test_types: test = self.new_test("%s_reboot" % test_type["prefix"], "Verify devices can be rebooted", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -B node3 -t 5") test.add_cmd("stonith_admin", "--output-as=xml -D true1") test.add_expected_fail_cmd("stonith_admin", "--output-as=xml -Q true1", CrmExit.ERROR) # test fencing history. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_fence_history" % test_type["prefix"], "Verify last fencing operation is returned.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5 -V") test.add_cmd_check_stdout("stonith_admin", "--output-as=xml -H node3", 'action="off" target="node3" .* status="success"') # simple test of dynamic list query for test_type in test_types: test = self.new_test("%s_dynamic_list_query" % test_type["prefix"], "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "--output-as=xml -R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd_check_stdout("stonith_admin", "--output-as=xml -l fake_port_1", 'count="3"') # fence using dynamic list query for test_type in test_types: test = self.new_test("%s_fence_dynamic_list_query" % test_type["prefix"], "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "--output-as=xml -R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "--output-as=xml -F fake_port_1 -t 5 -V") # simple test of query using status action for test_type in test_types: test = self.new_test("%s_status_query" % test_type["prefix"], "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd("stonith_admin", "--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd_check_stdout("stonith_admin", "--output-as=xml -l fake_port_1", 'count="3"') # test what happens when no reboot action is advertised for test_type in test_types: test = self.new_test("%s_no_reboot_support" % test_type["prefix"], "Verify reboot action defaults to off when no reboot action is advertised by agent.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy_no_reboot -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -B node1 -t 5 -V") test.add_stonith_log_pattern("does not support reboot") test.add_stonith_log_pattern("using true1 returned 0") # make sure reboot is used when reboot action is advertised for test_type in test_types: test = self.new_test("%s_with_reboot_support" % test_type["prefix"], "Verify reboot action can be used when metadata advertises it.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -B node1 -t 5 -V") test.add_stonith_neg_log_pattern("does not advertise support for 'reboot', performing 'off'") test.add_stonith_log_pattern("using true1 returned 0") # make sure requested fencing delay is applied only for the first device in the first level # make sure static delay from pcmk_delay_base is added for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_delay" % test_type["prefix"], "Verify requested fencing delay is applied only for the first device in the first level and pcmk_delay_base is added.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\" -o \"pcmk_delay_base=1\"") test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\" -o \"pcmk_delay_base=1\"") test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true3") test.add_cmd("stonith_admin", "--output-as=xml -F node3 --delay 1") test.add_stonith_log_pattern("Delaying 'off' action targeting node3 using true1 for 2s | timeout=120s requested_delay=1s base=1s max=1s") test.add_stonith_log_pattern("Delaying 'off' action targeting node3 using false1 for 1s | timeout=120s requested_delay=0s base=1s max=1s") test.add_stonith_neg_log_pattern("Delaying 'off' action targeting node3 using true2") test.add_stonith_neg_log_pattern("Delaying 'off' action targeting node3 using true3") def build_nodeid_tests(self): """ Register tests that use a corosync node id """ our_uname = localname() ### verify nodeid is supplied when nodeid is in the metadata parameters test = self.new_test("cpg_supply_nodeid", "Verify nodeid is given when fence agent has nodeid as parameter", 1) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -F %s -t 3" % (our_uname)) test.add_stonith_log_pattern("as nodeid with fence action 'off' targeting %s" % (our_uname)) ### verify nodeid is _NOT_ supplied when nodeid is not in the metadata parameters test = self.new_test("cpg_do_not_supply_nodeid", "Verify nodeid is _NOT_ given when fence agent does not have nodeid as parameter", 1) # use a host name that won't be in corosync.conf test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=regr-test\"") test.add_cmd("stonith_admin", "--output-as=xml -F regr-test -t 3") test.add_stonith_neg_log_pattern("as nodeid with fence action 'off' targeting regr-test") ### verify nodeid use doesn't explode standalone mode test = self.new_test("standalone_do_not_supply_nodeid", "Verify nodeid in metadata parameter list doesn't kill standalone mode", 0) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -F %s -t 3" % (our_uname)) test.add_stonith_neg_log_pattern("as nodeid with fence action 'off' targeting %s" % (our_uname)) def build_unfence_tests(self): """ Register tests that verify unfencing """ our_uname = localname() ### verify unfencing using automatic unfencing test = self.new_test("cpg_unfence_required_1", "Verify require unfencing on all devices when automatic=true in agent's metadata", 1) test.add_cmd('stonith_admin', '--output-as=xml -R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname)) # both devices should be executed test.add_stonith_log_pattern("using true1 returned 0") test.add_stonith_log_pattern("using true2 returned 0") ### verify unfencing using automatic unfencing fails if any of the required agents fail test = self.new_test("cpg_unfence_required_2", "Verify require unfencing on all devices when automatic=true in agent's metadata", 1) test.add_cmd('stonith_admin', '--output-as=xml -R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R true2 -a fence_dummy_auto_unfence -o "mode=fail" -o "pcmk_host_list=%s"' % (our_uname)) test.add_expected_fail_cmd("stonith_admin", "--output-as=xml -U %s -t 6" % (our_uname), CrmExit.ERROR) ### verify unfencing using automatic devices with topology test = self.new_test("cpg_unfence_required_3", "Verify require unfencing on all devices even when at different topology levels", 1) test.add_cmd('stonith_admin', '--output-as=xml -R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("using true1 returned 0") test.add_stonith_log_pattern("using true2 returned 0") ### verify unfencing using automatic devices with topology test = self.new_test("cpg_unfence_required_4", "Verify all required devices are executed even with topology levels fail.", 1) test.add_cmd('stonith_admin', '--output-as=xml -R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R true3 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R true4 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R false3 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '--output-as=xml -R false4 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 1 -v false1" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v false2" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v false3" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v true3" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 3 -v false4" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 4 -v true4" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("using true1 returned 0") test.add_stonith_log_pattern("using true2 returned 0") test.add_stonith_log_pattern("using true3 returned 0") test.add_stonith_log_pattern("using true4 returned 0") def build_unfence_on_target_tests(self): """ Register tests that verify unfencing that runs on the target """ our_uname = localname() ### verify unfencing using on_target device test = self.new_test("cpg_unfence_on_target_1", "Verify unfencing with on_target = true", 1) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("(on) to be executed on target") ### verify failure of unfencing using on_target device test = self.new_test("cpg_unfence_on_target_2", "Verify failure unfencing with on_target = true", 1) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake_1234\"" % (our_uname)) test.add_expected_fail_cmd("stonith_admin", "--output-as=xml -U node_fake_1234 -t 3", CrmExit.ERROR) test.add_stonith_log_pattern("(on) to be executed on target") ### verify unfencing using on_target device with topology test = self.new_test("cpg_unfence_on_target_3", "Verify unfencing with on_target = true using topology", 1) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("(on) to be executed on target") ### verify unfencing using on_target device with topology fails when victim node doesn't exist test = self.new_test("cpg_unfence_on_target_4", "Verify unfencing failure with on_target = true using topology", 1) test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 1 -v true1") test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 2 -v true2") test.add_expected_fail_cmd("stonith_admin", "--output-as=xml -U node_fake -t 3", CrmExit.ERROR) test.add_stonith_log_pattern("(on) to be executed on target") def build_remap_tests(self): """ Register tests that verify remapping of reboots to off-on """ test = self.new_test("cpg_remap_simple", "Verify sequential topology reboot is remapped to all-off-then-all-on", 1) test.add_cmd("stonith_admin", """--output-as=xml -R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """ """-o "pcmk_off_timeout=1" -o "pcmk_reboot_timeout=10" """) test.add_cmd("stonith_admin", """--output-as=xml -R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """ """-o "pcmk_off_timeout=2" -o "pcmk_reboot_timeout=20" """) test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", "--output-as=xml -B node_fake -t 5") test.add_stonith_log_pattern("Remapping multiple-device reboot targeting node_fake") # timeout should be sum of off timeouts (1+2=3), not reboot timeouts (10+20=30) test.add_stonith_log_pattern("Total timeout set to 3 for peer's fencing targeting node_fake") test.add_stonith_log_pattern("perform 'off' action targeting node_fake using true1") test.add_stonith_log_pattern("perform 'off' action targeting node_fake using true2") test.add_stonith_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'") # fence_dummy sets "on" as an on_target action test.add_stonith_log_pattern("Ignoring true1 'on' failure (no capable peers) targeting node_fake") test.add_stonith_log_pattern("Ignoring true2 'on' failure (no capable peers) targeting node_fake") test.add_stonith_log_pattern("Undoing remap of reboot targeting node_fake") test = self.new_test("cpg_remap_automatic", "Verify remapped topology reboot skips automatic 'on'", 1) test.add_cmd("stonith_admin", """--output-as=xml -R true1 -a fence_dummy_auto_unfence """ """-o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """--output-as=xml -R true2 -a fence_dummy_auto_unfence """ """-o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", "--output-as=xml -B node_fake -t 5") test.add_stonith_log_pattern("Remapping multiple-device reboot targeting node_fake") test.add_stonith_log_pattern("perform 'off' action targeting node_fake using true1") test.add_stonith_log_pattern("perform 'off' action targeting node_fake using true2") test.add_stonith_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'") test.add_stonith_log_pattern("Undoing remap of reboot targeting node_fake") test.add_stonith_neg_log_pattern("perform 'on' action targeting node_fake using") test.add_stonith_neg_log_pattern("'on' failure") test = self.new_test("cpg_remap_complex_1", "Verify remapped topology reboot in second level works if non-remapped first level fails", 1) test.add_cmd("stonith_admin", """--output-as=xml -R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """--output-as=xml -R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """--output-as=xml -R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 1 -v false1") test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 2 -v true1 -v true2") test.add_cmd("stonith_admin", "--output-as=xml -B node_fake -t 5") test.add_stonith_log_pattern("perform 'reboot' action targeting node_fake using false1") test.add_stonith_log_pattern("Remapping multiple-device reboot targeting node_fake") test.add_stonith_log_pattern("perform 'off' action targeting node_fake using true1") test.add_stonith_log_pattern("perform 'off' action targeting node_fake using true2") test.add_stonith_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'") test.add_stonith_log_pattern("Ignoring true1 'on' failure (no capable peers) targeting node_fake") test.add_stonith_log_pattern("Ignoring true2 'on' failure (no capable peers) targeting node_fake") test.add_stonith_log_pattern("Undoing remap of reboot targeting node_fake") test = self.new_test("cpg_remap_complex_2", "Verify remapped topology reboot failure in second level proceeds to third level", 1) test.add_cmd("stonith_admin", """--output-as=xml -R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """--output-as=xml -R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """--output-as=xml -R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """--output-as=xml -R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """--output-as=xml -R true3 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 1 -v false1") test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 2 -v true1 -v false2 -v true3") test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 3 -v true2") test.add_cmd("stonith_admin", "--output-as=xml -B node_fake -t 5") test.add_stonith_log_pattern("perform 'reboot' action targeting node_fake using false1") test.add_stonith_log_pattern("Remapping multiple-device reboot targeting node_fake") test.add_stonith_log_pattern("perform 'off' action targeting node_fake using true1") test.add_stonith_log_pattern("perform 'off' action targeting node_fake using false2") test.add_stonith_log_pattern("Attempted to execute agent fence_dummy (off) the maximum number of times") test.add_stonith_log_pattern("Undoing remap of reboot targeting node_fake") test.add_stonith_log_pattern("perform 'reboot' action targeting node_fake using true2") test.add_stonith_neg_log_pattern("node_fake with true3") def build_query_tests(self): """ run stonith_admin --metadata for the fence_dummy agent and check command output """ test = self.new_test("get_metadata", "Run stonith_admin --metadata for the fence_dummy agent", 1) test.add_cmd_check_stdout("stonith_admin", "--output-as=xml -a fence_dummy --metadata", '