diff --git a/.gitignore b/.gitignore index 0d01017afa..29c9325106 100644 --- a/.gitignore +++ b/.gitignore @@ -1,207 +1,207 @@ # Common \#* .\#* GPATH GRTAGS GTAGS TAGS Makefile Makefile.in .deps .libs *.pc *.pyc *.bz2 *.tar.gz *.rpm *.la *.lo *.o *~ *.gcda *.gcno # Autobuild aclocal.m4 autoconf autoheader autom4te.cache/ automake build.counter compile config.guess config.log config.status config.sub configure depcomp install-sh include/stamp-* libtool libtool.m4 ltdl.m4 libltdl ltmain.sh missing py-compile /m4/argz.m4 /m4/ltargz.m4 /m4/ltoptions.m4 /m4/ltsugar.m4 /m4/ltversion.m4 /m4/lt~obsolete.m4 test-driver ylwrap # Configure targets Doxyfile /cts/CTS.py /cts/CTSlab.py /cts/CTSvars.py /cts/LSBDummy /cts/OCFIPraTest.py /cts/benchmark/clubench /cts/cluster_test /cts/cts /cts/cts-cli /cts/cts-coverage /cts/cts-lrmd /cts/cts-pengine /cts/cts-regression /cts/cts-stonithd /cts/fence_dummy /cts/lxc_autogen.sh /cts/pacemaker-cts-dummyd /cts/pacemaker-cts-dummyd@.service /daemons/execd/pacemaker_remote /daemons/execd/pacemaker_remote.service /daemons/pacemakerd/pacemaker /daemons/pacemakerd/pacemaker.combined.upstart /daemons/pacemakerd/pacemaker.service /daemons/pacemakerd/pacemaker.upstart extra/logrotate/pacemaker /fencing/fence_legacy include/config.h include/config.h.in include/crm_config.h publican.cfg /tools/cibsecret /tools/crm_error /tools/crm_failcount /tools/crm_master /tools/crm_mon.service /tools/crm_mon.upstart /tools/crm_report /tools/crm_standby /tools/report.collector /tools/report.common # Build targets *.7 *.7.xml *.7.html *.8 *.8.xml *.8.html doc/*/en-US/images/*.png doc/*/tmp/** doc/*/publish cib/cib cib/cibmon cib/cibpipe crmd/atest crmd/crmd /daemons/attrd/pacemaker-attrd -/daemons/execd/lrmd_test +/daemons/execd/cts-exec-helper /daemons/execd/pacemaker-execd /daemons/execd/pacemaker_remoted /daemons/pacemakerd/pacemakerd doc/api/* doc/Clusters_from_Scratch.txt doc/Pacemaker_Explained.txt doc/acls.html doc/crm_fencing.html doc/publican-catalog* fencing/stonith-test fencing/stonith_admin fencing/stonithd fencing/stonithd.xml pengine/pengine pengine/pengine.xml pengine/ptest scratch tools/attrd_updater tools/cibadmin tools/crm_attribute tools/crm_diff tools/crm_mon tools/crm_node tools/crm_resource tools/crm_shadow tools/crm_simulate tools/crm_verify tools/crmadmin tools/iso8601 tools/crm_ticket tools/report.collector.1 xml/crm.dtd xml/pacemaker*.rng xml/versions.rng doc/shared/en-US/*.xml doc/Clusters_from_Scratch.build doc/Clusters_from_Scratch/en-US/Ap-*.xml doc/Clusters_from_Scratch/en-US/Ch-*.xml doc/Pacemaker_Administration.build doc/Pacemaker_Administration/en-US/Ch-*.xml doc/Pacemaker_Development.build doc/Pacemaker_Development/en-US/Ch-*.xml doc/Pacemaker_Explained.build doc/Pacemaker_Explained/en-US/Ch-*.xml doc/Pacemaker_Explained/en-US/Ap-*.xml doc/Pacemaker_Remote.build doc/Pacemaker_Remote/en-US/Ch-*.xml lib/gnu/libgnu.a lib/gnu/stdalign.h *.coverity # Test detritus /cts/.regression.failed.diff /cts/pengine/*.ref /cts/pengine/*.up /cts/pengine/*.up.err /cts/pengine/bug-rh-1097457.log /cts/pengine/bug-rh-1097457.trs /cts/pengine/shadow.* /cts/test-suite.log /xml/test-2/*.up /xml/test-2/*.up.err # Formerly built files (helps when jumping back and forth in checkout) /attrd /coverage.sh /cts/HBDummy /fencing/regression.py /lrmd /mcp /pengine/.regression.failed.diff /pengine/regression.core.sh /pengine/test10/shadow.* #Other mock HTML pacemaker*.spec coverity-* compat_reports .ABI-build abi_dumps logs *.patch *.diff *.sed *.orig *.rej *.swp diff --git a/cts/cts-lrmd.in b/cts/cts-lrmd.in index c6dc068a34..7be6462976 100644 --- a/cts/cts-lrmd.in +++ b/cts/cts-lrmd.in @@ -1,1278 +1,1278 @@ #!@PYTHON@ """ Regression tests for Pacemaker's pacemaker-execd """ # Pacemaker targets compatibility with Python 2.7 and 3.2+ from __future__ import print_function, unicode_literals, absolute_import, division __copyright__ = "Copyright 2012-2018 Andrew Beekhof " __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import stat import sys import subprocess import shlex import time # Where to find test binaries # Prefer the source tree if available BUILD_DIR = "@abs_top_builddir@" TEST_DIR = sys.path[0] SBIN_DIR = "@sbindir@" # File permissions for executable scripts we create EXECMODE = stat.S_IRUSR | stat.S_IXUSR | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH # These values must be kept in sync with include/crm/crm.h class CrmExit: OK = 0 ERROR = 1 INVALID_PARAM = 2 UNIMPLEMENT_FEATURE = 3 INSUFFICIENT_PRIV = 4 NOT_INSTALLED = 5 NOT_CONFIGURED = 6 NOT_RUNNING = 7 USAGE = 64 DATAERR = 65 NOINPUT = 66 NOUSER = 67 NOHOST = 68 UNAVAILABLE = 69 SOFTWARE = 70 OSERR = 71 OSFILE = 72 CANTCREAT = 73 IOERR = 74 TEMPFAIL = 75 PROTOCOL = 76 NOPERM = 77 CONFIG = 78 FATAL = 100 PANIC = 101 DISCONNECT = 102 SOLO = 103 DIGEST = 104 NOSUCH = 105 QUORUM = 106 UNSAFE = 107 EXISTS = 108 MULTIPLE = 109 OLD = 110 TIMEOUT = 124 MAX = 255 def update_path(): """ Set the PATH environment variable appropriately for the tests """ new_path = os.environ['PATH'] if os.path.exists("%s/cts-lrmd.in" % TEST_DIR): print("Running tests from the source tree: %s (%s)" % (BUILD_DIR, TEST_DIR)) - # For pacemaker-execd, lrmd_test and pacemaker_remoted + # For pacemaker-execd, cts-exec-helper, and pacemaker_remoted new_path = "%s/daemons/execd:%s" % (BUILD_DIR, new_path) new_path = "%s/tools:%s" % (BUILD_DIR, new_path) # For crm_resource new_path = "%s/fencing:%s" % (BUILD_DIR, new_path) # For stonithd else: print("Running tests from the install tree: @CRM_DAEMON_DIR@ (not %s)" % TEST_DIR) - # For stonithd, pacemaker-execd, lrmd_test and pacemaker_remoted + # For stonithd, pacemaker-execd, cts-exec-helper, and pacemaker_remoted new_path = "@CRM_DAEMON_DIR@:%s" % (new_path) print('Using PATH="{}"'.format(new_path)) os.environ['PATH'] = new_path def pipe_output(pipes, stdout=True, stderr=False): """ Wrapper to get text output from pipes regardless of Python version """ output = "" pipe_outputs = pipes.communicate() if sys.version_info < (3,): if stdout: output = output + pipe_outputs[0] if stderr: output = output + pipe_outputs[1] else: if stdout: output = output + pipe_outputs[0].decode(sys.stdout.encoding) if stderr: output = output + pipe_outputs[1].decode(sys.stderr.encoding) return output def output_from_command(command): """ Run a command, and return its standard output. """ test = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE) test.wait() return pipe_output(test).split("\n") def write_file(filename, contents, executable=False): """ Create a file. """ f = io.open(filename, "w+") f.write(contents) f.close() if executable: os.chmod(filename, EXECMODE) class TestError(Exception): """ Base class for exceptions in this module """ pass class ExitCodeError(TestError): """ Exception raised when command exit status is unexpected """ def __init__(self, exit_code): self.exit_code = exit_code def __str__(self): return repr(self.exit_code) class OutputNotFoundError(TestError): """ Exception raised when command output does not contain wanted string """ def __init__(self, output): self.output = output def __str__(self): return repr(self.output) class OutputFoundError(TestError): """ Exception raised when command output contains unwanted string """ def __init__(self, output): self.output = output def __str__(self): return repr(self.output) class Test(object): """ Executor for a single pacemaker-execd regression test """ def __init__(self, name, description, verbose=0, tls=0): self.name = name self.description = description self.cmds = [] if tls: self.daemon_location = "pacemaker_remoted" else: self.daemon_location = "pacemaker-execd" - self.test_tool_location = "lrmd_test" + self.test_tool_location = "cts-exec-helper" self.verbose = verbose self.tls = tls self.result_txt = "" self.cmd_tool_output = "" self.result_exitcode = CrmExit.OK self.execd_process = None self.stonith_process = None self.executed = 0 def __new_cmd(self, cmd, args, exitcode, stdout_match="", no_wait=0, stdout_negative_match="", kill=None): """ Add a command to be executed as part of this test """ if self.verbose and cmd == self.test_tool_location: args = args + " -V " if (cmd == self.test_tool_location) and self.tls: args = args + " -S " self.cmds.append( { "cmd" : cmd, "kill" : kill, "args" : args, "expected_exitcode" : exitcode, "stdout_match" : stdout_match, "stdout_negative_match" : stdout_negative_match, "no_wait" : no_wait, "cmd_output" : "", } ) def start_environment(self): """ Prepare the host for running a test """ ### make sure we are in full control here ### - cmd = shlex.split("killall -q -9 stonithd lt-stonithd pacemaker-execd lt-pacemaker-execd lrmd_test lt-lrmd_test pacemaker_remoted") + cmd = shlex.split("killall -q -9 stonithd lt-stonithd pacemaker-execd lt-pacemaker-execd cts-exec-helper lt-cts-exec-helper pacemaker_remoted") test = subprocess.Popen(cmd, stdout=subprocess.PIPE) test.wait() additional_args = "" if self.tls == 0: self.stonith_process = subprocess.Popen(shlex.split("stonithd -s")) if self.verbose: additional_args = additional_args + " -V" self.execd_process = subprocess.Popen(shlex.split("%s %s -l /tmp/pacemaker-execd-regression.log" % (self.daemon_location, additional_args))) time.sleep(1) def clean_environment(self): """ Clean up the host after running a test """ if self.execd_process: self.execd_process.terminate() self.execd_process.wait() if self.verbose: print("Daemon output") logfile = io.open('/tmp/pacemaker-execd-regression.log', 'rt', errors='replace') for line in logfile: print(line.strip().encode('utf-8', 'replace')) os.remove('/tmp/pacemaker-execd-regression.log') if self.stonith_process: self.stonith_process.terminate() self.stonith_process.wait() self.execd_process = None self.stonith_process = None def add_sys_cmd(self, cmd, args): """ Add a simple command to be executed as part of this test """ self.__new_cmd(cmd, args, CrmExit.OK, "") def add_cmd_check_stdout(self, args, match, no_match=""): """ Add a command with expected output to be executed as part of this test """ self.__new_cmd(self.test_tool_location, args, CrmExit.OK, match, 0, no_match) def add_cmd(self, args): - """ Add an lrmd_test command to be executed as part of this test """ + """ Add a cts-exec-helper command to be executed as part of this test """ self.__new_cmd(self.test_tool_location, args, CrmExit.OK, "") def add_cmd_and_kill(self, kill_proc, args): - """ Add an lrmd_test command and system command to be executed as part of this test """ + """ Add a cts-exec-helper command and system command to be executed as part of this test """ self.__new_cmd(self.test_tool_location, args, CrmExit.OK, "", kill=kill_proc) def add_expected_fail_cmd(self, args, exitcode=CrmExit.ERROR): - """ Add an lrmd_test command to be executed as part of this test and expected to fail """ + """ Add a cts-exec-helper command to be executed as part of this test and expected to fail """ self.__new_cmd(self.test_tool_location, args, exitcode, "") def get_exitcode(self): """ Return the exit status of the last test execution """ return self.result_exitcode def print_result(self, filler): """ Print the result of the last test execution """ print("%s%s" % (filler, self.result_txt)) def run_cmd(self, args): """ Execute a command as part of this test """ cmd = shlex.split(args['args']) cmd.insert(0, args['cmd']) if self.verbose: print("\n\nRunning: "+" ".join(cmd)) test = subprocess.Popen(cmd, stdout=subprocess.PIPE) if args['kill']: if self.verbose: print("Also running: "+args['kill']) ### Typically, the kill argument is used to detect some sort of ### failure. Without yielding for a few seconds here, the process ### launched earlier that is listening for the failure may not have ### time to connect to pacemaker-execd. time.sleep(2) subprocess.Popen(shlex.split(args['kill'])) if args['no_wait'] == 0: test.wait() else: return CrmExit.OK output = pipe_output(test) args['cmd_output'] = output if test.returncode != args['expected_exitcode']: raise ExitCodeError(test.returncode) if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0: raise OutputNotFoundError(output) if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0: raise OutputFoundError(output) def set_error(self, step, cmd): """ Record failure of this test """ msg = "FAILURE - '%s' failed at step %d. Command: %s %s" self.result_txt = msg % (self.name, step, cmd['cmd'], cmd['args']) self.result_exitcode = CrmExit.ERROR def run(self): """ Execute this test. """ res = 0 i = 1 if self.tls and self.name.count("stonith") != 0: self.result_txt = "SKIPPED - '%s' - disabled when testing pacemaker_remote" % (self.name) print(self.result_txt) return res self.start_environment() if self.verbose: print("\n--- START TEST - %s" % self.name) self.result_txt = "SUCCESS - '%s'" % (self.name) self.result_exitcode = CrmExit.OK for cmd in self.cmds: try: self.run_cmd(cmd) except ExitCodeError as e: print(cmd['cmd_output']) print("Step %d FAILED - command returned %s, expected %d" % (i, e, cmd['expected_exitcode'])) self.set_error(i, cmd); break except OutputNotFoundError as e: print("Step %d FAILED - '%s' was not found in command output: %s" % (i, cmd['stdout_match'], e)) self.set_error(i, cmd); break except OutputFoundError as e: print("Step %d FAILED - '%s' was found in command output: %s" % (i, cmd['stdout_negative_match'], e)) self.set_error(i, cmd); break if self.verbose: print(cmd['cmd_output'].strip()) print("Step %d SUCCESS" % (i)) i = i + 1 self.clean_environment() print(self.result_txt) if self.verbose: print("--- END TEST - %s\n" % self.name) self.executed = 1 return res class Tests(object): """ Collection of all pacemaker-execd regression tests """ def __init__(self, verbose=0, tls=0): self.tests = [] self.verbose = verbose self.tls = tls self.rsc_classes = output_from_command("crm_resource --list-standards") self.rsc_classes = self.rsc_classes[:-1] # Strip trailing empty line self.need_authkey = 0 self.action_timeout = " -t 9000 " if self.tls: self.rsc_classes.remove("stonith") if "systemd" in self.rsc_classes: try: # This code doesn't need this import, but pacemaker-cts-dummyd # does, so ensure the dependency is available rather than cause # all systemd tests to fail. import systemd.daemon except ImportError: print("Fatal error: python systemd bindings not found. Is package installed?", file=sys.stderr) sys.exit(CrmExit.ERROR) print("Testing resource classes", repr(self.rsc_classes)) self.common_cmds = { "ocf_reg_line" : "-c register_rsc -r ocf_test_rsc "+self.action_timeout+" -C ocf -P pacemaker -T Dummy", "ocf_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"", "ocf_unreg_line" : "-c unregister_rsc -r \"ocf_test_rsc\" "+self.action_timeout, "ocf_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"", "ocf_start_line" : "-c exec -r \"ocf_test_rsc\" -a \"start\" "+self.action_timeout, "ocf_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:start rc:ok op_status:complete\" ", "ocf_stop_line" : "-c exec -r \"ocf_test_rsc\" -a \"stop\" "+self.action_timeout, "ocf_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:stop rc:ok op_status:complete\" ", "ocf_monitor_line" : '-c exec -r ocf_test_rsc -a monitor -i 2s ' + self.action_timeout, "ocf_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "ocf_cancel_line" : '-c cancel -r ocf_test_rsc -a monitor -i 2s -t 6000 ', "ocf_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "systemd_reg_line" : "-c register_rsc -r systemd_test_rsc " + self.action_timeout + " -C systemd -T pacemaker-cts-dummyd@3", "systemd_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"", "systemd_unreg_line" : "-c unregister_rsc -r \"systemd_test_rsc\" "+self.action_timeout, "systemd_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"", "systemd_start_line" : "-c exec -r \"systemd_test_rsc\" -a \"start\" "+self.action_timeout, "systemd_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:start rc:ok op_status:complete\" ", "systemd_stop_line" : "-c exec -r \"systemd_test_rsc\" -a \"stop\" "+self.action_timeout, "systemd_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:stop rc:ok op_status:complete\" ", "systemd_monitor_line" : '-c exec -r systemd_test_rsc -a monitor -i 2s ' + self.action_timeout, # not sure why this one takes so much longer "systemd_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:complete\" -t 12000 ", "systemd_cancel_line" : '-c cancel -r systemd_test_rsc -a monitor -i 2s -t 6000 ', "systemd_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "upstart_reg_line" : "-c register_rsc -r upstart_test_rsc "+self.action_timeout+" -C upstart -T pacemaker-cts-dummyd", "upstart_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"", "upstart_unreg_line" : "-c unregister_rsc -r \"upstart_test_rsc\" "+self.action_timeout, "upstart_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"", "upstart_start_line" : "-c exec -r \"upstart_test_rsc\" -a \"start\" "+self.action_timeout, "upstart_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:start rc:ok op_status:complete\" ", "upstart_stop_line" : "-c exec -r \"upstart_test_rsc\" -a \"stop\" "+self.action_timeout, "upstart_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:stop rc:ok op_status:complete\" ", "upstart_monitor_line" : '-c exec -r upstart_test_rsc -a monitor -i 2s ' + self.action_timeout, "upstart_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "upstart_cancel_line" : '-c cancel -r upstart_test_rsc -a monitor -i 2s -t 6000 ', "upstart_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "service_reg_line" : "-c register_rsc -r service_test_rsc "+self.action_timeout+" -C service -T LSBDummy", "service_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", "service_unreg_line" : "-c unregister_rsc -r \"service_test_rsc\" "+self.action_timeout, "service_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", "service_start_line" : "-c exec -r \"service_test_rsc\" -a \"start\" "+self.action_timeout, "service_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:start rc:ok op_status:complete\" ", "service_stop_line" : "-c exec -r \"service_test_rsc\" -a \"stop\" "+self.action_timeout, "service_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:stop rc:ok op_status:complete\" ", "service_monitor_line" : '-c exec -r service_test_rsc -a monitor -i 2s ' + self.action_timeout, "service_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "service_cancel_line" : '-c cancel -r service_test_rsc -a monitor -i 2s -t 6000 ', "service_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "lsb_reg_line" : "-c register_rsc -r lsb_test_rsc "+self.action_timeout+" -C lsb -T LSBDummy", "lsb_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\" ", "lsb_unreg_line" : "-c unregister_rsc -r \"lsb_test_rsc\" "+self.action_timeout, "lsb_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\"", "lsb_start_line" : "-c exec -r \"lsb_test_rsc\" -a \"start\" "+self.action_timeout, "lsb_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:start rc:ok op_status:complete\" ", "lsb_stop_line" : "-c exec -r \"lsb_test_rsc\" -a \"stop\" "+self.action_timeout, "lsb_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:stop rc:ok op_status:complete\" ", "lsb_monitor_line" : '-c exec -r lsb_test_rsc -a status -i 2s ' + self.action_timeout, "lsb_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:complete\" "+self.action_timeout, "lsb_cancel_line" : '-c cancel -r lsb_test_rsc -a status -i 2s -t 6000 ', "lsb_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:Cancelled\" ", "stonith_reg_line" : "-c register_rsc -r stonith_test_rsc "+self.action_timeout+" -C stonith -P pacemaker -T fence_dummy_monitor", "stonith_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\" ", "stonith_unreg_line" : "-c unregister_rsc -r \"stonith_test_rsc\" "+self.action_timeout, "stonith_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\"", "stonith_start_line" : "-c exec -r \"stonith_test_rsc\" -a \"start\" -t 8000 ", "stonith_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:start rc:ok op_status:complete\" ", "stonith_stop_line" : "-c exec -r \"stonith_test_rsc\" -a \"stop\" "+self.action_timeout, "stonith_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:stop rc:ok op_status:complete\" ", "stonith_monitor_line" : '-c exec -r stonith_test_rsc -a monitor -i 2s ' + self.action_timeout, "stonith_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "stonith_cancel_line" : '-c cancel -r stonith_test_rsc -a monitor -i 2s -t 6000 ', "stonith_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:Cancelled\" ", } def new_test(self, name, description): """ Create a named test """ test = Test(name, description, self.verbose, self.tls) self.tests.append(test) return test def setup_test_environment(self): """ Prepare the host before executing any tests """ os.system("service pacemaker_remote stop") self.cleanup_test_environment() if self.tls and not os.path.isfile("/etc/pacemaker/authkey"): self.need_authkey = 1 os.system("mkdir -p /etc/pacemaker") os.system("dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1") dummy_upstart_job = (""" description "Dummy service for regression tests" exec dd if=/dev/random of=/dev/null """) dummy_fence_sleep_agent = ("""#!@PYTHON@ import sys import time def main(): for line in sys.stdin: if line.count("monitor") > 0: time.sleep(30000) sys.exit(0) sys.exit(1) if __name__ == "__main__": main() """) dummy_fence_agent = ("""#!/bin/sh while read line; do case ${line} in *monitor*) exit 0;; *metadata*) echo '' echo ' dummy description.' echo ' http://www.example.com' echo ' ' echo ' ' echo ' ' echo ' ' echo ' Fencing Action' echo ' ' echo ' ' echo ' ' echo ' ' echo ' Physical plug number or name of virtual machine' echo ' ' echo ' ' echo ' ' echo ' ' echo ' ' echo ' ' echo ' ' echo ' ' echo '' exit 0;; esac exit 1 done """) if os.path.isdir("/etc/init"): write_file("/etc/init/pacemaker-cts-dummyd.conf", dummy_upstart_job); write_file(SBIN_DIR + "/fence_dummy_sleep", dummy_fence_sleep_agent, executable=True); write_file(SBIN_DIR + "/fence_dummy_monitor", dummy_fence_agent, executable=True); if os.path.exists("%s/cts/LSBDummy" % BUILD_DIR): os.system("cp %s/cts/LSBDummy /etc/init.d/LSBDummy" % BUILD_DIR) if not os.path.exists("@OCF_RA_DIR@/pacemaker"): os.system("mkdir -p @OCF_RA_DIR@/pacemaker/") # Install helper OCF agents for agent in ["Dummy", "Stateful", "ping"]: os.system("cp %s/extra/resources/%s @OCF_RA_DIR@/pacemaker/%s" % (BUILD_DIR, agent, agent)) os.system("chmod a+x @OCF_RA_DIR@/pacemaker/%s" % (agent)) else: # Assume it's installed os.system("cp @datadir@/@PACKAGE@/tests/cts/LSBDummy /etc/init.d/LSBDummy") os.system("chmod a+x /etc/init.d/LSBDummy") os.system("mkdir -p @CRM_CORE_DIR@/root") if os.path.exists("/bin/systemctl"): os.system("systemctl daemon-reload") def cleanup_test_environment(self): """ Clean up the host after executing desired tests """ if self.need_authkey: os.system("rm -f /etc/pacemaker/authkey") os.system("rm -f /etc/init.d/LSBDummy") os.system("rm -f " + SBIN_DIR + "/fence_dummy_monitor") os.system("rm -f " + SBIN_DIR + "/fence_dummy_sleep") if os.path.exists("/bin/systemctl"): os.system("systemctl daemon-reload") def build_generic_tests(self): """ Register tests that apply to all resource classes """ common_cmds = self.common_cmds ### register/unregister tests ### for rsc in self.rsc_classes: test = self.new_test("generic_registration_%s" % (rsc), "Simple resource registration test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### start/stop tests ### for rsc in self.rsc_classes: test = self.new_test("generic_start_stop_%s" % (rsc), "Simple start and stop test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### monitor cancel test ### for rsc in self.rsc_classes: test = self.new_test("generic_monitor_cancel_%s" % (rsc), "Simple monitor cancel test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### monitor duplicate test ### for rsc in self.rsc_classes: test = self.new_test("generic_monitor_duplicate_%s" % (rsc), "Test creation and canceling of duplicate monitors for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) # Add the duplicate monitors test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) # verify we still get update events ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) # cancel the monitor, if the duplicate merged with the original, we should no longer see monitor updates test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### stop implies cancel test ### for rsc in self.rsc_classes: test = self.new_test("generic_stop_implies_cancel_%s" % (rsc), "Verify stopping a resource implies cancel of recurring ops for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) ### If this happens the monitor did not actually cancel correctly. ### test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) def build_multi_rsc_tests(self): """ Register complex tests that involve managing multiple resouces of different types """ common_cmds = self.common_cmds # do not use service and systemd at the same time, it is the same resource. ### register start monitor stop unregister resources of each type at the same time. ### test = self.new_test("multi_rsc_start_stop_all", "Start, monitor, and stop resources of multiple types and classes") for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) for rsc in self.rsc_classes: ### If this fails, that means the monitor is not being rescheduled #### test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) def build_negative_tests(self): """ Register tests related to how pacemaker-execd handles failures """ ### ocf start timeout test ### test = self.new_test("ocf_start_timeout", "Force start timeout to occur, verify start failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" " + self.action_timeout + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") # -t must be less than self.action_timeout test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -k \"op_sleep\" -v \"5\" -t 1000 -w") test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out" ' + self.action_timeout) test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### stonith start timeout test ### test = self.new_test("stonith_start_timeout", "Force start timeout to occur, verify start failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy_sleep\" " + self.action_timeout + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 1000 -w") # -t must be less than self.action_timeout test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out" ' + self.action_timeout) test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### stonith component fail ### common_cmds = self.common_cmds test = self.new_test("stonith_component_fail", "Kill stonith component after pacemaker-execd connects") test.add_cmd(common_cmds["stonith_reg_line"] + " " + common_cmds["stonith_reg_event"]) test.add_cmd(common_cmds["stonith_start_line"] + " " + common_cmds["stonith_start_event"]) test.add_cmd('-c exec -r stonith_test_rsc -a monitor -i 600s ' '-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete" ' + self.action_timeout) test.add_cmd_and_kill("killall -9 -q stonithd lt-stonithd", '-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:unknown error op_status:error" -t 15000') test.add_cmd(common_cmds["stonith_unreg_line"] + " " + common_cmds["stonith_unreg_event"]) ### monitor fail for ocf resources ### test = self.new_test("monitor_fail_ocf", "Force ocf monitor to fail, verify failure is reported.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" " + self.action_timeout + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"') test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' + self.action_timeout) test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' + self.action_timeout) test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state", '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete" -t 6000') test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s -t 6000 ' "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" " + self.action_timeout, CrmExit.TIMEOUT) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" " + self.action_timeout, CrmExit.TIMEOUT) test.add_cmd("-c unregister_rsc -r \"test_rsc\" " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### verify notify changes only for monitor operation. ### test = self.new_test("monitor_changes_only", "Verify when flag is set, only monitor changes are notified.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+" -o " "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + ' -o -l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete" ') test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 6000") test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s -t 6000 ' "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd('-c unregister_rsc -r "test_rsc" ' + self.action_timeout + '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete"') ### monitor fail for systemd resource ### if "systemd" in self.rsc_classes: test = self.new_test("monitor_fail_systemd", "Force systemd monitor to fail, verify failure is reported..") test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T pacemaker-cts-dummyd@3 " + self.action_timeout + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill("killall -9 -q pacemaker-cts-dummyd", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 8000") test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s -t 6000 ' "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### monitor fail for upstart resource ### if "upstart" in self.rsc_classes: test = self.new_test("monitor_fail_upstart", "Force upstart monitor to fail, verify failure is reported..") test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T pacemaker-cts-dummyd "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill("killall -9 -q dd", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 8000") test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s -t 6000 ' "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Cancel non-existent operation on a resource ### test = self.new_test("cancel_non_existent_op", "Attempt to cancel the wrong monitor operation, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_expected_fail_cmd('-c cancel -r test_rsc -a monitor -i 2s -t 6000 ' ### interval is wrong, should fail "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd('-c cancel -r test_rsc -a stop -i 1s -t 6000 ' ### action name is wrong, should fail "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Attempt to invoke non-existent rsc id ### test = self.new_test("invoke_non_existent_rsc", "Attempt to perform operations on a non-existent rsc id.") test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r test_rsc -a stop "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_expected_fail_cmd('-c exec -r test_rsc -a monitor -i 6s ' + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c cancel -r test_rsc -a start "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register and start a resource that doesn't exist, systemd ### if "systemd" in self.rsc_classes: test = self.new_test("start_uninstalled_systemd", "Register uninstalled systemd agent, try to start, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") if "upstart" in self.rsc_classes: test = self.new_test("start_uninstalled_upstart", "Register uninstalled upstart agent, try to start, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register and start a resource that doesn't exist, ocf ### test = self.new_test("start_uninstalled_ocf", "Register uninstalled ocf agent, try to start, verify expected failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pacemaker -T this_is_fake1234 "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register ocf with non-existent provider ### test = self.new_test("start_ocf_bad_provider", "Register ocf agent with a non-existent provider, verify expected failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pancakes -T Dummy "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register ocf with empty provider field ### test = self.new_test("start_ocf_no_provider", "Register ocf agent with a no provider, verify expected failure.") test.add_expected_fail_cmd("-c register_rsc -r \"test_rsc\" -C ocf -T Dummy "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Error\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") def build_stress_tests(self): """ Register stress tests """ timeout = "-t 20000" iterations = 25 test = self.new_test("ocf_stress", "Verify OCF agent handling works under load") for i in range(iterations): test.add_cmd("-c register_rsc -r rsc_%s %s -C ocf -P heartbeat -T Dummy -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd('-c exec -r rsc_%s -a monitor %s -i 1s ' '-l "NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete"' % (i, timeout, i)) for i in range(iterations): test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) if "systemd" in self.rsc_classes: test = self.new_test("systemd_stress", "Verify systemd dbus connection works under load") for i in range(iterations): test.add_cmd("-c register_rsc -r rsc_%s %s -C systemd -T pacemaker-cts-dummyd@3 -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd('-c exec -r rsc_%s -a monitor %s -i 1s ' '-l "NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete"' % (i, timeout, i)) for i in range(iterations): test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) iterations = 9 timeout = "-t 30000" ### Verify recurring op in-flight collision is handled in series properly test = self.new_test("rsc_inflight_collision", "Verify recurring ops do not collide with other operations for the same rsc.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -a start %s -k op_sleep -v 1 -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\"" % (timeout)) for i in range(iterations): test.add_cmd('-c exec -r test_rsc -a monitor %s -i 100%dms ' '-k op_sleep -v 2 ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' % (timeout, i)) test.add_cmd("-c exec -r test_rsc -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\"" % (timeout)) test.add_cmd("-c unregister_rsc -r test_rsc %s -l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\"" % (timeout)) def build_custom_tests(self): """ Register tests that target specific cases """ ### verify resource temporary folder is created and used by OCF agents. ### test = self.new_test("rsc_tmp_dir", "Verify creation and use of rsc temporary state directory") test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@") test.add_cmd("-c register_rsc -r test_rsc -P heartbeat -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -a start -t 4000") test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@") test.add_sys_cmd("ls", "@CRM_RSCTMP_DIR@/Dummy-test_rsc.state") test.add_cmd("-c exec -r test_rsc -a stop -t 4000") test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### start delay then stop test ### test = self.new_test("start_delay", "Verify start delay works as expected.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -s 6000 -a start -w -t 6000") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 2000", CrmExit.TIMEOUT) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 6000") test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### start delay, but cancel before it gets a chance to start. ### test = self.new_test("start_delay_cancel", "Using start_delay, start a rsc, but cancel the start op before execution.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -s 5000 -a start -w -t 4000") test.add_cmd("-c cancel -r test_rsc -a start " + self.action_timeout + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 5000", CrmExit.TIMEOUT) test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register a bunch of resources, verify we can get info on them ### test = self.new_test("verify_get_rsc_info", "Register multiple resources, verify retrieval of rsc info.") if "systemd" in self.rsc_classes: test.add_cmd("-c register_rsc -r rsc1 -C systemd -T pacemaker-cts-dummyd@3 "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ") if "upstart" in self.rsc_classes: test.add_cmd("-c register_rsc -r rsc1 -C upstart -T pacemaker-cts-dummyd "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc2 ") test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ") ### Register duplicate, verify only one entry exists and can still be removed. test = self.new_test("duplicate_registration", "Register resource multiple times, verify only one entry exists and can be removed.") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Stateful -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Stateful") test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ") ### verify the option to only send notification to the original client. ### test = self.new_test("notify_orig_client_only", "Verify option to only send notifications to the client originating the action.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd('-c exec -r \"test_rsc\" -a \"monitor\" -i 1s ' + self.action_timeout + ' -n ' '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"') # this will fail because the monitor notifications should only go to the original caller, which no longer exists. test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT) test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s -t 6000 ') test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### get metadata ### test = self.new_test("get_ocf_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Dummy\"", "resource-agent name=\"Dummy\"") test.add_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Stateful\"") test.add_expected_fail_cmd("-c metadata -P \"pacemaker\" -T \"Stateful\"") test.add_expected_fail_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"fake_agent\"") ### get metadata ### test = self.new_test("get_lsb_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"lsb\" -T \"LSBDummy\"", "resource-agent name='LSBDummy'") ### get stonith metadata ### test = self.new_test("get_stonith_metadata", "Retrieve stonith metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy_monitor\"", "resource-agent name=\"fence_dummy_monitor\"") ### get metadata ### if "systemd" in self.rsc_classes: test = self.new_test("get_systemd_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"systemd\" -T \"pacemaker-cts-dummyd@\"", "resource-agent name=\"pacemaker-cts-dummyd@\"") ### get metadata ### if "upstart" in self.rsc_classes: test = self.new_test("get_upstart_metadata", "Retrieve metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"upstart\" -T \"pacemaker-cts-dummyd\"", "resource-agent name=\"pacemaker-cts-dummyd\"") ### get ocf providers ### test = self.new_test("list_ocf_providers", "Retrieve list of available resource providers, verifies pacemaker is a provider.") test.add_cmd_check_stdout("-c list_ocf_providers ", "pacemaker") test.add_cmd_check_stdout("-c list_ocf_providers -T ping", "pacemaker") ### Verify agents only exist in their lists ### test = self.new_test("verify_agent_lists", "Verify the agent lists contain the right data.") test.add_cmd_check_stdout("-c list_agents ", "Stateful") ### ocf ### test.add_cmd_check_stdout("-c list_agents -C ocf", "Stateful") test.add_cmd_check_stdout("-c list_agents -C lsb", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents ", "LSBDummy") ### init.d ### test.add_cmd_check_stdout("-c list_agents -C lsb", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C ocf", "", "pacemaker-cts-dummyd@") ### should not exist test.add_cmd_check_stdout("-c list_agents -C ocf", "", "pacemaker-cts-dummyd@") ### should not exist test.add_cmd_check_stdout("-c list_agents -C lsb", "", "fence_dummy_monitor") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "fence_dummy_monitor") ### should not exist test.add_cmd_check_stdout("-c list_agents -C ocf", "", "fence_dummy_monitor") ### should not exist if "systemd" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents ", "pacemaker-cts-dummyd@") ### systemd ### test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C systemd", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C systemd", "pacemaker-cts-dummyd@") test.add_cmd_check_stdout("-c list_agents -C systemd", "", "fence_dummy_monitor") ### should not exist if "upstart" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents ", "pacemaker-cts-dummyd") ### upstart ### test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C upstart", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C upstart", "pacemaker-cts-dummyd") test.add_cmd_check_stdout("-c list_agents -C upstart", "", "fence_dummy_monitor") ### should not exist if "stonith" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents -C stonith", "fence_dummy_monitor") ### stonith ### test.add_cmd_check_stdout("-c list_agents -C stonith", "", "pacemaker-cts-dummyd@") ### should not exist test.add_cmd_check_stdout("-c list_agents -C stonith", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents ", "fence_dummy_monitor") def print_list(self): """ List all registered tests """ print("\n==== %d TESTS FOUND ====" % (len(self.tests))) print("%35s - %s" % ("TEST NAME", "TEST DESCRIPTION")) print("%35s - %s" % ("--------------------", "--------------------")) for test in self.tests: print("%35s - %s" % (test.name, test.description)) print("==== END OF LIST ====\n") def run_single(self, name): """ Run a single named test """ for test in self.tests: if test.name == name: test.run() break def run_tests_matching(self, pattern): """ Run all tests whose name matches a pattern """ for test in self.tests: if test.name.count(pattern) != 0: test.run() def run_tests(self): """ Run all tests """ for test in self.tests: test.run() def exit(self): """ Exit (with error status code if any test failed) """ for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != CrmExit.OK: sys.exit(CrmExit.ERROR) sys.exit(CrmExit.OK) def print_results(self): """ Print summary of results of executed tests """ failures = 0 success = 0 print("\n\n======= FINAL RESULTS ==========") print("\n--- FAILURE RESULTS:") for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != CrmExit.OK: failures = failures + 1 test.print_result(" ") else: success = success + 1 if failures == 0: print(" None") print("\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures)) class TestOptions(object): """ Option handler """ def __init__(self): self.options = {} self.options['list-tests'] = 0 self.options['run-all'] = 1 self.options['run-only'] = "" self.options['run-only-pattern'] = "" self.options['verbose'] = 0 self.options['invalid-arg'] = "" self.options['show-usage'] = 0 self.options['pacemaker-remote'] = 0 def build_options(self, argv): """ Set options based on command-line arguments """ args = argv[1:] skip = 0 for i in range(0, len(args)): if skip: skip = 0 continue elif args[i] == "-h" or args[i] == "--help": self.options['show-usage'] = 1 elif args[i] == "-l" or args[i] == "--list-tests": self.options['list-tests'] = 1 elif args[i] == "-V" or args[i] == "--verbose": self.options['verbose'] = 1 elif args[i] == "-R" or args[i] == "--pacemaker-remote": self.options['pacemaker-remote'] = 1 elif args[i] == "-r" or args[i] == "--run-only": self.options['run-only'] = args[i+1] skip = 1 elif args[i] == "-p" or args[i] == "--run-only-pattern": self.options['run-only-pattern'] = args[i+1] skip = 1 def show_usage(self): """ Show command usage """ print("usage: " + sys.argv[0] + " [options]") print("If no options are provided, all tests will run") print("Options:") print("\t [--help | -h] Show usage") print("\t [--list-tests | -l] Print out all registered tests.") print("\t [--run-only | -r 'testname'] Run a specific test") print("\t [--verbose | -V] Verbose output") print("\t [--pacemaker-remote | -R Test pacemaker_remoted binary instead of pacemaker-execd.") print("\t [--run-only-pattern | -p 'string'] Run only tests containing the string value") print("\n\tExample: Run only the test 'start_stop'") print("\t\t " + sys.argv[0] + " --run-only start_stop") print("\n\tExample: Run only the tests with the string 'systemd' present in them") print("\t\t " + sys.argv[0] + " --run-only-pattern systemd") def main(argv): """ Run pacemaker-execd regression tests as specified by arguments """ update_path() opts = TestOptions() opts.build_options(argv) tests = Tests(opts.options['verbose'], opts.options['pacemaker-remote']) tests.build_generic_tests() tests.build_multi_rsc_tests() tests.build_negative_tests() tests.build_custom_tests() tests.build_stress_tests() tests.setup_test_environment() print("Starting ...") if opts.options['list-tests']: tests.print_list() elif opts.options['show-usage']: opts.show_usage() elif opts.options['run-only-pattern'] != "": tests.run_tests_matching(opts.options['run-only-pattern']) tests.print_results() elif opts.options['run-only'] != "": tests.run_single(opts.options['run-only']) tests.print_results() else: tests.run_tests() tests.print_results() tests.cleanup_test_environment() tests.exit() if __name__ == "__main__": main(sys.argv) diff --git a/cts/patterns.py b/cts/patterns.py index 2cc21d86ca..f20f8d98e6 100644 --- a/cts/patterns.py +++ b/cts/patterns.py @@ -1,402 +1,402 @@ """ Pattern-holding classes for Pacemaker's Cluster Test Suite (CTS) """ # Pacemaker targets compatibility with Python 2.7 and 3.2+ from __future__ import print_function, unicode_literals, absolute_import, division __copyright__ = "Copyright 2008-2018 Andrew Beekhof " __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import sys, os from cts.CTSvars import * patternvariants = {} class BasePatterns(object): def __init__(self, name): self.name = name patternvariants[name] = self self.ignore = [ "avoid confusing Valgrind", ] self.BadNews = [] self.components = {} self.commands = { "StatusCmd" : "crmadmin -t 60000 -S %s 2>/dev/null", "CibQuery" : "cibadmin -Ql", "CibAddXml" : "cibadmin --modify -c --xml-text %s", "CibDelXpath" : "cibadmin --delete --xpath %s", # 300,000 == 5 minutes - "RscRunning" : CTSvars.CRM_DAEMON_DIR + "/lrmd_test -R -r %s", + "RscRunning" : CTSvars.CRM_DAEMON_DIR + "/cts-exec-helper -R -r %s", "CIBfile" : "%s:"+CTSvars.CRM_CONFIG_DIR+"/cib.xml", "TmpDir" : "/tmp", "BreakCommCmd" : "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1", "FixCommCmd" : "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1", # tc qdisc add dev lo root handle 1: cbq avpkt 1000 bandwidth 1000mbit # tc class add dev lo parent 1: classid 1:1 cbq rate "$RATE"kbps allot 17000 prio 5 bounded isolated # tc filter add dev lo parent 1: protocol ip prio 16 u32 match ip dst 127.0.0.1 match ip sport $PORT 0xFFFF flowid 1:1 # tc qdisc add dev lo parent 1: netem delay "$LATENCY"msec "$(($LATENCY/4))"msec 10% 2> /dev/null > /dev/null "ReduceCommCmd" : "", "RestoreCommCmd" : "tc qdisc del dev lo root", "SetCheckInterval" : "cibadmin --modify -c --xml-text ''", "ClearCheckInterval" : "cibadmin --delete --xpath \"//nvpair[@name='cluster-recheck-interval']\"", "MaintenanceModeOn" : "cibadmin --modify -c --xml-text ''", "MaintenanceModeOff" : "cibadmin --delete --xpath \"//nvpair[@name='maintenance-mode']\"", "StandbyCmd" : "crm_attribute -Vq -U %s -n standby -l forever -v %s 2>/dev/null", "StandbyQueryCmd" : "crm_attribute -qG -U %s -n standby -l forever -d off 2>/dev/null", } self.search = { "Pat:DC_IDLE" : "crmd.*State transition.*-> S_IDLE", # This won't work if we have multiple partitions "Pat:Local_started" : "%s\W.*The local CRM is operational", "Pat:NonDC_started" : r"%s\W.*State transition.*-> S_NOT_DC", "Pat:DC_started" : r"%s\W.*State transition.*-> S_IDLE", "Pat:We_stopped" : "%s\W.*OVERRIDE THIS PATTERN", "Pat:They_stopped" : "%s\W.*LOST:.* %s ", "Pat:They_dead" : "node %s.*: is dead", "Pat:TransitionComplete" : "Transition status: Complete: complete", "Pat:Fencing_start" : "(Initiating remote operation|Requesting peer fencing ).* (for|of) %s", "Pat:Fencing_ok" : r"stonith.*:\s*Operation .* of %s by .* for .*@.*: OK", "Pat:Fencing_recover" : r"pengine.*: Recover %s", "Pat:RscOpOK" : r"crmd.*:\s+Result of %s operation for %s.*: (0 \()?ok", "Pat:RscRemoteOpOK" : r"crmd.*:\s+Result of %s operation for %s on %s: (0 \()?ok", "Pat:NodeFenced" : r"crmd.*:\s* Peer %s was terminated \(.*\) by .* on behalf of .*: OK", "Pat:FenceOpOK" : "Operation .* for host '%s' with device .* returned: 0", } def get_component(self, key): if key in self.components: return self.components[key] print("Unknown component '%s' for %s" % (key, self.name)) return [] def get_patterns(self, key): if key == "BadNews": return self.BadNews elif key == "BadNewsIgnore": return self.ignore elif key == "Commands": return self.commands elif key == "Search": return self.search elif key == "Components": return self.components def __getitem__(self, key): if key == "Name": return self.name elif key in self.commands: return self.commands[key] elif key in self.search: return self.search[key] else: print("Unknown template '%s' for %s" % (key, self.name)) return None class crm_corosync(BasePatterns): ''' Patterns for Corosync version 2 cluster manager class ''' def __init__(self, name): BasePatterns.__init__(self, name) self.commands.update({ "StartCmd" : "service corosync start && service pacemaker start", "StopCmd" : "service pacemaker stop; [ ! -e /usr/sbin/pacemaker_remoted ] || service pacemaker_remote stop; service corosync stop", "EpochCmd" : "crm_node -e", "QuorumCmd" : "crm_node -q", "PartitionCmd" : "crm_node -p", }) self.search.update({ # Close enough ... "Corosync Cluster Engine exiting normally" isn't # printed reliably. "Pat:We_stopped" : "%s\W.*Unloading all Corosync service engines", "Pat:They_stopped" : "%s\W.*crmd.*Node %s(\[|\s).*state is now lost", "Pat:They_dead" : "crmd.*Node %s(\[|\s).*state is now lost", "Pat:ChildExit" : r"\[[0-9]+\] exited with status [0-9]+ \(", "Pat:ChildKilled" : r"%s\W.*pacemakerd.*%s\[[0-9]+\] terminated with signal 9", "Pat:ChildRespawn" : "%s\W.*pacemakerd.*Respawning failed child process: %s", "Pat:InfraUp" : "%s\W.*corosync.*Initializing transport", "Pat:PacemakerUp" : "%s\W.*pacemakerd.*Starting Pacemaker", }) self.ignore = self.ignore + [ r"crm_mon:", r"crmadmin:", r"update_trace_data", r"async_notify:.*strange, client not found", r"Parse error: Ignoring unknown option .*nodename", r"error.*: Operation 'reboot' .* with device 'FencingFail' returned:", r"getinfo response error: 1$", "sbd.* error: inquisitor_child: DEBUG MODE IS ACTIVE", r"sbd.* pcmk:\s*error:.*Connection to cib_ro failed", r"sbd.* pcmk:\s*error:.*Connection to cib_ro.* closed .I/O condition=17", ] self.BadNews = [ r"error:", r"crit:", r"ERROR:", r"CRIT:", r"Shutting down...NOW", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r"(pacemakerd|pacemaker-execd|crmd):.*, exiting", r"pengine.*Attempting recovery of resource", r"is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Attempting to schedule .* after a stop", r"Resource .* was active at shutdown", r"duplicate entries for call_id", r"Search terminated:", r":global_timer_callback", r"Faking parameter digest creation", r"Parameters to .* action changed:", r"Parameters to .* changed", r"\[[0-9]+\] terminated with signal [0-9]+ \(", r"pengine:.*Recover .*\(.* -\> .*\)", r"rsyslogd.* imuxsock lost .* messages from pid .* due to rate-limiting", r"Peer is not part of our cluster", r"We appear to be in an election loop", r"Unknown node -> we will not deliver message", r"(Blackbox dump requested|Problem detected)", r"pacemakerd.*Could not connect to Cluster Configuration Database API", r"Receiving messages from a node we think is dead", r"share the same cluster nodeid", r"share the same name", #r"crm_ipc_send:.*Request .* failed", #r"crm_ipc_send:.*Sending to .* is disabled until pending reply is received", # Not inherently bad, but worth tracking #r"No need to invoke the TE", #r"ping.*: DEBUG: Updated connected = 0", #r"Digest mis-match:", r"crmd:.*Transition failed: terminated", r"Local CIB .* differs from .*:", r"warn.*:\s*Continuing but .* will NOT be used", r"warn.*:\s*Cluster configuration file .* is corrupt", #r"Executing .* fencing operation", r"Election storm", r"stalled the FSA with pending inputs", ] self.components["common-ignore"] = [ "Pending action:", "error: crm_log_message_adv:", r"resource( was|s were) active at shutdown", "pending LRM operations at shutdown", "Lost connection to the CIB service", "Connection to the CIB terminated...", "Sending message to CIB service FAILED", "apply_xml_diff:.*Diff application failed!", r"crmd.*:\s*Action A_RECOVER .* not supported", "unconfirmed_actions:.*Waiting on .* unconfirmed actions", "cib_native_msgready:.*Message pending on command channel", r"crmd.*:\s*Performing A_EXIT_1 - forcefully exiting the CRMd", "verify_stopped:.*Resource .* was active at shutdown. You may ignore this error if it is unmanaged.", "error: attrd_connection_destroy:.*Lost connection to attrd", r".*:\s*Executing .* fencing operation \(.*\) on ", r".*:\s*Requesting fencing \([^)]+\) of node ", r"(Blackbox dump requested|Problem detected)", # "error: native_create_actions: Resource .*stonith::.* is active on 2 nodes attempting recovery", # "error: process_pe_message: Transition .* ERRORs found during PE processing", ] self.components["corosync-ignore"] = [ r"error:.*Connection to the CPG API failed: Library error", r"\[[0-9]+\] exited with status [0-9]+ \(", r"cib.*error:.*Corosync connection lost", r"stonith-ng.*error:.*Corosync connection terminated", r"pacemaker-execd.*error:.*Connection to stonith-ng.* (failed|closed)", r"crmd.*State transition .* S_RECOVERY", r"crmd.*error:.*Input (I_ERROR|I_TERMINATE ) .*received in state", r"crmd.*error:.*Could not recover from internal error", r"error:.*Connection to cib_(shm|rw).* (failed|closed)", r"error:.*STONITH connection failed", r"error: Connection to stonith-ng.* (failed|closed)", r"crit: Fencing daemon connection failed", ] self.components["corosync"] = [ r"pacemakerd.*error:.*Connection destroyed", r"attrd.*:\s*(crit|error):.*Lost connection to (Corosync|CIB) service", r"stonith.*:\s*(Corosync connection terminated|Shutting down)", r"cib.*:\s*Corosync connection lost!\s+Exiting.", r"crmd.*:\s*(connection terminated|Disconnected from Corosync)", r"pengine.*Scheduling Node .* for STONITH", r"crmd.*:\s*Peer .* was terminated \(.*\) by .* for .*:\s*OK", ] self.components["cib-ignore"] = [ "pacemaker-execd.*Connection to stonith-ng failed", "pacemaker-execd.*Connection to stonith-ng.* closed", "pacemaker-execd.*LRMD lost STONITH connection", "pacemaker-execd.*STONITH connection failed, finalizing .* pending operations", ] self.components["cib"] = [ "State transition .* S_RECOVERY", r"Respawning failed child process: (pacemaker-attrd|crmd)", "Connection to cib_.* failed", "Connection to cib_.* closed", r"crmd.*:.*Connection to the CIB terminated...", r"attrd.*:.*(Lost connection to CIB service|Connection to the CIB terminated)", r"crmd\[[0-9]+\] exited with status 1 \(", r"attrd\[[0-9]+\] exited with status 102 \(", r"crmd.*: Input I_TERMINATE .*from do_recover", "crmd.*I_ERROR.*crmd_cib_connection_destroy", "crmd.*Could not recover from internal error", ] self.components["pacemaker-execd"] = [ r"crmd.*Connection to (pacemaker-execd|lrmd|executor) (failed|closed)", r"crmd.*I_ERROR.*lrm_connection_destroy", r"crmd.*State transition .* S_RECOVERY", r"crmd.*: Input I_TERMINATE .*from do_recover", r"crmd.*Could not recover from internal error", r"pacemakerd.*pacemaker-execd.* terminated with signal 9", r"pacemakerd.*crmd\[[0-9]+\] exited with status 1", r"pacemakerd.*Respawning failed child process: pacemaker-execd", r"pacemakerd.*Respawning failed child process: crmd", ] self.components["pacemaker-execd-ignore"] = [] self.components["crmd"] = [ # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # Only if the node wasn't the DC: "State transition S_IDLE", "State transition .* -> S_IDLE", ] self.components["crmd-ignore"] = [] self.components["pacemaker-attrd"] = [] self.components["pacemaker-attrd-ignore"] = [] self.components["pengine"] = [ "State transition .* S_RECOVERY", r"Respawning failed child process: crmd", r"crmd\[[0-9]+\] exited with status 1 \(", "Connection to pengine failed", "Connection to pengine.* closed", "Connection to the Policy Engine failed", "crmd.*I_ERROR.*save_cib_contents", r"crmd.*: Input I_TERMINATE .*from do_recover", "crmd.*Could not recover from internal error", ] self.components["pengine-ignore"] = [] self.components["stonith"] = [ "Connection to stonith-ng failed", "LRMD lost STONITH connection", "Connection to stonith-ng.* closed", "Fencing daemon connection failed", r"crmd.*:\s*warn.*:\s*Callback already present", ] self.components["stonith-ignore"] = [ r"pengine.*: Recover Fencing", r"Updating failcount for Fencing", r"error:.*Connection to stonith-ng failed", r"error:.*Connection to stonith-ng.*closed \(I/O condition=17\)", r"crit:.*Fencing daemon connection failed", r"error:.*Sign-in failed: triggered a retry", "STONITH connection failed, finalizing .* pending operations.", r"crmd.*:\s+Result of .* operation for Fencing.*Error", ] self.components["stonith-ignore"].extend(self.components["common-ignore"]) class crm_corosync_docker(crm_corosync): ''' Patterns for Corosync version 2 cluster manager class ''' def __init__(self, name): crm_corosync.__init__(self, name) self.commands.update({ "StartCmd" : "pcmk_start", "StopCmd" : "pcmk_stop", }) class PatternSelector(object): def __init__(self, name=None): self.name = name self.base = BasePatterns("crm-base") if not name: crm_corosync("crm-corosync") elif name == "crm-corosync": crm_corosync(name) elif name == "crm-corosync-docker": crm_corosync_docker(name) def get_variant(self, variant): if variant in patternvariants: return patternvariants[variant] print("defaulting to crm-base for %s" % variant) return self.base def get_patterns(self, variant, kind): return self.get_variant(variant).get_patterns(kind) def get_template(self, variant, key): v = self.get_variant(variant) return v[key] def get_component(self, variant, kind): return self.get_variant(variant).get_component(kind) def __getitem__(self, key): return self.get_template(self.name, key) # python cts/CTSpatt.py -k crm-corosync -t StartCmd if __name__ == '__main__': pdir=os.path.dirname(sys.path[0]) sys.path.insert(0, pdir) # So that things work from the source directory kind=None template=None skipthis=None args=sys.argv[1:] for i in range(0, len(args)): if skipthis: skipthis=None continue elif args[i] == "-k" or args[i] == "--kind": skipthis=1 kind = args[i+1] elif args[i] == "-t" or args[i] == "--template": skipthis=1 template = args[i+1] else: print("Illegal argument " + args[i]) print(PatternSelector(kind)[template]) diff --git a/daemons/execd/Makefile.am b/daemons/execd/Makefile.am index 352ec597ee..44458dbf3a 100644 --- a/daemons/execd/Makefile.am +++ b/daemons/execd/Makefile.am @@ -1,48 +1,48 @@ # # Copyright 2012-2018 David Vossel # # This source code is licensed under the GNU Lesser General Public License # version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. # include $(top_srcdir)/Makefile.common halibdir = $(CRM_DAEMON_DIR) -halib_PROGRAMS = pacemaker-execd lrmd_test +halib_PROGRAMS = pacemaker-execd cts-exec-helper initdir = $(INITDIR) init_SCRIPTS = pacemaker_remote sbin_PROGRAMS = pacemaker_remoted if BUILD_SYSTEMD systemdunit_DATA = pacemaker_remote.service endif pacemaker_execd_CFLAGS = $(CFLAGS_HARDENED_EXE) pacemaker_execd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) pacemaker_execd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/services/libcrmservice.la \ $(top_builddir)/lib/fencing/libstonithd.la ${COMPAT_LIBS} pacemaker_execd_SOURCES = main.c lrmd.c lrmd_alert_api.c pacemaker_remoted_CPPFLAGS = -DSUPPORT_REMOTE $(AM_CPPFLAGS) pacemaker_remoted_CFLAGS = $(CFLAGS_HARDENED_EXE) pacemaker_remoted_LDFLAGS = $(LDFLAGS_HARDENED_EXE) pacemaker_remoted_LDADD = $(pacemaker_execd_LDADD) \ $(top_builddir)/lib/lrmd/liblrmd.la pacemaker_remoted_SOURCES = $(pacemaker_execd_SOURCES) tls_backend.c ipc_proxy.c -lrmd_test_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ +cts_exec_helper_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/services/libcrmservice.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/pengine/libpengine.la -lrmd_test_SOURCES = cts-exec-helper.c +cts_exec_helper_SOURCES = cts-exec-helper.c noinst_HEADERS = lrmd_private.h CLEANFILES = $(man8_MANS) diff --git a/daemons/execd/cts-exec-helper.c b/daemons/execd/cts-exec-helper.c index c8057c8fe9..d5d225f550 100644 --- a/daemons/execd/cts-exec-helper.c +++ b/daemons/execd/cts-exec-helper.c @@ -1,615 +1,616 @@ /* * Copyright 2012-2018 David Vossel * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include /* *INDENT-OFF* */ static struct crm_option long_options[] = { {"help", 0, 0, '?'}, {"verbose", 0, 0, 'V', "\t\tPrint out logs and events to screen"}, {"quiet", 0, 0, 'Q', "\t\tSuppress all output to screen"}, {"tls", 0, 0, 'S', "\t\tUse tls backend for local connection"}, {"listen", 1, 0, 'l', "\tListen for a specific event string"}, {"api-call", 1, 0, 'c', "\tDirectly relates to executor API functions"}, {"no-wait", 0, 0, 'w', "\tMake api call and do not wait for result."}, {"is-running", 0, 0, 'R', "\tDetermine if a resource is registered and running."}, {"notify-orig", 0, 0, 'n', "\tOnly notify this client the results of an api action."}, {"notify-changes", 0, 0, 'o', "\tOnly notify client changes to recurring operations."}, {"-spacer-", 1, 0, '-', "\nParameters for api-call option"}, {"action", 1, 0, 'a'}, {"rsc-id", 1, 0, 'r'}, {"cancel-call-id", 1, 0, 'x'}, {"provider", 1, 0, 'P'}, {"class", 1, 0, 'C'}, {"type", 1, 0, 'T'}, {"interval", 1, 0, 'i'}, {"timeout", 1, 0, 't'}, {"start-delay", 1, 0, 's'}, {"param-key", 1, 0, 'k'}, {"param-val", 1, 0, 'v'}, {"-spacer-", 1, 0, '-'}, {0, 0, 0, 0} }; /* *INDENT-ON* */ cib_t *cib_conn = NULL; static int exec_call_id = 0; static int exec_call_opts = 0; extern void cleanup_alloc_calculations(pe_working_set_t * data_set); static gboolean start_test(gpointer user_data); static void try_connect(void); static struct { int verbose; int quiet; guint interval_ms; int timeout; int start_delay; int cancel_call_id; int no_wait; int is_running; int no_connect; const char *api_call; const char *rsc_id; const char *provider; const char *class; const char *type; const char *action; const char *listen; lrmd_key_value_t *params; } options; GMainLoop *mainloop = NULL; lrmd_t *lrmd_conn = NULL; static char event_buf_v0[1024]; static void test_exit(crm_exit_t exit_code) { lrmd_api_delete(lrmd_conn); crm_exit(exit_code); } #define print_result(result) \ if (!options.quiet) { \ result; \ } \ #define report_event(event) \ snprintf(event_buf_v0, sizeof(event_buf_v0), "NEW_EVENT event_type:%s rsc_id:%s action:%s rc:%s op_status:%s", \ lrmd_event_type2str(event->type), \ event->rsc_id, \ event->op_type ? event->op_type : "none", \ services_ocf_exitcode_str(event->rc), \ services_lrm_status_str(event->op_status)); \ crm_info("%s", event_buf_v0); static void test_shutdown(int nsig) { lrmd_api_delete(lrmd_conn); lrmd_conn = NULL; } static void read_events(lrmd_event_data_t * event) { report_event(event); if (options.listen) { if (safe_str_eq(options.listen, event_buf_v0)) { print_result(printf("LISTEN EVENT SUCCESSFUL\n")); test_exit(CRM_EX_OK); } } if (exec_call_id && (event->call_id == exec_call_id)) { if (event->op_status == 0 && event->rc == 0) { print_result(printf("API-CALL SUCCESSFUL for 'exec'\n")); } else { print_result(printf("API-CALL FAILURE for 'exec', rc:%d lrmd_op_status:%s\n", event->rc, services_lrm_status_str(event->op_status))); test_exit(CRM_EX_ERROR); } if (!options.listen) { test_exit(CRM_EX_OK); } } } static gboolean timeout_err(gpointer data) { print_result(printf("LISTEN EVENT FAILURE - timeout occurred, never found.\n")); test_exit(CRM_EX_TIMEOUT); return FALSE; } static void connection_events(lrmd_event_data_t * event) { int rc = event->connection_rc; if (event->type != lrmd_event_connect) { /* ignore */ return; } if (!rc) { crm_info("Executor client connection established"); start_test(NULL); return; } else { sleep(1); try_connect(); crm_notice("Executor client connection failed"); } } static void try_connect(void) { int tries = 10; static int num_tries = 0; int rc = 0; lrmd_conn->cmds->set_callback(lrmd_conn, connection_events); for (; num_tries < tries; num_tries++) { rc = lrmd_conn->cmds->connect_async(lrmd_conn, "pacemaker-execd", 3000); if (!rc) { return; /* we'll hear back in async callback */ } sleep(1); } print_result(printf("API CONNECTION FAILURE\n")); test_exit(CRM_EX_ERROR); } static gboolean start_test(gpointer user_data) { int rc = 0; if (!options.no_connect) { if (!lrmd_conn->cmds->is_connected(lrmd_conn)) { try_connect(); /* async connect -- this function will get called back into */ return 0; } } lrmd_conn->cmds->set_callback(lrmd_conn, read_events); if (options.timeout) { g_timeout_add(options.timeout, timeout_err, NULL); } if (!options.api_call) { return 0; } if (safe_str_eq(options.api_call, "exec")) { rc = lrmd_conn->cmds->exec(lrmd_conn, options.rsc_id, options.action, NULL, options.interval_ms, options.timeout, options.start_delay, exec_call_opts, options.params); if (rc > 0) { exec_call_id = rc; print_result(printf("API-CALL 'exec' action pending, waiting on response\n")); } } else if (safe_str_eq(options.api_call, "register_rsc")) { rc = lrmd_conn->cmds->register_rsc(lrmd_conn, options.rsc_id, options.class, options.provider, options.type, 0); } else if (safe_str_eq(options.api_call, "get_rsc_info")) { lrmd_rsc_info_t *rsc_info; rsc_info = lrmd_conn->cmds->get_rsc_info(lrmd_conn, options.rsc_id, 0); if (rsc_info) { print_result(printf("RSC_INFO: id:%s class:%s provider:%s type:%s\n", rsc_info->id, rsc_info->standard, rsc_info->provider ? rsc_info->provider : "", rsc_info->type)); lrmd_free_rsc_info(rsc_info); rc = pcmk_ok; } else { rc = -1; } } else if (safe_str_eq(options.api_call, "unregister_rsc")) { rc = lrmd_conn->cmds->unregister_rsc(lrmd_conn, options.rsc_id, 0); } else if (safe_str_eq(options.api_call, "cancel")) { rc = lrmd_conn->cmds->cancel(lrmd_conn, options.rsc_id, options.action, options.interval_ms); } else if (safe_str_eq(options.api_call, "metadata")) { char *output = NULL; rc = lrmd_conn->cmds->get_metadata(lrmd_conn, options.class, options.provider, options.type, &output, 0); if (rc == pcmk_ok) { print_result(printf("%s", output)); free(output); } } else if (safe_str_eq(options.api_call, "list_agents")) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; rc = lrmd_conn->cmds->list_agents(lrmd_conn, &list, options.class, options.provider); if (rc > 0) { print_result(printf("%d agents found\n", rc)); for (iter = list; iter != NULL; iter = iter->next) { print_result(printf("%s\n", iter->val)); } lrmd_list_freeall(list); rc = 0; } else { print_result(printf("API_CALL FAILURE - no agents found\n")); rc = -1; } } else if (safe_str_eq(options.api_call, "list_ocf_providers")) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; rc = lrmd_conn->cmds->list_ocf_providers(lrmd_conn, options.type, &list); if (rc > 0) { print_result(printf("%d providers found\n", rc)); for (iter = list; iter != NULL; iter = iter->next) { print_result(printf("%s\n", iter->val)); } lrmd_list_freeall(list); rc = 0; } else { print_result(printf("API_CALL FAILURE - no providers found\n")); rc = -1; } } else if (safe_str_eq(options.api_call, "list_standards")) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; rc = lrmd_conn->cmds->list_standards(lrmd_conn, &list); if (rc > 0) { print_result(printf("%d standards found\n", rc)); for (iter = list; iter != NULL; iter = iter->next) { print_result(printf("%s\n", iter->val)); } lrmd_list_freeall(list); rc = 0; } else { print_result(printf("API_CALL FAILURE - no providers found\n")); rc = -1; } } else if (safe_str_eq(options.api_call, "get_recurring_ops")) { GList *op_list = NULL; GList *op_item = NULL; rc = lrmd_conn->cmds->get_recurring_ops(lrmd_conn, options.rsc_id, 0, 0, &op_list); for (op_item = op_list; op_item != NULL; op_item = op_item->next) { lrmd_op_info_t *op_info = op_item->data; print_result(printf("RECURRING_OP: %s_%s_%s timeout=%sms\n", op_info->rsc_id, op_info->action, op_info->interval_ms_s, op_info->timeout_ms_s)); lrmd_free_op_info(op_info); } g_list_free(op_list); } else if (options.api_call) { print_result(printf("API-CALL FAILURE unknown action '%s'\n", options.action)); test_exit(CRM_EX_ERROR); } if (rc < 0) { print_result(printf("API-CALL FAILURE for '%s' api_rc:%d\n", options.api_call, rc)); test_exit(CRM_EX_ERROR); } if (options.api_call && rc == pcmk_ok) { print_result(printf("API-CALL SUCCESSFUL for '%s'\n", options.api_call)); if (!options.listen) { test_exit(CRM_EX_OK); } } if (options.no_wait) { /* just make the call and exit regardless of anything else. */ test_exit(CRM_EX_OK); } return 0; } static int generate_params(void) { int rc = 0; pe_working_set_t data_set; xmlNode *cib_xml_copy = NULL; resource_t *rsc = NULL; GHashTable *params = NULL; GHashTable *meta = NULL; GHashTableIter iter; if (options.params) { return 0; } set_working_set_defaults(&data_set); cib_conn = cib_new(); - rc = cib_conn->cmds->signon(cib_conn, "lrmd_test", cib_query); + rc = cib_conn->cmds->signon(cib_conn, "cts-exec-helper", cib_query); if (rc != pcmk_ok) { crm_err("Error signing on to the CIB service: %s", pcmk_strerror(rc)); rc = -1; goto param_gen_bail; } rc = cib_conn->cmds->query(cib_conn, NULL, &cib_xml_copy, cib_scope_local | cib_sync_call); if (rc != pcmk_ok) { crm_err("Error retrieving cib copy: %s (%d)", pcmk_strerror(rc), rc); goto param_gen_bail; } else if (cib_xml_copy == NULL) { rc = -ENODATA; crm_err("Error retrieving cib copy: %s (%d)", pcmk_strerror(rc), rc); goto param_gen_bail; } if (cli_config_update(&cib_xml_copy, NULL, FALSE) == FALSE) { crm_err("Error updating cib configuration"); rc = -1; goto param_gen_bail; } data_set.input = cib_xml_copy; data_set.now = crm_time_new(NULL); cluster_status(&data_set); if (options.rsc_id) { rsc = pe_find_resource_with_flags(data_set.resources, options.rsc_id, pe_find_renamed|pe_find_any); } if (!rsc) { crm_err("Resource does not exist in config"); rc = -1; goto param_gen_bail; } params = crm_str_table_new(); meta = crm_str_table_new(); get_rsc_attributes(params, rsc, NULL, &data_set); get_meta_attributes(meta, rsc, NULL, &data_set); if (params) { char *key = NULL; char *value = NULL; g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { options.params = lrmd_key_value_add(options.params, key, value); } g_hash_table_destroy(params); } if (meta) { char *key = NULL; char *value = NULL; g_hash_table_iter_init(&iter, meta); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { char *crm_name = crm_meta_name(key); options.params = lrmd_key_value_add(options.params, crm_name, value); free(crm_name); } g_hash_table_destroy(meta); } param_gen_bail: cleanup_alloc_calculations(&data_set); return rc; } int main(int argc, char **argv) { int option_index = 0; int argerr = 0; int flag; char *key = NULL; char *val = NULL; gboolean use_tls = FALSE; crm_trigger_t *trig; crm_set_options(NULL, "mode [options]", long_options, "Inject commands into the executor, and watch for events\n"); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch (flag) { case '?': crm_help(flag, CRM_EX_OK); break; case 'V': options.verbose = 1; break; case 'Q': options.quiet = 1; options.verbose = 0; break; case 'l': options.listen = optarg; break; case 'w': options.no_wait = 1; break; case 'R': options.is_running = 1; break; case 'n': exec_call_opts = lrmd_opt_notify_orig_only; break; case 'o': exec_call_opts = lrmd_opt_notify_changes_only; break; case 'c': options.api_call = optarg; break; case 'a': options.action = optarg; break; case 'r': options.rsc_id = optarg; break; case 'x': if(optarg) { options.cancel_call_id = atoi(optarg); } break; case 'P': options.provider = optarg; break; case 'C': options.class = optarg; break; case 'T': options.type = optarg; break; case 'i': if(optarg) { options.interval_ms = crm_parse_interval_spec(optarg); } break; case 't': if(optarg) { options.timeout = atoi(optarg); } break; case 's': if(optarg) { options.start_delay = atoi(optarg); } break; case 'k': key = optarg; if (key && val) { options.params = lrmd_key_value_add(options.params, key, val); key = val = NULL; } break; case 'v': val = optarg; if (key && val) { options.params = lrmd_key_value_add(options.params, key, val); key = val = NULL; } break; case 'S': use_tls = TRUE; break; default: ++argerr; break; } } if (argerr) { crm_help('?', CRM_EX_USAGE); } if (optind > argc) { ++argerr; } if (!options.listen && (safe_str_eq(options.api_call, "metadata") || safe_str_eq(options.api_call, "list_agents") || safe_str_eq(options.api_call, "list_standards") || safe_str_eq(options.api_call, "list_ocf_providers"))) { options.no_connect = 1; } - crm_log_init("lrmd_ctest", LOG_INFO, TRUE, options.verbose ? TRUE : FALSE, argc, argv, FALSE); + crm_log_init(NULL, LOG_INFO, TRUE, (options.verbose? TRUE : FALSE), + argc, argv, FALSE); if (options.is_running) { if (!options.timeout) { options.timeout = 30000; } options.interval_ms = 0; if (!options.rsc_id) { crm_err("rsc-id must be given when is-running is used"); test_exit(CRM_EX_ERROR); } if (generate_params()) { print_result(printf ("Failed to retrieve rsc parameters from cib, can not determine if rsc is running.\n")); test_exit(CRM_EX_ERROR); } options.api_call = "exec"; options.action = "monitor"; exec_call_opts = lrmd_opt_notify_orig_only; } /* if we can't perform an api_call or listen for events, * there is nothing to do */ if (!options.api_call && !options.listen) { crm_err("Nothing to be done. Please specify 'api-call' and/or 'listen'"); return CRM_EX_OK; } if (use_tls) { lrmd_conn = lrmd_remote_api_new(NULL, "localhost", 0); } else { lrmd_conn = lrmd_api_new(); } trig = mainloop_add_trigger(G_PRIORITY_HIGH, start_test, NULL); mainloop_set_trigger(trig); mainloop_add_signal(SIGTERM, test_shutdown); crm_info("Starting"); mainloop = g_main_loop_new(NULL, FALSE); g_main_loop_run(mainloop); if (cib_conn != NULL) { cib_conn->cmds->signoff(cib_conn); cib_delete(cib_conn); } test_exit(CRM_EX_OK); return CRM_EX_OK; } diff --git a/pacemaker.spec.in b/pacemaker.spec.in index b6d5cccda5..f09173177f 100644 --- a/pacemaker.spec.in +++ b/pacemaker.spec.in @@ -1,771 +1,771 @@ # Globals and defines to control package behavior (configure these as desired) ## User and group to use for nonprivileged services %global uname hacluster %global gname haclient ## Where to install Pacemaker documentation %global pcmk_docdir %{_docdir}/%{name} ## GitHub entity that distributes source (for ease of using a fork) %global github_owner ClusterLabs ## Upstream pacemaker version, and its package version (specversion ## can be incremented to build packages reliably considered "newer" ## than previously built packages with the same pcmkversion) %global pcmkversion 2.0.0 %global specversion 1 ## Upstream commit (or git tag, such as "Pacemaker-" plus the ## {pcmkversion} macro for an official release) to use for this package %global commit HEAD ## Since git v2.11, the extent of abbreviation is autoscaled by default ## (used to be constant of 7), so we need to convey it for non-tags, too. %global commit_abbrev 7 ## Python major version to use (2, 3, or 0 for auto-detect) %global python_major 0 # Define globals for convenient use later ## Workaround to use parentheses in other globals %global lparen ( %global rparen ) ## Short version of git commit %define shortcommit %(c=%{commit}; case ${c} in Pacemaker-*%{rparen} echo ${c:10};; *%{rparen} echo ${c:0:%{commit_abbrev}};; esac) ## Whether this is a tagged release %define tag_release %([ %{commit} != Pacemaker-%{shortcommit} ]; echo $?) ## Whether this is a release candidate (in case of a tagged release) %define pre_release %([ "%{tag_release}" -eq 0 ] || { case "%{shortcommit}" in *-rc[[:digit:]]*%{rparen} false;; esac; }; echo $?) ## Heuristic used to infer bleeding-edge deployments that are ## less likely to have working versions of the documentation tools %define bleeding %(test ! -e /etc/yum.repos.d/fedora-rawhide.repo; echo $?) ## Whether this platform defaults to using systemd as an init system ## (needs to be evaluated prior to BuildRequires being enumerated and ## installed as it's intended to conditionally select some of these, and ## for that there are only few indicators with varying reliability: ## - presence of systemd-defined macros (when building in a full-fledged ## environment, which is not the case with ordinary mock-based builds) ## - systemd-aware rpm as manifested with the presence of particular ## macro (rpm itself will trivially always be present when building) ## - existence of /usr/lib/os-release file, which is something heavily ## propagated by systemd project ## - when not good enough, there's always a possibility to check ## particular distro-specific macros (incl. version comparison) %define systemd_native (%{?_unitdir:1}%{!?_unitdir:0}%{nil \ } || %{?__transaction_systemd_inhibit:1}%{!?__transaction_systemd_inhibit:0}%{nil \ } || %(test -f /usr/lib/os-release; test $? -ne 0; echo $?)) %if 0%{?fedora} > 20 || 0%{?rhel} > 7 %global gnutls_priorities @SYSTEM %endif # Python-related definitions ## Use Python 3 on certain platforms if major version not specified %if %{?python_major} == 0 %if 0%{?fedora} > 26 || 0%{?rhel} > 7 %global python_major 3 %endif %endif ## Turn off auto-compilation of Python files outside site-packages directory, ## so that the -libs-devel package is multilib-compliant (no *.py[co] files) %global __os_install_post %(echo '%{__os_install_post}' | { sed -e 's!/usr/lib[^[:space:]]*/brp-python-bytecompile[[:space:]].*$!!g'; }) ## Values that differ by Python major version %if 0%{?python_major} > 2 %global python_path /usr/bin/python%{?python3_pkgversion}%{!?python3_pkgversion:3} %global python_pkg python3 %global python_min 3.2 %define py_site %{?python3_sitelib}%{!?python3_sitelib:%( python3 -c 'from distutils.sysconfig import get_python_lib as gpl; print(gpl(1))' 2>/dev/null)} %else %if 0%{?python_major} > 1 %global python_path /usr/bin/python%{?python2_pkgversion}%{!?python2_pkgversion:2} %global python_pkg python2 %global python_min 2.7 %define py_site %{?python2_sitelib}%{!?python2_sitelib:%( python2 -c 'from distutils.sysconfig import get_python_lib as gpl; print(gpl(1))' 2>/dev/null)} %else %global python_min 2.7 %global python_pkg python %define py_site %{?python_sitelib}%{!?python_sitelib:%( python -c 'from distutils.sysconfig import get_python_lib as gpl; print(gpl(1))' 2>/dev/null)} %endif %endif # Definitions for backward compatibility with older RPM versions ## Ensure the license macro behaves consistently (older RPM will otherwise ## overwrite it once it encounters "License:"). Courtesy Jason Tibbitts: ## https://pkgs.fedoraproject.org/cgit/rpms/epel-rpm-macros.git/tree/macros.zzz-epel?h=el6&id=e1adcb77 %if !%{defined _licensedir} %define description %{lua: rpm.define("license %doc") print("%description") } %endif # Define conditionals so that "rpmbuild --with " and # "rpmbuild --without " can enable and disable specific features ## Add option to enable support for stonith/external fencing agents %bcond_with stonithd ## Add option to create binaries suitable for use with profiling tools %bcond_with profiling ## Add option to create binaries with coverage analysis %bcond_with coverage ## Add option to skip generating documentation ## (the build tools aren't available everywhere) %bcond_without doc ## Add option to prefix package version with "0." ## (so later "official" packages will be considered updates) %bcond_with pre_release ## Add option to ship Upstart job files %bcond_with upstart_job ## Add option to turn off hardening of libraries and daemon executables %bcond_without hardening # Keep sane profiling data if requested %if %{with profiling} ## Disable -debuginfo package and stripping binaries/libraries %define debug_package %{nil} %endif # Define the release version # (do not look at externally enforced pre-release flag for tagged releases # as only -rc tags, captured with the second condition, implies that then) %if (!%{tag_release} && %{with pre_release}) || 0%{pre_release} %if 0%{pre_release} %define pcmk_release 0.%{specversion}.%(s=%{shortcommit}; echo ${s: -3}) %else %define pcmk_release 0.%{specversion}.%{shortcommit}.git %endif %else %if 0%{tag_release} %define pcmk_release %{specversion} %else %define pcmk_release %{specversion}.%{shortcommit}.git %endif %endif Name: pacemaker Summary: Scalable High-Availability cluster resource manager Version: %{pcmkversion} Release: %{pcmk_release}%{?dist} %if %{defined _unitdir} License: GPLv2+ and LGPLv2+ %else # initscript is Revised BSD License: GPLv2+ and LGPLv2+ and BSD %endif Url: http://www.clusterlabs.org Group: System Environment/Daemons # Hint: use "spectool -s 0 pacemaker.spec" (rpmdevtools) to check the final URL: # https://github.com/ClusterLabs/pacemaker/archive/e91769e5a39f5cb2f7b097d3c612368f0530535e/pacemaker-e91769e.tar.gz Source0: https://github.com/%{github_owner}/%{name}/archive/%{commit}/%{name}-%{shortcommit}.tar.gz Requires: resource-agents Requires: %{name}-libs = %{version}-%{release} Requires: %{name}-cluster-libs = %{version}-%{release} Requires: %{name}-cli = %{version}-%{release} %{?systemd_requires} # Pacemaker requires a minimum Python functionality Requires: %{python_pkg} >= %{python_min} BuildRequires: %{python_pkg}-devel >= %{python_min} # Pacemaker requires a minimum libqb functionality Requires: libqb >= 0.13.0 BuildRequires: libqb-devel >= 0.13.0 # Basics required for the build (even if usually satisfied through other BRs) BuildRequires: coreutils findutils grep sed # Required for core functionality BuildRequires: automake autoconf gcc libtool pkgconfig libtool-ltdl-devel BuildRequires: pkgconfig(glib-2.0) >= 2.16 BuildRequires: libxml2-devel libxslt-devel libuuid-devel BuildRequires: bzip2-devel # Enables optional functionality BuildRequires: ncurses-devel docbook-style-xsl BuildRequires: help2man gnutls-devel pam-devel pkgconfig(dbus-1) %if %{systemd_native} BuildRequires: pkgconfig(systemd) %endif Requires: corosync >= 2.0.0 BuildRequires: corosynclib-devel >= 2.0.0 %if %{with stonithd} BuildRequires: cluster-glue-libs-devel %endif ## (note no avoiding effect when building through non-customized mock) %if !%{bleeding} %if %{with doc} BuildRequires: inkscape asciidoc publican %endif %endif %description Pacemaker is an advanced, scalable High-Availability cluster resource manager. It supports more than 16 node clusters with significant capabilities for managing resources and dependencies. It will run scripts at initialization, when machines go up or down, when related resources fail and can be configured to periodically check resource health. Available rpmbuild rebuild options: --with(out) : coverage doc stonithd hardening pre_release profiling upstart_job %package cli License: GPLv2+ and LGPLv2+ Summary: Command line tools for controlling Pacemaker clusters Group: System Environment/Daemons Requires: %{name}-libs = %{version}-%{release} Requires: perl-TimeDate %description cli Pacemaker is an advanced, scalable High-Availability cluster resource manager. The %{name}-cli package contains command line tools that can be used to query and control the cluster from machines that may, or may not, be part of the cluster. %package libs License: GPLv2+ and LGPLv2+ Summary: Core Pacemaker libraries Group: System Environment/Daemons Requires(pre): shadow-utils %description libs Pacemaker is an advanced, scalable High-Availability cluster resource manager. The %{name}-libs package contains shared libraries needed for cluster nodes and those just running the CLI tools. %package cluster-libs License: GPLv2+ and LGPLv2+ Summary: Cluster Libraries used by Pacemaker Group: System Environment/Daemons Requires: %{name}-libs = %{version}-%{release} %description cluster-libs Pacemaker is an advanced, scalable High-Availability cluster resource manager. The %{name}-cluster-libs package contains cluster-aware shared libraries needed for nodes that will form part of the cluster nodes. %package remote %if %{defined _unitdir} License: GPLv2+ and LGPLv2+ %else # initscript is Revised BSD License: GPLv2+ and LGPLv2+ and BSD %endif Summary: Pacemaker remote daemon for non-cluster nodes Group: System Environment/Daemons Requires: %{name}-libs = %{version}-%{release} Requires: %{name}-cli = %{version}-%{release} Requires: resource-agents # -remote can be fully independent of systemd %{?systemd_ordering}%{!?systemd_ordering:%{?systemd_requires}} %description remote Pacemaker is an advanced, scalable High-Availability cluster resource manager. The %{name}-remote package contains the Pacemaker Remote daemon which is capable of extending pacemaker functionality to remote nodes not running the full corosync/cluster stack. %package libs-devel License: GPLv2+ and LGPLv2+ Summary: Pacemaker development package Group: Development/Libraries Requires: %{name}-cts = %{version}-%{release} Requires: %{name}-libs%{?_isa} = %{version}-%{release} Requires: %{name}-cluster-libs%{?_isa} = %{version}-%{release} Requires: libuuid-devel%{?_isa} libtool-ltdl-devel%{?_isa} Requires: libxml2-devel%{?_isa} libxslt-devel%{?_isa} Requires: bzip2-devel%{?_isa} glib2-devel%{?_isa} Requires: libqb-devel%{?_isa} Requires: corosynclib-devel%{?_isa} >= 2.0.0 %description libs-devel Pacemaker is an advanced, scalable High-Availability cluster resource manager. The %{name}-libs-devel package contains headers and shared libraries for developing tools for Pacemaker. %package cts License: GPLv2+ and LGPLv2+ Summary: Test framework for cluster-related technologies like Pacemaker Group: System Environment/Daemons Requires: %{python_pkg} >= %{python_min} Requires: %{name}-libs = %{version}-%{release} BuildArch: noarch # systemd python bindings are separate package in some distros %if %{defined systemd_requires} %if 0%{?fedora} > 22 || 0%{?rhel} > 7 Requires: %{python_pkg}-systemd %else %if 0%{?fedora} > 20 || 0%{?rhel} > 6 Requires: systemd-python %endif %endif %endif %description cts Test framework for cluster-related technologies like Pacemaker %package doc License: CC-BY-SA-4.0 Summary: Documentation for Pacemaker Group: Documentation BuildArch: noarch %description doc Documentation for Pacemaker. Pacemaker is an advanced, scalable High-Availability cluster resource manager. %prep %setup -q -n %{name}-%{commit} %build # Early versions of autotools (e.g. RHEL <= 5) do not support --docdir export docdir=%{pcmk_docdir} export systemdunitdir=%{?_unitdir}%{!?_unitdir:no} %if %{with hardening} # prefer distro-provided hardening flags in case they are defined # through _hardening_{c,ld}flags macros, configure script will # use its own defaults otherwise; if such hardenings are completely # undesired, rpmbuild using "--without hardening" # (or "--define '_without_hardening 1'") export CFLAGS_HARDENED_EXE="%{?_hardening_cflags}" export CFLAGS_HARDENED_LIB="%{?_hardening_cflags}" export LDFLAGS_HARDENED_EXE="%{?_hardening_ldflags}" export LDFLAGS_HARDENED_LIB="%{?_hardening_ldflags}" %endif ./autogen.sh %{configure} \ %{?with_profiling: --with-profiling} \ %{?with_coverage: --with-coverage} \ %{!?with_doc: --with-brand=} \ %{!?with_hardening: --disable-hardening} \ %{?python_path: PYTHON=%{python_path}} \ %{?gnutls_priorities: --with-gnutls-priorities="%{gnutls_priorities}"} \ --with-initdir=%{_initrddir} \ --localstatedir=%{_var} \ --with-version=%{version}-%{release} %if 0%{?suse_version} >= 1200 # Fedora handles rpath removal automagically sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' libtool sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool %endif make %{_smp_mflags} V=1 all %check { cts/cts-pengine --run one-or-more-unrunnable-instances \ && cts/cts-cli \ && touch .CHECKED } 2>&1 | sed 's/[fF]ail/faiil/g' # prevent false positives in rpmlint [ -f .CHECKED ] && rm -f -- .CHECKED exit $? # TODO remove when rpm<4.14 compatibility irrelevant %install make DESTDIR=%{buildroot} docdir=%{pcmk_docdir} V=1 install mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig install -m 644 daemons/pacemakerd/pacemaker.sysconfig ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig/pacemaker install -m 644 tools/crm_mon.sysconfig ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig/crm_mon %if %{with upstart_job} mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/init install -m 644 pacemakerd/pacemaker.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/pacemaker.conf install -m 644 pacemakerd/pacemaker.combined.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/pacemaker.combined.conf install -m 644 tools/crm_mon.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/crm_mon.conf %endif %if %{defined _unitdir} mkdir -p ${RPM_BUILD_ROOT}%{_localstatedir}/lib/rpm-state/%{name} %endif # These are not actually scripts find %{buildroot} -name '*.xml' -type f -print0 | xargs -0 chmod a-x # Don't package static libs find %{buildroot} -name '*.a' -type f -print0 | xargs -0 rm -f find %{buildroot} -name '*.la' -type f -print0 | xargs -0 rm -f # Don't ship init scripts for systemd based platforms %if %{defined _unitdir} rm -f %{buildroot}/%{_initrddir}/pacemaker rm -f %{buildroot}/%{_initrddir}/pacemaker_remote %endif %if %{with coverage} GCOV_BASE=%{buildroot}/%{_var}/lib/pacemaker/gcov mkdir -p $GCOV_BASE find . -name '*.gcno' -type f | while read F ; do D=`dirname $F` mkdir -p ${GCOV_BASE}/$D cp $F ${GCOV_BASE}/$D done %endif %post %if %{defined _unitdir} %systemd_post pacemaker.service %else /sbin/chkconfig --add pacemaker || : %endif %preun %if %{defined _unitdir} %systemd_preun pacemaker.service %else /sbin/service pacemaker stop >/dev/null 2>&1 || : if [ $1 -eq 0 ]; then # Package removal, not upgrade /sbin/chkconfig --del pacemaker || : fi %endif %postun %if %{defined _unitdir} %systemd_postun_with_restart pacemaker.service %endif %pre remote %if %{defined _unitdir} # Stop the service before anything is touched, and remember to restart # it as one of the last actions (compared to using systemd_postun_with_restart, # this avoids suicide when sbd is in use) systemctl --quiet is-active pacemaker_remote if [ $? -eq 0 ] ; then mkdir -p %{_localstatedir}/lib/rpm-state/%{name} touch %{_localstatedir}/lib/rpm-state/%{name}/restart_pacemaker_remote systemctl stop pacemaker_remote >/dev/null 2>&1 else rm -f %{_localstatedir}/lib/rpm-state/%{name}/restart_pacemaker_remote fi %endif %post remote %if %{defined _unitdir} %systemd_post pacemaker_remote.service %else /sbin/chkconfig --add pacemaker_remote || : %endif %preun remote %if %{defined _unitdir} %systemd_preun pacemaker_remote.service %else /sbin/service pacemaker_remote stop >/dev/null 2>&1 || : if [ $1 -eq 0 ]; then # Package removal, not upgrade /sbin/chkconfig --del pacemaker_remote || : fi %endif %postun remote %if %{defined _unitdir} # This next line is a no-op, because we stopped the service earlier, but # we leave it here because it allows us to revert to the standard behavior # in the future if desired %systemd_postun_with_restart pacemaker_remote.service # Explicitly take care of removing the flag-file(s) upon final removal if [ $1 -eq 0 ] ; then rm -f %{_localstatedir}/lib/rpm-state/%{name}/restart_pacemaker_remote fi %endif %posttrans remote %if %{defined _unitdir} if [ -e %{_localstatedir}/lib/rpm-state/%{name}/restart_pacemaker_remote ] ; then systemctl start pacemaker_remote >/dev/null 2>&1 rm -f %{_localstatedir}/lib/rpm-state/%{name}/restart_pacemaker_remote fi %endif %post cli %if %{defined _unitdir} %systemd_post crm_mon.service %endif %preun cli %if %{defined _unitdir} %systemd_preun crm_mon.service %endif %postun cli %if %{defined _unitdir} %systemd_postun_with_restart crm_mon.service %endif %post cts %if %{defined _unitdir} %systemd_post pacemaker-cts-dummyd@.service %endif %preun cts %if %{defined _unitdir} %systemd_preun pacemaker-cts-dummyd@.service %endif %postun cts %if %{defined _unitdir} %systemd_postun_with_restart pacemaker-cts-dummyd@.service %endif %pre libs getent group %{gname} >/dev/null || groupadd -r %{gname} -g 189 getent passwd %{uname} >/dev/null || useradd -r -g %{gname} -u 189 -s /sbin/nologin -c "cluster user" %{uname} exit 0 %if %{defined ldconfig_scriptlets} %ldconfig_scriptlets libs %ldconfig_scriptlets cluster-libs %else %post libs -p /sbin/ldconfig %postun libs -p /sbin/ldconfig %post cluster-libs -p /sbin/ldconfig %postun cluster-libs -p /sbin/ldconfig %endif %files ########################################################### %config(noreplace) %{_sysconfdir}/sysconfig/pacemaker %{_sbindir}/pacemakerd %if %{defined _unitdir} %{_unitdir}/pacemaker.service %else %{_initrddir}/pacemaker %endif -%exclude %{_libexecdir}/pacemaker/lrmd_test +%exclude %{_libexecdir}/pacemaker/cts-exec-helper %exclude %{_sbindir}/pacemaker_remoted %{_libexecdir}/pacemaker/* %{_sbindir}/crm_attribute %{_sbindir}/crm_master %{_sbindir}/crm_node %{_sbindir}/fence_legacy %{_sbindir}/stonith_admin %doc %{_mandir}/man7/crmd.* %doc %{_mandir}/man7/pengine.* %doc %{_mandir}/man7/stonithd.* %doc %{_mandir}/man7/ocf_pacemaker_controld.* %doc %{_mandir}/man7/ocf_pacemaker_o2cb.* %doc %{_mandir}/man7/ocf_pacemaker_remote.* %doc %{_mandir}/man8/crm_attribute.* %doc %{_mandir}/man8/crm_node.* %doc %{_mandir}/man8/crm_master.* %doc %{_mandir}/man8/fence_legacy.* %doc %{_mandir}/man8/pacemakerd.* %doc %{_mandir}/man8/stonith_admin.* %doc %{_datadir}/pacemaker/alerts %license licenses/GPLv2 %doc COPYING %doc ChangeLog %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/cib %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/pengine /usr/lib/ocf/resource.d/pacemaker/controld /usr/lib/ocf/resource.d/pacemaker/o2cb /usr/lib/ocf/resource.d/pacemaker/remote %if %{with upstart_job} %config(noreplace) %{_sysconfdir}/init/pacemaker.conf %config(noreplace) %{_sysconfdir}/init/pacemaker.combined.conf %endif %files cli %dir %attr (750, root, %{gname}) %{_sysconfdir}/pacemaker %config(noreplace) %{_sysconfdir}/logrotate.d/pacemaker %config(noreplace) %{_sysconfdir}/sysconfig/crm_mon %if %{defined _unitdir} %{_unitdir}/crm_mon.service %endif %if %{with upstart_job} %config(noreplace) %{_sysconfdir}/init/crm_mon.conf %endif %{_sbindir}/attrd_updater %{_sbindir}/cibadmin %{_sbindir}/crm_diff %{_sbindir}/crm_error %{_sbindir}/crm_failcount %{_sbindir}/crm_mon %{_sbindir}/crm_resource %{_sbindir}/crm_standby %{_sbindir}/crm_verify %{_sbindir}/crmadmin %{_sbindir}/iso8601 %{_sbindir}/crm_shadow %{_sbindir}/crm_simulate %{_sbindir}/crm_report %{_sbindir}/crm_ticket %exclude %{_datadir}/pacemaker/alerts %exclude %{_datadir}/pacemaker/tests %{_datadir}/pacemaker %{_datadir}/snmp/mibs/PCMK-MIB.txt -%{_libexecdir}/pacemaker/lrmd_test +%{_libexecdir}/pacemaker/cts-exec-helper %exclude /usr/lib/ocf/resource.d/pacemaker/controld %exclude /usr/lib/ocf/resource.d/pacemaker/o2cb %exclude /usr/lib/ocf/resource.d/pacemaker/remote %dir /usr/lib/ocf %dir /usr/lib/ocf/resource.d /usr/lib/ocf/resource.d/pacemaker %doc %{_mandir}/man7/* %exclude %{_mandir}/man7/crmd.* %exclude %{_mandir}/man7/pengine.* %exclude %{_mandir}/man7/stonithd.* %exclude %{_mandir}/man7/ocf_pacemaker_controld.* %exclude %{_mandir}/man7/ocf_pacemaker_o2cb.* %exclude %{_mandir}/man7/ocf_pacemaker_remote.* %doc %{_mandir}/man8/* %exclude %{_mandir}/man8/crm_attribute.* %exclude %{_mandir}/man8/crm_node.* %exclude %{_mandir}/man8/crm_master.* %exclude %{_mandir}/man8/fence_legacy.* %exclude %{_mandir}/man8/pacemakerd.* %exclude %{_mandir}/man8/pacemaker_remoted.* %exclude %{_mandir}/man8/stonith_admin.* %license licenses/GPLv2 %doc COPYING %doc ChangeLog %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/blackbox %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/cores %dir %attr (770, %{uname}, %{gname}) %{_var}/log/pacemaker %dir %attr (770, %{uname}, %{gname}) %{_var}/log/pacemaker/bundles %files libs %{_libdir}/libcib.so.* %{_libdir}/liblrmd.so.* %{_libdir}/libcrmservice.so.* %{_libdir}/libcrmcommon.so.* %{_libdir}/libpe_status.so.* %{_libdir}/libpe_rules.so.* %{_libdir}/libpengine.so.* %{_libdir}/libstonithd.so.* %{_libdir}/libtransitioner.so.* %license licenses/LGPLv2.1 %doc COPYING %doc ChangeLog %files cluster-libs %{_libdir}/libcrmcluster.so.* %license licenses/LGPLv2.1 %doc COPYING %doc ChangeLog %files remote %config(noreplace) %{_sysconfdir}/sysconfig/pacemaker %if %{defined _unitdir} # state directory is shared between the subpackets # let rpm take care of removing it once it isn't # referenced anymore and empty %ghost %dir %{_localstatedir}/lib/rpm-state/%{name} %{_unitdir}/pacemaker_remote.service %else %{_initrddir}/pacemaker_remote %endif %{_sbindir}/pacemaker_remoted %{_mandir}/man8/pacemaker_remoted.* %license licenses/GPLv2 %doc COPYING %doc ChangeLog %files doc %doc %{pcmk_docdir} %license licenses/CC-BY-SA-4.0 %files cts %{py_site}/cts %{_datadir}/pacemaker/tests %if %{defined _unitdir} %{_unitdir}/pacemaker-cts-dummyd@.service %endif %license licenses/GPLv2 %doc COPYING %doc ChangeLog %files libs-devel %{_includedir}/pacemaker %{_libdir}/*.so %if %{with coverage} %{_var}/lib/pacemaker/gcov %endif %{_libdir}/pkgconfig/*.pc %license licenses/LGPLv2.1 %doc COPYING %doc ChangeLog %changelog diff --git a/tools/report.common.in b/tools/report.common.in index 0d433cb538..3a051e6f73 100644 --- a/tools/report.common.in +++ b/tools/report.common.in @@ -1,851 +1,851 @@ # # Originally based on hb_report # Copyright 2007 Dejan Muhamedagic # # Later changes copyright 2010-2018 Andrew Beekhof # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # host=`uname -n` shorthost=`echo $host | sed s:\\\\..*::` if [ -z $verbose ]; then verbose=0 fi # Target Files EVENTS_F=events.txt ANALYSIS_F=analysis.txt HALOG_F=cluster-log.txt BT_F=backtraces.txt SYSINFO_F=sysinfo.txt SYSSTATS_F=sysstats.txt DLM_DUMP_F=dlm_dump.txt CRM_MON_F=crm_mon.txt MEMBERSHIP_F=members.txt CRM_VERIFY_F=crm_verify.txt PERMISSIONS_F=permissions.txt CIB_F=cib.xml CIB_TXT_F=cib.txt DRBD_INFO_F=drbd_info.txt EVENT_PATTERNS=" state do_state_transition membership pcmk_peer_update.*(lost|memb): quorum crmd.*crm_update_quorum pause Process.pause.detected resources (lrmd|pacemaker-execd).*rsc:(start|stop) stonith te_fence_node|stonith-ng.*log_oper.*report|stonithd.*(requests|(Succeeded|Failed).to.STONITH|result=) start_stop shutdown.decision|Corosync.Cluster.Engine|corosync.*Initializing.transport|Executive.Service.RELEASE|crm_shutdown:.Requesting.shutdown|pcmk_shutdown:.Shutdown.complete " # superset of all packages of interest on all distros # (the package manager will be used to validate the installation # of any of these packages that are installed) PACKAGES="pacemaker pacemaker-libs pacemaker-cluster-libs libpacemaker3 pacemaker-remote pacemaker-pygui pacemaker-pymgmt pymgmt-client corosync corosynclib libcorosync4 resource-agents cluster-glue-libs cluster-glue libglue2 ldirectord ocfs2-tools ocfs2-tools-o2cb ocfs2console ocfs2-kmp-default ocfs2-kmp-pae ocfs2-kmp-xen ocfs2-kmp-debug ocfs2-kmp-trace drbd drbd-kmp-xen drbd-kmp-pae drbd-kmp-default drbd-kmp-debug drbd-kmp-trace drbd-pacemaker drbd-utils drbd-bash-completion drbd-xen lvm2 lvm2-clvm cmirrord libdlm libdlm2 libdlm3 hawk ruby lighttpd kernel-default kernel-pae kernel-xen glibc " # Potential locations of system log files SYSLOGS=" /var/log/* /var/logs/* /var/syslog/* /var/adm/* /var/log/ha/* /var/log/cluster/* " # Whether pacemaker_remoted was found (0 = yes, 1 = no, -1 = haven't looked yet) REMOTED_STATUS=-1 # # keep the user posted # record() { if [ x != x"$REPORT_HOME" -a -d "${REPORT_HOME}/$shorthost" ]; then rec="${REPORT_HOME}/$shorthost/report.out" elif [ x != x"${l_base}" -a -d "${l_base}" ]; then rec="${l_base}/report.summary" else rec="/dev/null" fi printf "%-10s $*\n" "$shorthost:" 2>&1 >> "${rec}" } log() { printf "%-10s $*\n" "$shorthost:" 1>&2 record "$*" } debug() { if [ $verbose -gt 0 ]; then log "Debug: $*" else record "Debug: $*" fi } info() { log "$*" } warning() { log "WARN: $*" } fatal() { log "ERROR: $*" exit 1 } is_running() { ps -ef | egrep -qs $(echo "$1" | sed -e 's/^\(.\)/[\1]/') } has_remoted() { if [ $REMOTED_STATUS -eq -1 ]; then REMOTED_STATUS=1 if which pacemaker_remoted >/dev/null 2>&1; then REMOTED_STATUS=0 elif [ -x "@sbindir@/pacemaker_remoted" ]; then REMOTED_STATUS=0 else # @TODO: the binary might be elsewhere, # but a global search is too expensive for d in /{usr,opt}/{local/,}{s,}bin; do if [ -x "${d}/pacemaker_remoted" ]; then REMOTED_STATUS=0 fi done fi fi return $REMOTED_STATUS } # found_dir found_dir() { echo "$2" info "Pacemaker $1 found in: $2" } detect_daemon_dir() { info "Searching for where Pacemaker daemons live... this may take a while" for d in \ {/usr,/usr/local,/opt/local,@exec_prefix@}/{libexec,lib64,lib}/pacemaker do # pacemaker and pacemaker-cts packages can install to daemon directory, # so check for a file from each - if [ -e $d/pengine ] || [ -e $d/lrmd_test ]; then + if [ -e $d/pengine ] || [ -e $d/cts-exec-helper ]; then found_dir "daemons" "$d" return fi done # Pacemaker Remote nodes don't need to install daemons if has_remoted; then info "Pacemaker daemons not found (this appears to be a Pacemaker Remote node)" return fi - for f in $(find / -maxdepth $maxdepth -type f -name pengine -o -name lrmd_test); do + for f in $(find / -maxdepth $maxdepth -type f -name pengine -o -name cts-exec-helper); do d=$(dirname "$f") found_dir "daemons" "$d" return done fatal "Pacemaker daemons not found (nonstandard installation?)" } detect_cib_dir() { if [ "-f ${local_state_dir}/lib/pacemaker/cib/cib.xml" ]; then found_dir "config files" "$d" return fi # Pacemaker Remote nodes don't need a CIB if has_remoted; then info "Pacemaker config not found (this appears to be a Pacemaker Remote node)" return fi info "Searching for where Pacemaker keeps config information... this may take a while" # TODO: What about false positives where someone copied the CIB? for f in $(find / -maxdepth $maxdepth -type f -name cib.xml); do d=$(dirname $f) found_dir "config files" "$d" return done warning "Pacemaker config not found (nonstandard installation?)" } detect_state_dir() { if [ -n "$CRM_CONFIG_DIR" ]; then # Assume new layout # $local_state_dir/lib/pacemaker/(cib,pengine,blackbox,cores) dirname "$CRM_CONFIG_DIR" # Pacemaker Remote nodes might not have a CRM_CONFIG_DIR elif [ -d "$local_state_dir/lib/pacemaker" ]; then echo $local_state_dir/lib/pacemaker fi } detect_pe_dir() { config_root="$1" d="$config_root/pengine" if [ -d "$d" ]; then found_dir "policy engine inputs" "$d" return fi if has_remoted; then info "Pacemaker policy engine inputs not found (this appears to be a Pacemaker Remote node)" return fi info "Searching for where Pacemaker keeps Policy Engine inputs... this may take a while" for d in $(find / -maxdepth $maxdepth -type d -name pengine); do found_dir "policy engine inputs" "$d" return done fatal "Pacemaker policy engine inputs not found (nonstandard installation?)" } detect_host() { local_state_dir=@localstatedir@ if [ -d $local_state_dir/run ]; then CRM_STATE_DIR=$local_state_dir/run/crm else info "Searching for where Pacemaker keeps runtime data... this may take a while" for d in `find / -maxdepth $maxdepth -type d -name run`; do local_state_dir=`dirname $d` CRM_STATE_DIR=$d/crm break done info "Found: $CRM_STATE_DIR" fi debug "Machine runtime directory: $local_state_dir" debug "Pacemaker runtime data located in: $CRM_STATE_DIR" CRM_DAEMON_DIR=$(detect_daemon_dir) CRM_CONFIG_DIR=$(detect_cib_dir) config_root=$(detect_state_dir) # Older versions had none BLACKBOX_DIR=$config_root/blackbox debug "Pacemaker blackboxes (if any) located in: $BLACKBOX_DIR" PE_STATE_DIR=$(detect_pe_dir "$config_root") CRM_CORE_DIRS="" for d in $config_root/cores $local_state_dir/lib/corosync; do if [ -d $d ]; then CRM_CORE_DIRS="$CRM_CORE_DIRS $d" fi done debug "Core files located under: $CRM_CORE_DIRS" } time2str() { perl -e "use POSIX; print strftime('%x %X',localtime($1));" } get_time() { perl -e "\$time=\"$*\";" -e ' $unix_tm = 0; eval "use Date::Parse"; if (index($time, ":") < 0) { } elsif (!$@) { $unix_tm = str2time($time); } else { eval "use Date::Manip"; if (!$@) { $unix_tm = UnixDate(ParseDateString($time), "%s"); } } if ($unix_tm != "") { print int($unix_tm); } else { print ""; } ' } get_time_() { warning "Unknown time format used by: $*" } get_time_syslog() { awk '{print $1,$2,$3}' } get_time_legacy() { awk '{print $2}' | sed 's/_/ /' } get_time_iso8601() { awk '{print $1}' } get_time_format_for_string() { l="$*" t=$(get_time `echo $l | get_time_syslog`) if [ "x$t" != x ]; then echo syslog return fi t=$(get_time `echo $l | get_time_iso8601`) if [ "x$t" != x ]; then echo iso8601 return fi t=$(get_time `echo $l | get_time_legacy`) if [ "x$t" != x ]; then echo legacy return fi } get_time_format() { t=0 l="" func="" trycnt=10 while [ $trycnt -gt 0 ] && read l; do func=$(get_time_format_for_string $l) if [ "x$func" != x ]; then break fi trycnt=$(($trycnt-1)) done #debug "Logfile uses the $func time format" echo $func } get_first_time() { l="" format=$1 while read l; do t=$(echo $l | get_time_$format) ts=$(get_time $t) if [ "x$ts" != x ]; then echo "$ts" return fi done } get_last_time() { l="" best=`date +%s` # Now format=$1 while read l; do t=$(echo $l | get_time_$format) ts=$(get_time $t) if [ "x$ts" != x ]; then best=$ts fi done echo $best } linetime() { l=`tail -n +$2 $1 | grep -a ":[0-5][0-9]:" | head -n 1` format=`get_time_format_for_string $l` t=`echo $l | get_time_$format` get_time "$t" } # # findmsg # # Print the names of up to system logs that contain , # ordered by most recently modified. # findmsg() { max=$1 pattern="$2" found=0 # List all potential system logs ordered by most recently modified. candidates=$(ls -1td $SYSLOGS 2>/dev/null) if [ -z "$candidates" ]; then debug "No system logs found to search for pattern \'$pattern\'" return fi # Portable way to handle files with spaces in their names. SAVE_IFS=$IFS IFS=" " # Check each log file for matches. logfiles="" for f in $candidates; do local cat="" # We only care about readable files with something in them. if [ ! -f "$f" ] || [ ! -r "$f" ] || [ ! -s "$f" ] ; then continue fi cat=$(find_decompressor "$f") # We want to avoid grepping through potentially huge binary logs such # as lastlog. However, control characters sometimes find their way into # text logs, so we use a heuristic of more than 256 nonprintable # characters in the file's first kilobyte. if [ $($cat "$f" 2>/dev/null | head -c 1024 | tr -d '[:print:][:space:]' | wc -c) -gt 256 ] then continue fi # Our patterns are ASCII, so we can use LC_ALL="C" to speed up grep $cat "$f" 2>/dev/null | LC_ALL="C" grep -q -e "$pattern" if [ $? -eq 0 ]; then # Add this file to the list of hits # (using newline as separator to handle spaces in names). if [ -z "$logfiles" ]; then logfiles="$f" else logfiles="$logfiles $f" fi # If we have enough hits, print them and return. found=$(($found+1)) if [ $found -ge $max ]; then debug "Pattern \'$pattern\' found in: [ $logfiles ]" IFS=$SAVE_IFS echo "$logfiles" return fi fi done 2>/dev/null IFS=$SAVE_IFS debug "Pattern \'$pattern\' not found in any system logs" } node_events() { if [ -e $1 ]; then Epatt=`echo "$EVENT_PATTERNS" | while read title p; do [ -n "$p" ] && echo -n "|$p"; done | sed 's/.//' ` grep -E "$Epatt" $1 fi } pickfirst() { for x; do which $x >/dev/null 2>&1 && { echo $x return 0 } done return 1 } shrink() { olddir=$PWD dir=`dirname $1` base=`basename $1` target=$1.tar tar_options="cf" variant=`pickfirst bzip2 gzip xz false` case $variant in bz*) tar_options="jcf" target="$target.bz2" ;; gz*) tar_options="zcf" target="$target.gz" ;; xz*) tar_options="Jcf" target="$target.xz" ;; *) warning "Could not find a compression program, the resulting tarball may be huge" ;; esac if [ -e $target ]; then fatal "Destination $target already exists, specify an alternate name with --dest" fi cd $dir >/dev/null 2>&1 tar $tar_options $target $base >/dev/null 2>&1 cd $olddir >/dev/null 2>&1 echo $target } findln_by_time() { local logf=$1 local tm=$2 local first=1 # Some logs can be massive (over 1,500,000,000 lines have been seen in the wild) # Even just 'wc -l' on these files can take 10+ minutes local fileSize=`ls -lh | awk '{ print $5 }' | grep -ie G` if [ x$fileSize != x ]; then warning "$logf is ${fileSize} in size and could take many hours to process. Skipping." return fi local last=`wc -l < $logf` while [ $first -le $last ]; do mid=$((($last+$first)/2)) trycnt=10 while [ $trycnt -gt 0 ]; do tmid=`linetime $logf $mid` [ "$tmid" ] && break warning "cannot extract time: $logf:$mid; will try the next one" trycnt=$(($trycnt-1)) # shift the whole first-last segment first=$(($first-1)) last=$(($last-1)) mid=$((($last+$first)/2)) done if [ -z "$tmid" ]; then warning "giving up on log..." return fi if [ $tmid -gt $tm ]; then last=$(($mid-1)) elif [ $tmid -lt $tm ]; then first=$(($mid+1)) else break fi done echo $mid } dumplog() { local logf=$1 local from_line=$2 local to_line=$3 [ "$from_line" ] || return tail -n +$from_line $logf | if [ "$to_line" ]; then head -$(($to_line-$from_line+1)) else cat fi } # # find log/set of logs which are interesting for us # # # find log slices # find_decompressor() { case $1 in *bz2) echo "bzip2 -dc" ;; *gz) echo "gzip -dc" ;; *xz) echo "xz -dc" ;; *) echo "cat" ;; esac } # # check if the log contains a piece of our segment # is_our_log() { local logf=$1 local from_time=$2 local to_time=$3 local cat=`find_decompressor $logf` local format=`$cat $logf | get_time_format` local first_time=`$cat $logf | head -10 | get_first_time $format` local last_time=`$cat $logf | tail -10 | get_last_time $format` if [ x = "x$first_time" -o x = "x$last_time" ]; then warning "Skipping bad logfile '$1': Could not determine log dates" return 0 # skip (empty log?) fi if [ $from_time -gt $last_time ]; then # we shouldn't get here anyway if the logs are in order return 2 # we're past good logs; exit fi if [ $from_time -ge $first_time ]; then return 3 # this is the last good log fi # have to go further back if [ x = "x$to_time" -o $to_time -ge $first_time ]; then return 1 # include this log else return 0 # don't include this log fi } # # go through archived logs (timewise backwards) and see if there # are lines belonging to us # (we rely on untouched log files, i.e. that modify time # hasn't been changed) # arch_logs() { local logf=$1 local from_time=$2 local to_time=$3 # look for files such as: ha-log-20090308 or # ha-log-20090308.gz (.bz2) or ha-log.0, etc ls -t $logf $logf*[0-9z] 2>/dev/null | while read next_log; do is_our_log $next_log $from_time $to_time case $? in 0) ;; # noop, continue 1) echo $next_log # include log and continue debug "Found log $next_log" ;; 2) break;; # don't go through older logs! 3) echo $next_log # include log and continue debug "Found log $next_log" break ;; # don't go through older logs! esac done } # # print part of the log # drop_tmp_file() { [ -z "$tmp" ] || rm -f "$tmp" } print_logseg() { local logf=$1 local from_time=$2 local to_time=$3 # uncompress to a temp file (if necessary) local cat=`find_decompressor $logf` if [ "$cat" != "cat" ]; then tmp=`mktemp` $cat $logf > $tmp trap drop_tmp_file 0 sourcef=$tmp else sourcef=$logf tmp="" fi if [ "$from_time" = 0 ]; then FROM_LINE=1 else FROM_LINE=`findln_by_time $sourcef $from_time` fi if [ -z "$FROM_LINE" ]; then warning "couldn't find line for time $from_time; corrupt log file?" return fi TO_LINE="" if [ "$to_time" != 0 ]; then TO_LINE=`findln_by_time $sourcef $to_time` if [ -z "$TO_LINE" ]; then warning "couldn't find line for time $to_time; corrupt log file?" return fi if [ $FROM_LINE -lt $TO_LINE ]; then dumplog $sourcef $FROM_LINE $TO_LINE log "Including segment [$FROM_LINE-$TO_LINE] from $logf" else debug "Empty segment [$FROM_LINE-$TO_LINE] from $logf" fi else dumplog $sourcef $FROM_LINE $TO_LINE log "Including all logs after line $FROM_LINE from $logf" fi drop_tmp_file trap "" 0 } # # find log/set of logs which are interesting for us # dumplogset() { local logf=$1 local from_time=$2 local to_time=$3 local logf_set=`arch_logs $logf $from_time $to_time` if [ x = "x$logf_set" ]; then return fi local num_logs=`echo "$logf_set" | wc -l` local oldest=`echo $logf_set | awk '{print $NF}'` local newest=`echo $logf_set | awk '{print $1}'` local mid_logfiles=`echo $logf_set | awk '{for(i=NF-1; i>1; i--) print $i}'` # the first logfile: from $from_time to $to_time (or end) # logfiles in the middle: all # the last logfile: from beginning to $to_time (or end) case $num_logs in 1) print_logseg $newest $from_time $to_time;; *) print_logseg $oldest $from_time 0 for f in $mid_logfiles; do `find_decompressor $f` $f debug "including complete $f logfile" done print_logseg $newest 0 $to_time ;; esac } # cut out a stanza getstanza() { awk -v name="$1" ' !in_stanza && NF==2 && /^[a-z][a-z]*[[:space:]]*{/ { # stanza start if ($1 == name) in_stanza = 1 } in_stanza { print } in_stanza && NF==1 && $1 == "}" { exit } ' } # supply stanza in $1 and variable name in $2 # (stanza is optional) getcfvar() { cf_type=$1; shift; cf_var=$1; shift; cf_file=$* [ -f "$cf_file" ] || return case $cf_type in corosync) sed 's/#.*//' < $cf_file | if [ $# -eq 2 ]; then getstanza "$cf_var" shift 1 else cat fi | awk -v varname="$cf_var" ' NF==2 && match($1,varname":$")==1 { print $2; exit; } ' ;; esac } pickfirst() { for x; do which $x >/dev/null 2>&1 && { echo $x return 0 } done return 1 } # # figure out the cluster type, depending on the process list # and existence of configuration files # get_cluster_type() { if is_running corosync; then tool=`pickfirst corosync-objctl corosync-cmapctl` case $tool in *objctl) quorum=`$tool -a | grep quorum.provider | sed 's/.*=\s*//'`;; *cmapctl) quorum=`$tool | grep quorum.provider | sed 's/.*=\s*//'`;; esac stack="corosync" # Now we're guessing... # TODO: Technically these could be anywhere :-/ elif [ -f /etc/corosync/corosync.conf ]; then stack="corosync" else # We still don't know. This might be a Pacemaker Remote node, # or the configuration might be in a nonstandard location. stack="any" fi debug "Detected the '$stack' cluster stack" echo $stack } find_cluster_cf() { case $1 in corosync) best_size=0 best_file="" # TODO: Technically these could be anywhere :-/ for cf in /etc/corosync/corosync.conf; do if [ -f $cf ]; then size=`wc -l $cf | awk '{print $1}'` if [ $size -gt $best_size ]; then best_size=$size best_file=$cf fi fi done if [ -z "$best_file" ]; then debug "Looking for corosync configuration file. This may take a while..." for f in `find / -maxdepth $maxdepth -type f -name corosync.conf`; do best_file=$f break done fi debug "Located corosync config file: $best_file" echo "$best_file" ;; any) # Cluster type is undetermined. Don't complain, because this # might be a Pacemaker Remote node. ;; *) warning "Unknown cluster type: $1" ;; esac } # # check for the major prereq for a) parameter parsing and b) # parsing logs # t=`get_time "12:00"` if [ "$t" = "" ]; then fatal "please install the perl Date::Parse module (perl-DateTime-Format-DateParse on Fedora/Red Hat)" fi # vim: set expandtab tabstop=8 softtabstop=4 shiftwidth=4 textwidth=80: