diff --git a/cts/__init__.py b/cts/__init__.py index feff2bbd39..0cafb8c8c9 100644 --- a/cts/__init__.py +++ b/cts/__init__.py @@ -1,2 +1,19 @@ -# This file is required for python packages. -# It is intentionally empty. +"""Python modules for Pacemaker's Cluster Test Suite (CTS) + +This package provides the following modules: + +CIB +cib_xml +CM_ais +CM_lha +CTSaudits +CTS +CTSscenarios +CTStests +CTSvars +environment +logging +patterns +remote +watcher +""" diff --git a/cts/logging.py b/cts/logging.py index 08da44ad62..13192f2fc8 100644 --- a/cts/logging.py +++ b/cts/logging.py @@ -1,112 +1,159 @@ -''' -Classes related to producing logs -''' - -__copyright__=''' -Copyright (C) 2014 Andrew Beekhof -Licensed under the GNU GPL. -''' - -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - -import string, sys, time, os - -class Logger: +""" Logging classes for Pacemaker's Cluster Test Suite (CTS) +""" + +# Pacemaker targets compatibility with Python 2.6+ and 3.2+ +from __future__ import print_function, unicode_literals, absolute_import, division + +__copyright__ = "Copyright (C) 2014-2016 Andrew Beekhof " +__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" + +import io +import os +import sys +import time + + +# Wrapper to detect a string under Python 2 or 3 +try: + _StringType = basestring +except NameError: + _StringType = str + +def _is_string(obj): + """ Return True if obj is a simple string. """ + + return isinstance(obj, _StringType) + + +def _strip(line): + """ Wrapper for strip() that works regardless of Python version """ + + if sys.version_info < (3,): + return line.decode('utf-8').strip() + else: + return line.strip() + + +def _rstrip(line): + """ Wrapper for rstrip() that works regardless of Python version """ + + if sys.version_info < (3,): + return line.decode('utf-8').rstrip() + else: + return line.rstrip() + + +class Logger(object): + """ Abstract class to use as parent for CTS logging classes """ + TimeFormat = "%b %d %H:%M:%S\t" + def __init__(self): + # Whether this logger should print debug messages + self.debug_target = True + def __call__(self, lines): + """ Log specified messages """ + raise ValueError("Abstract class member (__call__)") + def write(self, line): - return self(line.rstrip()) + """ Log a single line excluding trailing whitespace """ + + return self(_rstrip(line)) + def writelines(self, lines): - for s in lines: - self.write(s) - return 1 - def flush(self): + """ Log a series of lines excluding trailing whitespace """ + + for line in lines: + self.write(line) return 1 - def isatty(self): - return None + + def is_debug_target(self): + """ Return True if this logger should receive debug messages """ + + return self.debug_target + class StdErrLog(Logger): + """ Class to log to standard error """ def __init__(self, filename, tag): - pass + Logger.__init__(self) + self.debug_target = False def __call__(self, lines): - t = time.strftime(Logger.TimeFormat, time.localtime(time.time())) - if isinstance(lines, basestring): - sys.__stderr__.writelines([t, lines, "\n"]) - else: - for line in lines: - sys.__stderr__.writelines([t, line, "\n"]) + """ Log specified lines to stderr """ + + timestamp = time.strftime(Logger.TimeFormat, + time.localtime(time.time())) + if _is_string(lines): + lines = [lines] + for line in lines: + print("%s%s" % (timestamp, line), file=sys.__stderr__) sys.__stderr__.flush() - def name(self): - return "StdErrLog" class FileLog(Logger): - def __init__(self, filename, tag): - self.logfile=filename - self.hostname = os.uname()[1]+" " + """ Class to log to a file """ - self.source = "" + def __init__(self, filename, tag): + Logger.__init__(self) + self.logfile = filename + self.hostname = os.uname()[1] if tag: - self.source = tag+": " + self.source = tag + ": " + else: + self.source = "" def __call__(self, lines): + """ Log specified lines to the file """ - fd = open(self.logfile, "a") - t = time.strftime(Logger.TimeFormat, time.localtime(time.time())) - - if isinstance(lines, basestring): - fd.writelines([t, self.hostname, self.source, lines, "\n"]) - else: - for line in lines: - fd.writelines([t, self.hostname, self.source, line, "\n"]) - fd.close() + logf = io.open(self.logfile, "at") + timestamp = time.strftime(Logger.TimeFormat, + time.localtime(time.time())) + if _is_string(lines): + lines = [lines] + for line in lines: + print("%s%s %s%s" % (timestamp, self.hostname, self.source, line), + file=logf) + logf.close() - def name(self): - return "FileLog" -class LogFactory: +class LogFactory(object): + """ Singleton to log messages to various destinations """ - log_methods=[] + log_methods = [] have_stderr = False - def __init__(self): - pass - def add_file(self, filename, tag=None): + """ When logging messages, log them to specified file """ + if filename: LogFactory.log_methods.append(FileLog(filename, tag)) def add_stderr(self): + """ When logging messages, log them to standard error """ + if not LogFactory.have_stderr: LogFactory.have_stderr = True LogFactory.log_methods.append(StdErrLog(None, None)) def log(self, args): + """ Log a message (to all configured log destinations) """ + for logfn in LogFactory.log_methods: - logfn(string.strip(args)) + logfn(_strip(args)) def debug(self, args): + """ Log a debug message (to all configured log destinations) """ + for logfn in LogFactory.log_methods: - if logfn.name() != "StdErrLog": - logfn("debug: %s" % string.strip(args)) + if logfn.is_debug_target(): + logfn("debug: %s" % _strip(args)) def traceback(self, traceback): + """ Log a stack trace (to all configured log destinations) """ + for logfn in LogFactory.log_methods: traceback.print_exc(50, logfn) diff --git a/doc/Pacemaker_Development/en-US/Ch-Python.txt b/doc/Pacemaker_Development/en-US/Ch-Python.txt index 55f8b66c20..dd8c72fabd 100644 --- a/doc/Pacemaker_Development/en-US/Ch-Python.txt +++ b/doc/Pacemaker_Development/en-US/Ch-Python.txt @@ -1,135 +1,141 @@ = Python Coding Guidelines = //// We prefer [[ch-NAME]], but older versions of asciidoc don't deal well with that construct for chapter headings //// anchor:ch-python-coding[Chapter 3, Python Coding Guidelines] [[s-python-boilerplate]] == Python Boilerplate == indexterm:[Python,boilerplate] indexterm:[licensing,Python boilerplate] Every Python file should start like this: ==== [source,Python] ---- [] """ """ # Pacemaker targets compatibility with Python 2.6+ and 3.2+ from __future__ import print_function, unicode_literals, absolute_import, division __copyright__ = "Copyright (C) Andrew Beekhof " __license__ = " WITHOUT ANY WARRANTY" ---- ==== If the file is meant to be directly executed, the first line (++) should be +#!/usr/bin/python+. If it is meant to be imported, omit this line. ++ is obviously a brief description of the file's purpose. The string may contain any other information typically used in a Python file https://www.python.org/dev/peps/pep-0257/[docstring]. The +import+ statement is discussed further in <>. ++ is the year the code was 'originally' created (it is the most important date for copyright purposes, as it establishes priority and the point from which expiration is calculated). If the code is modified in later years, add +-YYYY+ with the most recent year of modification. ++ should follow the policy set forth in the https://github.com/ClusterLabs/pacemaker/blob/master/COPYING[+COPYING+] file, generally one of "GNU General Public License version 2 or later (GPLv2+)" or "GNU Lesser General Public License version 2.1 or later (LGPLv2.1+)". == Python Compatibility == indexterm:[Python,2] indexterm:[Python,3] indexterm:[Python,versions] Pacemaker targets compatibility with Python 2.6 and later, and Python 3.2 and later. These versions have added features to be more compatible with each other, allowing us to support both the 2 and 3 series with the same code. It is a good idea to test any changes with both Python 2 and 3. [[s-python-future-imports]] === Python Future Imports === The future imports used in <> mean: * All print statements must use parentheses, and printing without a newline is accomplished with the +end=' '+ parameter rather than a trailing comma. * All string literals will be treated as Unicode (the +u+ prefix is unnecessary, and must not be used, because it is not available in Python 3.2). * Local modules must be imported using +from . import+ (rather than just +import+). To import one item from a local module, use +from .modulename import+ (rather than +from modulename import+). * Division using +/+ will always return a floating-point result (use +//+ if you want the integer floor instead). === Other Python Compatibility Requirements === * When specifying an exception variable, always use +as+ instead of a comma (e.g. +except Exception as e+ or +except (TypeError, IOError) as e+). Use +e.args+ to access the error arguments (instead of iterating over or subscripting +e+). * Use +in+ (not +has_key()+) to determine if a dictionary has a particular key. * Always use the I/O functions from the +io+ module rather than the native I/O functions (e.g. +io.open()+ rather than +open()+). * When opening a file, always use the +t+ (text) or +b+ (binary) mode flag. +* When creating classes, always specify a parent class to ensure that it is a + "new-style" class (e.g. +class Foo(object):+ rather than +class Foo:+) * Be aware of the bytes type added in Python 3. Many places where strings are used in Python 2 use bytes or bytearrays in Python 3 (for example, the pipes used with +subprocess.Popen()+). Code should handle both possibilities. * Be aware that the +items()+, +keys()+, and +values()+ methods of dictionaries return lists in Python 2 and views in Python 3. In many case, no special handling is required, but if the code needs to use list methods on the result, cast the result to list first. * Do not name variables +with+ or +as+. * Do not raise or catch strings as exceptions (e.g. +raise "Bad thing"+). * Do not use the +cmp+ parameter of sorting functions (use +key+ instead, if needed) or the +$$__cmp__()$$+ method of classes (implement rich comparison methods such as +$$__lt__()$$+ instead, if needed). * Do not use the +buffer+ type. * Do not use features not available in all targeted Python versions. Common examples include: ** The +argparse+, +html+, +ipaddress+, +sysconfig+, and +UserDict+ modules ** The +collections.OrderedDict+ class ** The +subprocess.run()+ function ** The +subprocess.DEVNULL+ constant ** +subprocess+ module-specific exceptions ** Set literals (+{1, 2, 3}+) === Python Usages to Avoid === Avoid the following if possible, otherwise research the compatibility issues involved (hacky workarounds are often available): * long integers * octal integer literals * mixed binary and string data in one data file or variable * metaclasses * +locale.strcoll+ and +locale.strxfrm+ * the +configparser+ and +ConfigParser+ modules * importing compatibility modules such as +six+ (so we don't have to add them to Pacemaker's dependencies) == Formatting Python Code == indexterm:[Python,formatting] * Indentation must be 4 spaces, no tabs. * Do not leave trailing whitespace. * Lines should be no longer than 80 characters unless limiting line length significantly impacts readability. For Python, this limitation is flexible since breaking a line often impacts readability, but definitely keep it under 120 characters. * Where not conflicting with this style guide, it is recommended (but not required) to follow https://www.python.org/dev/peps/pep-0008/:[PEP 8]. +* It is recommended (but not required) to format Python code such that + `pylint --disable=line-too-long,too-many-lines,too-many-instance-attributes,too-many-arguments,too-many-statements` + produces minimal complaints (even better if you don't need to disable all + those checks). diff --git a/doc/Pacemaker_Development/en-US/Revision_History.xml b/doc/Pacemaker_Development/en-US/Revision_History.xml index d1ea73cdc6..fd29d52672 100644 --- a/doc/Pacemaker_Development/en-US/Revision_History.xml +++ b/doc/Pacemaker_Development/en-US/Revision_History.xml @@ -1,40 +1,40 @@ %BOOK_ENTITIES; ]> Revision History 1-0 Tue Jul 26 2016 KenGaillot kgaillot@redhat.com Convert coding guidelines and developer FAQ to Publican document 1-1 - Wed Aug 17 2016 + Mon Aug 29 2016 KenGaillot kgaillot@redhat.com Add Python coding guidelines, and more about licensing diff --git a/fencing/fence_dummy b/fencing/fence_dummy index a44a9355e8..8137b8add7 100755 --- a/fencing/fence_dummy +++ b/fencing/fence_dummy @@ -1,419 +1,463 @@ #!/usr/bin/python """Dummy fence agent for testing """ # Pacemaker targets compatibility with Python 2.6+ and 3.2+ from __future__ import print_function, unicode_literals, absolute_import, division __copyright__ = "Copyright (C) 2012-2016 Andrew Beekhof " __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import re import sys import time import random import atexit import getopt AGENT_VERSION = "4.0.0" OCF_VERSION = "1.0" SHORT_DESC = "Dummy fence agent" LONG_DESC = """fence_dummy is a fake fencing agent which reports success based on its mode (pass|fail|random) without doing anything.""" # Short options used: ifhmnoqsvBDHMRUV -all_opt = { +ALL_OPT = { "quiet" : { "getopt" : "q", "help" : "", - "order" : 50 }, + "order" : 50 + }, "verbose" : { "getopt" : "v", "longopt" : "verbose", "help" : "-v, --verbose Verbose mode", "required" : "0", "shortdesc" : "Verbose mode", - "order" : 51 }, + "order" : 51 + }, "debug" : { "getopt" : "D:", - "longopt" : "debug-file", + "longopt" : "debug-file", "help" : "-D, --debug-file=[debugfile] Debugging to output file", "required" : "0", "shortdesc" : "Write debug information to given file", - "order" : 52 }, - "version" : { + "order" : 52 + }, + "version" : { "getopt" : "V", "longopt" : "version", "help" : "-V, --version Display version information and exit", "required" : "0", "shortdesc" : "Display version information and exit", - "order" : 53 }, + "order" : 53 + }, "help" : { "getopt" : "h", "longopt" : "help", "help" : "-h, --help Display this help and exit", "required" : "0", "shortdesc" : "Display help and exit", - "order" : 54 }, + "order" : 54 + }, "action" : { "getopt" : "o:", "longopt" : "action", "help" : "-o, --action=[action] Action: status, list, reboot (default), off or on", "required" : "1", "shortdesc" : "Fencing Action", "default" : "reboot", - "order" : 1 }, + "order" : 1 + }, "nodename" : { "getopt" : "N:", "longopt" : "nodename", "help" : "-N, --nodename Node name of fence victim (ignored)", "required" : "0", "shortdesc" : "The node name of fence victim (ignored)", - "order" : 2 }, + "order" : 2 + }, "mode": { "getopt" : "M:", "longopt" : "mode", "required" : "0", "help" : "-M, --mode=(pass|fail|random) Exit status to return for non-monitor operations", "shortdesc" : "Whether fence operations should always pass, always fail, or fail at random", - "order" : 3 }, + "order" : 3 + }, "monitor_mode" : { "getopt" : "m:", "longopt" : "monitor_mode", "help" : "-m, --monitor_mode=(pass|fail|random) Exit status to return for monitor operations", "required" : "0", "shortdesc" : "Whether monitor operations should always pass, always fail, or fail at random", - "order" : 3 }, + "order" : 3 + }, "random_sleep_range": { "getopt" : "R:", "required" : "0", "longopt" : "random_sleep_range", "help" : "-R, --random_sleep_range=[seconds] Sleep between 1 and [seconds] before returning", "shortdesc" : "Wait randomly between 1 and [seconds]", - "order" : 3 }, + "order" : 3 + }, "mock_dynamic_hosts" : { "getopt" : "H:", "longopt" : "mock_dynamic_hosts", "help" : "-H, --mock_dynamic_hosts=[list] What to return when dynamically queried for possible targets", "required" : "0", "shortdesc" : "A list of hosts we can fence", - "order" : 3 }, + "order" : 3 + }, "delay" : { "getopt" : "f:", "longopt" : "delay", "help" : "-f, --delay [seconds] Wait X seconds before fencing is started", "required" : "0", "shortdesc" : "Wait X seconds before fencing is started", "default" : "0", - "order" : 3 }, + "order" : 3 + }, "port" : { "getopt" : "n:", "longopt" : "plug", "help" : "-n, --plug=[id] Physical plug number on device (ignored)", "required" : "1", "shortdesc" : "Ignored", - "order" : 4 }, + "order" : 4 + }, "switch" : { "getopt" : "s:", "longopt" : "switch", "help" : "-s, --switch=[id] Physical switch number on device (ignored)", "required" : "0", "shortdesc" : "Ignored", - "order" : 4 }, + "order" : 4 + }, "nodeid" : { "getopt" : "i:", "longopt" : "nodeid", "help" : "-i, --nodeid Corosync id of fence victim (ignored)", "required" : "0", "shortdesc" : "Ignored", - "order" : 4 }, + "order" : 4 + }, "uuid" : { "getopt" : "U:", "longopt" : "uuid", "help" : "-U, --uuid UUID of the VM to fence (ignored)", "required" : "0", "shortdesc" : "Ignored", - "order" : 4 } + "order" : 4 + } } +def agent(): + """ Return name this file was run as. """ + + return os.path.basename(sys.argv[0]) + + def fail_usage(message): - sys.stderr.write("%s\nPlease use '-h' for usage\n" % message) - sys.exit(1) + """ Print a usage message and exit. """ + + sys.exit("%s\nPlease use '-h' for usage" % message) def show_docs(options): """ Handle informational options (display info and exit). """ device_opt = options["device_opt"] - if "-h" in options: + if "-h" in options: usage(device_opt) sys.exit(0) if "-o" in options and options["-o"].lower() == "metadata": metadata(device_opt, options) sys.exit(0) if "-V" in options: print(AGENT_VERSION) sys.exit(0) +def sorted_options(avail_opt): + """ Return a list of all options, in their internally specified order. """ + + sorted_list = [(key, ALL_OPT[key]) for key in avail_opt] + sorted_list.sort(key=lambda x: x[1]["order"]) + return sorted_list + + def usage(avail_opt): - global all_opt + """ Print a usage message. """ print("Usage:") - print("\t" + os.path.basename(sys.argv[0]) + " [options]") + print("\t" + agent() + " [options]") print("Options:") - sorted_list = [ (key, all_opt[key]) for key in avail_opt ] - sorted_list.sort(key=lambda x: x[1]["order"]) - - for key, value in sorted_list: + for dummy, value in sorted_options(avail_opt): if len(value["help"]) != 0: print(" " + value["help"]) def metadata(avail_opt, options): - global all_opt + """ Print agent metadata. """ # This log is just for testing handling of stderr output - sys.stderr.write("asked for fence_dummy metadata\n") - - sorted_list = [ (key, all_opt[key]) for key in avail_opt ] - sorted_list.sort(key=lambda x: x[1]["order"]) + print("asked for fence_dummy metadata", file=sys.stderr) print(""" %s %s -""" % (os.path.basename(sys.argv[0]), SHORT_DESC, - AGENT_VERSION, OCF_VERSION, LONG_DESC)) +""" % (agent(), SHORT_DESC, AGENT_VERSION, OCF_VERSION, LONG_DESC)) - for option, value in sorted_list: - if "shortdesc" in all_opt[option]: - print("\t") + for option, dummy in sorted_options(avail_opt): + if "shortdesc" in ALL_OPT[option]: + print("\t") default = "" - if "default" in all_opt[option]: - default = "default=\""+str(all_opt[option]["default"])+"\"" - elif ("-" + all_opt[option]["getopt"][:-1]) in options: - if options["-" + all_opt[option]["getopt"][:-1]]: + default_name_arg = "-" + ALL_OPT[option]["getopt"][:-1] + default_name_no_arg = "-" + ALL_OPT[option]["getopt"] + + if "default" in ALL_OPT[option]: + default = 'default="%s"' % str(ALL_OPT[option]["default"]) + elif default_name_arg in options: + if options[default_name_arg]: try: - default = "default=\"" + options["-" + all_opt[option]["getopt"][:-1]] + "\"" + default = 'default="%s"' % options[default_name_arg] except TypeError: ## @todo/@note: Currently there is no clean way how to handle lists ## we can create a string from it but we can't set it on command line - default = "default=\"" + str(options["-" + all_opt[option]["getopt"][:-1]]) +"\"" - elif ("-" + all_opt[option]["getopt"]) in options: - default = "default=\"true\" " + default = 'default="%s"' % str(options[default_name_arg]) + elif default_name_no_arg in options: + default = 'default="true"' - mixed = all_opt[option]["help"] + mixed = ALL_OPT[option]["help"] ## split it between option and help text - res = re.compile("^(.*--\S+)\s+", re.IGNORECASE | re.S).search(mixed) - if (None != res): + res = re.compile(r"^(.*--\S+)\s+", re.IGNORECASE | re.S).search(mixed) + if None != res: mixed = res.group(1) mixed = mixed.replace("<", "<").replace(">", ">") print("\t\t") - if all_opt[option]["getopt"].count(":") > 0: + if ALL_OPT[option]["getopt"].count(":") > 0: print("\t\t") else: print("\t\t") - - print("\t\t" + all_opt[option]["shortdesc"] + "") + + print("\t\t" + ALL_OPT[option]["shortdesc"] + "") print("\t") print(""" \t \t \t \t \t \t \t """) -def process_input(avail_opt): - global all_opt +def option_longopt(option): + """ Return the getopt-compatible long-option name of the given option. """ - ## - ## Set standard environment - ##### - os.putenv("LANG", "C") - os.putenv("LC_ALL", "C") + if ALL_OPT[option]["getopt"].endswith(":"): + return ALL_OPT[option]["longopt"] + "=" + else: + return ALL_OPT[option]["longopt"] + + +def opts_from_command_line(argv, avail_opt): + """ Read options from command-line arguments. """ - ## - ## Prepare list of options for getopt - ##### + # Prepare list of options for getopt getopt_string = "" - longopt_list = [ ] + longopt_list = [] for k in avail_opt: - if k in all_opt: - getopt_string += all_opt[k]["getopt"] + if k in ALL_OPT: + getopt_string += ALL_OPT[k]["getopt"] else: fail_usage("Parse error: unknown option '"+k+"'") - if k in all_opt and "longopt" in all_opt[k]: - if all_opt[k]["getopt"].endswith(":"): - longopt_list.append(all_opt[k]["longopt"] + "=") - else: - longopt_list.append(all_opt[k]["longopt"]) + if k in ALL_OPT and "longopt" in ALL_OPT[k]: + longopt_list.append(option_longopt(k)) + + try: + opt, dummy = getopt.gnu_getopt(argv, getopt_string, longopt_list) + except getopt.GetoptError as error: + fail_usage("Parse error: " + error.msg) + + # Transform longopt to short one which are used in fencing agents + old_opt = opt + opt = {} + for old_option in dict(old_opt).keys(): + if old_option.startswith("--"): + for option in ALL_OPT.keys(): + if "longopt" in ALL_OPT[option] and "--" + ALL_OPT[option]["longopt"] == old_option: + opt["-" + ALL_OPT[option]["getopt"].rstrip(":")] = dict(old_opt)[old_option] + else: + opt[old_option] = dict(old_opt)[old_option] + + # Compatibility Layer (with what? probably not needed for fence_dummy) + new_opt = dict(opt) + if "-T" in new_opt: + new_opt["-o"] = "status" + if "-n" in new_opt: + new_opt["-m"] = new_opt["-n"] + opt = new_opt - ## - ## Read options from command line or standard input - ##### + return opt + + +def opts_from_stdin(avail_opt): + """ Read options from standard input. """ + + opt = {} + name = "" + for line in sys.stdin.readlines(): + line = line.strip() + if line.startswith("#") or (len(line) == 0): + continue + + (name, value) = (line + "=").split("=", 1) + value = value[:-1] + + # Compatibility Layer (with what? probably not needed for fence_dummy) + if name == "option": + name = "action" + + if name not in avail_opt: + print("Parse error: Ignoring unknown option '%s'" % line, + file=sys.stderr) + continue + + if ALL_OPT[name]["getopt"].endswith(":"): + opt["-"+ALL_OPT[name]["getopt"].rstrip(":")] = value + elif value.lower() in ["1", "yes", "on", "true"]: + opt["-"+ALL_OPT[name]["getopt"]] = "1" + + return opt + + +def process_input(avail_opt): + """ Set standard environment variables, and parse all options. """ + + # Set standard environment + os.putenv("LANG", "C") + os.putenv("LC_ALL", "C") + + # Read options from command line or standard input if len(sys.argv) > 1: - try: - opt, args = getopt.gnu_getopt(sys.argv[1:], getopt_string, longopt_list) - except getopt.GetoptError as error: - fail_usage("Parse error: " + error.msg) - - ## Transform longopt to short one which are used in fencing agents - ##### - old_opt = opt - opt = { } - for o in dict(old_opt).keys(): - if o.startswith("--"): - for x in all_opt.keys(): - if "longopt" in all_opt[x] and "--" + all_opt[x]["longopt"] == o: - opt["-" + all_opt[x]["getopt"].rstrip(":")] = dict(old_opt)[o] - else: - opt[o] = dict(old_opt)[o] - - ## Compatibility Layer - ##### - z = dict(opt) - if "-T" in z: - z["-o"] = "status" - if "-n" in z: - z["-m"] = z["-n"] - - opt = z - ## - ##### + return opts_from_command_line(sys.argv[1:], avail_opt) else: - opt = { } - name = "" - for line in sys.stdin.readlines(): - line = line.strip() - if ((line.startswith("#")) or (len(line) == 0)): - continue - - (name, value) = (line + "=").split("=", 1) - value = value[:-1] - - ## Compatibility Layer - ###### - if name == "option": - name = "action" - - ## - ###### - if name not in avail_opt: - sys.stderr.write("Parse error: Ignoring unknown option '"+line+"'\n") - continue - - if all_opt[name]["getopt"].endswith(":"): - opt["-"+all_opt[name]["getopt"].rstrip(":")] = value - elif ((value == "1") or (value.lower() == "yes") or (value.lower() == "on") or (value.lower() == "true")): - opt["-"+all_opt[name]["getopt"]] = "1" - return opt + return opts_from_stdin(avail_opt) def atexit_handler(): + """ Close stdout on exit. """ + try: sys.stdout.close() os.close(1) except IOError: - sys.stderr.write("%s failed to close standard output\n"%(sys.argv[0])) - sys.exit(1) + sys.exit("%s failed to close standard output" % agent()) def success_mode(options, option, default_value): """ Return exit code specified by option. """ if option in options: test_value = options[option] else: test_value = default_value if test_value == "pass": exitcode = 0 elif test_value == "fail": exitcode = 1 else: exitcode = random.randint(0, 1) return exitcode +def write_options(options): + """ Write out all options to debug file. """ + + try: + debugfile = io.open(options["-D"], 'at') + debugfile.write("### %s ###\n" % (time.strftime("%Y-%m-%d %H:%M:%S"))) + for option in sorted(options): + debugfile.write("%s=%s\n" % (option, options[option])) + debugfile.write("###\n") + debugfile.close() + except IOError: + pass + + def main(): - global all_opt - device_opt = all_opt.keys() + """ Make it so! """ + + device_opt = ALL_OPT.keys() ## Defaults for fence agent atexit.register(atexit_handler) options = process_input(device_opt) options["device_opt"] = device_opt show_docs(options) # dump input to file if "-D" in options: - try: - f = io.open(options["-D"], 'at') - f.write("### %s ###\n" % (time.strftime("%Y-%m-%d %H:%M:%S"))) - for v in sorted(options): - f.write("%s=%s\n" % (v, options[v])) - f.write("###\n") - f.close() - except IOError: - pass + write_options(options) if "-f" in options: val = int(options["-f"]) - sys.stderr.write("delay sleep for %d seconds\n" % val) + print("delay sleep for %d seconds" % val, file=sys.stderr) time.sleep(val) # random sleep for testing if "-R" in options: val = int(options["-R"]) ran = random.randint(1, val) - sys.stderr.write("random sleep for %d seconds\n" % ran) + print("random sleep for %d seconds" % ran, file=sys.stderr) time.sleep(ran) if "-o" in options: action = options["-o"] else: action = "action" if action == "monitor": exitcode = success_mode(options, "-m", "pass") elif action == "list": - sys.stderr.write("fence_dummy action (list) called\n") + print("fence_dummy action (list) called", file=sys.stderr) if "-H" in options: print(options["-H"]) exitcode = 0 else: - sys.stderr.write("were asked for hostlist but attribute mock_dynamic_hosts wasn't set\n") + print("dynamic hostlist requires mock_dynamic_hosts to be set", + file=sys.stderr) exitcode = 1 else: exitcode = success_mode(options, "-M", "random") # Ensure we generate some error output on failure exit. if exitcode == 1: - sys.stderr.write("simulated %s failure\n" % action) + print("simulated %s failure" % action, file=sys.stderr) sys.exit(exitcode) if __name__ == "__main__": main() diff --git a/fencing/regression.py.in b/fencing/regression.py.in index e38763a9a0..b8e2ccf590 100644 --- a/fencing/regression.py.in +++ b/fencing/regression.py.in @@ -1,1188 +1,1354 @@ #!/usr/bin/python """ Regression tests for Pacemaker's stonithd """ # Pacemaker targets compatibility with Python 2.6+ and 3.2+ from __future__ import print_function, unicode_literals, absolute_import, division __copyright__ = "Copyright (C) 2012-2016 Andrew Beekhof " __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import sys import subprocess import shlex import time FENCE_DUMMY = "@datadir@/@PACKAGE@/tests/cts/fence_dummy" def shlex_split(command): """ Wrapper for shlex.split() that works around Python 2.6 bug """ - if sys.version_info < (2,7,): + if sys.version_info < (2, 7,): return shlex.split(command.encode('ascii')) else: return shlex.split(command) def pipe_output(pipes, stdout=True, stderr=False): """ Wrapper to get text output from pipes regardless of Python version """ output = "" pipe_outputs = pipes.communicate() if sys.version_info < (3,): if stdout: output = output + pipe_outputs[0] if stderr: output = output + pipe_outputs[1] else: if stdout: output = output + pipe_outputs[0].decode(sys.stdout.encoding) if stderr: output = output + pipe_outputs[1].decode(sys.stderr.encoding) return output def output_from_command(command): - test = subprocess.Popen(shlex_split(command), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - test.wait() + """ Execute command and return its standard output """ + test = subprocess.Popen(shlex_split(command), stdout=subprocess.PIPE) + test.wait() return pipe_output(test).split("\n") def localname(): - """ Return the uname of the local host. """ + """ Return the uname of the local host """ our_uname = output_from_command("uname -n") if our_uname: our_uname = our_uname[0] else: our_uname = "localhost" return our_uname -class Test: - def __init__(self, name, description, verbose = 0, with_cpg = 0): +def killall(process): + """ Kill all instances of a process """ + + cmd = shlex_split("killall -9 -q %s" % process) + test = subprocess.Popen(cmd, stdout=subprocess.PIPE) + test.wait() + + +class Test(object): + """ Executor for a single test """ + + def __init__(self, name, description, verbose=0, with_cpg=0): self.name = name self.description = description self.cmds = [] self.verbose = verbose self.result_txt = "" self.cmd_tool_output = "" - self.result_exitcode = 0; - - self.stonith_options = "-s" - self.enable_corosync = 0 + self.result_exitcode = 0 if with_cpg: self.stonith_options = "-c" self.enable_corosync = 1 + else: + self.stonith_options = "-s" + self.enable_corosync = 0 self.stonith_process = None self.stonith_output = "" self.stonith_patterns = [] self.negative_stonith_patterns = [] self.executed = 0 - def __new_cmd(self, cmd, args, exitcode, stdout_match = "", no_wait = 0, stdout_negative_match = "", kill=None): + def __new_cmd(self, cmd, args, exitcode, stdout_match="", no_wait=0, stdout_negative_match="", kill=None): + """ Add a command to be executed as part of this test """ + self.cmds.append( { "cmd" : cmd, "kill" : kill, "args" : args, "expected_exitcode" : exitcode, "stdout_match" : stdout_match, "stdout_negative_match" : stdout_negative_match, "no_wait" : no_wait, } ) - def stop_pacemaker(self): - cmd = shlex_split("killall -9 -q pacemakerd") - test = subprocess.Popen(cmd, stdout=subprocess.PIPE) - test.wait() - def start_environment(self): - ### make sure we are in full control here ### - self.stop_pacemaker() + """ Prepare the host for executing a test """ - cmd = shlex_split("killall -9 -q stonithd") - test = subprocess.Popen(cmd, stdout=subprocess.PIPE) - test.wait() + # Make sure we are in full control + killall("pacemakerd") + killall("stonithd") if self.verbose: self.stonith_options = self.stonith_options + " -V" print("Starting stonithd with %s" % self.stonith_options) if os.path.exists("/tmp/stonith-regression.log"): os.remove('/tmp/stonith-regression.log') cmd = "@CRM_DAEMON_DIR@/stonithd %s -l /tmp/stonith-regression.log" % self.stonith_options self.stonith_process = subprocess.Popen(shlex_split(cmd)) time.sleep(1) def clean_environment(self): + """ Clean up the host after executing a test """ + if self.stonith_process: self.stonith_process.terminate() self.stonith_process.wait() self.stonith_output = "" self.stonith_process = None - f = io.open('/tmp/stonith-regression.log', 'rt') - for line in f.readlines(): + logfile = io.open('/tmp/stonith-regression.log', 'rt') + for line in logfile.readlines(): self.stonith_output = self.stonith_output + line if self.verbose: print("Daemon Output Start") print(self.stonith_output) print("Daemon Output End") os.remove('/tmp/stonith-regression.log') def add_stonith_log_pattern(self, pattern): + """ Add a log pattern to expect from this test """ + self.stonith_patterns.append(pattern) - def add_stonith_negative_log_pattern(self, pattern): + def add_stonith_neg_log_pattern(self, pattern): + """ Add a log pattern that should not occur with this test """ + self.negative_stonith_patterns.append(pattern) def add_cmd(self, cmd, args): + """ Add a simple command to be executed as part of this test """ + self.__new_cmd(cmd, args, 0, "") def add_cmd_no_wait(self, cmd, args): + """ Add a simple command to be executed (without waiting) as part of this test """ + self.__new_cmd(cmd, args, 0, "", 1) - def add_cmd_check_stdout(self, cmd, args, match, no_match = ""): + def add_cmd_check_stdout(self, cmd, args, match, no_match=""): + """ Add a simple command with expected output to be executed as part of this test """ + self.__new_cmd(cmd, args, 0, match, 0, no_match) - def add_expected_fail_cmd(self, cmd, args, exitcode = 255): + def add_expected_fail_cmd(self, cmd, args, exitcode=255): + """ Add a command to be executed as part of this test and expected to fail """ + self.__new_cmd(cmd, args, exitcode, "") def get_exitcode(self): + """ Return the exit status of the last test execution """ + return self.result_exitcode def print_result(self, filler): + """ Print the result of the last test execution """ + print("%s%s" % (filler, self.result_txt)) def run_cmd(self, args): + """ Execute a command as part of this test """ + cmd = shlex_split(args['args']) cmd.insert(0, args['cmd']) if self.verbose: print("\n\nRunning: "+" ".join(cmd)) test = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if args['kill']: if self.verbose: print("Also running: "+args['kill']) subprocess.Popen(shlex_split(args['kill'])) if args['no_wait'] == 0: test.wait() else: return 0 output = pipe_output(test, stderr=True) if self.verbose: print(output) if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0: test.returncode = -2 print("STDOUT string '%s' was not found in cmd output: %s" % (args['stdout_match'], output)) if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0: test.returncode = -2 print("STDOUT string '%s' was found in cmd output: %s" % (args['stdout_negative_match'], output)) - return test.returncode; + return test.returncode def count_negative_matches(self, outline): + """ Return 1 if a line matches patterns that shouldn't have occurred """ + count = 0 for line in self.negative_stonith_patterns: if outline.count(line): count = 1 if self.verbose: print("This pattern should not have matched = '%s" % (line)) return count def match_stonith_patterns(self): + """ Check test output for expected patterns """ + negative_matches = 0 cur = 0 pats = self.stonith_patterns total_patterns = len(self.stonith_patterns) - if len(self.stonith_patterns) == 0: + if len(self.stonith_patterns) == 0 and len(self.negative_stonith_patterns) == 0: return for line in self.stonith_output.split("\n"): negative_matches = negative_matches + self.count_negative_matches(line) if len(pats) == 0: continue cur = -1 - for p in pats: + for pat in pats: cur = cur + 1 if line.count(pats[cur]): del pats[cur] break if len(pats) > 0 or negative_matches: if self.verbose: - for p in pats: - print("Pattern Not Matched = '%s'" % p) + for pat in pats: + print("Pattern Not Matched = '%s'" % pat) - self.result_txt = "FAILURE - '%s' failed. %d patterns out of %d not matched. %d negative matches." % (self.name, len(pats), total_patterns, negative_matches) + msg = "FAILURE - '%s' failed. %d patterns out of %d not matched. %d negative matches." + self.result_txt = msg % (self.name, len(pats), total_patterns, negative_matches) self.result_exitcode = -1 def run(self): + """ Execute this test. """ + res = 0 i = 1 self.start_environment() if self.verbose: print("\n--- START TEST - %s" % self.name) self.result_txt = "SUCCESS - '%s'" % (self.name) self.result_exitcode = 0 for cmd in self.cmds: res = self.run_cmd(cmd) if res != cmd['expected_exitcode']: print("Step %d FAILED - command returned %d, expected %d" % (i, res, cmd['expected_exitcode'])) - self.result_txt = "FAILURE - '%s' failed at step %d. Command: %s %s" % (self.name, i, cmd['cmd'], cmd['args']) + msg = "FAILURE - '%s' failed at step %d. Command: %s %s" + self.result_txt = msg % (self.name, i, cmd['cmd'], cmd['args']) self.result_exitcode = -1 break else: if self.verbose: print("Step %d SUCCESS" % (i)) i = i + 1 self.clean_environment() if self.result_exitcode == 0: self.match_stonith_patterns() print(self.result_txt) if self.verbose: print("--- END TEST - %s\n" % self.name) self.executed = 1 return res -class Tests: - def __init__(self, verbose = 0): +class Tests(object): + """ Collection of all fencing regression tests """ + + def __init__(self, verbose=0): self.tests = [] self.verbose = verbose self.autogen_corosync_cfg = 0 if not os.path.exists("/etc/corosync/corosync.conf"): self.autogen_corosync_cfg = 1 - def new_test(self, name, description, with_cpg = 0): + def new_test(self, name, description, with_cpg=0): + """ Create a named test """ + test = Test(name, description, self.verbose, with_cpg) self.tests.append(test) return test def print_list(self): + """ List all registered tests """ + print("\n==== %d TESTS FOUND ====" % (len(self.tests))) print("%35s - %s" % ("TEST NAME", "TEST DESCRIPTION")) print("%35s - %s" % ("--------------------", "--------------------")) for test in self.tests: print("%35s - %s" % (test.name, test.description)) print("==== END OF LIST ====\n") def start_corosync(self): + """ Start the corosync process """ + if self.verbose: print("Starting corosync") test = subprocess.Popen("corosync", stdout=subprocess.PIPE) test.wait() time.sleep(10) - def stop_corosync(self): - cmd = shlex_split("killall -9 -q corosync") - test = subprocess.Popen(cmd, stdout=subprocess.PIPE) - test.wait() - def run_single(self, name): + """ Run a single named test """ + for test in self.tests: if test.name == name: test.run() - break; + break def run_tests_matching(self, pattern): + """ Run all tests whose name matches a pattern """ + for test in self.tests: if test.name.count(pattern) != 0: test.run() def run_cpg_only(self): + """ Run all corosync-enabled tests """ + for test in self.tests: if test.enable_corosync: test.run() def run_no_cpg(self): + """ Run all standalone tests """ + for test in self.tests: if not test.enable_corosync: test.run() def run_tests(self): + """ Run all tests """ + for test in self.tests: test.run() def exit(self): + """ Exit (with error status code if any test failed) """ + for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != 0: sys.exit(-1) sys.exit(0) def print_results(self): - failures = 0; - success = 0; + """ Print summary of results of executed tests """ + + failures = 0 + success = 0 print("\n\n======= FINAL RESULTS ==========") print("\n--- FAILURE RESULTS:") for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != 0: failures = failures + 1 test.print_result(" ") else: success = success + 1 if failures == 0: print(" None") print("\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures)) def build_api_sanity_tests(self): + """ Register tests to verify basic API usage """ + verbose_arg = "" if self.verbose: verbose_arg = "-V" test = self.new_test("standalone_low_level_api_test", "Sanity test client api in standalone mode.") test.add_cmd("@CRM_DAEMON_DIR@/stonith-test", "-t %s" % (verbose_arg)) test = self.new_test("cpg_low_level_api_test", "Sanity test client api using mainloop and cpg.", 1) test.add_cmd("@CRM_DAEMON_DIR@/stonith-test", "-m %s" % (verbose_arg)) def build_custom_timeout_tests(self): + """ Register tests to verify custom timeout usage """ + # custom timeout without topology test = self.new_test("cpg_custom_timeout_1", - "Verify per device timeouts work as expected without using topology.", 1) - test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=1\"") - test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=4\"") + "Verify per device timeouts work as expected without using topology.", 1) + test.add_cmd('stonith_admin', + '-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd('stonith_admin', + '-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=1"') + test.add_cmd('stonith_admin', + '-R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=4"') test.add_cmd("stonith_admin", "-F node3 -t 2") # timeout is 2+1+4 = 7 test.add_stonith_log_pattern("Total timeout set to 7") # custom timeout _WITH_ topology test = self.new_test("cpg_custom_timeout_2", - "Verify per device timeouts work as expected _WITH_ topology.", 1) - test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=1\"") - test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\" -o \"pcmk_off_timeout=4000\"") + "Verify per device timeouts work as expected _WITH_ topology.", 1) + test.add_cmd('stonith_admin', + '-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd('stonith_admin', + '-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=1"') + test.add_cmd('stonith_admin', + '-R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=4000"') test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 3 -v false2") test.add_cmd("stonith_admin", "-F node3 -t 2") # timeout is 2+1+4000 = 4003 test.add_stonith_log_pattern("Total timeout set to 4003") def build_fence_merge_tests(self): + """ Register tests to verify when fence operations should be merged """ ### Simple test that overlapping fencing operations get merged test = self.new_test("cpg_custom_merge_single", - "Verify overlapping identical fencing operations are merged, no fencing levels used.", 1) + "Verify overlapping identical fencing operations are merged, no fencing levels used.", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") ### one merger will happen test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") ### Test that multiple mergers occur test = self.new_test("cpg_custom_merge_multiple", - "Verify multiple overlapping identical fencing operations are merged", 1) + "Verify multiple overlapping identical fencing operations are merged", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"delay=2\" -o \"pcmk_host_list=node3\" ") + test.add_cmd("stonith_admin", + "-R true1 -a fence_dummy -o \"mode=pass\" -o \"delay=2\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") ### 4 mergers should occur test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") ### Test that multiple mergers occur with topologies used test = self.new_test("cpg_custom_merge_with_topology", - "Verify multiple overlapping identical fencing operations are merged with fencing levels.", 1) + "Verify multiple overlapping identical fencing operations are merged with fencing levels.", + 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") ### 4 mergers should occur test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") + def build_fence_no_merge_tests(self): + """ Register tests to verify when fence operations should not be merged """ test = self.new_test("cpg_custom_no_merge", - "Verify differing fencing operations are not merged", 1) + "Verify differing fencing operations are not merged", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3 node2\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") test.add_cmd_no_wait("stonith_admin", "-F node2 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") - test.add_stonith_negative_log_pattern("Merging stonith action off for node node3 originating from client") + test.add_stonith_neg_log_pattern("Merging stonith action off for node node3 originating from client") def build_standalone_tests(self): + """ Register a grab bag of tests that can be executed in standalone or corosync mode """ + test_types = [ { - "prefix" : "standalone" , + "prefix" : "standalone", "use_cpg" : 0, }, { - "prefix" : "cpg" , + "prefix" : "cpg", "use_cpg" : 1, }, ] # test what happens when all devices timeout for test_type in test_types: test = self.new_test("%s_fence_multi_device_failure" % test_type["prefix"], - "Verify that all devices timeout, a fencing failure is returned.", test_type["use_cpg"]) - test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R false3 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") + "Verify that all devices timeout, a fencing failure is returned.", + test_type["use_cpg"]) + test.add_cmd("stonith_admin", + "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "-R false3 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") if test_type["use_cpg"] == 1: # 194 = (unsigned char)-62 (-ETIME) test.add_expected_fail_cmd("stonith_admin", "-F node3 -t 2", 194) test.add_stonith_log_pattern("Total timeout set to 6") else: # 55 = (unsigned char)-201 (-pcmk_err_generic) test.add_expected_fail_cmd("stonith_admin", "-F node3 -t 2", 55) test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: ") test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: ") test.add_stonith_log_pattern("for host 'node3' with device 'false3' returned: ") # test what happens when multiple devices can fence a node, but the first device fails. for test_type in test_types: test = self.new_test("%s_fence_device_failure_rollover" % test_type["prefix"], - "Verify that when one fence device fails for a node, the others are tried.", test_type["use_cpg"]) - test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") + "Verify that when one fence device fails for a node, the others are tried.", + test_type["use_cpg"]) + test.add_cmd("stonith_admin", + "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-F node3 -t 2") if test_type["use_cpg"] == 1: test.add_stonith_log_pattern("Total timeout set to 6") # simple topology test for one device for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_simple" % test_type["prefix"], - "Verify all fencing devices at a level are used.", test_type["use_cpg"]) - test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") - + "Verify all fencing devices at a level are used.", test_type["use_cpg"]) + test.add_cmd("stonith_admin", + "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true") test.add_cmd("stonith_admin", "-F node3 -t 2") test.add_stonith_log_pattern("Total timeout set to 2") test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") # add topology, delete topology, verify fencing still works for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_add_remove" % test_type["prefix"], - "Verify fencing occurrs after all topology levels are removed", test_type["use_cpg"]) - test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") - + "Verify fencing occurrs after all topology levels are removed", + test_type["use_cpg"]) + test.add_cmd("stonith_admin", + "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true") test.add_cmd("stonith_admin", "-d node3 -i 1") test.add_cmd("stonith_admin", "-F node3 -t 2") test.add_stonith_log_pattern("Total timeout set to 2") test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") # test what happens when the first fencing level has multiple devices. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_device_fails" % test_type["prefix"], - "Verify if one device in a level fails, the other is tried.", test_type["use_cpg"]) - test.add_cmd("stonith_admin", "-R false -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") - + "Verify if one device in a level fails, the other is tried.", + test_type["use_cpg"]) + test.add_cmd("stonith_admin", + "-R false -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true") test.add_cmd("stonith_admin", "-F node3 -t 20") test.add_stonith_log_pattern("Total timeout set to 40") test.add_stonith_log_pattern("for host 'node3' with device 'false' returned: -201") test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") # test what happens when the first fencing level fails. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_multi_level_fails" % test_type["prefix"], - "Verify if one level fails, the next leve is tried.", test_type["use_cpg"]) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") + "Verify if one level fails, the next leve is tried.", + test_type["use_cpg"]) + test.add_cmd("stonith_admin", + "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") test.add_cmd("stonith_admin", "-F node3 -t 3") test.add_stonith_log_pattern("Total timeout set to 18") test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -201") test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: -201") test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0") test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0") # test what happens when the first fencing level had devices that no one has registered for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_missing_devices" % test_type["prefix"], - "Verify topology can continue with missing devices.", test_type["use_cpg"]) - test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") + "Verify topology can continue with missing devices.", + test_type["use_cpg"]) + test.add_cmd("stonith_admin", + "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") test.add_cmd("stonith_admin", "-F node3 -t 2") # Test what happens if multiple fencing levels are defined, and then the first one is removed. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_level_removal" % test_type["prefix"], - "Verify level removal works.", test_type["use_cpg"]) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") + "Verify level removal works.", test_type["use_cpg"]) + test.add_cmd("stonith_admin", + "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") # Now remove level 2, verify none of the devices in level two are hit. test.add_cmd("stonith_admin", "-d node3 -i 2") test.add_cmd("stonith_admin", "-F node3 -t 20") test.add_stonith_log_pattern("Total timeout set to 8") test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -201") - test.add_stonith_negative_log_pattern("for host 'node3' with device 'false2' returned: ") + test.add_stonith_neg_log_pattern("for host 'node3' with device 'false2' returned: ") test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0") test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0") # Test targeting a topology level by node name pattern. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_level_pattern" % test_type["prefix"], - "Verify targeting topology by node name pattern works.", test_type["use_cpg"]) + "Verify targeting topology by node name pattern works.", + test_type["use_cpg"]) test.add_cmd("stonith_admin", """-R true -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node1 node2 node3" """) test.add_cmd("stonith_admin", """-r '@node.*' -i 1 -v true""") test.add_cmd("stonith_admin", "-F node3 -t 2") test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") # test allowing commas and semicolons as delimiters in pcmk_host_list for test_type in test_types: test = self.new_test("%s_host_list_delimiters" % test_type["prefix"], "Verify commas and semicolons can be used as pcmk_host_list delimiters", test_type["use_cpg"]) test.add_cmd("stonith_admin", """-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node1,node2,node3" """) test.add_cmd("stonith_admin", """-R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=pcmk1;pcmk2;pcmk3" """) test.add_cmd("stonith_admin", "stonith_admin -F node2 -t 2") test.add_cmd("stonith_admin", "stonith_admin -F pcmk3 -t 2") test.add_stonith_log_pattern("for host 'node2' with device 'true1' returned: 0") test.add_stonith_log_pattern("for host 'pcmk3' with device 'true2' returned: 0") # test the stonith builds the correct list of devices that can fence a node. for test_type in test_types: test = self.new_test("%s_list_devices" % test_type["prefix"], - "Verify list of devices that can fence a node is correct", test_type["use_cpg"]) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") - test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") - + "Verify list of devices that can fence a node is correct", + test_type["use_cpg"]) + test.add_cmd("stonith_admin", + "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") + test.add_cmd("stonith_admin", + "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true2", "true1") test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true3", "true1") # simple test of device monitor for test_type in test_types: test = self.new_test("%s_monitor" % test_type["prefix"], - "Verify device is reachable", test_type["use_cpg"]) + "Verify device is reachable", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-Q true1") test.add_cmd("stonith_admin", "-Q false1") test.add_expected_fail_cmd("stonith_admin", "-Q true2", 237) # Verify monitor occurs for duration of timeout period on failure for test_type in test_types: test = self.new_test("%s_monitor_timeout" % test_type["prefix"], "Verify monitor uses duration of timeout period given.", test_type["use_cpg"]) test.add_cmd("stonith_admin", '-R true1 -a fence_dummy -o "mode=fail" -o "monitor_mode=fail" -o "pcmk_host_list=node3"') # 55 = (unsigned char)-201 (-pcmk_err_generic) test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 5", 55) test.add_stonith_log_pattern("Attempt 2 to execute") # Verify monitor occurs for duration of timeout period on failure, but stops at max retries for test_type in test_types: test = self.new_test("%s_monitor_timeout_max_retries" % test_type["prefix"], "Verify monitor retries until max retry value or timeout is hit.", test_type["use_cpg"]) test.add_cmd("stonith_admin", '-R true1 -a fence_dummy -o "mode=fail" -o "monitor_mode=fail" -o "pcmk_host_list=node3"') # 55 = (unsigned char)-201 (-pcmk_err_generic) test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 15", 55) test.add_stonith_log_pattern("Attempted to execute agent fence_dummy (list) the maximum number of times") # simple register test for test_type in test_types: test = self.new_test("%s_register" % test_type["prefix"], - "Verify devices can be registered and un-registered", test_type["use_cpg"]) + "Verify devices can be registered and un-registered", + test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-Q true1") test.add_cmd("stonith_admin", "-D true1") test.add_expected_fail_cmd("stonith_admin", "-Q true1", 237) # simple reboot test for test_type in test_types: test = self.new_test("%s_reboot" % test_type["prefix"], - "Verify devices can be rebooted", test_type["use_cpg"]) + "Verify devices can be rebooted", + test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-B node3 -t 2") test.add_cmd("stonith_admin", "-D true1") test.add_expected_fail_cmd("stonith_admin", "-Q true1", 237) # test fencing history. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_fence_history" % test_type["prefix"], - "Verify last fencing operation is returned.", test_type["use_cpg"]) + "Verify last fencing operation is returned.", + test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-F node3 -t 2 -V") test.add_cmd_check_stdout("stonith_admin", "-H node3", "was able to turn off node node3", "") # simple test of dynamic list query for test_type in test_types: test = self.new_test("%s_dynamic_list_query" % test_type["prefix"], - "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) + "Verify dynamic list of fencing devices can be retrieved.", + test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o mock_dynamic_hosts=fake_port_1") test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found") # fence using dynamic list query for test_type in test_types: test = self.new_test("%s_fence_dynamic_list_query" % test_type["prefix"], - "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) + "Verify dynamic list of fencing devices can be retrieved.", + test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o mock_dynamic_hosts=fake_port_1") - test.add_cmd("stonith_admin", "-F fake_port_1 -t 5 -V"); + test.add_cmd("stonith_admin", "-F fake_port_1 -t 5 -V") # simple test of query using status action for test_type in test_types: test = self.new_test("%s_status_query" % test_type["prefix"], - "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) + "Verify dynamic list of fencing devices can be retrieved.", + test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found") # test what happens when no reboot action is advertised for test_type in test_types: test = self.new_test("%s_no_reboot_support" % test_type["prefix"], - "Verify reboot action defaults to off when no reboot action is advertised by agent.", test_type["use_cpg"]) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_no_reboot -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-B node1 -t 5 -V"); + "Verify reboot action defaults to off when no reboot action is advertised by agent.", + test_type["use_cpg"]) + test.add_cmd("stonith_admin", + "-R true1 -a fence_dummy_no_reboot -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", "-B node1 -t 5 -V") test.add_stonith_log_pattern("does not advertise support for 'reboot', performing 'off'") - test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); + test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") # make sure reboot is used when reboot action is advertised for test_type in test_types: test = self.new_test("%s_with_reboot_support" % test_type["prefix"], - "Verify reboot action can be used when metadata advertises it.", test_type["use_cpg"]) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") - test.add_cmd("stonith_admin", "-B node1 -t 5 -V"); - test.add_stonith_negative_log_pattern("does not advertise support for 'reboot', performing 'off'") - test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); + "Verify reboot action can be used when metadata advertises it.", + test_type["use_cpg"]) + test.add_cmd("stonith_admin", + "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", "-B node1 -t 5 -V") + test.add_stonith_neg_log_pattern("does not advertise support for 'reboot', performing 'off'") + test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") def build_nodeid_tests(self): + """ Register tests that use a corosync node id """ + our_uname = localname() ### verify nodeid is supplied when nodeid is in the metadata parameters test = self.new_test("cpg_supply_nodeid", - "Verify nodeid is given when fence agent has nodeid as parameter", 1) + "Verify nodeid is given when fence agent has nodeid as parameter", 1) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) + test.add_cmd("stonith_admin", + "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname)) test.add_stonith_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname)) ### verify nodeid is _NOT_ supplied when nodeid is not in the metadata parameters test = self.new_test("cpg_do_not_supply_nodeid", - "Verify nodeid is _NOT_ given when fence agent does not have nodeid as parameter", 1) + "Verify nodeid is _NOT_ given when fence agent does not have nodeid as parameter", + 1) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) - test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname)) - test.add_stonith_negative_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname)) + # use a host name that won't be in corosync.conf + test.add_cmd("stonith_admin", + "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=regr-test\"") + test.add_cmd("stonith_admin", "-F regr-test -t 3") + test.add_stonith_neg_log_pattern("For stonith action (off) for victim regr-test, adding nodeid") ### verify nodeid use doesn't explode standalone mode test = self.new_test("standalone_do_not_supply_nodeid", - "Verify nodeid in metadata parameter list doesn't kill standalone mode", 0) + "Verify nodeid in metadata parameter list doesn't kill standalone mode", + 0) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) + test.add_cmd("stonith_admin", + "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname)) - test.add_stonith_negative_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname)) + test.add_stonith_neg_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname)) def build_unfence_tests(self): + """ Register tests that verify unfencing """ + our_uname = localname() ### verify unfencing using automatic unfencing test = self.new_test("cpg_unfence_required_1", - "Verify require unfencing on all devices when automatic=true in agent's metadata", 1) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) - test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) + "Verify require unfencing on all devices when automatic=true in agent's metadata", + 1) + test.add_cmd('stonith_admin', + '-R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname)) + test.add_cmd('stonith_admin', + '-R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) # both devices should be executed - test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); - test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)"); - + test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") + test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)") ### verify unfencing using automatic unfencing fails if any of the required agents fail test = self.new_test("cpg_unfence_required_2", - "Verify require unfencing on all devices when automatic=true in agent's metadata", 1) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) - test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=fail\" -o \"pcmk_host_list=%s\"" % (our_uname)) + "Verify require unfencing on all devices when automatic=true in agent's metadata", + 1) + test.add_cmd('stonith_admin', + '-R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname)) + test.add_cmd('stonith_admin', + '-R true2 -a fence_dummy_auto_unfence -o "mode=fail" -o "pcmk_host_list=%s"' % (our_uname)) test.add_expected_fail_cmd("stonith_admin", "-U %s -t 6" % (our_uname), 143) ### verify unfencing using automatic devices with topology test = self.new_test("cpg_unfence_required_3", - "Verify require unfencing on all devices even when required devices are at different topology levels", 1) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) - test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) + "Verify require unfencing on all devices even when at different topology levels", + 1) + test.add_cmd('stonith_admin', + '-R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) + test.add_cmd('stonith_admin', + '-R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) - test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); - test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)"); - + test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") + test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)") ### verify unfencing using automatic devices with topology test = self.new_test("cpg_unfence_required_4", - "Verify all required devices are executed even with topology levels fail.", 1) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) - test.add_cmd("stonith_admin", "-R true2 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) - test.add_cmd("stonith_admin", "-R true3 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) - test.add_cmd("stonith_admin", "-R true4 -a fence_dummy_automatic_unfence -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) - test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) - test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) - test.add_cmd("stonith_admin", "-R false3 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) - test.add_cmd("stonith_admin", "-R false4 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) + "Verify all required devices are executed even with topology levels fail.", + 1) + test.add_cmd('stonith_admin', + '-R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) + test.add_cmd('stonith_admin', + '-R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) + test.add_cmd('stonith_admin', + '-R true3 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) + test.add_cmd('stonith_admin', + '-R true4 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) + test.add_cmd('stonith_admin', + '-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) + test.add_cmd('stonith_admin', + '-R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) + test.add_cmd('stonith_admin', + '-R false3 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) + test.add_cmd('stonith_admin', + '-R false4 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v false1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v false2" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v false3" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true3" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 3 -v false4" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 4 -v true4" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) - test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)"); - test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)"); - test.add_stonith_log_pattern("with device 'true3' returned: 0 (OK)"); - test.add_stonith_log_pattern("with device 'true4' returned: 0 (OK)"); + test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") + test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)") + test.add_stonith_log_pattern("with device 'true3' returned: 0 (OK)") + test.add_stonith_log_pattern("with device 'true4' returned: 0 (OK)") + + def build_unfence_on_target_tests(self): + """ Register tests that verify unfencing that runs on the target """ + + our_uname = localname() ### verify unfencing using on_target device test = self.new_test("cpg_unfence_on_target_1", - "Verify unfencing with on_target = true", 1) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) + "Verify unfencing with on_target = true", 1) + test.add_cmd("stonith_admin", + "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("(on) to be executed on the target node") - ### verify failure of unfencing using on_target device test = self.new_test("cpg_unfence_on_target_2", - "Verify failure unfencing with on_target = true", 1) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake_1234\"" % (our_uname)) + "Verify failure unfencing with on_target = true", + 1) + test.add_cmd("stonith_admin", + "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake_1234\"" % (our_uname)) test.add_expected_fail_cmd("stonith_admin", "-U node_fake_1234 -t 3", 237) test.add_stonith_log_pattern("(on) to be executed on the target node") - ### verify unfencing using on_target device with topology test = self.new_test("cpg_unfence_on_target_3", - "Verify unfencing with on_target = true using topology", 1) + "Verify unfencing with on_target = true using topology", + 1) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) - test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) + test.add_cmd("stonith_admin", + "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) + test.add_cmd("stonith_admin", + "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("(on) to be executed on the target node") ### verify unfencing using on_target device with topology fails when victim node doesn't exist test = self.new_test("cpg_unfence_on_target_4", - "Verify unfencing failure with on_target = true using topology", 1) + "Verify unfencing failure with on_target = true using topology", + 1) - test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) - test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) + test.add_cmd("stonith_admin", + "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) + test.add_cmd("stonith_admin", + "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v true1") test.add_cmd("stonith_admin", "-r node_fake -i 2 -v true2") test.add_expected_fail_cmd("stonith_admin", "-U node_fake -t 3", 237) test.add_stonith_log_pattern("(on) to be executed on the target node") def build_remap_tests(self): + """ Register tests that verify remapping of reboots to off-on """ + test = self.new_test("cpg_remap_simple", "Verify sequential topology reboot is remapped to all-off-then-all-on", 1) test.add_cmd("stonith_admin", """-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """ """-o "pcmk_off_timeout=1" -o "pcmk_reboot_timeout=10" """) test.add_cmd("stonith_admin", """-R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """ """-o "pcmk_off_timeout=2" -o "pcmk_reboot_timeout=20" """) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", "-B node_fake -t 5") test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") # timeout should be sum of off timeouts (1+2=3), not reboot timeouts (10+20=30) test.add_stonith_log_pattern("Total timeout set to 3 for peer's fencing of node_fake") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true1'") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true2'") test.add_stonith_log_pattern("Remapped off of node_fake complete, remapping to on") # fence_dummy sets "on" as an on_target action test.add_stonith_log_pattern("Ignoring true1 'on' failure (no capable peers) for node_fake") test.add_stonith_log_pattern("Ignoring true2 'on' failure (no capable peers) for node_fake") test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") test = self.new_test("cpg_remap_automatic", "Verify remapped topology reboot skips automatic 'on'", 1) test.add_cmd("stonith_admin", - """-R true1 -a fence_dummy_automatic_unfence """ + """-R true1 -a fence_dummy_auto_unfence """ """-o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", - """-R true2 -a fence_dummy_automatic_unfence """ + """-R true2 -a fence_dummy_auto_unfence """ """-o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", "-B node_fake -t 5") test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true1'") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true2'") test.add_stonith_log_pattern("Remapped off of node_fake complete, remapping to on") test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") - test.add_stonith_negative_log_pattern("perform op 'node_fake on' with") - test.add_stonith_negative_log_pattern("'on' failure") + test.add_stonith_neg_log_pattern("perform op 'node_fake on' with") + test.add_stonith_neg_log_pattern("'on' failure") test = self.new_test("cpg_remap_complex_1", - "Verify remapped topology reboot in second level works if non-remapped first level fails", 1) + "Verify remapped topology reboot in second level works if non-remapped first level fails", + 1) test.add_cmd("stonith_admin", """-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v false1") test.add_cmd("stonith_admin", "-r node_fake -i 2 -v true1 -v true2") test.add_cmd("stonith_admin", "-B node_fake -t 5") test.add_stonith_log_pattern("perform op 'node_fake reboot' with 'false1'") test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true1'") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true2'") test.add_stonith_log_pattern("Remapped off of node_fake complete, remapping to on") test.add_stonith_log_pattern("Ignoring true1 'on' failure (no capable peers) for node_fake") test.add_stonith_log_pattern("Ignoring true2 'on' failure (no capable peers) for node_fake") test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") test = self.new_test("cpg_remap_complex_2", - "Verify remapped topology reboot failure in second level proceeds to third level", 1) + "Verify remapped topology reboot failure in second level proceeds to third level", + 1) test.add_cmd("stonith_admin", """-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true3 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v false1") test.add_cmd("stonith_admin", "-r node_fake -i 2 -v true1 -v false2 -v true3") test.add_cmd("stonith_admin", "-r node_fake -i 3 -v true2") test.add_cmd("stonith_admin", "-B node_fake -t 5") test.add_stonith_log_pattern("perform op 'node_fake reboot' with 'false1'") test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true1'") test.add_stonith_log_pattern("perform op 'node_fake off' with 'false2'") test.add_stonith_log_pattern("Attempted to execute agent fence_dummy (off) the maximum number of times") test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") test.add_stonith_log_pattern("perform op 'node_fake reboot' with 'true2'") - test.add_stonith_negative_log_pattern("node_fake with true3") + test.add_stonith_neg_log_pattern("node_fake with true3") def setup_environment(self, use_corosync): + """ Prepare the host before executing any tests """ + if self.autogen_corosync_cfg and use_corosync: corosync_conf = (""" totem { version: 2 crypto_cipher: none crypto_hash: none nodeid: 101 secauth: off interface { ttl: 1 ringnumber: 0 mcastport: 6666 mcastaddr: 226.94.1.1 bindnetaddr: 127.0.0.1 } } logging { debug: off fileline: off to_syslog: no to_stderr: no syslog_facility: daemon timestamp: on to_logfile: yes logfile: /var/log/corosync.log logfile_priority: info } """) os.system("cat <<-END >>/etc/corosync/corosync.conf\n%s\nEND" % (corosync_conf)) if use_corosync: ### make sure we are in control ### - self.stop_corosync() + killall("corosync") self.start_corosync() os.system("cp %s /usr/sbin/fence_dummy" % FENCE_DUMMY) # modifies dummy agent to do require unfencing - os.system("sed 's/on_target=/automatic=/g' %s > /usr/sbin/fence_dummy_automatic_unfence" % FENCE_DUMMY); - os.system("chmod 711 /usr/sbin/fence_dummy_automatic_unfence") + os.system("sed 's/on_target=/automatic=/g' %s > /usr/sbin/fence_dummy_auto_unfence" % FENCE_DUMMY) + os.system("chmod 711 /usr/sbin/fence_dummy_auto_unfence") # modifies dummy agent to not advertise reboot - os.system("sed 's/^.*.*//g' %s > /usr/sbin/fence_dummy_no_reboot" % FENCE_DUMMY); + os.system("sed 's/^.*.*//g' %s > /usr/sbin/fence_dummy_no_reboot" % FENCE_DUMMY) os.system("chmod 711 /usr/sbin/fence_dummy_no_reboot") def cleanup_environment(self, use_corosync): + """ Clean up the host after executing desired tests """ + if use_corosync: - self.stop_corosync() + killall("corosync") if self.verbose and os.path.exists('/var/log/corosync.log'): print("Corosync output") - f = io.open('/var/log/corosync.log', 'rt') - for line in f.readlines(): + logfile = io.open('/var/log/corosync.log', 'rt') + for line in logfile.readlines(): print(line.strip()) os.remove('/var/log/corosync.log') if self.autogen_corosync_cfg: os.system("rm -f /etc/corosync/corosync.conf") os.system("rm -f /usr/sbin/fence_dummy") - os.system("rm -f /usr/sbin/fence_dummy_automatic_unfence") + os.system("rm -f /usr/sbin/fence_dummy_auto_unfence") os.system("rm -f /usr/sbin/fence_dummy_no_reboot") -class TestOptions: +class TestOptions(object): + """ Option handler """ + def __init__(self): self.options = {} self.options['list-tests'] = 0 self.options['run-all'] = 1 self.options['run-only'] = "" self.options['run-only-pattern'] = "" self.options['verbose'] = 0 self.options['invalid-arg'] = "" self.options['cpg-only'] = 0 self.options['no-cpg'] = 0 self.options['show-usage'] = 0 def build_options(self, argv): + """ Set options based on command-line arguments """ + args = argv[1:] skip = 0 for i in range(0, len(args)): if skip: skip = 0 continue elif args[i] == "-h" or args[i] == "--help": self.options['show-usage'] = 1 elif args[i] == "-l" or args[i] == "--list-tests": self.options['list-tests'] = 1 elif args[i] == "-V" or args[i] == "--verbose": self.options['verbose'] = 1 elif args[i] == "-n" or args[i] == "--no-cpg": self.options['no-cpg'] = 1 elif args[i] == "-c" or args[i] == "--cpg-only": self.options['cpg-only'] = 1 elif args[i] == "-r" or args[i] == "--run-only": self.options['run-only'] = args[i+1] skip = 1 elif args[i] == "-p" or args[i] == "--run-only-pattern": self.options['run-only-pattern'] = args[i+1] skip = 1 def show_usage(self): + """ Show command usage """ + print("usage: " + sys.argv[0] + " [options]") print("If no options are provided, all tests will run") print("Options:") print("\t [--help | -h] Show usage") print("\t [--list-tests | -l] Print out all registered tests.") print("\t [--cpg-only | -c] Only run tests that require corosync.") print("\t [--no-cpg | -n] Only run tests that do not require corosync") print("\t [--run-only | -r 'testname'] Run a specific test") print("\t [--verbose | -V] Verbose output") print("\t [--run-only-pattern | -p 'string'] Run only tests containing the string value") print("\n\tExample: Run only the test 'start_top'") print("\t\t python ./regression.py --run-only start_stop") print("\n\tExample: Run only the tests with the string 'systemd' present in them") print("\t\t python ./regression.py --run-only-pattern systemd") def main(argv): - o = TestOptions() - o.build_options(argv) + """ Run fencing regression tests as specified by arguments """ + + opts = TestOptions() + opts.build_options(argv) use_corosync = 1 - tests = Tests(o.options['verbose']) + tests = Tests(opts.options['verbose']) tests.build_standalone_tests() tests.build_custom_timeout_tests() tests.build_api_sanity_tests() tests.build_fence_merge_tests() + tests.build_fence_no_merge_tests() tests.build_unfence_tests() + tests.build_unfence_on_target_tests() tests.build_nodeid_tests() tests.build_remap_tests() - if o.options['list-tests']: + if opts.options['list-tests']: tests.print_list() sys.exit(0) - elif o.options['show-usage']: - o.show_usage() + elif opts.options['show-usage']: + opts.show_usage() sys.exit(0) print("Starting ...") - if o.options['no-cpg']: + if opts.options['no-cpg']: use_corosync = 0 tests.setup_environment(use_corosync) - if o.options['run-only-pattern'] != "": - tests.run_tests_matching(o.options['run-only-pattern']) + if opts.options['run-only-pattern'] != "": + tests.run_tests_matching(opts.options['run-only-pattern']) tests.print_results() - elif o.options['run-only'] != "": - tests.run_single(o.options['run-only']) + elif opts.options['run-only'] != "": + tests.run_single(opts.options['run-only']) tests.print_results() - elif o.options['no-cpg']: + elif opts.options['no-cpg']: tests.run_no_cpg() tests.print_results() - elif o.options['cpg-only']: + elif opts.options['cpg-only']: tests.run_cpg_only() tests.print_results() else: tests.run_tests() tests.print_results() tests.cleanup_environment(use_corosync) tests.exit() -if __name__=="__main__": +if __name__ == "__main__": main(sys.argv) diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index e1eda352a0..c7f4079afb 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -1,2693 +1,2693 @@ /* * Copyright (c) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include /* Add it for compiling on OSX */ #include #include #include #include #include #ifdef HAVE_STONITH_STONITH_H # include # define LHA_STONITH_LIBRARY "libstonith.so.1" static void *lha_agents_lib = NULL; #endif #include CRM_TRACE_INIT_DATA(stonith); struct stonith_action_s { /*! user defined data */ char *agent; char *action; char *victim; char *args; int timeout; int async; void *userdata; void (*done_cb) (GPid pid, gint status, const char *output, gpointer user_data); /*! internal async track data */ int fd_stdout; int fd_stderr; int last_timeout_signo; /*! internal timing information */ time_t initial_start_time; int tries; int remaining_timeout; guint timer_sigterm; guint timer_sigkill; int max_retries; /* device output data */ GPid pid; int rc; char *output; char *error; }; typedef struct stonith_private_s { char *token; crm_ipc_t *ipc; mainloop_io_t *source; GHashTable *stonith_op_callback_table; GList *notify_list; void (*op_callback) (stonith_t * st, stonith_callback_data_t * data); } stonith_private_t; typedef struct stonith_notify_client_s { const char *event; const char *obj_id; /* implement one day */ const char *obj_type; /* implement one day */ void (*notify) (stonith_t * st, stonith_event_t * e); } stonith_notify_client_t; typedef struct stonith_callback_client_s { void (*callback) (stonith_t * st, stonith_callback_data_t * data); const char *id; void *user_data; gboolean only_success; gboolean allow_timeout_updates; struct timer_rec_s *timer; } stonith_callback_client_t; struct notify_blob_s { stonith_t *stonith; xmlNode *xml; }; struct timer_rec_s { int call_id; int timeout; guint ref; stonith_t *stonith; }; typedef int (*stonith_op_t) (const char *, int, const char *, xmlNode *, xmlNode *, xmlNode *, xmlNode **, xmlNode **); #if HAVE_STONITH_STONITH_H static const char META_TEMPLATE[] = "\n" "\n" "\n" " 1.0\n" " \n" "%s\n" " \n" " %s\n" "%s\n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " 2.0\n" " \n" "\n"; #endif bool stonith_dispatch(stonith_t * st); int stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata); void stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc); xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); int stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data, int call_options, int timeout); static void stonith_connection_destroy(gpointer user_data); static void stonith_send_notification(gpointer data, gpointer user_data); static int internal_stonith_action_execute(stonith_action_t * action); static void log_action(stonith_action_t *action, pid_t pid); static void log_action(stonith_action_t *action, pid_t pid) { if (action->output) { /* Logging the whole string confuses syslog when the string is xml */ char *prefix = crm_strdup_printf("%s[%d] stdout:", action->agent, pid); crm_log_output(LOG_TRACE, prefix, action->output); free(prefix); } if (action->error) { /* Logging the whole string confuses syslog when the string is xml */ char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid); crm_log_output(LOG_WARNING, prefix, action->error); free(prefix); } } static void stonith_connection_destroy(gpointer user_data) { stonith_t *stonith = user_data; stonith_private_t *native = NULL; struct notify_blob_s blob; crm_trace("Sending destroyed notification"); blob.stonith = stonith; blob.xml = create_xml_node(NULL, "notify"); native = stonith->private; native->ipc = NULL; native->source = NULL; stonith->state = stonith_disconnected; crm_xml_add(blob.xml, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(blob.xml, F_SUBTYPE, T_STONITH_NOTIFY_DISCONNECT); g_list_foreach(native->notify_list, stonith_send_notification, &blob); free_xml(blob.xml); } xmlNode * create_device_registration_xml(const char *id, const char *namespace, const char *agent, stonith_key_value_t * params, const char *rsc_provides) { xmlNode *data = create_xml_node(NULL, F_STONITH_DEVICE); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); #if HAVE_STONITH_STONITH_H namespace = get_stonith_provider(agent, namespace); if (safe_str_eq(namespace, "heartbeat")) { hash2field((gpointer) "plugin", (gpointer) agent, args); agent = "fence_legacy"; } #endif crm_xml_add(data, XML_ATTR_ID, id); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add(data, "agent", agent); crm_xml_add(data, "namespace", namespace); if (rsc_provides) { crm_xml_add(data, "rsc_provides", rsc_provides); } for (; params; params = params->next) { hash2field((gpointer) params->key, (gpointer) params->value, args); } return data; } static int stonith_api_register_device(stonith_t * st, int call_options, const char *id, const char *namespace, const char *agent, stonith_key_value_t * params) { int rc = 0; xmlNode *data = NULL; data = create_device_registration_xml(id, namespace, agent, params, NULL); rc = stonith_send_command(st, STONITH_OP_DEVICE_ADD, data, NULL, call_options, 0); free_xml(data); return rc; } static int stonith_api_remove_device(stonith_t * st, int call_options, const char *name) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add(data, XML_ATTR_ID, name); rc = stonith_send_command(st, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0); free_xml(data); return rc; } static int stonith_api_remove_level_full(stonith_t *st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level) { int rc = 0; xmlNode *data = NULL; CRM_CHECK(node || pattern || (attr && value), return -EINVAL); data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); if (node) { crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); } else if (pattern) { crm_xml_add(data, XML_ATTR_STONITH_TARGET_PATTERN, pattern); } else { crm_xml_add(data, XML_ATTR_STONITH_TARGET_ATTRIBUTE, attr); crm_xml_add(data, XML_ATTR_STONITH_TARGET_VALUE, value); } crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); rc = stonith_send_command(st, STONITH_OP_LEVEL_DEL, data, NULL, options, 0); free_xml(data); return rc; } static int stonith_api_remove_level(stonith_t * st, int options, const char *node, int level) { return stonith_api_remove_level_full(st, options, node, NULL, NULL, NULL, level); } /*! * \internal * \brief Create XML for stonithd topology level registration request * * \param[in] node If not NULL, target level by this node name * \param[in] pattern If not NULL, target by node name using this regex * \param[in] attr If not NULL, target by this node attribute * \param[in] value If not NULL, target by this node attribute value * \param[in] level Index number of level to register * \param[in] device_list List of devices in level * * \return Newly allocated XML tree on success, NULL otherwise * * \note The caller should set only one of node, pattern or attr/value. */ xmlNode * create_level_registration_xml(const char *node, const char *pattern, const char *attr, const char *value, int level, stonith_key_value_t *device_list) { int len = 0; char *list = NULL; xmlNode *data; CRM_CHECK(node || pattern || (attr && value), return NULL); data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); CRM_CHECK(data, return NULL); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add_int(data, XML_ATTR_ID, level); crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); if (node) { crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); } else if (pattern) { crm_xml_add(data, XML_ATTR_STONITH_TARGET_PATTERN, pattern); } else { crm_xml_add(data, XML_ATTR_STONITH_TARGET_ATTRIBUTE, attr); crm_xml_add(data, XML_ATTR_STONITH_TARGET_VALUE, value); } for (; device_list; device_list = device_list->next) { int adding = strlen(device_list->value); if(list) { adding++; /* +1 space */ } crm_trace("Adding %s (%dc) at offset %d", device_list->value, adding, len); list = realloc_safe(list, len + adding + 1); /* +1 EOS */ if (list == NULL) { crm_perror(LOG_CRIT, "Could not create device list"); free_xml(data); return NULL; } sprintf(list + len, "%s%s", len?",":"", device_list->value); len += adding; } crm_xml_add(data, XML_ATTR_STONITH_DEVICES, list); free(list); return data; } static int stonith_api_register_level_full(stonith_t * st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level, stonith_key_value_t *device_list) { int rc = 0; xmlNode *data = create_level_registration_xml(node, pattern, attr, value, level, device_list); CRM_CHECK(data != NULL, return -EINVAL); rc = stonith_send_command(st, STONITH_OP_LEVEL_ADD, data, NULL, options, 0); free_xml(data); return rc; } static int stonith_api_register_level(stonith_t * st, int options, const char *node, int level, stonith_key_value_t * device_list) { return stonith_api_register_level_full(st, options, node, NULL, NULL, NULL, level, device_list); } static void append_arg(gpointer key, gpointer value, gpointer user_data) { int len = 3; /* =, \n, \0 */ int last = 0; char **args = user_data; CRM_CHECK(key != NULL, return); CRM_CHECK(value != NULL, return); if (strstr(key, "pcmk_")) { return; } else if (strstr(key, CRM_META)) { return; } else if (safe_str_eq(key, "crm_feature_set")) { return; } len += strlen(key); len += strlen(value); if (*args != NULL) { last = strlen(*args); } *args = realloc_safe(*args, last + len); crm_trace("Appending: %s=%s", (char *)key, (char *)value); sprintf((*args) + last, "%s=%s\n", (char *)key, (char *)value); } static void append_const_arg(const char *key, const char *value, char **arg_list) { CRM_LOG_ASSERT(key && value); if(key && value) { char *glib_sucks_key = strdup(key); char *glib_sucks_value = strdup(value); append_arg(glib_sucks_key, glib_sucks_value, arg_list); free(glib_sucks_value); free(glib_sucks_key); } } static void append_host_specific_args(const char *victim, const char *map, GHashTable * params, char **arg_list) { char *name = NULL; int last = 0, lpc = 0, max = 0; if (map == NULL) { /* The best default there is for now... */ crm_debug("Using default arg map: port=uname"); append_const_arg("port", victim, arg_list); return; } max = strlen(map); crm_debug("Processing arg map: %s", map); for (; lpc < max + 1; lpc++) { if (isalpha(map[lpc])) { /* keep going */ } else if (map[lpc] == '=' || map[lpc] == ':') { free(name); name = calloc(1, 1 + lpc - last); memcpy(name, map + last, lpc - last); crm_debug("Got name: %s", name); last = lpc + 1; } else if (map[lpc] == 0 || map[lpc] == ',' || isspace(map[lpc])) { char *param = NULL; const char *value = NULL; param = calloc(1, 1 + lpc - last); memcpy(param, map + last, lpc - last); last = lpc + 1; crm_debug("Got key: %s", param); if (name == NULL) { crm_err("Misparsed '%s', found '%s' without a name", map, param); free(param); continue; } if (safe_str_eq(param, "uname")) { value = victim; } else { char *key = crm_meta_name(param); value = g_hash_table_lookup(params, key); free(key); } if (value) { crm_debug("Setting '%s'='%s' (%s) for %s", name, value, param, victim); append_const_arg(name, value, arg_list); } else { crm_err("No node attribute '%s' for '%s'", name, victim); } free(name); name = NULL; free(param); if (map[lpc] == 0) { break; } } else if (isspace(map[lpc])) { last = lpc; } } free(name); } static char * make_args(const char *agent, const char *action, const char *victim, uint32_t victim_nodeid, GHashTable * device_args, GHashTable * port_map) { char buffer[512]; char *arg_list = NULL; const char *value = NULL; const char *_action = action; CRM_CHECK(action != NULL, return NULL); buffer[511] = 0; snprintf(buffer, 511, "pcmk_%s_action", action); if (device_args) { value = g_hash_table_lookup(device_args, buffer); } if (value == NULL && device_args) { /* Legacy support for early 1.1 releases - Remove for 1.4 */ snprintf(buffer, 511, "pcmk_%s_cmd", action); value = g_hash_table_lookup(device_args, buffer); } if (value == NULL && device_args && safe_str_eq(action, "off")) { /* Legacy support for late 1.1 releases - Remove for 1.4 */ value = g_hash_table_lookup(device_args, "pcmk_poweroff_action"); } if (value) { crm_info("Substituting action '%s' for requested operation '%s'", value, action); action = value; } append_const_arg(STONITH_ATTR_ACTION_OP, action, &arg_list); if (victim && device_args) { const char *alias = victim; const char *param = g_hash_table_lookup(device_args, STONITH_ATTR_HOSTARG); if (port_map && g_hash_table_lookup(port_map, victim)) { alias = g_hash_table_lookup(port_map, victim); } /* Always supply the node's name too: * https://fedorahosted.org/cluster/wiki/FenceAgentAPI */ append_const_arg("nodename", victim, &arg_list); if (victim_nodeid) { char nodeid_str[33] = { 0, }; if (snprintf(nodeid_str, 33, "%u", (unsigned int)victim_nodeid)) { - crm_info("For stonith action (%s) for victim %s, adding nodeid (%d) to parameters", + crm_info("For stonith action (%s) for victim %s, adding nodeid (%s) to parameters", action, victim, nodeid_str); append_const_arg("nodeid", nodeid_str, &arg_list); } } /* Check if we need to supply the victim in any other form */ if(safe_str_eq(agent, "fence_legacy")) { value = agent; } else if (param == NULL) { const char *map = g_hash_table_lookup(device_args, STONITH_ATTR_ARGMAP); if (map == NULL) { param = "port"; value = g_hash_table_lookup(device_args, param); } else { /* Legacy handling */ append_host_specific_args(alias, map, device_args, &arg_list); value = map; /* Nothing more to do */ } } else if (safe_str_eq(param, "none")) { value = param; /* Nothing more to do */ } else { value = g_hash_table_lookup(device_args, param); } /* Don't overwrite explictly set values for $param */ if (value == NULL || safe_str_eq(value, "dynamic")) { crm_debug("Performing %s action for node '%s' as '%s=%s'", action, victim, param, alias); append_const_arg(param, alias, &arg_list); } } if (device_args) { g_hash_table_foreach(device_args, append_arg, &arg_list); } if(device_args && g_hash_table_lookup(device_args, STONITH_ATTR_ACTION_OP)) { if(safe_str_eq(_action,"list") || safe_str_eq(_action,"status") || safe_str_eq(_action,"monitor") || safe_str_eq(_action,"metadata")) { /* Force use of the calculated command for support ops * We don't want list or monitor ops initiating fencing, regardless of what the admin configured */ append_const_arg(STONITH_ATTR_ACTION_OP, action, &arg_list); } } return arg_list; } static gboolean st_child_term(gpointer data) { int rc = 0; stonith_action_t *track = data; crm_info("Child %d timed out, sending SIGTERM", track->pid); track->timer_sigterm = 0; track->last_timeout_signo = SIGTERM; rc = kill(-track->pid, SIGTERM); if (rc < 0) { crm_perror(LOG_ERR, "Couldn't send SIGTERM to %d", track->pid); } return FALSE; } static gboolean st_child_kill(gpointer data) { int rc = 0; stonith_action_t *track = data; crm_info("Child %d timed out, sending SIGKILL", track->pid); track->timer_sigkill = 0; track->last_timeout_signo = SIGKILL; rc = kill(-track->pid, SIGKILL); if (rc < 0) { crm_perror(LOG_ERR, "Couldn't send SIGKILL to %d", track->pid); } return FALSE; } static void stonith_action_clear_tracking_data(stonith_action_t * action) { if (action->timer_sigterm > 0) { g_source_remove(action->timer_sigterm); action->timer_sigterm = 0; } if (action->timer_sigkill > 0) { g_source_remove(action->timer_sigkill); action->timer_sigkill = 0; } if (action->fd_stdout) { close(action->fd_stdout); action->fd_stdout = 0; } if (action->fd_stderr) { close(action->fd_stderr); action->fd_stderr = 0; } free(action->output); action->output = NULL; free(action->error); action->error = NULL; action->rc = 0; action->pid = 0; action->last_timeout_signo = 0; } static void stonith_action_destroy(stonith_action_t * action) { stonith_action_clear_tracking_data(action); free(action->agent); free(action->args); free(action->action); free(action->victim); free(action); } #define FAILURE_MAX_RETRIES 2 stonith_action_t * stonith_action_create(const char *agent, const char *_action, const char *victim, uint32_t victim_nodeid, int timeout, GHashTable * device_args, GHashTable * port_map) { stonith_action_t *action; action = calloc(1, sizeof(stonith_action_t)); crm_debug("Initiating action %s for agent %s (target=%s)", _action, agent, victim); action->args = make_args(agent, _action, victim, victim_nodeid, device_args, port_map); action->agent = strdup(agent); action->action = strdup(_action); if (victim) { action->victim = strdup(victim); } action->timeout = action->remaining_timeout = timeout; action->max_retries = FAILURE_MAX_RETRIES; if (device_args) { char buffer[512]; const char *value = NULL; snprintf(buffer, 511, "pcmk_%s_retries", _action); value = g_hash_table_lookup(device_args, buffer); if (value) { action->max_retries = atoi(value); } } return action; } #define READ_MAX 500 static char * read_output(int fd) { char buffer[READ_MAX]; char *output = NULL; int len = 0; int more = 0; if (!fd) { return NULL; } do { errno = 0; memset(&buffer, 0, READ_MAX); more = read(fd, buffer, READ_MAX - 1); if (more > 0) { buffer[more] = 0; /* Make sure it's nul-terminated for logging * 'more' is always less than our buffer size */ output = realloc_safe(output, len + more + 1); snprintf(output + len, more + 1, "%s", buffer); len += more; } } while (more == (READ_MAX - 1) || (more < 0 && errno == EINTR)); return output; } static gboolean update_remaining_timeout(stonith_action_t * action) { int diff = time(NULL) - action->initial_start_time; if (action->tries >= action->max_retries) { crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed", action->agent, action->action, action->max_retries); action->remaining_timeout = 0; } else if ((action->rc != -ETIME) && diff < (action->timeout * 0.7)) { /* only set remaining timeout period if there is 30% * or greater of the original timeout period left */ action->remaining_timeout = action->timeout - diff; } else { action->remaining_timeout = 0; } return action->remaining_timeout ? TRUE : FALSE; } static void stonith_action_async_done(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { stonith_action_t *action = mainloop_child_userdata(p); if (action->timer_sigterm > 0) { g_source_remove(action->timer_sigterm); action->timer_sigterm = 0; } if (action->timer_sigkill > 0) { g_source_remove(action->timer_sigkill); action->timer_sigkill = 0; } action->output = read_output(action->fd_stdout); action->error = read_output(action->fd_stderr); if (action->last_timeout_signo) { action->rc = -ETIME; crm_notice("Child process %d performing action '%s' timed out with signal %d", pid, action->action, action->last_timeout_signo); } else if (signo) { action->rc = -ECONNABORTED; crm_notice("Child process %d performing action '%s' timed out with signal %d", pid, action->action, signo); } else { crm_debug("Child process %d performing action '%s' exited with rc %d", pid, action->action, exitcode); if (exitcode > 0) { /* Try to provide a useful error code based on the fence agent's * error output. */ if (action->error == NULL) { exitcode = -ENODATA; } else if (strstr(action->error, "imed out")) { /* Some agents have their own internal timeouts */ exitcode = -ETIMEDOUT; } else if (strstr(action->error, "Unrecognised action")) { exitcode = -EOPNOTSUPP; } else { exitcode = -pcmk_err_generic; } } action->rc = exitcode; } log_action(action, pid); if (action->rc != pcmk_ok && update_remaining_timeout(action)) { int rc = internal_stonith_action_execute(action); if (rc == pcmk_ok) { return; } } if (action->done_cb) { action->done_cb(pid, action->rc, action->output, action->userdata); } stonith_action_destroy(action); } static int internal_stonith_action_execute(stonith_action_t * action) { int pid, status = 0, len, rc = -EPROTO; int ret; int total = 0; int p_read_fd, p_write_fd; /* parent read/write file descriptors */ int c_read_fd, c_write_fd; /* child read/write file descriptors */ int c_stderr_fd, p_stderr_fd; /* parent/child side file descriptors for stderr */ int fd1[2]; int fd2[2]; int fd3[2]; int is_retry = 0; /* clear any previous tracking data */ stonith_action_clear_tracking_data(action); if (!action->tries) { action->initial_start_time = time(NULL); } action->tries++; if (action->tries > 1) { crm_info("Attempt %d to execute %s (%s). remaining timeout is %d", action->tries, action->agent, action->action, action->remaining_timeout); is_retry = 1; } c_read_fd = c_write_fd = p_read_fd = p_write_fd = c_stderr_fd = p_stderr_fd = -1; if (action->args == NULL || action->agent == NULL) goto fail; len = strlen(action->args); if (pipe(fd1)) goto fail; p_read_fd = fd1[0]; c_write_fd = fd1[1]; if (pipe(fd2)) goto fail; c_read_fd = fd2[0]; p_write_fd = fd2[1]; if (pipe(fd3)) goto fail; p_stderr_fd = fd3[0]; c_stderr_fd = fd3[1]; crm_debug("forking"); pid = fork(); if (pid < 0) { rc = -ECHILD; goto fail; } if (!pid) { /* child */ setpgid(0, 0); close(1); /* coverity[leaked_handle] False positive */ if (dup(c_write_fd) < 0) goto fail; close(2); /* coverity[leaked_handle] False positive */ if (dup(c_stderr_fd) < 0) goto fail; close(0); /* coverity[leaked_handle] False positive */ if (dup(c_read_fd) < 0) goto fail; /* keep c_stderr_fd open so parent can report all errors. */ /* keep c_write_fd open so hostlist can be sent to parent. */ close(c_read_fd); close(p_read_fd); close(p_write_fd); close(p_stderr_fd); /* keep retries from executing out of control */ if (is_retry) { sleep(1); } execlp(action->agent, action->agent, NULL); exit(EXIT_FAILURE); } /* parent */ action->pid = pid; ret = fcntl(p_read_fd, F_SETFL, fcntl(p_read_fd, F_GETFL, 0) | O_NONBLOCK); if (ret < 0) { crm_perror(LOG_NOTICE, "Could not change the output of %s to be non-blocking", action->agent); } ret = fcntl(p_stderr_fd, F_SETFL, fcntl(p_stderr_fd, F_GETFL, 0) | O_NONBLOCK); if (ret < 0) { crm_perror(LOG_NOTICE, "Could not change the stderr of %s to be non-blocking", action->agent); } do { crm_debug("sending args"); ret = write(p_write_fd, action->args + total, len - total); if (ret > 0) { total += ret; } } while (errno == EINTR && total < len); if (total != len) { crm_perror(LOG_ERR, "Sent %d not %d bytes", total, len); if (ret >= 0) { rc = -ECOMM; } goto fail; } close(p_write_fd); p_write_fd = -1; /* async */ if (action->async) { action->fd_stdout = p_read_fd; action->fd_stderr = p_stderr_fd; mainloop_child_add(pid, 0/* Move the timeout here? */, action->action, action, stonith_action_async_done); crm_trace("Op: %s on %s, pid: %d, timeout: %ds", action->action, action->agent, pid, action->remaining_timeout); action->last_timeout_signo = 0; if (action->remaining_timeout) { action->timer_sigterm = g_timeout_add(1000 * action->remaining_timeout, st_child_term, action); action->timer_sigkill = g_timeout_add(1000 * (action->remaining_timeout + 5), st_child_kill, action); } else { crm_err("No timeout set for stonith operation %s with device %s", action->action, action->agent); } close(c_write_fd); close(c_read_fd); close(c_stderr_fd); return 0; } else { /* sync */ int timeout = action->remaining_timeout + 1; pid_t p = 0; while (action->remaining_timeout < 0 || timeout > 0) { p = waitpid(pid, &status, WNOHANG); if (p > 0) { break; } sleep(1); timeout--; } if (timeout == 0) { int killrc = kill(-pid, SIGKILL); if (killrc && errno != ESRCH) { crm_err("kill(%d, KILL) failed: %s (%d)", pid, pcmk_strerror(errno), errno); } /* * From sigprocmask(2): * It is not possible to block SIGKILL or SIGSTOP. Attempts to do so are silently ignored. * * This makes it safe to skip WNOHANG here */ p = waitpid(pid, &status, 0); } if (p <= 0) { crm_perror(LOG_ERR, "waitpid(%d)", pid); } else if (p != pid) { crm_err("Waited for %d, got %d", pid, p); } action->output = read_output(p_read_fd); action->error = read_output(p_stderr_fd); action->rc = -ECONNABORTED; log_action(action, pid); rc = action->rc; if (timeout == 0) { action->rc = -ETIME; } else if (WIFEXITED(status)) { crm_debug("result = %d", WEXITSTATUS(status)); action->rc = -WEXITSTATUS(status); rc = 0; } else if (WIFSIGNALED(status)) { crm_err("call %s for %s exited due to signal %d", action->action, action->agent, WTERMSIG(status)); } else { crm_err("call %s for %s returned unexpected status %#x", action->action, action->agent, status); } } fail: if (p_read_fd >= 0) { close(p_read_fd); } if (p_write_fd >= 0) { close(p_write_fd); } if (p_stderr_fd >= 0) { close(p_stderr_fd); } if (c_read_fd >= 0) { close(c_read_fd); } if (c_write_fd >= 0) { close(c_write_fd); } if (c_stderr_fd >= 0) { close(c_stderr_fd); } return rc; } GPid stonith_action_execute_async(stonith_action_t * action, void *userdata, void (*done) (GPid pid, int rc, const char *output, gpointer user_data)) { int rc = 0; if (!action) { return -1; } action->userdata = userdata; action->done_cb = done; action->async = 1; rc = internal_stonith_action_execute(action); return rc < 0 ? rc : action->pid; } int stonith_action_execute(stonith_action_t * action, int *agent_result, char **output) { int rc = 0; if (!action) { return -1; } do { rc = internal_stonith_action_execute(action); if (rc == pcmk_ok) { /* success! */ break; } /* keep retrying while we have time left */ } while (update_remaining_timeout(action)); if (rc) { /* error */ return rc; } if (agent_result) { *agent_result = action->rc; } if (output) { *output = action->output; action->output = NULL; /* handed it off, do not free */ } stonith_action_destroy(action); return rc; } static int stonith_api_device_list(stonith_t * stonith, int call_options, const char *namespace, stonith_key_value_t ** devices, int timeout) { int count = 0; if (devices == NULL) { crm_err("Parameter error: stonith_api_device_list"); return -EFAULT; } /* Include Heartbeat agents */ if (namespace == NULL || safe_str_eq("heartbeat", namespace)) { #if HAVE_STONITH_STONITH_H static gboolean need_init = TRUE; char **entry = NULL; char **type_list = NULL; static char **(*type_list_fn) (void) = NULL; static void (*type_free_fn) (char **) = NULL; if (need_init) { need_init = FALSE; type_list_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_types", FALSE); type_free_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_free_hostlist", FALSE); } if (type_list_fn) { type_list = (*type_list_fn) (); } for (entry = type_list; entry != NULL && *entry; ++entry) { crm_trace("Added: %s", *entry); *devices = stonith_key_value_add(*devices, NULL, *entry); count++; } if (type_list && type_free_fn) { (*type_free_fn) (type_list); } #else if (namespace != NULL) { return -EINVAL; /* Heartbeat agents not supported */ } #endif } /* Include Red Hat agents, basically: ls -1 @sbin_dir@/fence_* */ if (namespace == NULL || safe_str_eq("redhat", namespace)) { struct dirent **namelist; int file_num = scandir(RH_STONITH_DIR, &namelist, 0, alphasort); if (file_num > 0) { struct stat prop; char buffer[FILENAME_MAX + 1]; while (file_num--) { if ('.' == namelist[file_num]->d_name[0]) { free(namelist[file_num]); continue; } else if (0 != strncmp(RH_STONITH_PREFIX, namelist[file_num]->d_name, strlen(RH_STONITH_PREFIX))) { free(namelist[file_num]); continue; } snprintf(buffer, FILENAME_MAX, "%s/%s", RH_STONITH_DIR, namelist[file_num]->d_name); if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) { *devices = stonith_key_value_add(*devices, NULL, namelist[file_num]->d_name); count++; } free(namelist[file_num]); } free(namelist); } } return count; } #if HAVE_STONITH_STONITH_H static inline char * strdup_null(const char *val) { if (val) { return strdup(val); } return NULL; } static void stonith_plugin(int priority, const char *fmt, ...) __attribute__((__format__ (__printf__, 2, 3))); static void stonith_plugin(int priority, const char *format, ...) { int err = errno; va_list ap; int len = 0; char *string = NULL; va_start(ap, format); len = vasprintf (&string, format, ap); CRM_ASSERT(len > 0); do_crm_log_alias(priority, __FILE__, __func__, __LINE__, "%s", string); free(string); errno = err; } #endif static int stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent, const char *namespace, char **output, int timeout) { int rc = 0; char *buffer = NULL; const char *provider = get_stonith_provider(agent, namespace); crm_trace("looking up %s/%s metadata", agent, provider); /* By having this in a library, we can access it from stonith_admin * when neither lrmd or stonith-ng are running * Important for the crm shell's validations... */ if (safe_str_eq(provider, "redhat")) { stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0, 5, NULL, NULL); int exec_rc = stonith_action_execute(action, &rc, &buffer); xmlNode *xml = NULL; xmlNode *actions = NULL; xmlXPathObject *xpathObj = NULL; if (exec_rc < 0 || rc != 0 || buffer == NULL) { crm_warn("Could not obtain metadata for %s", agent); crm_debug("Query failed: %d %d: %s", exec_rc, rc, crm_str(buffer)); free(buffer); /* Just in case */ return -EINVAL; } xml = string2xml(buffer); if(xml == NULL) { crm_warn("Metadata for %s is invalid", agent); free(buffer); return -EINVAL; } xpathObj = xpath_search(xml, "//actions"); if (numXpathResults(xpathObj) > 0) { actions = getXpathResult(xpathObj, 0); } freeXpathObject(xpathObj); /* Now fudge the metadata so that the start/stop actions appear */ xpathObj = xpath_search(xml, "//action[@name='stop']"); if (numXpathResults(xpathObj) <= 0) { xmlNode *tmp = NULL; tmp = create_xml_node(actions, "action"); crm_xml_add(tmp, "name", "stop"); crm_xml_add(tmp, "timeout", "20s"); tmp = create_xml_node(actions, "action"); crm_xml_add(tmp, "name", "start"); crm_xml_add(tmp, "timeout", "20s"); } freeXpathObject(xpathObj); /* Now fudge the metadata so that the port isn't required in the configuration */ xpathObj = xpath_search(xml, "//parameter[@name='port']"); if (numXpathResults(xpathObj) > 0) { /* We'll fill this in */ xmlNode *tmp = getXpathResult(xpathObj, 0); crm_xml_add(tmp, "required", "0"); } freeXpathObject(xpathObj); free(buffer); buffer = dump_xml_formatted_with_text(xml); free_xml(xml); if (!buffer) { return -EINVAL; } } else { #if !HAVE_STONITH_STONITH_H return -EINVAL; /* Heartbeat agents not supported */ #else int bufferlen = 0; static const char *no_parameter_info = ""; Stonith *stonith_obj = NULL; static gboolean need_init = TRUE; static Stonith *(*st_new_fn) (const char *) = NULL; static const char *(*st_info_fn) (Stonith *, int) = NULL; static void (*st_del_fn) (Stonith *) = NULL; static void (*st_log_fn) (Stonith *, PILLogFun) = NULL; if (need_init) { need_init = FALSE; st_new_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE); st_del_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete", FALSE); st_log_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_set_log", FALSE); st_info_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_get_info", FALSE); } if (lha_agents_lib && st_new_fn && st_del_fn && st_info_fn && st_log_fn) { char *xml_meta_longdesc = NULL; char *xml_meta_shortdesc = NULL; char *meta_param = NULL; char *meta_longdesc = NULL; char *meta_shortdesc = NULL; stonith_obj = (*st_new_fn) (agent); if (stonith_obj) { (*st_log_fn) (stonith_obj, (PILLogFun) & stonith_plugin); meta_longdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEDESCR)); if (meta_longdesc == NULL) { crm_warn("no long description in %s's metadata.", agent); meta_longdesc = strdup(no_parameter_info); } meta_shortdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEID)); if (meta_shortdesc == NULL) { crm_warn("no short description in %s's metadata.", agent); meta_shortdesc = strdup(no_parameter_info); } meta_param = strdup_null((*st_info_fn) (stonith_obj, ST_CONF_XML)); if (meta_param == NULL) { crm_warn("no list of parameters in %s's metadata.", agent); meta_param = strdup(no_parameter_info); } (*st_del_fn) (stonith_obj); } else { return -EINVAL; /* Heartbeat agents not supported */ } xml_meta_longdesc = (char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_longdesc); xml_meta_shortdesc = (char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_shortdesc); bufferlen = strlen(META_TEMPLATE) + strlen(agent) + strlen(xml_meta_longdesc) + strlen(xml_meta_shortdesc) + strlen(meta_param) + 1; buffer = calloc(1, bufferlen); snprintf(buffer, bufferlen - 1, META_TEMPLATE, agent, xml_meta_longdesc, xml_meta_shortdesc, meta_param); xmlFree(xml_meta_longdesc); xmlFree(xml_meta_shortdesc); free(meta_shortdesc); free(meta_longdesc); free(meta_param); } #endif } if (output) { *output = buffer; } else { free(buffer); } return rc; } static int stonith_api_query(stonith_t * stonith, int call_options, const char *target, stonith_key_value_t ** devices, int timeout) { int rc = 0, lpc = 0, max = 0; xmlNode *data = NULL; xmlNode *output = NULL; xmlXPathObjectPtr xpathObj = NULL; CRM_CHECK(devices != NULL, return -EINVAL); data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add(data, F_STONITH_TARGET, target); crm_xml_add(data, F_STONITH_ACTION, "off"); rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout); if (rc < 0) { return rc; } xpathObj = xpath_search(output, "//@agent"); if (xpathObj) { max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { xmlChar *match_path = xmlGetNodePath(match); crm_info("%s[%d] = %s", "//@agent", lpc, match_path); free(match_path); *devices = stonith_key_value_add(*devices, NULL, crm_element_value(match, XML_ATTR_ID)); } } freeXpathObject(xpathObj); } free_xml(output); free_xml(data); return max; } static int stonith_api_call(stonith_t * stonith, int call_options, const char *id, const char *action, const char *victim, int timeout, xmlNode ** output) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add(data, F_STONITH_DEVICE, id); crm_xml_add(data, F_STONITH_ACTION, action); crm_xml_add(data, F_STONITH_TARGET, victim); rc = stonith_send_command(stonith, STONITH_OP_EXEC, data, output, call_options, timeout); free_xml(data); return rc; } static int stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **list_info, int timeout) { int rc; xmlNode *output = NULL; rc = stonith_api_call(stonith, call_options, id, "list", NULL, timeout, &output); if (output && list_info) { const char *list_str; list_str = crm_element_value(output, "st_output"); if (list_str) { *list_info = strdup(list_str); } } if (output) { free_xml(output); } return rc; } static int stonith_api_monitor(stonith_t * stonith, int call_options, const char *id, int timeout) { return stonith_api_call(stonith, call_options, id, "monitor", NULL, timeout, NULL); } static int stonith_api_status(stonith_t * stonith, int call_options, const char *id, const char *port, int timeout) { return stonith_api_call(stonith, call_options, id, "status", port, timeout, NULL); } static int stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const char *action, int timeout, int tolerance) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, __FUNCTION__); crm_xml_add(data, F_STONITH_TARGET, node); crm_xml_add(data, F_STONITH_ACTION, action); crm_xml_add_int(data, F_STONITH_TIMEOUT, timeout); crm_xml_add_int(data, F_STONITH_TOLERANCE, tolerance); rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout); free_xml(data); return rc; } static int stonith_api_confirm(stonith_t * stonith, int call_options, const char *target) { return stonith_api_fence(stonith, call_options | st_opt_manual_ack, target, "off", 0, 0); } static int stonith_api_history(stonith_t * stonith, int call_options, const char *node, stonith_history_t ** history, int timeout) { int rc = 0; xmlNode *data = NULL; xmlNode *output = NULL; stonith_history_t *last = NULL; *history = NULL; if (node) { data = create_xml_node(NULL, __FUNCTION__); crm_xml_add(data, F_STONITH_TARGET, node); } rc = stonith_send_command(stonith, STONITH_OP_FENCE_HISTORY, data, &output, call_options | st_opt_sync_call, timeout); free_xml(data); if (rc == 0) { xmlNode *op = NULL; xmlNode *reply = get_xpath_object("//" F_STONITH_HISTORY_LIST, output, LOG_ERR); for (op = __xml_first_child(reply); op != NULL; op = __xml_next(op)) { stonith_history_t *kvp; kvp = calloc(1, sizeof(stonith_history_t)); kvp->target = crm_element_value_copy(op, F_STONITH_TARGET); kvp->action = crm_element_value_copy(op, F_STONITH_ACTION); kvp->origin = crm_element_value_copy(op, F_STONITH_ORIGIN); kvp->delegate = crm_element_value_copy(op, F_STONITH_DELEGATE); kvp->client = crm_element_value_copy(op, F_STONITH_CLIENTNAME); crm_element_value_int(op, F_STONITH_DATE, &kvp->completed); crm_element_value_int(op, F_STONITH_STATE, &kvp->state); if (last) { last->next = kvp; } else { *history = kvp; } last = kvp; } } return rc; } gboolean is_redhat_agent(const char *agent) { int rc = 0; struct stat prop; char buffer[FILENAME_MAX + 1]; snprintf(buffer, FILENAME_MAX, "%s/%s", RH_STONITH_DIR, agent); rc = stat(buffer, &prop); if (rc >= 0 && S_ISREG(prop.st_mode)) { return TRUE; } return FALSE; } const char * get_stonith_provider(const char *agent, const char *provider) { /* This function sucks */ if (is_redhat_agent(agent)) { return "redhat"; #if HAVE_STONITH_STONITH_H } else { Stonith *stonith_obj = NULL; static gboolean need_init = TRUE; static Stonith *(*st_new_fn) (const char *) = NULL; static void (*st_del_fn) (Stonith *) = NULL; if (need_init) { need_init = FALSE; st_new_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE); st_del_fn = find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete", FALSE); } if (lha_agents_lib && st_new_fn && st_del_fn) { stonith_obj = (*st_new_fn) (agent); if (stonith_obj) { (*st_del_fn) (stonith_obj); return "heartbeat"; } } #endif } if (safe_str_eq(provider, "internal")) { return provider; } else { crm_err("No such device: %s", agent); return NULL; } } static gint stonithlib_GCompareFunc(gconstpointer a, gconstpointer b) { int rc = 0; const stonith_notify_client_t *a_client = a; const stonith_notify_client_t *b_client = b; CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0); rc = strcmp(a_client->event, b_client->event); if (rc == 0) { if (a_client->notify == NULL || b_client->notify == NULL) { return 0; } else if (a_client->notify == b_client->notify) { return 0; } else if (((long)a_client->notify) < ((long)b_client->notify)) { crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return -1; } crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return 1; } return rc; } xmlNode * stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options) { xmlNode *op_msg = create_xml_node(NULL, "stonith_command"); CRM_CHECK(op_msg != NULL, return NULL); CRM_CHECK(token != NULL, return NULL); crm_xml_add(op_msg, F_XML_TAGNAME, "stonith_command"); crm_xml_add(op_msg, F_TYPE, T_STONITH_NG); crm_xml_add(op_msg, F_STONITH_CALLBACK_TOKEN, token); crm_xml_add(op_msg, F_STONITH_OPERATION, op); crm_xml_add_int(op_msg, F_STONITH_CALLID, call_id); crm_trace("Sending call options: %.8lx, %d", (long)call_options, call_options); crm_xml_add_int(op_msg, F_STONITH_CALLOPTS, call_options); if (data != NULL) { add_message_xml(op_msg, F_STONITH_CALLDATA, data); } return op_msg; } static void stonith_destroy_op_callback(gpointer data) { stonith_callback_client_t *blob = data; if (blob->timer && blob->timer->ref > 0) { g_source_remove(blob->timer->ref); } free(blob->timer); free(blob); } static int stonith_api_signoff(stonith_t * stonith) { stonith_private_t *native = stonith->private; crm_debug("Signing out of the STONITH Service"); if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } free(native->token); native->token = NULL; stonith->state = stonith_disconnected; return pcmk_ok; } static int stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd) { int rc = pcmk_ok; stonith_private_t *native = stonith->private; static struct ipc_client_callbacks st_callbacks = { .dispatch = stonith_dispatch_internal, .destroy = stonith_connection_destroy }; crm_trace("Connecting command channel"); stonith->state = stonith_connected_command; if (stonith_fd) { /* No mainloop */ native->ipc = crm_ipc_new("stonith-ng", 0); if (native->ipc && crm_ipc_connect(native->ipc)) { *stonith_fd = crm_ipc_get_fd(native->ipc); } else if (native->ipc) { crm_perror(LOG_ERR, "Connection to STONITH manager failed"); rc = -ENOTCONN; } } else { /* With mainloop */ native->source = mainloop_add_ipc_client("stonith-ng", G_PRIORITY_MEDIUM, 0, stonith, &st_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { crm_debug("Could not connect to the Stonith API"); rc = -ENOTCONN; } if (rc == pcmk_ok) { xmlNode *reply = NULL; xmlNode *hello = create_xml_node(NULL, "stonith_command"); crm_xml_add(hello, F_TYPE, T_STONITH_NG); crm_xml_add(hello, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(hello, F_STONITH_CLIENTNAME, name); rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't complete registration with the fencing API: %d", rc); rc = -ECOMM; } else if (reply == NULL) { crm_err("Did not receive registration reply"); rc = -EPROTO; } else { const char *msg_type = crm_element_value(reply, F_STONITH_OPERATION); const char *tmp_ticket = crm_element_value(reply, F_STONITH_CLIENTID); if (safe_str_neq(msg_type, CRM_OP_REGISTER)) { crm_err("Invalid registration message: %s", msg_type); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else if (tmp_ticket == NULL) { crm_err("No registration token provided"); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else { crm_trace("Obtained registration token: %s", tmp_ticket); native->token = strdup(tmp_ticket); rc = pcmk_ok; } } free_xml(reply); free_xml(hello); } if (rc == pcmk_ok) { #if HAVE_MSGFROMIPC_TIMEOUT stonith->call_timeout = MAX_IPC_DELAY; #endif crm_debug("Connection to STONITH successful"); return pcmk_ok; } crm_debug("Connection to STONITH failed: %s", pcmk_strerror(rc)); stonith->cmds->disconnect(stonith); return rc; } static int stonith_set_notification(stonith_t * stonith, const char *callback, int enabled) { int rc = pcmk_ok; xmlNode *notify_msg = create_xml_node(NULL, __FUNCTION__); stonith_private_t *native = stonith->private; if (stonith->state != stonith_disconnected) { crm_xml_add(notify_msg, F_STONITH_OPERATION, T_STONITH_NOTIFY); if (enabled) { crm_xml_add(notify_msg, F_STONITH_NOTIFY_ACTIVATE, callback); } else { crm_xml_add(notify_msg, F_STONITH_NOTIFY_DEACTIVATE, callback); } rc = crm_ipc_send(native->ipc, notify_msg, crm_ipc_client_response, -1, NULL); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't register for fencing notifications: %d", rc); rc = -ECOMM; } else { rc = pcmk_ok; } } free_xml(notify_msg); return rc; } static int stonith_api_add_notification(stonith_t * stonith, const char *event, void (*callback) (stonith_t * stonith, stonith_event_t * e)) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = NULL; private = stonith->private; crm_trace("Adding callback for %s events (%d)", event, g_list_length(private->notify_list)); new_client = calloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = callback; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); if (list_item != NULL) { crm_warn("Callback already present"); free(new_client); return -ENOTUNIQ; } else { private->notify_list = g_list_append(private->notify_list, new_client); stonith_set_notification(stonith, event, 1); crm_trace("Callback added (%d)", g_list_length(private->notify_list)); } return pcmk_ok; } static int stonith_api_del_notification(stonith_t * stonith, const char *event) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = NULL; crm_debug("Removing callback for %s events", event); private = stonith->private; new_client = calloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = NULL; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); stonith_set_notification(stonith, event, 0); if (list_item != NULL) { stonith_notify_client_t *list_client = list_item->data; private->notify_list = g_list_remove(private->notify_list, list_client); free(list_client); crm_trace("Removed callback"); } else { crm_trace("Callback not present"); } free(new_client); return pcmk_ok; } static gboolean stonith_async_timeout_handler(gpointer data) { struct timer_rec_s *timer = data; crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout); stonith_perform_callback(timer->stonith, NULL, timer->call_id, -ETIME); /* Always return TRUE, never remove the handler * We do that in stonith_del_callback() */ return TRUE; } static void set_callback_timeout(stonith_callback_client_t * callback, stonith_t * stonith, int call_id, int timeout) { struct timer_rec_s *async_timer = callback->timer; if (timeout <= 0) { return; } if (!async_timer) { async_timer = calloc(1, sizeof(struct timer_rec_s)); callback->timer = async_timer; } async_timer->stonith = stonith; async_timer->call_id = call_id; /* Allow a fair bit of grace to allow the server to tell us of a timeout * This is only a fallback */ async_timer->timeout = (timeout + 60) * 1000; if (async_timer->ref) { g_source_remove(async_timer->ref); } async_timer->ref = g_timeout_add(async_timer->timeout, stonith_async_timeout_handler, async_timer); } static void update_callback_timeout(int call_id, int timeout, stonith_t * st) { stonith_callback_client_t *callback = NULL; stonith_private_t *private = st->private; callback = g_hash_table_lookup(private->stonith_op_callback_table, GINT_TO_POINTER(call_id)); if (!callback || !callback->allow_timeout_updates) { return; } set_callback_timeout(callback, st, call_id, timeout); } static void invoke_callback(stonith_t * st, int call_id, int rc, void *userdata, void (*callback) (stonith_t * st, stonith_callback_data_t * data)) { stonith_callback_data_t data = { 0, }; data.call_id = call_id; data.rc = rc; data.userdata = userdata; callback(st, &data); } static int stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int options, void *user_data, const char *callback_name, void (*callback) (stonith_t * st, stonith_callback_data_t * data)) { stonith_callback_client_t *blob = NULL; stonith_private_t *private = NULL; CRM_CHECK(stonith != NULL, return -EINVAL); CRM_CHECK(stonith->private != NULL, return -EINVAL); private = stonith->private; if (call_id == 0) { private->op_callback = callback; } else if (call_id < 0) { if (!(options & st_opt_report_only_success)) { crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id)); invoke_callback(stonith, call_id, call_id, user_data, callback); } else { crm_warn("STONITH call failed: %s", pcmk_strerror(call_id)); } return FALSE; } blob = calloc(1, sizeof(stonith_callback_client_t)); blob->id = callback_name; blob->only_success = (options & st_opt_report_only_success) ? TRUE : FALSE; blob->user_data = user_data; blob->callback = callback; blob->allow_timeout_updates = (options & st_opt_timeout_updates) ? TRUE : FALSE; if (timeout > 0) { set_callback_timeout(blob, stonith, call_id, timeout); } g_hash_table_insert(private->stonith_op_callback_table, GINT_TO_POINTER(call_id), blob); crm_trace("Added callback to %s for call %d", callback_name, call_id); return TRUE; } static int stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks) { stonith_private_t *private = stonith->private; if (all_callbacks) { private->op_callback = NULL; g_hash_table_destroy(private->stonith_op_callback_table); private->stonith_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, stonith_destroy_op_callback); } else if (call_id == 0) { private->op_callback = NULL; } else { g_hash_table_remove(private->stonith_op_callback_table, GINT_TO_POINTER(call_id)); } return pcmk_ok; } static void stonith_dump_pending_op(gpointer key, gpointer value, gpointer user_data) { int call = GPOINTER_TO_INT(key); stonith_callback_client_t *blob = value; crm_debug("Call %d (%s): pending", call, crm_str(blob->id)); } void stonith_dump_pending_callbacks(stonith_t * stonith) { stonith_private_t *private = stonith->private; if (private->stonith_op_callback_table == NULL) { return; } return g_hash_table_foreach(private->stonith_op_callback_table, stonith_dump_pending_op, NULL); } void stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc) { stonith_private_t *private = NULL; stonith_callback_client_t *blob = NULL; stonith_callback_client_t local_blob; CRM_CHECK(stonith != NULL, return); CRM_CHECK(stonith->private != NULL, return); private = stonith->private; local_blob.id = NULL; local_blob.callback = NULL; local_blob.user_data = NULL; local_blob.only_success = FALSE; if (msg != NULL) { crm_element_value_int(msg, F_STONITH_RC, &rc); crm_element_value_int(msg, F_STONITH_CALLID, &call_id); } CRM_CHECK(call_id > 0, crm_log_xml_err(msg, "Bad result")); blob = g_hash_table_lookup(private->stonith_op_callback_table, GINT_TO_POINTER(call_id)); if (blob != NULL) { local_blob = *blob; blob = NULL; stonith_api_del_callback(stonith, call_id, FALSE); } else { crm_trace("No callback found for call %d", call_id); local_blob.callback = NULL; } if (local_blob.callback != NULL && (rc == pcmk_ok || local_blob.only_success == FALSE)) { crm_trace("Invoking callback %s for call %d", crm_str(local_blob.id), call_id); invoke_callback(stonith, call_id, rc, local_blob.user_data, local_blob.callback); } else if (private->op_callback == NULL && rc != pcmk_ok) { crm_warn("STONITH command failed: %s", pcmk_strerror(rc)); crm_log_xml_debug(msg, "Failed STONITH Update"); } if (private->op_callback != NULL) { crm_trace("Invoking global callback for call %d", call_id); invoke_callback(stonith, call_id, rc, NULL, private->op_callback); } crm_trace("OP callback activated."); } /* */ static stonith_event_t * xml_to_event(xmlNode * msg) { stonith_event_t *event = calloc(1, sizeof(stonith_event_t)); const char *ntype = crm_element_value(msg, F_SUBTYPE); char *data_addr = crm_strdup_printf("//%s", ntype); xmlNode *data = get_xpath_object(data_addr, msg, LOG_DEBUG); crm_log_xml_trace(msg, "stonith_notify"); crm_element_value_int(msg, F_STONITH_RC, &(event->result)); if (safe_str_eq(ntype, T_STONITH_NOTIFY_FENCE)) { event->operation = crm_element_value_copy(msg, F_STONITH_OPERATION); if (data) { event->origin = crm_element_value_copy(data, F_STONITH_ORIGIN); event->action = crm_element_value_copy(data, F_STONITH_ACTION); event->target = crm_element_value_copy(data, F_STONITH_TARGET); event->executioner = crm_element_value_copy(data, F_STONITH_DELEGATE); event->id = crm_element_value_copy(data, F_STONITH_REMOTE_OP_ID); event->client_origin = crm_element_value_copy(data, F_STONITH_CLIENTNAME); event->device = crm_element_value_copy(data, F_STONITH_DEVICE); } else { crm_err("No data for %s event", ntype); crm_log_xml_notice(msg, "BadEvent"); } } free(data_addr); return event; } static void event_free(stonith_event_t * event) { free(event->id); free(event->type); free(event->message); free(event->operation); free(event->origin); free(event->action); free(event->target); free(event->executioner); free(event->device); free(event->client_origin); free(event); } static void stonith_send_notification(gpointer data, gpointer user_data) { struct notify_blob_s *blob = user_data; stonith_notify_client_t *entry = data; stonith_event_t *st_event = NULL; const char *event = NULL; if (blob->xml == NULL) { crm_warn("Skipping callback - NULL message"); return; } event = crm_element_value(blob->xml, F_SUBTYPE); if (entry == NULL) { crm_warn("Skipping callback - NULL callback client"); return; } else if (entry->notify == NULL) { crm_warn("Skipping callback - NULL callback"); return; } else if (safe_str_neq(entry->event, event)) { crm_trace("Skipping callback - event mismatch %p/%s vs. %s", entry, entry->event, event); return; } st_event = xml_to_event(blob->xml); crm_trace("Invoking callback for %p/%s event...", entry, event); entry->notify(blob->stonith, st_event); crm_trace("Callback invoked..."); event_free(st_event); } int stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data, int call_options, int timeout) { int rc = 0; int reply_id = -1; enum crm_ipc_flags ipc_flags = crm_ipc_flags_none; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; stonith_private_t *native = stonith->private; if (stonith->state == stonith_disconnected) { return -ENOTCONN; } if (output_data != NULL) { *output_data = NULL; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } if (call_options & st_opt_sync_call) { ipc_flags |= crm_ipc_client_response; } stonith->call_id++; /* prevent call_id from being negative (or zero) and conflicting * with the stonith_errors enum * use 2 because we use it as (stonith->call_id - 1) below */ if (stonith->call_id < 1) { stonith->call_id = 1; } CRM_CHECK(native->token != NULL,; ); op_msg = stonith_create_op(stonith->call_id, native->token, op, data, call_options); if (op_msg == NULL) { return -EINVAL; } crm_xml_add_int(op_msg, F_STONITH_TIMEOUT, timeout); crm_trace("Sending %s message to STONITH service, Timeout: %ds", op, timeout); rc = crm_ipc_send(native->ipc, op_msg, ipc_flags, 1000 * (timeout + 60), &op_reply); free_xml(op_msg); if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%ds): %d", op, timeout, rc); rc = -ECOMM; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (!(call_options & st_opt_sync_call)) { crm_trace("Async call %d, returning", stonith->call_id); CRM_CHECK(stonith->call_id != 0, return -EPROTO); free_xml(op_reply); return stonith->call_id; } rc = pcmk_ok; crm_element_value_int(op_reply, F_STONITH_CALLID, &reply_id); if (reply_id == stonith->call_id) { crm_trace("Synchronous reply %d received", reply_id); if (crm_element_value_int(op_reply, F_STONITH_RC, &rc) != 0) { rc = -ENOMSG; } if ((call_options & st_opt_discard_reply) || output_data == NULL) { crm_trace("Discarding reply"); } else { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } } else if (reply_id <= 0) { crm_err("Received bad reply: No id set"); crm_log_xml_err(op_reply, "Bad reply"); free_xml(op_reply); rc = -ENOMSG; } else { crm_err("Received bad reply: %d (wanted %d)", reply_id, stonith->call_id); crm_log_xml_err(op_reply, "Old reply"); free_xml(op_reply); rc = -ENOMSG; } done: if (crm_ipc_connected(native->ipc) == FALSE) { crm_err("STONITH disconnected"); stonith->state = stonith_disconnected; } free_xml(op_reply); return rc; } /* Not used with mainloop */ bool stonith_dispatch(stonith_t * st) { gboolean stay_connected = TRUE; stonith_private_t *private = NULL; CRM_ASSERT(st != NULL); private = st->private; while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); stonith_dispatch_internal(msg, strlen(msg), st); } if (crm_ipc_connected(private->ipc) == FALSE) { crm_err("Connection closed"); stay_connected = FALSE; } } return stay_connected; } int stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) { const char *type = NULL; struct notify_blob_s blob; stonith_t *st = userdata; stonith_private_t *private = NULL; CRM_ASSERT(st != NULL); private = st->private; blob.stonith = st; blob.xml = string2xml(buffer); if (blob.xml == NULL) { crm_warn("Received a NULL msg from STONITH service: %s.", buffer); return 0; } /* do callbacks */ type = crm_element_value(blob.xml, F_TYPE); crm_trace("Activating %s callbacks...", type); if (safe_str_eq(type, T_STONITH_NG)) { stonith_perform_callback(st, blob.xml, 0, 0); } else if (safe_str_eq(type, T_STONITH_NOTIFY)) { g_list_foreach(private->notify_list, stonith_send_notification, &blob); } else if (safe_str_eq(type, T_STONITH_TIMEOUT_VALUE)) { int call_id = 0; int timeout = 0; crm_element_value_int(blob.xml, F_STONITH_TIMEOUT, &timeout); crm_element_value_int(blob.xml, F_STONITH_CALLID, &call_id); update_callback_timeout(call_id, timeout, st); } else { crm_err("Unknown message type: %s", type); crm_log_xml_warn(blob.xml, "BadReply"); } free_xml(blob.xml); return 1; } static int stonith_api_free(stonith_t * stonith) { int rc = pcmk_ok; crm_trace("Destroying %p", stonith); if (stonith->state != stonith_disconnected) { crm_trace("Disconnecting %p first", stonith); rc = stonith->cmds->disconnect(stonith); } if (stonith->state == stonith_disconnected) { stonith_private_t *private = stonith->private; crm_trace("Removing %d callbacks", g_hash_table_size(private->stonith_op_callback_table)); g_hash_table_destroy(private->stonith_op_callback_table); crm_trace("Destroying %d notification clients", g_list_length(private->notify_list)); g_list_free_full(private->notify_list, free); free(stonith->private); free(stonith->cmds); free(stonith); } else { crm_err("Not free'ing active connection: %s (%d)", pcmk_strerror(rc), rc); } return rc; } void stonith_api_delete(stonith_t * stonith) { crm_trace("Destroying %p", stonith); if(stonith) { stonith->cmds->free(stonith); } } stonith_t * stonith_api_new(void) { stonith_t *new_stonith = NULL; stonith_private_t *private = NULL; new_stonith = calloc(1, sizeof(stonith_t)); private = calloc(1, sizeof(stonith_private_t)); new_stonith->private = private; private->stonith_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, stonith_destroy_op_callback); private->notify_list = NULL; new_stonith->call_id = 1; new_stonith->state = stonith_disconnected; new_stonith->cmds = calloc(1, sizeof(stonith_api_operations_t)); /* *INDENT-OFF* */ new_stonith->cmds->free = stonith_api_free; new_stonith->cmds->connect = stonith_api_signon; new_stonith->cmds->disconnect = stonith_api_signoff; new_stonith->cmds->list = stonith_api_list; new_stonith->cmds->monitor = stonith_api_monitor; new_stonith->cmds->status = stonith_api_status; new_stonith->cmds->fence = stonith_api_fence; new_stonith->cmds->confirm = stonith_api_confirm; new_stonith->cmds->history = stonith_api_history; new_stonith->cmds->list_agents = stonith_api_device_list; new_stonith->cmds->metadata = stonith_api_device_metadata; new_stonith->cmds->query = stonith_api_query; new_stonith->cmds->remove_device = stonith_api_remove_device; new_stonith->cmds->register_device = stonith_api_register_device; new_stonith->cmds->remove_level = stonith_api_remove_level; new_stonith->cmds->remove_level_full = stonith_api_remove_level_full; new_stonith->cmds->register_level = stonith_api_register_level; new_stonith->cmds->register_level_full = stonith_api_register_level_full; new_stonith->cmds->remove_callback = stonith_api_del_callback; new_stonith->cmds->register_callback = stonith_api_add_callback; new_stonith->cmds->remove_notification = stonith_api_del_notification; new_stonith->cmds->register_notification = stonith_api_add_notification; /* *INDENT-ON* */ return new_stonith; } stonith_key_value_t * stonith_key_value_add(stonith_key_value_t * head, const char *key, const char *value) { stonith_key_value_t *p, *end; p = calloc(1, sizeof(stonith_key_value_t)); if (key) { p->key = strdup(key); } if (value) { p->value = strdup(value); } end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void stonith_key_value_freeall(stonith_key_value_t * head, int keys, int values) { stonith_key_value_t *p; while (head) { p = head->next; if (keys) { free(head->key); } if (values) { free(head->value); } free(head); head = p; } } #define api_log_open() openlog("stonith-api", LOG_CONS | LOG_NDELAY | LOG_PID, LOG_DAEMON) #define api_log(level, fmt, args...) syslog(level, "%s: "fmt, __FUNCTION__, args) int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off) { char *name = NULL; const char *action = "reboot"; int rc = -EPROTO; stonith_t *st = NULL; enum stonith_call_options opts = st_opt_sync_call | st_opt_allow_suicide; api_log_open(); st = stonith_api_new(); if (st) { rc = st->cmds->connect(st, "stonith-api", NULL); if(rc != pcmk_ok) { api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } } if (uname != NULL) { name = strdup(uname); } else if (nodeid > 0) { opts |= st_opt_cs_nodeid; name = crm_itoa(nodeid); } if (off) { action = "off"; } if (rc == pcmk_ok) { rc = st->cmds->fence(st, opts, name, action, timeout, 0); if(rc != pcmk_ok) { api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } else { api_log(LOG_NOTICE, "Node %u/%s kicked: %s ", nodeid, uname, action); } } if (st) { st->cmds->disconnect(st); stonith_api_delete(st); } free(name); return rc; } time_t stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress) { int rc = 0; char *name = NULL; time_t when = 0; stonith_t *st = NULL; stonith_history_t *history, *hp = NULL; enum stonith_call_options opts = st_opt_sync_call; st = stonith_api_new(); if (st) { rc = st->cmds->connect(st, "stonith-api", NULL); if(rc != pcmk_ok) { api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc); } } if (uname != NULL) { name = strdup(uname); } else if (nodeid > 0) { opts |= st_opt_cs_nodeid; name = crm_itoa(nodeid); } if (st && rc == pcmk_ok) { int entries = 0; int progress = 0; int completed = 0; rc = st->cmds->history(st, opts, name, &history, 120); for (hp = history; hp; hp = hp->next) { entries++; if (in_progress) { progress++; if (hp->state != st_done && hp->state != st_failed) { when = time(NULL); } } else if (hp->state == st_done) { completed++; if (hp->completed > when) { when = hp->completed; } } } if(rc == pcmk_ok) { api_log(LOG_INFO, "Found %d entries for %u/%s: %d in progress, %d completed", entries, nodeid, uname, progress, completed); } else { api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: %s (%d)", nodeid, uname, pcmk_strerror(rc), rc); } } if (st) { st->cmds->disconnect(st); stonith_api_delete(st); } if(when) { api_log(LOG_INFO, "Node %u/%s last kicked at: %ld", nodeid, uname, (long int)when); } free(name); return when; } #if HAVE_STONITH_STONITH_H # include const char *i_hate_pils(int rc); const char * i_hate_pils(int rc) { return PIL_strerror(rc); } #endif diff --git a/lrmd/pacemaker_remote.in b/lrmd/pacemaker_remote.in index a356510c1a..92fe125b42 100644 --- a/lrmd/pacemaker_remote.in +++ b/lrmd/pacemaker_remote.in @@ -1,175 +1,177 @@ #!/bin/bash # Authors: # Andrew Beekhof # # License: Revised BSD # chkconfig: - 99 01 # description: Pacemaker Cluster Manager # processname: pacemaker_remoted # ### BEGIN INIT INFO # Provides: pacemaker_remote # Required-Start: $network $remote_fs # Should-Start: $syslog # Required-Stop: $network $remote_fs # Default-Start: # Default-Stop: # Short-Description: Starts and stops the Pacemaker remote agent for non-cluster nodes # Description: Starts and stops the Pacemaker remote agent for non-cluster nodes ### END INIT INFO desc="Pacemaker Remote Agent" prog="pacemaker_remoted" cman=0 # set secure PATH PATH="/sbin:/bin:/usr/sbin:/usr/bin:@sbindir@" checkrc() { if [ $? = 0 ]; then success else failure fi } success() { echo -ne "[ OK ]\r" } failure() { echo -ne "[FAILED]\r" } status() { pid=$(pidof $1 2>/dev/null) local rtrn=$? if [ $rtrn -ne 0 ]; then echo "$1 is stopped" if [ -f "@localstatedir@/run/$prog.pid" ]; then rtrn=1 else rtrn=3 fi else echo "$1 (pid $pid) is running..." fi return $rtrn } if [ -d @CONFIGDIR@ ]; then [ -f @INITDIR@/functions ] && . @INITDIR@/functions set -a [ -f @CONFIGDIR@/pacemaker ] && . @CONFIGDIR@/pacemaker [ -f @CONFIGDIR@/sbd ] && . @CONFIGDIR@/sbd set +a fi LOCK_DIR="." if [ -d "@localstatedir@/lock/subsys" ]; then LOCK_DIR="@localstatedir@/lock/subsys" elif [ -d "@localstatedir@/lock" ]; then LOCK_DIR="@localstatedir@/lock" fi [ -z "$LOCK_FILE" ] && LOCK_FILE="$LOCK_DIR/pacemaker_remote" # Check if there is a valid watchdog-device configured in sbd config if [ x != "x$SBD_WATCHDOG_DEV" -a "/dev/null" != "$SBD_WATCHDOG_DEV" -a -c "$SBD_WATCHDOG_DEV" ]; then # enhance for unavailable chkconfig - don't touch sbd for now if chkconfig --list sbd_remote_helper 2>/dev/null | grep -q ":on"; then SBD_SERVICE=sbd_remote_helper fi fi start() { echo -n "Starting $desc: " # most recent distributions use tmpfs for $@localstatedir@/run # to avoid to clean it up on every boot. # they also assume that init scripts will create # required subdirectories for proper operations mkdir -p "@localstatedir@/run" if status $prog > /dev/null 2>&1; then success else $prog > /dev/null 2>&1 & # Time to connect to corosync and fail sleep 5 if status $prog > /dev/null 2>&1; then touch "$LOCK_FILE" pidof $prog > "@localstatedir@/run/$prog.pid" success else failure rtrn=1 fi fi echo - [ "x$SBD_SERVICE" != x ] && service $SBD_SERVICE start + [ "x$SBD_SERVICE" = "x" ] || service $SBD_SERVICE start } stop() { if status $prog > /dev/null 2>&1; then echo -n "Signaling $desc to terminate: " kill -TERM $(pidof $prog) > /dev/null 2>&1 success echo echo -n "Waiting for $desc to unload:" while status $prog > /dev/null 2>&1; do sleep 1 echo -n "." done + else + echo -n "$desc is already stopped" fi rm -f "$LOCK_FILE" rm -f "@localstatedir@/run/$prog.pid" success echo - [ "x$SBD_SERVICE" != x ] && service $SBD_SERVICE stop + [ "x$SBD_SERVICE" = "x" ] || service $SBD_SERVICE stop } rtrn=0 case "$1" in start) start ;; restart|reload|force-reload) stop start ;; condrestart|try-restart) if status $prog > /dev/null 2>&1; then stop start rtrn=$? fi ;; status) status $prog rtrn=$? ;; stop) stop rtrn=$? ;; *) echo "usage: $0 {start|stop|restart|reload|force-reload|condrestart|try-restart|status}" rtrn=2 ;; esac exit $rtrn diff --git a/lrmd/regression.py.in b/lrmd/regression.py.in index ad12ae97ae..eef76918be 100755 --- a/lrmd/regression.py.in +++ b/lrmd/regression.py.in @@ -1,1187 +1,1290 @@ #!/usr/bin/python """ Regression tests for Pacemaker's lrmd """ # Pacemaker targets compatibility with Python 2.6+ and 3.2+ from __future__ import print_function, unicode_literals, absolute_import, division __copyright__ = "Copyright (C) 2012-2016 Andrew Beekhof " __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import sys import subprocess import shlex import time # Where to find test binaries # Prefer the source tree if available -build_dir="@abs_top_builddir@" -test_dir=sys.path[0] +BUILD_DIR = "@abs_top_builddir@" +TEST_DIR = sys.path[0] -new_path=os.environ['PATH'] -if os.path.exists("%s/regression.py.in" % test_dir): - print("Running tests from the source tree: %s (%s)" % (build_dir, test_dir)) - new_path = "%s/lrmd:%s" % (build_dir, new_path) # For lrmd, lrmd_test and pacemaker_remoted - new_path = "%s/tools:%s" % (build_dir, new_path) # For crm_resource - new_path = "%s/fencing:%s" % (build_dir, new_path) # For stonithd +def update_path(): + """ Set the PATH environment variable appropriately for the tests """ -else: - print("Running tests from the install tree: @CRM_DAEMON_DIR@ (not %s)" % test_dir) - new_path = "@CRM_DAEMON_DIR@:%s" % (new_path) # For stonithd, lrmd, lrmd_test and pacemaker_remoted + new_path = os.environ['PATH'] + if os.path.exists("%s/regression.py.in" % TEST_DIR): + print("Running tests from the source tree: %s (%s)" % (BUILD_DIR, TEST_DIR)) + new_path = "%s/lrmd:%s" % (BUILD_DIR, new_path) # For lrmd, lrmd_test and pacemaker_remoted + new_path = "%s/tools:%s" % (BUILD_DIR, new_path) # For crm_resource + new_path = "%s/fencing:%s" % (BUILD_DIR, new_path) # For stonithd -print(new_path) -os.environ['PATH']=new_path + else: + print("Running tests from the install tree: @CRM_DAEMON_DIR@ (not %s)" % TEST_DIR) + new_path = "@CRM_DAEMON_DIR@:%s" % (new_path) # For stonithd, lrmd, lrmd_test and pacemaker_remoted + + print(new_path) + os.environ['PATH'] = new_path def shlex_split(command): """ Wrapper for shlex.split() that works around Python 2.6 bug """ - if sys.version_info < (2,7,): + if sys.version_info < (2, 7,): return shlex.split(command.encode('ascii')) else: return shlex.split(command) def pipe_output(pipes, stdout=True, stderr=False): """ Wrapper to get text output from pipes regardless of Python version """ output = "" pipe_outputs = pipes.communicate() if sys.version_info < (3,): if stdout: output = output + pipe_outputs[0] if stderr: output = output + pipe_outputs[1] else: if stdout: output = output + pipe_outputs[0].decode(sys.stdout.encoding) if stderr: output = output + pipe_outputs[1].decode(sys.stderr.encoding) return output def output_from_command(command): + """ Run a command, and return its standard output. """ + test = subprocess.Popen(shlex_split(command), stdout=subprocess.PIPE) test.wait() return pipe_output(test).split("\n") -class Test: - def __init__(self, name, description, verbose = 0, tls = 0): +class Test(object): + """ Executor for a single lrmd regression test """ + + def __init__(self, name, description, verbose=0, tls=0): self.name = name self.description = description self.cmds = [] if tls: self.daemon_location = "pacemaker_remoted" else: self.daemon_location = "lrmd" self.test_tool_location = "lrmd_test" self.verbose = verbose self.tls = tls self.result_txt = "" self.cmd_tool_output = "" - self.result_exitcode = 0; + self.result_exitcode = 0 self.lrmd_process = None self.stonith_process = None self.executed = 0 - def __new_cmd(self, cmd, args, exitcode, stdout_match = "", no_wait = 0, stdout_negative_match = "", kill=None): + def __new_cmd(self, cmd, args, exitcode, stdout_match="", no_wait=0, stdout_negative_match="", kill=None): + """ Add a command to be executed as part of this test """ + if self.verbose and cmd == self.test_tool_location: args = args + " -V " if (cmd == self.test_tool_location) and self.tls: args = args + " -S " self.cmds.append( { "cmd" : cmd, "kill" : kill, "args" : args, "expected_exitcode" : exitcode, "stdout_match" : stdout_match, "stdout_negative_match" : stdout_negative_match, "no_wait" : no_wait, "cmd_output" : "", } ) def start_environment(self): + """ Prepare the host for running a test """ + ### make sure we are in full control here ### cmd = shlex_split("killall -q -9 stonithd lt-stonithd lrmd lt-lrmd lrmd_test lt-lrmd_test pacemaker_remoted") test = subprocess.Popen(cmd, stdout=subprocess.PIPE) test.wait() additional_args = "" if self.tls == 0: self.stonith_process = subprocess.Popen(shlex_split("stonithd -s")) if self.verbose: additional_args = additional_args + " -V" - self.lrmd_process = subprocess.Popen(shlex_split("%s %s -l /tmp/lrmd-regression.log" % (self.daemon_location, additional_args))) + self.lrmd_process = subprocess.Popen(shlex_split("%s %s -l /tmp/lrmd-regression.log" + % (self.daemon_location, additional_args))) time.sleep(1) def clean_environment(self): + """ Clean up the host after running a test """ + if self.lrmd_process: self.lrmd_process.terminate() self.lrmd_process.wait() if self.verbose: print("Daemon output") - f = io.open('/tmp/lrmd-regression.log', 'rt') - for line in f.readlines(): + logfile = io.open('/tmp/lrmd-regression.log', 'rt') + for line in logfile.readlines(): print(line.strip()) os.remove('/tmp/lrmd-regression.log') if self.stonith_process: self.stonith_process.terminate() self.stonith_process.wait() self.lrmd_process = None self.stonith_process = None def add_sys_cmd(self, cmd, args): + """ Add a simple command to be executed as part of this test """ + self.__new_cmd(cmd, args, 0, "") def add_sys_cmd_no_wait(self, cmd, args): + """ Add a simple command to be executed (without waiting) as part of this test """ + self.__new_cmd(cmd, args, 0, "", 1) def add_expected_fail_sys_cmd(self, cmd, args, exitcode): + """ Add a command to be executed as part of this test and expected to fail """ + self.__new_cmd(cmd, args, exitcode) - def add_cmd_check_stdout(self, args, match, no_match = ""): + def add_cmd_check_stdout(self, args, match, no_match=""): + """ Add a command with expected output to be executed as part of this test """ + self.__new_cmd(self.test_tool_location, args, 0, match, 0, no_match) def add_cmd(self, args): + """ Add an lrmd_test command to be executed as part of this test """ + self.__new_cmd(self.test_tool_location, args, 0, "") - def add_cmd_and_kill(self, killProc, args): - self.__new_cmd(self.test_tool_location, args, 0, "", kill=killProc) + def add_cmd_and_kill(self, kill_proc, args): + """ Add an lrmd_test command and system command to be executed as part of this test """ + + self.__new_cmd(self.test_tool_location, args, 0, "", kill=kill_proc) def add_expected_fail_cmd(self, args): + """ Add an lrmd_test command to be executed as part of this test and expected to fail """ + self.__new_cmd(self.test_tool_location, args, 1, "") def get_exitcode(self): + """ Return the exit status of the last test execution """ + return self.result_exitcode def print_result(self, filler): + """ Print the result of the last test execution """ + print("%s%s" % (filler, self.result_txt)) def run_cmd(self, args): + """ Execute a command as part of this test """ + cmd = shlex_split(args['args']) cmd.insert(0, args['cmd']) if self.verbose: print("\n\nRunning: "+" ".join(cmd)) test = subprocess.Popen(cmd, stdout=subprocess.PIPE) if args['kill']: if self.verbose: print("Also running: "+args['kill']) ### Typically, the kill argument is used to detect some sort of ### failure. Without yielding for a few seconds here, the process ### launched earlier that is listening for the failure may not have ### time to connect to the lrmd. time.sleep(2) subprocess.Popen(shlex_split(args['kill'])) if args['no_wait'] == 0: test.wait() else: return 0 output = pipe_output(test) if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0: test.returncode = -2 print("STDOUT string '%s' was not found in cmd output" % (args['stdout_match'])) if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0: test.returncode = -2 print("STDOUT string '%s' was found in cmd output" % (args['stdout_negative_match'])) args['cmd_output'] = output - return test.returncode; + return test.returncode def run(self): + """ Execute this test. """ + res = 0 i = 1 if self.tls and self.name.count("stonith") != 0: self.result_txt = "SKIPPED - '%s' - disabled when testing pacemaker_remote" % (self.name) print(self.result_txt) return res self.start_environment() if self.verbose: print("\n--- START TEST - %s" % self.name) self.result_txt = "SUCCESS - '%s'" % (self.name) self.result_exitcode = 0 for cmd in self.cmds: res = self.run_cmd(cmd) if res != cmd['expected_exitcode']: print(cmd['cmd_output']) print("Step %d FAILED - command returned %d, expected %d" % (i, res, cmd['expected_exitcode'])) - self.result_txt = "FAILURE - '%s' failed at step %d. Command: lrmd_test %s" % (self.name, i, cmd['args']) + msg = "FAILURE - '%s' failed at step %d. Command: lrmd_test %s" + self.result_txt = msg % (self.name, i, cmd['args']) self.result_exitcode = -1 break else: if self.verbose: print(cmd['cmd_output'].strip()) print("Step %d SUCCESS" % (i)) i = i + 1 self.clean_environment() print(self.result_txt) if self.verbose: print("--- END TEST - %s\n" % self.name) self.executed = 1 return res -class Tests: - def __init__(self, verbose = 0, tls = 0): +class Tests(object): + """ Collection of all lrmd regression tests """ + + def __init__(self, verbose=0, tls=0): self.tests = [] self.verbose = verbose - self.tls = tls; + self.tls = tls self.rsc_classes = output_from_command("crm_resource --list-standards") self.rsc_classes = self.rsc_classes[:-1] # Strip trailing empty line self.need_authkey = 0 self.action_timeout = " -t 5000 " if self.tls: self.rsc_classes.remove("stonith") if "systemd" in self.rsc_classes: - # the lrmd_dummy_daemon requires this, we are importing it - # here just to guarantee it is installed before allowing this - # script to run. Otherwise, running without this import being - # available will make all the systemd tests look like they fail, - # which is really scary looking. I'd rather see the import fail. - import systemd.daemon + try: + # This code doesn't need this import, but lrmd_dummy_daemon does, + # so ensure the dependency is available rather than cause all + # systemd tests to fail. + import systemd.daemon + except ImportError: + print("Fatal error: python systemd bindings not found. Is package installed?", + file=sys.stderr) + sys.exit(1) print("Testing "+repr(self.rsc_classes)) self.common_cmds = { "ocf_reg_line" : "-c register_rsc -r ocf_test_rsc "+self.action_timeout+" -C ocf -P pacemaker -T Dummy", "ocf_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"", "ocf_unreg_line" : "-c unregister_rsc -r \"ocf_test_rsc\" "+self.action_timeout, "ocf_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"", "ocf_start_line" : "-c exec -r \"ocf_test_rsc\" -a \"start\" "+self.action_timeout, "ocf_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:start rc:ok op_status:complete\" ", "ocf_stop_line" : "-c exec -r \"ocf_test_rsc\" -a \"stop\" "+self.action_timeout, "ocf_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:stop rc:ok op_status:complete\" ", "ocf_monitor_line" : "-c exec -r \"ocf_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, "ocf_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "ocf_cancel_line" : "-c cancel -r \"ocf_test_rsc\" -a \"monitor\" -i \"2000\" -t \"6000\" ", "ocf_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "systemd_reg_line" : "-c register_rsc -r systemd_test_rsc "+self.action_timeout+" -C systemd -T lrmd_dummy_daemon", "systemd_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"", "systemd_unreg_line" : "-c unregister_rsc -r \"systemd_test_rsc\" "+self.action_timeout, "systemd_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"", "systemd_start_line" : "-c exec -r \"systemd_test_rsc\" -a \"start\" "+self.action_timeout, "systemd_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:start rc:ok op_status:complete\" ", "systemd_stop_line" : "-c exec -r \"systemd_test_rsc\" -a \"stop\" "+self.action_timeout, "systemd_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:stop rc:ok op_status:complete\" ", "systemd_monitor_line" : "-c exec -r \"systemd_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, "systemd_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "systemd_cancel_line" : "-c cancel -r \"systemd_test_rsc\" -a \"monitor\" -i \"2000\" -t \"6000\" ", "systemd_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "upstart_reg_line" : "-c register_rsc -r upstart_test_rsc "+self.action_timeout+" -C upstart -T lrmd_dummy_daemon", "upstart_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"", "upstart_unreg_line" : "-c unregister_rsc -r \"upstart_test_rsc\" "+self.action_timeout, "upstart_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"", "upstart_start_line" : "-c exec -r \"upstart_test_rsc\" -a \"start\" "+self.action_timeout, "upstart_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:start rc:ok op_status:complete\" ", "upstart_stop_line" : "-c exec -r \"upstart_test_rsc\" -a \"stop\" "+self.action_timeout, "upstart_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:stop rc:ok op_status:complete\" ", "upstart_monitor_line" : "-c exec -r \"upstart_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, "upstart_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "upstart_cancel_line" : "-c cancel -r \"upstart_test_rsc\" -a \"monitor\" -i \"2000\" -t \"6000\" ", "upstart_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "service_reg_line" : "-c register_rsc -r service_test_rsc "+self.action_timeout+" -C service -T LSBDummy", "service_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", "service_unreg_line" : "-c unregister_rsc -r \"service_test_rsc\" "+self.action_timeout, "service_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:service_test_rsc action:none rc:ok op_status:complete\"", "service_start_line" : "-c exec -r \"service_test_rsc\" -a \"start\" "+self.action_timeout, "service_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:start rc:ok op_status:complete\" ", "service_stop_line" : "-c exec -r \"service_test_rsc\" -a \"stop\" "+self.action_timeout, "service_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:stop rc:ok op_status:complete\" ", "service_monitor_line" : "-c exec -r \"service_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, "service_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "service_cancel_line" : "-c cancel -r \"service_test_rsc\" -a \"monitor\" -i \"2000\" -t \"6000\" ", "service_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:Cancelled\" ", "lsb_reg_line" : "-c register_rsc -r lsb_test_rsc "+self.action_timeout+" -C lsb -T LSBDummy", "lsb_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\" ", "lsb_unreg_line" : "-c unregister_rsc -r \"lsb_test_rsc\" "+self.action_timeout, "lsb_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\"", "lsb_start_line" : "-c exec -r \"lsb_test_rsc\" -a \"start\" "+self.action_timeout, "lsb_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:start rc:ok op_status:complete\" ", "lsb_stop_line" : "-c exec -r \"lsb_test_rsc\" -a \"stop\" "+self.action_timeout, "lsb_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:stop rc:ok op_status:complete\" ", "lsb_monitor_line" : "-c exec -r \"lsb_test_rsc\" -a status -i \"2000\" "+self.action_timeout, "lsb_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:complete\" "+self.action_timeout, "lsb_cancel_line" : "-c cancel -r \"lsb_test_rsc\" -a \"status\" -i \"2000\" -t \"6000\" ", "lsb_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:Cancelled\" ", "heartbeat_reg_line" : "-c register_rsc -r hb_test_rsc "+self.action_timeout+" -C heartbeat -T HBDummy", "heartbeat_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:hb_test_rsc action:none rc:ok op_status:complete\" ", "heartbeat_unreg_line" : "-c unregister_rsc -r \"hb_test_rsc\" "+self.action_timeout, "heartbeat_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:hb_test_rsc action:none rc:ok op_status:complete\"", "heartbeat_start_line" : "-c exec -r \"hb_test_rsc\" -a \"start\" -k 1 -v a -k 2 -v b "+self.action_timeout, "heartbeat_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:hb_test_rsc action:start rc:ok op_status:complete\" ", "heartbeat_stop_line" : "-c exec -r \"hb_test_rsc\" -a \"stop\" -k 1 -v a -k 2 -v b "+self.action_timeout, "heartbeat_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:hb_test_rsc action:stop rc:ok op_status:complete\" ", "heartbeat_monitor_line" : "-c exec -r \"hb_test_rsc\" -a status -k 1 -v a -k 2 -v b -i \"2000\" "+self.action_timeout, "heartbeat_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:hb_test_rsc action:status rc:ok op_status:complete\" "+self.action_timeout, "heartbeat_cancel_line" : "-c cancel -r \"hb_test_rsc\" -a \"status\" -k 1 -v a -k 2 -v b -i \"2000\" -t \"6000\" ", "heartbeat_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:hb_test_rsc action:status rc:ok op_status:Cancelled\" ", "stonith_reg_line" : "-c register_rsc -r stonith_test_rsc "+self.action_timeout+" -C stonith -P pacemaker -T fence_dummy_monitor", "stonith_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\" ", "stonith_unreg_line" : "-c unregister_rsc -r \"stonith_test_rsc\" "+self.action_timeout, "stonith_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\"", "stonith_start_line" : "-c exec -r \"stonith_test_rsc\" -a \"start\" -t 8000 ", "stonith_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:start rc:ok op_status:complete\" ", "stonith_stop_line" : "-c exec -r \"stonith_test_rsc\" -a \"stop\" "+self.action_timeout, "stonith_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:stop rc:ok op_status:complete\" ", "stonith_monitor_line" : "-c exec -r \"stonith_test_rsc\" -a \"monitor\" -i \"2000\" "+self.action_timeout, "stonith_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, "stonith_cancel_line" : "-c cancel -r \"stonith_test_rsc\" -a \"monitor\" -i \"2000\" -t \"6000\" ", "stonith_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:Cancelled\" ", } def new_test(self, name, description): + """ Create a named test """ + test = Test(name, description, self.verbose, self.tls) self.tests.append(test) return test def setup_test_environment(self): + """ Prepare the host before executing any tests """ + os.system("service pacemaker_remote stop") self.cleanup_test_environment() if self.tls and not os.path.isfile("/etc/pacemaker/authkey"): self.need_authkey = 1 os.system("mkdir -p /etc/pacemaker") os.system("dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1") ### Make fake systemd daemon and unit file ### dummy_daemon = """#!/usr/bin/python import time, systemd.daemon time.sleep(3) systemd.daemon.notify("READY=1") while True: time.sleep(5) """ dummy_service_file = """ [Unit] Description=Dummy resource that takes a while to start [Service] Type=notify ExecStart=/usr/sbin/lrmd_dummy_daemon """ dummy_upstart_job = (""" description "Dummy service for regression tests" exec dd if=/dev/random of=/dev/null """) dummy_fence_sleep_agent = ("""#!/usr/bin/python import sys import time def main(): for line in sys.stdin.readlines(): if line.count("monitor") > 0: time.sleep(30000) sys.exit(0) sys.exit(-1) if __name__ == "__main__": main() """) dummy_fence_agent = ("""#!/usr/bin/python from __future__ import print_function, unicode_literals, absolute_import, division import sys def main(): for line in sys.stdin.readlines(): if line.count("monitor") > 0: sys.exit(0) if line.count("metadata") > 0: print('') print(' dummy description.') print(' http://www.example.com') print(' ') print(' ') print(' ') print(' ') print(' Fencing Action') print(' ') print(' ') print(' ') print(' ') print(' Physical plug number or name of virtual machine') print(' ') print(' ') print(' ') print(' ') print(' ') print(' ') print(' ') print(' ') print('') sys.exit(0) sys.exit(-1) if __name__ == "__main__": main() """) os.system("cat <<-END >>/etc/init/lrmd_dummy_daemon.conf\n%s\nEND" % (dummy_upstart_job)) os.system("cat <<-END >>/usr/sbin/lrmd_dummy_daemon\n%s\nEND" % (dummy_daemon)) os.system("cat <<-END >>/lib/systemd/system/lrmd_dummy_daemon.service\n%s\nEND" % (dummy_service_file)) os.system("chmod a+x /usr/sbin/lrmd_dummy_daemon") os.system("cat <<-END >>/usr/sbin/fence_dummy_sleep\n%s\nEND" % (dummy_fence_sleep_agent)) os.system("chmod 711 /usr/sbin/fence_dummy_sleep") os.system("cat <<-END >>/usr/sbin/fence_dummy_monitor\n%s\nEND" % (dummy_fence_agent)) os.system("chmod 711 /usr/sbin/fence_dummy_monitor") - if os.path.exists("%s/cts/LSBDummy" % build_dir): - print("Using %s/cts/LSBDummy" % build_dir) - os.system("cp %s/cts/LSBDummy /etc/init.d/LSBDummy" % build_dir) + if os.path.exists("%s/cts/LSBDummy" % BUILD_DIR): + print("Using %s/cts/LSBDummy" % BUILD_DIR) + os.system("cp %s/cts/LSBDummy /etc/init.d/LSBDummy" % BUILD_DIR) if not os.path.exists("@OCF_RA_DIR@/pacemaker"): os.system("mkdir -p @OCF_RA_DIR@/pacemaker/") # Install helper OCF agents - for ra in [ "Dummy", "Stateful", "ping" ]: - os.system("cp %s/extra/resources/%s @OCF_RA_DIR@/pacemaker/%s" % (build_dir, ra, ra)) - os.system("chmod a+x @OCF_RA_DIR@/pacemaker/%s" % (ra)) + for agent in ["Dummy", "Stateful", "ping"]: + os.system("cp %s/extra/resources/%s @OCF_RA_DIR@/pacemaker/%s" % (BUILD_DIR, agent, agent)) + os.system("chmod a+x @OCF_RA_DIR@/pacemaker/%s" % (agent)) else: # Assume it's installed print("Using @datadir@/@PACKAGE@/tests/cts/LSBDummy") os.system("cp @datadir@/@PACKAGE@/tests/cts/LSBDummy /etc/init.d/LSBDummy") os.system("chmod a+x /etc/init.d/LSBDummy") os.system("ls -al /etc/init.d/LSBDummy") os.system("mkdir -p @CRM_CORE_DIR@/root") os.system("mkdir -p /etc/ha.d/resource.d") - if os.path.exists("%s/cts/HBDummy" % build_dir): - print("Using %s/cts/HBDummy" % build_dir) - os.system("cp %s/cts/HBDummy /etc/ha.d/resource.d/HBDummy" % build_dir) + if os.path.exists("%s/cts/HBDummy" % BUILD_DIR): + print("Using %s/cts/HBDummy" % BUILD_DIR) + os.system("cp %s/cts/HBDummy /etc/ha.d/resource.d/HBDummy" % BUILD_DIR) else: # Assume it's installed print("Using @datadir@/@PACKAGE@/tests/cts/HBDummy") os.system("cp @datadir@/@PACKAGE@/tests/cts/HBDummy /etc/ha.d/resource.d/HBDummy") os.system("chmod a+x /etc/ha.d/resource.d/HBDummy") os.system("ls -al /etc/ha.d/resource.d/HBDummy") if os.path.exists("/bin/systemctl"): os.system("systemctl daemon-reload") def cleanup_test_environment(self): + """ Clean up the host after executing desired tests """ + if self.need_authkey: os.system("rm -f /etc/pacemaker/authkey") os.system("rm -f /etc/init.d/LSBDummy") os.system("rm -f /etc/ha.d/resource.d/HBDummy") os.system("rm -f /lib/systemd/system/lrmd_dummy_daemon.service") os.system("rm -f /usr/sbin/lrmd_dummy_daemon") os.system("rm -f /usr/sbin/fence_dummy_monitor") os.system("rm -f /usr/sbin/fence_dummy_sleep") if os.path.exists("/bin/systemctl"): os.system("systemctl daemon-reload") - ### These are tests that should apply to all resource classes ### def build_generic_tests(self): + """ Register tests that apply to all resource classes """ + common_cmds = self.common_cmds ### register/unregister tests ### for rsc in self.rsc_classes: - test = self.new_test("generic_registration_%s" % (rsc), "Simple resource registration test for %s standard" % (rsc)) + test = self.new_test("generic_registration_%s" % (rsc), + "Simple resource registration test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### start/stop tests ### for rsc in self.rsc_classes: test = self.new_test("generic_start_stop_%s" % (rsc), "Simple start and stop test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### monitor cancel test ### for rsc in self.rsc_classes: - test = self.new_test("generic_monitor_cancel_%s" % (rsc), "Simple monitor cancel test for %s standard" % (rsc)) + test = self.new_test("generic_monitor_cancel_%s" % (rsc), + "Simple monitor cancel test for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) - test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### - test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### + ### If this fails, that means the monitor may not be getting rescheduled #### + test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) + ### If this fails, that means the monitor may not be getting rescheduled #### + test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) - test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### - test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### + ### If this happens the monitor did not actually cancel correctly. ### + test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) + ### If this happens the monitor did not actually cancel correctly. ### + test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### monitor duplicate test ### for rsc in self.rsc_classes: - test = self.new_test("generic_monitor_duplicate_%s" % (rsc), "Test creation and canceling of duplicate monitors for %s standard" % (rsc)) + test = self.new_test("generic_monitor_duplicate_%s" % (rsc), + "Test creation and canceling of duplicate monitors for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) - test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### - test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### + ### If this fails, that means the monitor may not be getting rescheduled #### + test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) + ### If this fails, that means the monitor may not be getting rescheduled #### + test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) # Add the duplicate monitors test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) # verify we still get update events - test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### + ### If this fails, that means the monitor may not be getting rescheduled #### + test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) # cancel the monitor, if the duplicate merged with the original, we should no longer see monitor updates test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) - test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### - test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### + ### If this happens the monitor did not actually cancel correctly. ### + test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) + ### If this happens the monitor did not actually cancel correctly. ### + test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) ### stop implies cancel test ### for rsc in self.rsc_classes: - test = self.new_test("generic_stop_implies_cancel_%s" % (rsc), "Verify stopping a resource implies cancel of recurring ops for %s standard" % (rsc)) + test = self.new_test("generic_stop_implies_cancel_%s" % (rsc), + "Verify stopping a resource implies cancel of recurring ops for %s standard" % (rsc)) test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) - test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### - test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor may not be getting rescheduled #### + ### If this fails, that means the monitor may not be getting rescheduled #### + test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) + ### If this fails, that means the monitor may not be getting rescheduled #### + test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) - test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### - test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this happens the monitor did not actually cancel correctly. ### + ### If this happens the monitor did not actually cancel correctly. ### + test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) + ### If this happens the monitor did not actually cancel correctly. ### + test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)]) test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) - ### These are complex tests that involve managing multiple resouces of different types ### def build_multi_rsc_tests(self): + """ Register complex tests that involve managing multiple resouces of different types """ + common_cmds = self.common_cmds # do not use service and systemd at the same time, it is the same resource. ### register start monitor stop unregister resources of each type at the same time. ### - test = self.new_test("multi_rsc_start_stop_all", "Start, monitor, and stop resources of multiple types and classes") + test = self.new_test("multi_rsc_start_stop_all", + "Start, monitor, and stop resources of multiple types and classes") for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)]) for rsc in self.rsc_classes: - test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) ### If this fails, that means the monitor is not being rescheduled #### + ### If this fails, that means the monitor is not being rescheduled #### + test.add_cmd(common_cmds["%s_monitor_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)]) for rsc in self.rsc_classes: test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)]) - ### These are tests related to how the lrmd handles failures. ### def build_negative_tests(self): + """ Register tests related to how the lrmd handles failures """ ### ocf start timeout test ### test = self.new_test("ocf_start_timeout", "Force start timeout to occur, verify start failure.") - test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") - test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -k \"op_sleep\" -v \"5\" -t 1000 -w") # -t must be less than self.action_timeout - test.add_cmd("-l " - "\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out\" "+self.action_timeout) - test.add_cmd("-c exec -r test_rsc -a stop "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") - test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+ - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") - + test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" " + + self.action_timeout + + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + # -t must be less than self.action_timeout + test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -k \"op_sleep\" -v \"5\" -t 1000 -w") + test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out" ' + + self.action_timeout) + test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") + test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### stonith start timeout test ### test = self.new_test("stonith_start_timeout", "Force start timeout to occur, verify start failure.") - test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy_sleep\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy_sleep\" " + + self.action_timeout + + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -t 1000 -w") # -t must be less than self.action_timeout - test.add_cmd("-l " - "\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out\" "+self.action_timeout) - test.add_cmd("-c exec -r test_rsc -a stop "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") - test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+ - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out" ' + + self.action_timeout) + test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") + test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### stonith component fail ### common_cmds = self.common_cmds test = self.new_test("stonith_component_fail", "Kill stonith component after lrmd connects") test.add_cmd(common_cmds["stonith_reg_line"] + " " + common_cmds["stonith_reg_event"]) test.add_cmd(common_cmds["stonith_start_line"] + " " + common_cmds["stonith_start_event"]) - test.add_cmd("-c exec -r \"stonith_test_rsc\" -a \"monitor\" -i \"600000\" " - "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) + test.add_cmd('-c exec -r "stonith_test_rsc" -a "monitor" -i "600000" -l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete" ' + + self.action_timeout) - test.add_cmd_and_kill("killall -9 -q stonithd lt-stonithd" ,"-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:unknown error op_status:error\" -t 15000") + test.add_cmd_and_kill("killall -9 -q stonithd lt-stonithd", + '-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:unknown error op_status:error" -t 15000') test.add_cmd(common_cmds["stonith_unreg_line"] + " " + common_cmds["stonith_unreg_event"]) ### monitor fail for ocf resources ### test = self.new_test("monitor_fail_ocf", "Force ocf monitor to fail, verify failure is reported.") - test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") - test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") - test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") - test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") - test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) - test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) - test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 6000") + test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" " + + self.action_timeout + + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" " + self.action_timeout + + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") + test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" " + self.action_timeout + + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") + test.add_cmd('-c exec -r "test_rsc" -a "monitor" -i "100" ' + + self.action_timeout + + '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"') + test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' + + self.action_timeout) + test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' + + self.action_timeout) + test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state", + '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete" -t 6000') test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"6000\" " - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") - test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout) - test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) - test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") + test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" " + + self.action_timeout) + test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" " + + self.action_timeout) + test.add_cmd("-c unregister_rsc -r \"test_rsc\" " + + self.action_timeout + + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### verify notify changes only for monitor operation. ### test = self.new_test("monitor_changes_only", "Verify when flag is set, only monitor changes are notified.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+" -o " - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") - test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+" -o " - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") + test.add_cmd('-c exec -r "test_rsc" -a "monitor" -i "100" ' + + self.action_timeout + + ' -o -l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete" ') test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 6000") test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"6000\" " - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) - test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + test.add_cmd('-c unregister_rsc -r "test_rsc" ' + self.action_timeout + + '-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete"') ### monitor fail for systemd resource ### if "systemd" in self.rsc_classes: test = self.new_test("monitor_fail_systemd", "Force systemd monitor to fail, verify failure is reported..") test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T lrmd_dummy_daemon "+self.action_timeout+ - "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill("killall -9 -q lrmd_dummy_daemon", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 8000") test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"6000\" " - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### monitor fail for upstart resource ### if "upstart" in self.rsc_classes: test = self.new_test("monitor_fail_upstart", "Force upstart monitor to fail, verify failure is reported..") test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T lrmd_dummy_daemon "+self.action_timeout+ - "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd_and_kill("killall -9 -q dd", "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" -t 8000") test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"6000\" " - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout) test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Cancel non-existent operation on a resource ### test = self.new_test("cancel_non_existent_op", "Attempt to cancel the wrong monitor operation, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_expected_fail_cmd("-c cancel -r test_rsc -a \"monitor\" -i 1234 -t \"6000\" " ### interval is wrong, should fail - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") test.add_expected_fail_cmd("-c cancel -r test_rsc -a stop -i 100 -t \"6000\" " ### action name is wrong, should fail - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") - test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ") + test.add_cmd("-c unregister_rsc -r \"test_rsc\" " + self.action_timeout + + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Attempt to invoke non-existent rsc id ### test = self.new_test("invoke_non_existent_rsc", "Attempt to perform operations on a non-existent rsc id.") test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:complete\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r test_rsc -a stop "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r test_rsc -a monitor -i 6000 "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c cancel -r test_rsc -a start "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register and start a resource that doesn't exist, systemd ### if "systemd" in self.rsc_classes: test = self.new_test("start_uninstalled_systemd", "Register uninstalled systemd agent, try to start, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T this_is_fake1234 "+self.action_timeout+ - "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") if "upstart" in self.rsc_classes: test = self.new_test("start_uninstalled_upstart", "Register uninstalled upstart agent, try to start, verify expected failure") test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T this_is_fake1234 "+self.action_timeout+ - "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register and start a resource that doesn't exist, ocf ### test = self.new_test("start_uninstalled_ocf", "Register uninstalled ocf agent, try to start, verify expected failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pacemaker -T this_is_fake1234 "+self.action_timeout+ - "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register ocf with non-existent provider ### test = self.new_test("start_ocf_bad_provider", "Register ocf agent with a non-existent provider, verify expected failure.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pancakes -T Dummy "+self.action_timeout+ - "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register ocf with empty provider field ### test = self.new_test("start_ocf_no_provider", "Register ocf agent with a no provider, verify expected failure.") test.add_expected_fail_cmd("-c register_rsc -r \"test_rsc\" -C ocf -T Dummy "+self.action_timeout+ - "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Error\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Error\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") - ### stress tests ### def build_stress_tests(self): + """ Register stress tests """ + timeout = "-t 20000" iterations = 25 test = self.new_test("ocf_stress", "Verify OCF agent handling works under load") for i in range(iterations): test.add_cmd("-c register_rsc -r rsc_%s %s -C ocf -P heartbeat -T Dummy -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a monitor %s -i 1000 -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete\"" % (i, timeout, i)) for i in range(iterations): test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) if "systemd" in self.rsc_classes: test = self.new_test("systemd_stress", "Verify systemd dbus connection works under load") for i in range(iterations): test.add_cmd("-c register_rsc -r rsc_%s %s -C systemd -T lrmd_dummy_daemon -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c exec -r rsc_%s -a monitor %s -i 1000 -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete\"" % (i, timeout, i)) for i in range(iterations): test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i)) test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i)) iterations = 9 timeout = "-t 30000" ### Verify recurring op in-flight collision is handled in series properly test = self.new_test("rsc_inflight_collision", "Verify recurring ops do not collide with other operations for the same rsc.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " - "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -a start %s -k op_sleep -v 1 -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\"" % (timeout)) for i in range(iterations): test.add_cmd("-c exec -r test_rsc -a monitor %s -i 100%d -k op_sleep -v 2 -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\"" % (timeout, i)) # test.add_sys_cmd("sleep", "-al @CRM_RSCTMP_DIR@") test.add_cmd("-c exec -r test_rsc -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\"" % (timeout)) test.add_cmd("-c unregister_rsc -r test_rsc %s -l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\"" % (timeout)) - ### These are tests that target specific cases ### def build_custom_tests(self): - + """ Register tests that target specific cases """ ### verify resource temporary folder is created and used by heartbeat agents. ### test = self.new_test("rsc_tmp_dir", "Verify creation and use of rsc temporary state directory") test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@") test.add_cmd("-c register_rsc -r test_rsc -P heartbeat -C ocf -T Dummy " - "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -a start -t 4000") test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@") test.add_sys_cmd("ls", "@CRM_RSCTMP_DIR@/Dummy-test_rsc.state") test.add_cmd("-c exec -r test_rsc -a stop -t 4000") test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+ - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### start delay then stop test ### test = self.new_test("start_delay", "Verify start delay works as expected.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " - "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -s 6000 -a start -w -t 6000") - test.add_expected_fail_cmd("-l " - "\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 2000") - test.add_cmd("-l " - "\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 6000") - test.add_cmd("-c exec -r test_rsc -a stop "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") - test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+ - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 2000") + test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 6000") + test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout + + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ") + test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### start delay, but cancel before it gets a chance to start. ### test = self.new_test("start_delay_cancel", "Using start_delay, start a rsc, but cancel the start op before execution.") test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy " - "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c exec -r test_rsc -s 5000 -a start -w -t 4000") - test.add_cmd("-c cancel -r test_rsc -a start "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") - test.add_expected_fail_cmd("-l " - "\"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 5000") - test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+ - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + test.add_cmd("-c cancel -r test_rsc -a start " + self.action_timeout + + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ") + test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 5000") + test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Register a bunch of resources, verify we can get info on them ### test = self.new_test("verify_get_rsc_info", "Register multiple resources, verify retrieval of rsc info.") if "systemd" in self.rsc_classes: test.add_cmd("-c register_rsc -r rsc1 -C systemd -T lrmd_dummy_daemon "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ") if "upstart" in self.rsc_classes: test.add_cmd("-c register_rsc -r rsc1 -C upstart -T lrmd_dummy_daemon "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd("-c get_rsc_info -r rsc2 ") test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ") ### Register duplicate, verify only one entry exists and can still be removed. test = self.new_test("duplicate_registration", "Register resource multiple times, verify only one entry exists and can be removed.") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy") test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Stateful -P pacemaker "+self.action_timeout) test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Stateful") test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout) test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ") ### verify the option to only send notification to the original client. ### test = self.new_test("notify_orig_client_only", "Verify option to only send notifications to the client originating the action.") test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ") test.add_cmd("-c exec -r \"test_rsc\" -a \"monitor\" -i \"100\" "+self.action_timeout+" -n " - "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ") # this will fail because the monitor notifications should only go to the original caller, which no longer exists. test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout) test.add_cmd("-c cancel -r \"test_rsc\" -a \"monitor\" -i \"100\" -t \"6000\" ") test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+ - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### Verify that versioned resource parameters are chosen properly # We'll default to one state file, then check the version to use a different one state_file_default = "@CRM_RSCTMP_DIR@/versioned_params_default.state" state_file_expected = "@CRM_RSCTMP_DIR@/versioned_params_expected.state" # Versioned attributes are passed as a single key-value pair. # Here, we define the value to choose the state file; # ocf:heartbeat:Dummy should always be at least version 0.9. versioned_key = "#versioned_attributes" versioned_attrs = """ """ % (state_file_expected, state_file_default) cmd_params = "-k '%s' -v '%s' " % (versioned_key, versioned_attrs) test = self.new_test("versioned_params", "Verify use of versioned resource parameters") # First, remove the possible state files, so we can reliably tell what gets created. test.add_sys_cmd("rm", "-f %s %s" % (state_file_expected, state_file_default)) # Register and start the resource. test.add_cmd("-c register_rsc -r test_rsc -P heartbeat -C ocf -T Dummy " - "-l 'NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete' " - "%s" % (self.action_timeout)) + "-l 'NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete' " + "%s" % (self.action_timeout)) test.add_cmd("-c exec -r test_rsc -a start -t 6000 %s" % cmd_params) # Check the created state file. test.add_expected_fail_sys_cmd("ls", state_file_default, 2) test.add_sys_cmd("ls", state_file_expected) # Stop and unregister the resource. test.add_cmd("-c exec -r test_rsc -a stop -t 4000 %s" % cmd_params) test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout + - "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") + "-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ") ### get metadata ### test = self.new_test("get_ocf_metadata", "Retrieve metadata for a resource") - test.add_cmd_check_stdout("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Dummy\"" - ,"resource-agent name=\"Dummy\"") + test.add_cmd_check_stdout("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Dummy\"", + "resource-agent name=\"Dummy\"") test.add_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Stateful\"") test.add_expected_fail_cmd("-c metadata -P \"pacemaker\" -T \"Stateful\"") test.add_expected_fail_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"fake_agent\"") ### get metadata ### test = self.new_test("get_lsb_metadata", "Retrieve metadata for a resource") - test.add_cmd_check_stdout("-c metadata -C \"lsb\" -T \"LSBDummy\"" - ,"resource-agent name='LSBDummy'") + test.add_cmd_check_stdout("-c metadata -C \"lsb\" -T \"LSBDummy\"", + "resource-agent name='LSBDummy'") ### get stonith metadata ### test = self.new_test("get_stonith_metadata", "Retrieve stonith metadata for a resource") test.add_cmd_check_stdout("-c metadata -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy_monitor\"", - "resource-agent name=\"fence_dummy_monitor\"") + "resource-agent name=\"fence_dummy_monitor\"") ### get metadata ### if "systemd" in self.rsc_classes: test = self.new_test("get_systemd_metadata", "Retrieve metadata for a resource") - test.add_cmd_check_stdout("-c metadata -C \"systemd\" -T \"lrmd_dummy_daemon\"" - ,"resource-agent name=\"lrmd_dummy_daemon\"") + test.add_cmd_check_stdout("-c metadata -C \"systemd\" -T \"lrmd_dummy_daemon\"", + "resource-agent name=\"lrmd_dummy_daemon\"") ### get metadata ### if "upstart" in self.rsc_classes: test = self.new_test("get_upstart_metadata", "Retrieve metadata for a resource") - test.add_cmd_check_stdout("-c metadata -C \"upstart\" -T \"lrmd_dummy_daemon\"" - ,"resource-agent name=\"lrmd_dummy_daemon\"") + test.add_cmd_check_stdout("-c metadata -C \"upstart\" -T \"lrmd_dummy_daemon\"", + "resource-agent name=\"lrmd_dummy_daemon\"") if "heartbeat" in self.rsc_classes: test = self.new_test("get_heartbeat_metadata", "Retrieve metadata for a resource") - test.add_cmd_check_stdout("-c metadata -C \"heartbeat\" -T \"HBDummy\"" - ,"resource-agent name='HBDummy'") + test.add_cmd_check_stdout("-c metadata -C \"heartbeat\" -T \"HBDummy\"", + "resource-agent name='HBDummy'") ### get ocf providers ### - test = self.new_test("list_ocf_providers", "Retrieve list of available resource providers, verifies pacemaker is a provider.") + test = self.new_test("list_ocf_providers", + "Retrieve list of available resource providers, verifies pacemaker is a provider.") test.add_cmd_check_stdout("-c list_ocf_providers ", "pacemaker") test.add_cmd_check_stdout("-c list_ocf_providers -T ping", "pacemaker") ### Verify agents only exist in their lists ### test = self.new_test("verify_agent_lists", "Verify the agent lists contain the right data.") test.add_cmd_check_stdout("-c list_agents ", "Stateful") ### ocf ### test.add_cmd_check_stdout("-c list_agents -C ocf", "Stateful") test.add_cmd_check_stdout("-c list_agents -C lsb", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents ", "LSBDummy") ### init.d ### test.add_cmd_check_stdout("-c list_agents -C lsb", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C ocf", "", "lrmd_dummy_daemon") ### should not exist test.add_cmd_check_stdout("-c list_agents -C ocf", "", "lrmd_dummy_daemon") ### should not exist test.add_cmd_check_stdout("-c list_agents -C lsb", "", "fence_dummy_monitor") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "fence_dummy_monitor") ### should not exist test.add_cmd_check_stdout("-c list_agents -C ocf", "", "fence_dummy_monitor") ### should not exist if "systemd" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents ", "lrmd_dummy_daemon") ### systemd ### test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C systemd", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C systemd", "lrmd_dummy_daemon") test.add_cmd_check_stdout("-c list_agents -C systemd", "", "fence_dummy_monitor") ### should not exist if "upstart" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents ", "lrmd_dummy_daemon") ### upstart ### test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy") test.add_cmd_check_stdout("-c list_agents -C upstart", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents -C upstart", "lrmd_dummy_daemon") test.add_cmd_check_stdout("-c list_agents -C upstart", "", "fence_dummy_monitor") ### should not exist if "stonith" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents -C stonith", "fence_dummy_monitor") ### stonith ### test.add_cmd_check_stdout("-c list_agents -C stonith", "", "lrmd_dummy_daemon") ### should not exist test.add_cmd_check_stdout("-c list_agents -C stonith", "", "Stateful") ### should not exist test.add_cmd_check_stdout("-c list_agents ", "fence_dummy_monitor") if "heartbeat" in self.rsc_classes: test.add_cmd_check_stdout("-c list_agents -C heartbeat", "HBDummy") test.add_cmd_check_stdout("-c list_agents -C heartbeat", "", "LSBDummy") ### should not exist test.add_cmd_check_stdout("-c list_agents -C service", "", "HBDummy") ### should not exist def print_list(self): + """ List all registered tests """ + print("\n==== %d TESTS FOUND ====" % (len(self.tests))) print("%35s - %s" % ("TEST NAME", "TEST DESCRIPTION")) print("%35s - %s" % ("--------------------", "--------------------")) for test in self.tests: print("%35s - %s" % (test.name, test.description)) print("==== END OF LIST ====\n") def run_single(self, name): + """ Run a single named test """ + for test in self.tests: if test.name == name: test.run() - break; + break def run_tests_matching(self, pattern): + """ Run all tests whose name matches a pattern """ + for test in self.tests: if test.name.count(pattern) != 0: test.run() def run_tests(self): + """ Run all tests """ + for test in self.tests: test.run() def exit(self): + """ Exit (with error status code if any test failed) """ + for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != 0: sys.exit(-1) - sys.exit(0); + sys.exit(0) def print_results(self): - failures = 0; - success = 0; + """ Print summary of results of executed tests """ + + failures = 0 + success = 0 print("\n\n======= FINAL RESULTS ==========") print("\n--- FAILURE RESULTS:") for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != 0: failures = failures + 1 test.print_result(" ") else: success = success + 1 if failures == 0: print(" None") print("\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures)) -class TestOptions: + +class TestOptions(object): + """ Option handler """ + def __init__(self): self.options = {} self.options['list-tests'] = 0 self.options['run-all'] = 1 self.options['run-only'] = "" self.options['run-only-pattern'] = "" self.options['verbose'] = 0 self.options['invalid-arg'] = "" self.options['show-usage'] = 0 self.options['pacemaker-remote'] = 0 def build_options(self, argv): + """ Set options based on command-line arguments """ + args = argv[1:] skip = 0 for i in range(0, len(args)): if skip: skip = 0 continue elif args[i] == "-h" or args[i] == "--help": self.options['show-usage'] = 1 elif args[i] == "-l" or args[i] == "--list-tests": self.options['list-tests'] = 1 elif args[i] == "-V" or args[i] == "--verbose": self.options['verbose'] = 1 elif args[i] == "-R" or args[i] == "--pacemaker-remote": self.options['pacemaker-remote'] = 1 elif args[i] == "-r" or args[i] == "--run-only": self.options['run-only'] = args[i+1] skip = 1 elif args[i] == "-p" or args[i] == "--run-only-pattern": self.options['run-only-pattern'] = args[i+1] skip = 1 def show_usage(self): + """ Show command usage """ + print("usage: " + sys.argv[0] + " [options]") print("If no options are provided, all tests will run") print("Options:") print("\t [--help | -h] Show usage") print("\t [--list-tests | -l] Print out all registered tests.") print("\t [--run-only | -r 'testname'] Run a specific test") print("\t [--verbose | -V] Verbose output") print("\t [--pacemaker-remote | -R Test pacemaker-remote binary instead of lrmd.") print("\t [--run-only-pattern | -p 'string'] Run only tests containing the string value") print("\n\tExample: Run only the test 'start_top'") print("\t\t python ./regression.py --run-only start_stop") print("\n\tExample: Run only the tests with the string 'systemd' present in them") print("\t\t python ./regression.py --run-only-pattern systemd") def main(argv): - o = TestOptions() - o.build_options(argv) + """ Run lrmd regression tests as specified by arguments """ - tests = Tests(o.options['verbose'], o.options['pacemaker-remote']) + update_path() + + opts = TestOptions() + opts.build_options(argv) + + tests = Tests(opts.options['verbose'], opts.options['pacemaker-remote']) tests.build_generic_tests() tests.build_multi_rsc_tests() tests.build_negative_tests() tests.build_custom_tests() tests.build_stress_tests() tests.setup_test_environment() print("Starting ...") - if o.options['list-tests']: + if opts.options['list-tests']: tests.print_list() - elif o.options['show-usage']: - o.show_usage() - elif o.options['run-only-pattern'] != "": - tests.run_tests_matching(o.options['run-only-pattern']) + elif opts.options['show-usage']: + opts.show_usage() + elif opts.options['run-only-pattern'] != "": + tests.run_tests_matching(opts.options['run-only-pattern']) tests.print_results() - elif o.options['run-only'] != "": - tests.run_single(o.options['run-only']) + elif opts.options['run-only'] != "": + tests.run_single(opts.options['run-only']) tests.print_results() else: tests.run_tests() tests.print_results() tests.cleanup_test_environment() tests.exit() + if __name__ == "__main__": main(sys.argv)