diff --git a/cts/cts-fencing.in b/cts/cts-fencing.in index aa59f35685..af5915ad1a 100644 --- a/cts/cts-fencing.in +++ b/cts/cts-fencing.in @@ -1,1441 +1,1441 @@ #!@PYTHON@ """ Regression tests for Pacemaker's fencer """ # Pacemaker targets compatibility with Python 2.7 and 3.2+ from __future__ import print_function, unicode_literals, absolute_import, division __copyright__ = "Copyright 2012-2018 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import sys import subprocess import shlex import time import tempfile # Where to find test binaries # Prefer the source tree if available BUILD_DIR = "@abs_top_builddir@" TEST_DIR = sys.path[0] AUTOGEN_COROSYNC_TEMPLATE = """ totem { version: 2 cluster_name: cts-fencing crypto_cipher: none crypto_hash: none transport: udp } nodelist { node { nodeid: 1 name: %s ring0_addr: 127.0.0.1 } } logging { debug: off to_syslog: no to_stderr: no to_logfile: yes logfile: %s } """ # These values must be kept in sync with include/crm/crm.h class CrmExit(object): OK = 0 ERROR = 1 INVALID_PARAM = 2 UNIMPLEMENT_FEATURE = 3 INSUFFICIENT_PRIV = 4 NOT_INSTALLED = 5 NOT_CONFIGURED = 6 NOT_RUNNING = 7 USAGE = 64 DATAERR = 65 NOINPUT = 66 NOUSER = 67 NOHOST = 68 UNAVAILABLE = 69 SOFTWARE = 70 OSERR = 71 OSFILE = 72 CANTCREAT = 73 IOERR = 74 TEMPFAIL = 75 PROTOCOL = 76 NOPERM = 77 CONFIG = 78 FATAL = 100 PANIC = 101 DISCONNECT = 102 SOLO = 103 DIGEST = 104 NOSUCH = 105 QUORUM = 106 UNSAFE = 107 EXISTS = 108 MULTIPLE = 109 OLD = 110 TIMEOUT = 124 MAX = 255 def update_path(): """ Set the PATH environment variable appropriately for the tests """ new_path = os.environ['PATH'] if os.path.exists("%s/cts-fencing.in" % TEST_DIR): print("Running tests from the source tree: %s (%s)" % (BUILD_DIR, TEST_DIR)) # For pacemaker-fenced and cts-fence-helper new_path = "%s/daemons/fenced:%s" % (BUILD_DIR, new_path) new_path = "%s/tools:%s" % (BUILD_DIR, new_path) # For stonith_admin new_path = "%s/cts:%s" % (BUILD_DIR, new_path) # For cts-support else: print("Running tests from the install tree: @CRM_DAEMON_DIR@ (not %s)" % TEST_DIR) # For pacemaker-fenced, cts-fence-helper, and cts-support new_path = "@CRM_DAEMON_DIR@:%s" % (new_path) print('Using PATH="{}"'.format(new_path)) os.environ['PATH'] = new_path def pipe_output(pipes, stdout=True, stderr=False): """ Wrapper to get text output from pipes regardless of Python version """ output = "" pipe_outputs = pipes.communicate() if sys.version_info < (3,): if stdout: output = output + pipe_outputs[0] if stderr: output = output + pipe_outputs[1] else: if stdout: output = output + pipe_outputs[0].decode(sys.stdout.encoding) if stderr: output = output + pipe_outputs[1].decode(sys.stderr.encoding) return output def output_from_command(command): """ Execute command and return its standard output """ test = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE) test.wait() return pipe_output(test).split("\n") def localname(): """ Return the uname of the local host """ our_uname = output_from_command("uname -n") if our_uname: our_uname = our_uname[0] else: our_uname = "localhost" return our_uname def killall(process): """ Kill all instances of a process """ cmd = shlex.split("killall -9 -q %s" % process) test = subprocess.Popen(cmd, stdout=subprocess.PIPE) test.wait() class TestError(Exception): """ Base class for exceptions in this module """ pass class ExitCodeError(TestError): """ Exception raised when command exit status is unexpected """ def __init__(self, exit_code): self.exit_code = exit_code def __str__(self): return repr(self.exit_code) class OutputNotFoundError(TestError): """ Exception raised when command output does not contain wanted string """ def __init__(self, output): self.output = output def __str__(self): return repr(self.output) class OutputFoundError(TestError): """ Exception raised when command output contains unwanted string """ def __init__(self, exit_code): self.output = output def __str__(self): return repr(self.output) class Test(object): """ Executor for a single test """ def __init__(self, name, description, verbose=0, with_cpg=0): self.name = name self.description = description self.cmds = [] self.verbose = verbose self.result_txt = "" self.cmd_tool_output = "" self.result_exitcode = CrmExit.OK if with_cpg: self.stonith_options = "-c" self.enable_corosync = 1 else: self.stonith_options = "-s" self.enable_corosync = 0 self.stonith_process = None self.stonith_output = "" self.stonith_patterns = [] self.negative_stonith_patterns = [] self.executed = 0 def __new_cmd(self, cmd, args, exitcode, stdout_match="", no_wait=0, stdout_negative_match="", kill=None): """ Add a command to be executed as part of this test """ self.cmds.append( { "cmd" : cmd, "kill" : kill, "args" : args, "expected_exitcode" : exitcode, "stdout_match" : stdout_match, "stdout_negative_match" : stdout_negative_match, "no_wait" : no_wait, } ) def start_environment(self): """ Prepare the host for executing a test """ # Make sure we are in full control killall("pacemakerd") killall("pacemaker-fenced") if self.verbose: self.stonith_options = self.stonith_options + " -V" print("Starting pacemaker-fenced with %s" % self.stonith_options) if os.path.exists("/tmp/stonith-regression.log"): os.remove('/tmp/stonith-regression.log') cmd = "pacemaker-fenced %s -l /tmp/stonith-regression.log" % self.stonith_options self.stonith_process = subprocess.Popen(shlex.split(cmd)) time.sleep(1) def clean_environment(self): """ Clean up the host after executing a test """ if self.stonith_process: self.stonith_process.terminate() self.stonith_process.wait() self.stonith_output = "" self.stonith_process = None logfile = io.open('/tmp/stonith-regression.log', 'rt') for line in logfile.readlines(): self.stonith_output = self.stonith_output + line if self.verbose: print("Daemon Output Start") print(self.stonith_output) print("Daemon Output End") os.remove('/tmp/stonith-regression.log') def add_stonith_log_pattern(self, pattern): """ Add a log pattern to expect from this test """ self.stonith_patterns.append(pattern) def add_stonith_neg_log_pattern(self, pattern): """ Add a log pattern that should not occur with this test """ self.negative_stonith_patterns.append(pattern) def add_cmd(self, cmd, args): """ Add a simple command to be executed as part of this test """ self.__new_cmd(cmd, args, CrmExit.OK, "") def add_cmd_no_wait(self, cmd, args): """ Add a simple command to be executed (without waiting) as part of this test """ self.__new_cmd(cmd, args, CrmExit.OK, "", 1) def add_cmd_check_stdout(self, cmd, args, match, no_match=""): """ Add a simple command with expected output to be executed as part of this test """ self.__new_cmd(cmd, args, CrmExit.OK, match, 0, no_match) def add_expected_fail_cmd(self, cmd, args, exitcode=CrmExit.ERROR): """ Add a command to be executed as part of this test and expected to fail """ self.__new_cmd(cmd, args, exitcode, "") def get_exitcode(self): """ Return the exit status of the last test execution """ return self.result_exitcode def print_result(self, filler): """ Print the result of the last test execution """ print("%s%s" % (filler, self.result_txt)) def run_cmd(self, args): """ Execute a command as part of this test """ cmd = shlex.split(args['args']) cmd.insert(0, args['cmd']) if self.verbose: print("\n\nRunning: "+" ".join(cmd)) test = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if args['kill']: if self.verbose: print("Also running: "+args['kill']) subprocess.Popen(shlex.split(args['kill'])) if args['no_wait'] == 0: test.wait() else: return CrmExit.OK output = pipe_output(test, stderr=True) if self.verbose: print(output) if test.returncode != args['expected_exitcode']: raise ExitCodeError(test.returncode) if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0: raise OutputNotFoundError(output) if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0: raise OutputFoundError(output) def count_negative_matches(self, outline): """ Return 1 if a line matches patterns that shouldn't have occurred """ count = 0 for line in self.negative_stonith_patterns: if outline.count(line): count = 1 if self.verbose: print("This pattern should not have matched = '%s" % (line)) return count def match_stonith_patterns(self): """ Check test output for expected patterns """ negative_matches = 0 cur = 0 pats = self.stonith_patterns total_patterns = len(self.stonith_patterns) if len(self.stonith_patterns) == 0 and len(self.negative_stonith_patterns) == 0: return for line in self.stonith_output.split("\n"): negative_matches = negative_matches + self.count_negative_matches(line) if len(pats) == 0: continue cur = -1 for pat in pats: cur = cur + 1 if line.count(pats[cur]): del pats[cur] break if len(pats) > 0 or negative_matches: if self.verbose: for pat in pats: print("Pattern Not Matched = '%s'" % pat) msg = "FAILURE - '%s' failed. %d patterns out of %d not matched. %d negative matches." self.result_txt = msg % (self.name, len(pats), total_patterns, negative_matches) self.result_exitcode = CrmExit.ERROR def set_error(self, step, cmd): """ Record failure of this test """ msg = "FAILURE - '%s' failed at step %d. Command: %s %s" self.result_txt = msg % (self.name, step, cmd['cmd'], cmd['args']) self.result_exitcode = CrmExit.ERROR def run(self): """ Execute this test. """ res = 0 i = 1 self.start_environment() if self.verbose: print("\n--- START TEST - %s" % self.name) self.result_txt = "SUCCESS - '%s'" % (self.name) self.result_exitcode = CrmExit.OK for cmd in self.cmds: try: self.run_cmd(cmd) except ExitCodeError as e: print("Step %d FAILED - command returned %s, expected %d" % (i, e, cmd['expected_exitcode'])) self.set_error(i, cmd); break except OutputNotFoundError as e: print("Step %d FAILED - '%s' was not found in command output: %s" % (i, cmd['stdout_match'], e)) self.set_error(i, cmd); break except OutputFoundError as e: print("Step %d FAILED - '%s' was found in command output: %s" % (i, cmd['stdout_negative_match'], e)) self.set_error(i, cmd); break if self.verbose: print("Step %d SUCCESS" % (i)) i = i + 1 self.clean_environment() if self.result_exitcode == CrmExit.OK: self.match_stonith_patterns() print(self.result_txt) if self.verbose: print("--- END TEST - %s\n" % self.name) self.executed = 1 return res class Tests(object): """ Collection of all fencing regression tests """ def __init__(self, verbose=0): self.tests = [] self.verbose = verbose self.autogen_corosync_cfg = not os.path.exists("/etc/corosync/corosync.conf") def new_test(self, name, description, with_cpg=0): """ Create a named test """ test = Test(name, description, self.verbose, with_cpg) self.tests.append(test) return test def print_list(self): """ List all registered tests """ print("\n==== %d TESTS FOUND ====" % (len(self.tests))) print("%35s - %s" % ("TEST NAME", "TEST DESCRIPTION")) print("%35s - %s" % ("--------------------", "--------------------")) for test in self.tests: print("%35s - %s" % (test.name, test.description)) print("==== END OF LIST ====\n") def start_corosync(self): """ Start the corosync process """ if self.verbose: print("Starting corosync") test = subprocess.Popen("corosync", stdout=subprocess.PIPE) test.wait() time.sleep(10) def run_single(self, name): """ Run a single named test """ for test in self.tests: if test.name == name: test.run() break def run_tests_matching(self, pattern): """ Run all tests whose name matches a pattern """ for test in self.tests: if test.name.count(pattern) != 0: test.run() def run_cpg_only(self): """ Run all corosync-enabled tests """ for test in self.tests: if test.enable_corosync: test.run() def run_no_cpg(self): """ Run all standalone tests """ for test in self.tests: if not test.enable_corosync: test.run() def run_tests(self): """ Run all tests """ for test in self.tests: test.run() def exit(self): """ Exit (with error status code if any test failed) """ for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != CrmExit.OK: sys.exit(CrmExit.ERROR) sys.exit(CrmExit.OK) def print_results(self): """ Print summary of results of executed tests """ failures = 0 success = 0 print("\n\n======= FINAL RESULTS ==========") print("\n--- FAILURE RESULTS:") for test in self.tests: if test.executed == 0: continue if test.get_exitcode() != CrmExit.OK: failures = failures + 1 test.print_result(" ") else: success = success + 1 if failures == 0: print(" None") print("\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures)) def build_api_sanity_tests(self): """ Register tests to verify basic API usage """ verbose_arg = "" if self.verbose: verbose_arg = "-V" test = self.new_test("standalone_low_level_api_test", "Sanity test client api in standalone mode.") test.add_cmd("cts-fence-helper", "-t %s" % (verbose_arg)) test = self.new_test("cpg_low_level_api_test", "Sanity test client api using mainloop and cpg.", 1) test.add_cmd("cts-fence-helper", "-m %s" % (verbose_arg)) def build_custom_timeout_tests(self): """ Register tests to verify custom timeout usage """ # custom timeout without topology test = self.new_test("cpg_custom_timeout_1", "Verify per device timeouts work as expected without using topology.", 1) test.add_cmd('stonith_admin', '-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node1 node2 node3"') test.add_cmd('stonith_admin', '-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=1"') test.add_cmd('stonith_admin', '-R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=4"') test.add_cmd("stonith_admin", "-F node3 -t 2") # timeout is 2+1+4 = 7 test.add_stonith_log_pattern("Total timeout set to 7") # custom timeout _WITH_ topology test = self.new_test("cpg_custom_timeout_2", "Verify per device timeouts work as expected _WITH_ topology.", 1) test.add_cmd('stonith_admin', '-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node1 node2 node3"') test.add_cmd('stonith_admin', '-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=1"') test.add_cmd('stonith_admin', '-R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=4000"') test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 3 -v false2") test.add_cmd("stonith_admin", "-F node3 -t 2") # timeout is 2+1+4000 = 4003 test.add_stonith_log_pattern("Total timeout set to 4003") def build_fence_merge_tests(self): """ Register tests to verify when fence operations should be merged """ ### Simple test that overlapping fencing operations get merged test = self.new_test("cpg_custom_merge_single", "Verify overlapping identical fencing operations are merged, no fencing levels used.", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") ### one merger will happen test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") ### Test that multiple mergers occur test = self.new_test("cpg_custom_merge_multiple", "Verify multiple overlapping identical fencing operations are merged", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"delay=2\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") ### 4 mergers should occur test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") ### Test that multiple mergers occur with topologies used test = self.new_test("cpg_custom_merge_with_topology", "Verify multiple overlapping identical fencing operations are merged with fencing levels.", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd_no_wait("stonith_admin", "-F node3 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") ### 4 mergers should occur test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") test.add_stonith_log_pattern("Merging stonith action off for node node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") test.add_stonith_log_pattern("Operation off of node3 by") def build_fence_no_merge_tests(self): """ Register tests to verify when fence operations should not be merged """ test = self.new_test("cpg_custom_no_merge", "Verify differing fencing operations are not merged", 1) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3 node2\" ") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true1") test.add_cmd_no_wait("stonith_admin", "-F node2 -t 10") test.add_cmd("stonith_admin", "-F node3 -t 10") test.add_stonith_neg_log_pattern("Merging stonith action off for node node3 originating from client") def build_standalone_tests(self): """ Register a grab bag of tests that can be executed in standalone or corosync mode """ test_types = [ { "prefix" : "standalone", "use_cpg" : 0, }, { "prefix" : "cpg", "use_cpg" : 1, }, ] # test what happens when all devices timeout for test_type in test_types: test = self.new_test("%s_fence_multi_device_failure" % test_type["prefix"], "Verify that all devices timeout, a fencing failure is returned.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false3 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") if test_type["use_cpg"] == 1: test.add_expected_fail_cmd("stonith_admin", "-F node3 -t 2", CrmExit.TIMEOUT) test.add_stonith_log_pattern("Total timeout set to 6") else: test.add_expected_fail_cmd("stonith_admin", "-F node3 -t 2", CrmExit.ERROR) test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: ") test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: ") test.add_stonith_log_pattern("for host 'node3' with device 'false3' returned: ") # test what happens when multiple devices can fence a node, but the first device fails. for test_type in test_types: test = self.new_test("%s_fence_device_failure_rollover" % test_type["prefix"], "Verify that when one fence device fails for a node, the others are tried.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-F node3 -t 2") if test_type["use_cpg"] == 1: test.add_stonith_log_pattern("Total timeout set to 6") # simple topology test for one device for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_simple" % test_type["prefix"], "Verify all fencing devices at a level are used.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true") test.add_cmd("stonith_admin", "-F node3 -t 2") test.add_stonith_log_pattern("Total timeout set to 2") test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") # add topology, delete topology, verify fencing still works for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_add_remove" % test_type["prefix"], "Verify fencing occurrs after all topology levels are removed", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true") test.add_cmd("stonith_admin", "-d node3 -i 1") test.add_cmd("stonith_admin", "-F node3 -t 2") test.add_stonith_log_pattern("Total timeout set to 2") test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") # test what happens when the first fencing level has multiple devices. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_device_fails" % test_type["prefix"], "Verify if one device in a level fails, the other is tried.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R false -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true") test.add_cmd("stonith_admin", "-F node3 -t 20") test.add_stonith_log_pattern("Total timeout set to 40") test.add_stonith_log_pattern("for host 'node3' with device 'false' returned: -201") test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") # test what happens when the first fencing level fails. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_multi_level_fails" % test_type["prefix"], "Verify if one level fails, the next leve is tried.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") test.add_cmd("stonith_admin", "-F node3 -t 3") test.add_stonith_log_pattern("Total timeout set to 18") test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -201") test.add_stonith_log_pattern("for host 'node3' with device 'false2' returned: -201") test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0") test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0") # test what happens when the first fencing level had devices that no one has registered for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_missing_devices" % test_type["prefix"], "Verify topology can continue with missing devices.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") test.add_cmd("stonith_admin", "-F node3 -t 2") # Test what happens if multiple fencing levels are defined, and then the first one is removed. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_level_removal" % test_type["prefix"], "Verify level removal works.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1") test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1") test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2") test.add_cmd("stonith_admin", "-r node3 -i 2 -v false2") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true3") test.add_cmd("stonith_admin", "-r node3 -i 3 -v true4") # Now remove level 2, verify none of the devices in level two are hit. test.add_cmd("stonith_admin", "-d node3 -i 2") test.add_cmd("stonith_admin", "-F node3 -t 20") test.add_stonith_log_pattern("Total timeout set to 8") test.add_stonith_log_pattern("for host 'node3' with device 'false1' returned: -201") test.add_stonith_neg_log_pattern("for host 'node3' with device 'false2' returned: ") test.add_stonith_log_pattern("for host 'node3' with device 'true3' returned: 0") test.add_stonith_log_pattern("for host 'node3' with device 'true4' returned: 0") # Test targeting a topology level by node name pattern. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_topology_level_pattern" % test_type["prefix"], "Verify targeting topology by node name pattern works.", test_type["use_cpg"]) test.add_cmd("stonith_admin", """-R true -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node1 node2 node3" """) test.add_cmd("stonith_admin", """-r '@node.*' -i 1 -v true""") test.add_cmd("stonith_admin", "-F node3 -t 2") test.add_stonith_log_pattern("for host 'node3' with device 'true' returned: 0") # test allowing commas and semicolons as delimiters in pcmk_host_list for test_type in test_types: test = self.new_test("%s_host_list_delimiters" % test_type["prefix"], "Verify commas and semicolons can be used as pcmk_host_list delimiters", test_type["use_cpg"]) test.add_cmd("stonith_admin", """-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node1,node2,node3" """) test.add_cmd("stonith_admin", """-R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=pcmk1;pcmk2;pcmk3" """) test.add_cmd("stonith_admin", "stonith_admin -F node2 -t 2") test.add_cmd("stonith_admin", "stonith_admin -F pcmk3 -t 2") test.add_stonith_log_pattern("for host 'node2' with device 'true1' returned: 0") test.add_stonith_log_pattern("for host 'pcmk3' with device 'true2' returned: 0") # test the stonith builds the correct list of devices that can fence a node. for test_type in test_types: test = self.new_test("%s_list_devices" % test_type["prefix"], "Verify list of devices that can fence a node is correct", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true2", "true1") test.add_cmd_check_stdout("stonith_admin", "-l node1 -V", "true3", "true1") # simple test of device monitor for test_type in test_types: test = self.new_test("%s_monitor" % test_type["prefix"], "Verify device is reachable", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-Q true1") test.add_cmd("stonith_admin", "-Q false1") test.add_expected_fail_cmd("stonith_admin", "-Q true2", CrmExit.ERROR) # Verify monitor occurs for duration of timeout period on failure for test_type in test_types: test = self.new_test("%s_monitor_timeout" % test_type["prefix"], "Verify monitor uses duration of timeout period given.", test_type["use_cpg"]) test.add_cmd("stonith_admin", '-R true1 -a fence_dummy -o "mode=fail" -o "monitor_mode=fail" -o "pcmk_host_list=node3"') test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 5", CrmExit.ERROR) test.add_stonith_log_pattern("Attempt 2 to execute") # Verify monitor occurs for duration of timeout period on failure, but stops at max retries for test_type in test_types: test = self.new_test("%s_monitor_timeout_max_retries" % test_type["prefix"], "Verify monitor retries until max retry value or timeout is hit.", test_type["use_cpg"]) test.add_cmd("stonith_admin", '-R true1 -a fence_dummy -o "mode=fail" -o "monitor_mode=fail" -o "pcmk_host_list=node3"') test.add_expected_fail_cmd("stonith_admin", "-Q true1 -t 15", CrmExit.ERROR) test.add_stonith_log_pattern("Attempted to execute agent fence_dummy (list) the maximum number of times") # simple register test for test_type in test_types: test = self.new_test("%s_register" % test_type["prefix"], "Verify devices can be registered and un-registered", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-Q true1") test.add_cmd("stonith_admin", "-D true1") test.add_expected_fail_cmd("stonith_admin", "-Q true1", CrmExit.ERROR) # simple reboot test for test_type in test_types: test = self.new_test("%s_reboot" % test_type["prefix"], "Verify devices can be rebooted", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-B node3 -t 2") test.add_cmd("stonith_admin", "-D true1") test.add_expected_fail_cmd("stonith_admin", "-Q true1", CrmExit.ERROR) # test fencing history. for test_type in test_types: if test_type["use_cpg"] == 0: continue test = self.new_test("%s_fence_history" % test_type["prefix"], "Verify last fencing operation is returned.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"") test.add_cmd("stonith_admin", "-F node3 -t 2 -V") - test.add_cmd_check_stdout("stonith_admin", "-H node3", "was able to turn off node node3", "") + test.add_cmd_check_stdout("stonith_admin", "-H node3", "succeeded turning off node node3", "") # simple test of dynamic list query for test_type in test_types: test = self.new_test("%s_dynamic_list_query" % test_type["prefix"], "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") - test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found") + test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 fence devices found") # fence using dynamic list query for test_type in test_types: test = self.new_test("%s_fence_dynamic_list_query" % test_type["prefix"], "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") test.add_cmd("stonith_admin", "-F fake_port_1 -t 5 -V") # simple test of query using status action for test_type in test_types: test = self.new_test("%s_status_query" % test_type["prefix"], "Verify dynamic list of fencing devices can be retrieved.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") test.add_cmd("stonith_admin", "-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"") - test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 devices found") + test.add_cmd_check_stdout("stonith_admin", "-l fake_port_1", "3 fence devices found") # test what happens when no reboot action is advertised for test_type in test_types: test = self.new_test("%s_no_reboot_support" % test_type["prefix"], "Verify reboot action defaults to off when no reboot action is advertised by agent.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy_no_reboot -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-B node1 -t 5 -V") test.add_stonith_log_pattern("does not advertise support for 'reboot', performing 'off'") test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") # make sure reboot is used when reboot action is advertised for test_type in test_types: test = self.new_test("%s_with_reboot_support" % test_type["prefix"], "Verify reboot action can be used when metadata advertises it.", test_type["use_cpg"]) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") test.add_cmd("stonith_admin", "-B node1 -t 5 -V") test.add_stonith_neg_log_pattern("does not advertise support for 'reboot', performing 'off'") test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") def build_nodeid_tests(self): """ Register tests that use a corosync node id """ our_uname = localname() ### verify nodeid is supplied when nodeid is in the metadata parameters test = self.new_test("cpg_supply_nodeid", "Verify nodeid is given when fence agent has nodeid as parameter", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname)) test.add_stonith_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname)) ### verify nodeid is _NOT_ supplied when nodeid is not in the metadata parameters test = self.new_test("cpg_do_not_supply_nodeid", "Verify nodeid is _NOT_ given when fence agent does not have nodeid as parameter", 1) # use a host name that won't be in corosync.conf test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=regr-test\"") test.add_cmd("stonith_admin", "-F regr-test -t 3") test.add_stonith_neg_log_pattern("For stonith action (off) for victim regr-test, adding nodeid") ### verify nodeid use doesn't explode standalone mode test = self.new_test("standalone_do_not_supply_nodeid", "Verify nodeid in metadata parameter list doesn't kill standalone mode", 0) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "-F %s -t 3" % (our_uname)) test.add_stonith_neg_log_pattern("For stonith action (off) for victim %s, adding nodeid" % (our_uname)) def build_unfence_tests(self): """ Register tests that verify unfencing """ our_uname = localname() ### verify unfencing using automatic unfencing test = self.new_test("cpg_unfence_required_1", "Verify require unfencing on all devices when automatic=true in agent's metadata", 1) test.add_cmd('stonith_admin', '-R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname)) test.add_cmd('stonith_admin', '-R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) # both devices should be executed test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)") ### verify unfencing using automatic unfencing fails if any of the required agents fail test = self.new_test("cpg_unfence_required_2", "Verify require unfencing on all devices when automatic=true in agent's metadata", 1) test.add_cmd('stonith_admin', '-R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname)) test.add_cmd('stonith_admin', '-R true2 -a fence_dummy_auto_unfence -o "mode=fail" -o "pcmk_host_list=%s"' % (our_uname)) test.add_expected_fail_cmd("stonith_admin", "-U %s -t 6" % (our_uname), CrmExit.ERROR) ### verify unfencing using automatic devices with topology test = self.new_test("cpg_unfence_required_3", "Verify require unfencing on all devices even when at different topology levels", 1) test.add_cmd('stonith_admin', '-R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)") ### verify unfencing using automatic devices with topology test = self.new_test("cpg_unfence_required_4", "Verify all required devices are executed even with topology levels fail.", 1) test.add_cmd('stonith_admin', '-R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R true3 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R true4 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R false3 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd('stonith_admin', '-R false4 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v false1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v false2" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v false3" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true3" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 3 -v false4" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 4 -v true4" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") test.add_stonith_log_pattern("with device 'true2' returned: 0 (OK)") test.add_stonith_log_pattern("with device 'true3' returned: 0 (OK)") test.add_stonith_log_pattern("with device 'true4' returned: 0 (OK)") def build_unfence_on_target_tests(self): """ Register tests that verify unfencing that runs on the target """ our_uname = localname() ### verify unfencing using on_target device test = self.new_test("cpg_unfence_on_target_1", "Verify unfencing with on_target = true", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("(on) to be executed on the target node") ### verify failure of unfencing using on_target device test = self.new_test("cpg_unfence_on_target_2", "Verify failure unfencing with on_target = true", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake_1234\"" % (our_uname)) test.add_expected_fail_cmd("stonith_admin", "-U node_fake_1234 -t 3", CrmExit.ERROR) test.add_stonith_log_pattern("(on) to be executed on the target node") ### verify unfencing using on_target device with topology test = self.new_test("cpg_unfence_on_target_3", "Verify unfencing with on_target = true using topology", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 1 -v true1" % (our_uname)) test.add_cmd("stonith_admin", "-r %s -i 2 -v true2" % (our_uname)) test.add_cmd("stonith_admin", "-U %s -t 3" % (our_uname)) test.add_stonith_log_pattern("(on) to be executed on the target node") ### verify unfencing using on_target device with topology fails when victim node doesn't exist test = self.new_test("cpg_unfence_on_target_4", "Verify unfencing failure with on_target = true using topology", 1) test.add_cmd("stonith_admin", "-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) test.add_cmd("stonith_admin", "-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname)) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v true1") test.add_cmd("stonith_admin", "-r node_fake -i 2 -v true2") test.add_expected_fail_cmd("stonith_admin", "-U node_fake -t 3", CrmExit.ERROR) test.add_stonith_log_pattern("(on) to be executed on the target node") def build_remap_tests(self): """ Register tests that verify remapping of reboots to off-on """ test = self.new_test("cpg_remap_simple", "Verify sequential topology reboot is remapped to all-off-then-all-on", 1) test.add_cmd("stonith_admin", """-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """ """-o "pcmk_off_timeout=1" -o "pcmk_reboot_timeout=10" """) test.add_cmd("stonith_admin", """-R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """ """-o "pcmk_off_timeout=2" -o "pcmk_reboot_timeout=20" """) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", "-B node_fake -t 5") test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") # timeout should be sum of off timeouts (1+2=3), not reboot timeouts (10+20=30) test.add_stonith_log_pattern("Total timeout set to 3 for peer's fencing of node_fake") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true1'") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true2'") test.add_stonith_log_pattern("Remapped off of node_fake complete, remapping to on") # fence_dummy sets "on" as an on_target action test.add_stonith_log_pattern("Ignoring true1 'on' failure (no capable peers) for node_fake") test.add_stonith_log_pattern("Ignoring true2 'on' failure (no capable peers) for node_fake") test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") test = self.new_test("cpg_remap_automatic", "Verify remapped topology reboot skips automatic 'on'", 1) test.add_cmd("stonith_admin", """-R true1 -a fence_dummy_auto_unfence """ """-o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true2 -a fence_dummy_auto_unfence """ """-o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", "-B node_fake -t 5") test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true1'") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true2'") test.add_stonith_log_pattern("Remapped off of node_fake complete, remapping to on") test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") test.add_stonith_neg_log_pattern("perform op 'node_fake on' with") test.add_stonith_neg_log_pattern("'on' failure") test = self.new_test("cpg_remap_complex_1", "Verify remapped topology reboot in second level works if non-remapped first level fails", 1) test.add_cmd("stonith_admin", """-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v false1") test.add_cmd("stonith_admin", "-r node_fake -i 2 -v true1 -v true2") test.add_cmd("stonith_admin", "-B node_fake -t 5") test.add_stonith_log_pattern("perform op 'node_fake reboot' with 'false1'") test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true1'") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true2'") test.add_stonith_log_pattern("Remapped off of node_fake complete, remapping to on") test.add_stonith_log_pattern("Ignoring true1 'on' failure (no capable peers) for node_fake") test.add_stonith_log_pattern("Ignoring true2 'on' failure (no capable peers) for node_fake") test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") test = self.new_test("cpg_remap_complex_2", "Verify remapped topology reboot failure in second level proceeds to third level", 1) test.add_cmd("stonith_admin", """-R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", """-R true3 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """) test.add_cmd("stonith_admin", "-r node_fake -i 1 -v false1") test.add_cmd("stonith_admin", "-r node_fake -i 2 -v true1 -v false2 -v true3") test.add_cmd("stonith_admin", "-r node_fake -i 3 -v true2") test.add_cmd("stonith_admin", "-B node_fake -t 5") test.add_stonith_log_pattern("perform op 'node_fake reboot' with 'false1'") test.add_stonith_log_pattern("Remapping multiple-device reboot of node_fake") test.add_stonith_log_pattern("perform op 'node_fake off' with 'true1'") test.add_stonith_log_pattern("perform op 'node_fake off' with 'false2'") test.add_stonith_log_pattern("Attempted to execute agent fence_dummy (off) the maximum number of times") test.add_stonith_log_pattern("Undoing remap of reboot of node_fake") test.add_stonith_log_pattern("perform op 'node_fake reboot' with 'true2'") test.add_stonith_neg_log_pattern("node_fake with true3") def setup_environment(self, use_corosync): """ Prepare the host before executing any tests """ if use_corosync: if self.autogen_corosync_cfg: (handle, self.autogen_corosync_log) = tempfile.mkstemp(prefix="cts-fencing-", suffix=".corosync.log") os.close(handle) corosync_cfg = io.open("/etc/corosync/corosync.conf", "w") corosync_cfg.write(AUTOGEN_COROSYNC_TEMPLATE % (localname(), self.autogen_corosync_log)) corosync_cfg.close() ### make sure we are in control ### killall("corosync") self.start_corosync() subprocess.call(["cts-support", "install"]) def cleanup_environment(self, use_corosync): """ Clean up the host after executing desired tests """ if use_corosync: killall("corosync") if self.autogen_corosync_cfg: if self.verbose: print("Corosync output") logfile = io.open(self.autogen_corosync_log, 'rt') for line in logfile.readlines(): print(line.strip()) logfile.close() os.remove(self.autogen_corosync_log) os.remove("/etc/corosync/corosync.conf") subprocess.call(["cts-support", "uninstall"]) class TestOptions(object): """ Option handler """ def __init__(self): self.options = {} self.options['list-tests'] = 0 self.options['run-all'] = 1 self.options['run-only'] = "" self.options['run-only-pattern'] = "" self.options['verbose'] = 0 self.options['invalid-arg'] = "" self.options['cpg-only'] = 0 self.options['no-cpg'] = 0 self.options['show-usage'] = 0 def build_options(self, argv): """ Set options based on command-line arguments """ args = argv[1:] skip = 0 for i in range(0, len(args)): if skip: skip = 0 continue elif args[i] == "-h" or args[i] == "--help": self.options['show-usage'] = 1 elif args[i] == "-l" or args[i] == "--list-tests": self.options['list-tests'] = 1 elif args[i] == "-V" or args[i] == "--verbose": self.options['verbose'] = 1 elif args[i] == "-n" or args[i] == "--no-cpg": self.options['no-cpg'] = 1 elif args[i] == "-c" or args[i] == "--cpg-only": self.options['cpg-only'] = 1 elif args[i] == "-r" or args[i] == "--run-only": self.options['run-only'] = args[i+1] skip = 1 elif args[i] == "-p" or args[i] == "--run-only-pattern": self.options['run-only-pattern'] = args[i+1] skip = 1 def show_usage(self): """ Show command usage """ print("usage: " + sys.argv[0] + " [options]") print("If no options are provided, all tests will run") print("Options:") print("\t [--help | -h] Show usage") print("\t [--list-tests | -l] Print out all registered tests.") print("\t [--cpg-only | -c] Only run tests that require corosync.") print("\t [--no-cpg | -n] Only run tests that do not require corosync") print("\t [--run-only | -r 'testname'] Run a specific test") print("\t [--verbose | -V] Verbose output") print("\t [--run-only-pattern | -p 'string'] Run only tests containing the string value") print("\n\tExample: Run only the test 'start_stop'") print("\t\t " + sys.argv[0] + " --run-only start_stop") print("\n\tExample: Run only the tests with the string 'systemd' present in them") print("\t\t " + sys.argv[0] + " --run-only-pattern systemd") def main(argv): """ Run fencing regression tests as specified by arguments """ update_path() opts = TestOptions() opts.build_options(argv) use_corosync = 1 tests = Tests(opts.options['verbose']) tests.build_standalone_tests() tests.build_custom_timeout_tests() tests.build_api_sanity_tests() tests.build_fence_merge_tests() tests.build_fence_no_merge_tests() tests.build_unfence_tests() tests.build_unfence_on_target_tests() tests.build_nodeid_tests() tests.build_remap_tests() if opts.options['list-tests']: tests.print_list() sys.exit(CrmExit.OK) elif opts.options['show-usage']: opts.show_usage() sys.exit(CrmExit.OK) print("Starting ...") if opts.options['no-cpg']: use_corosync = 0 tests.setup_environment(use_corosync) if opts.options['run-only-pattern'] != "": tests.run_tests_matching(opts.options['run-only-pattern']) tests.print_results() elif opts.options['run-only'] != "": tests.run_single(opts.options['run-only']) tests.print_results() elif opts.options['no-cpg']: tests.run_no_cpg() tests.print_results() elif opts.options['cpg-only']: tests.run_cpg_only() tests.print_results() else: tests.run_tests() tests.print_results() tests.cleanup_environment(use_corosync) tests.exit() if __name__ == "__main__": main(sys.argv) diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c index 2898c51052..ef6fa36313 100644 --- a/lib/cluster/cpg.c +++ b/lib/cluster/cpg.c @@ -1,788 +1,809 @@ /* * Copyright 2004-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* PCMK__SPECIAL_PID* */ cpg_handle_t pcmk_cpg_handle = 0; /* TODO: Remove, use cluster.cpg_handle */ static bool cpg_evicted = FALSE; gboolean(*pcmk_cpg_dispatch_fn) (int kind, const char *from, const char *data) = NULL; #define cs_repeat(counter, max, code) do { \ code; \ if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ counter++; \ crm_debug("Retrying operation after %ds", counter); \ sleep(counter); \ } else { \ break; \ } \ } while(counter < max) void cluster_disconnect_cpg(crm_cluster_t *cluster) { pcmk_cpg_handle = 0; if (cluster->cpg_handle) { crm_trace("Disconnecting CPG"); cpg_leave(cluster->cpg_handle, &cluster->group); cpg_finalize(cluster->cpg_handle); cluster->cpg_handle = 0; } else { crm_info("No CPG connection"); } } uint32_t get_local_nodeid(cpg_handle_t handle) { cs_error_t rc = CS_OK; int retries = 0; static uint32_t local_nodeid = 0; cpg_handle_t local_handle = handle; cpg_callbacks_t cb = { }; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; if(local_nodeid != 0) { return local_nodeid; } if(handle == 0) { crm_trace("Creating connection"); cs_repeat(retries, 5, rc = cpg_initialize(&local_handle, &cb)); if (rc != CS_OK) { crm_err("Could not connect to the CPG API: %s (%d)", cs_strerror(rc), rc); return 0; } rc = cpg_fd_get(local_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CPG API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CPG provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CPG provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CPG provider: %s (%d)", strerror(-rv), -rv); goto bail; } } if (rc == CS_OK) { retries = 0; crm_trace("Performing lookup"); cs_repeat(retries, 5, rc = cpg_local_get(local_handle, &local_nodeid)); } if (rc != CS_OK) { crm_err("Could not get local node id from the CPG API: %s (%d)", ais_error2text(rc), rc); } bail: if(handle == 0) { crm_trace("Closing connection"); cpg_finalize(local_handle); } crm_debug("Local nodeid is %u", local_nodeid); return local_nodeid; } GListPtr cs_message_queue = NULL; int cs_message_timer = 0; static ssize_t crm_cs_flush(gpointer data); static gboolean crm_cs_flush_cb(gpointer data) { cs_message_timer = 0; crm_cs_flush(data); return FALSE; } #define CS_SEND_MAX 200 static ssize_t crm_cs_flush(gpointer data) { int sent = 0; ssize_t rc = 0; int queue_len = 0; static unsigned int last_sent = 0; cpg_handle_t *handle = (cpg_handle_t *)data; if (*handle == 0) { crm_trace("Connection is dead"); return pcmk_ok; } queue_len = g_list_length(cs_message_queue); if ((queue_len % 1000) == 0 && queue_len > 1) { crm_err("CPG queue has grown to %d", queue_len); } else if (queue_len == CS_SEND_MAX) { crm_warn("CPG queue has grown to %d", queue_len); } if (cs_message_timer) { /* There is already a timer, wait until it goes off */ crm_trace("Timer active %d", cs_message_timer); return pcmk_ok; } while (cs_message_queue && sent < CS_SEND_MAX) { struct iovec *iov = cs_message_queue->data; errno = 0; rc = cpg_mcast_joined(*handle, CPG_TYPE_AGREED, iov, 1); if (rc != CS_OK) { break; } sent++; last_sent++; crm_trace("CPG message sent, size=%llu", (unsigned long long) iov->iov_len); cs_message_queue = g_list_remove(cs_message_queue, iov); free(iov->iov_base); free(iov); } queue_len -= sent; if (sent > 1 || cs_message_queue) { crm_info("Sent %d CPG messages (%d remaining, last=%u): %s (%lld)", sent, queue_len, last_sent, ais_error2text(rc), (long long) rc); } else { crm_trace("Sent %d CPG messages (%d remaining, last=%u): %s (%lld)", sent, queue_len, last_sent, ais_error2text(rc), (long long) rc); } if (cs_message_queue) { uint32_t delay_ms = 100; if(rc != CS_OK) { /* Proportionally more if sending failed but cap at 1s */ delay_ms = QB_MIN(1000, CS_SEND_MAX + (10 * queue_len)); } cs_message_timer = g_timeout_add(delay_ms, crm_cs_flush_cb, data); } return rc; } gboolean send_cpg_iov(struct iovec * iov) { static unsigned int queued = 0; queued++; crm_trace("Queueing CPG message %u (%llu bytes)", queued, (unsigned long long) iov->iov_len); cs_message_queue = g_list_append(cs_message_queue, iov); crm_cs_flush(&pcmk_cpg_handle); return TRUE; } static int pcmk_cpg_dispatch(gpointer user_data) { int rc = 0; crm_cluster_t *cluster = (crm_cluster_t*) user_data; rc = cpg_dispatch(cluster->cpg_handle, CS_DISPATCH_ONE); if (rc != CS_OK) { crm_err("Connection to the CPG API failed: %s (%d)", ais_error2text(rc), rc); cluster->cpg_handle = 0; return -1; } else if(cpg_evicted) { crm_err("Evicted from CPG membership"); return -1; } return 0; } char * pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void *content, uint32_t *kind, const char **from) { char *data = NULL; AIS_Message *msg = (AIS_Message *) content; if(handle) { // Do filtering and field massaging uint32_t local_nodeid = get_local_nodeid(handle); const char *local_name = get_local_node_name(); if (msg->sender.id > 0 && msg->sender.id != nodeid) { crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, msg->sender.id); return NULL; } else if (msg->host.id != 0 && (local_nodeid != msg->host.id)) { /* Not for us */ crm_trace("Not for us: %u != %u", msg->host.id, local_nodeid); return NULL; } else if (msg->host.size != 0 && safe_str_neq(msg->host.uname, local_name)) { /* Not for us */ crm_trace("Not for us: %s != %s", msg->host.uname, local_name); return NULL; } msg->sender.id = nodeid; if (msg->sender.size == 0) { crm_node_t *peer = crm_get_peer(nodeid, NULL); if (peer == NULL) { crm_err("Peer with nodeid=%u is unknown", nodeid); } else if (peer->uname == NULL) { crm_err("No uname for peer with nodeid=%u", nodeid); } else { crm_notice("Fixing uname for peer with nodeid=%u", nodeid); msg->sender.size = strlen(peer->uname); memset(msg->sender.uname, 0, MAX_NAME); memcpy(msg->sender.uname, peer->uname, msg->sender.size); } } } crm_trace("Got new%s message (size=%d, %d, %d)", msg->is_compressed ? " compressed" : "", ais_data_len(msg), msg->size, msg->compressed_size); if (kind != NULL) { *kind = msg->header.id; } if (from != NULL) { *from = msg->sender.uname; } if (msg->is_compressed && msg->size > 0) { int rc = BZ_OK; char *uncompressed = NULL; unsigned int new_size = msg->size + 1; if (check_message_sanity(msg, NULL) == FALSE) { goto badmsg; } crm_trace("Decompressing message data"); uncompressed = calloc(1, new_size); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, msg->data, msg->compressed_size, 1, 0); if (rc != BZ_OK) { crm_err("Decompression failed: %s " CRM_XS " bzerror=%d", bz2_strerror(rc), rc); free(uncompressed); goto badmsg; } CRM_ASSERT(rc == BZ_OK); CRM_ASSERT(new_size == msg->size); data = uncompressed; } else if (check_message_sanity(msg, data) == FALSE) { goto badmsg; } else if (safe_str_eq("identify", data)) { char *pid_s = crm_getpid_s(); send_cluster_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); free(pid_s); return NULL; } else { data = strdup(msg->data); } // Is this necessary? crm_get_peer(msg->sender.id, msg->sender.uname); crm_trace("Payload: %.200s", data); return data; badmsg: crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" " min=%d, total=%d, size=%d, bz2_size=%d", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, (int)sizeof(AIS_Message), msg->header.size, msg->size, msg->compressed_size); free(data); return NULL; } -#define PEER_NAME(peer) ((peer)? ((peer)->uname? (peer)->uname : "") : "") - static int cmp_member_list_nodeid(const void *first, const void *second) { const struct cpg_address *const a = *((const struct cpg_address **) first), *const b = *((const struct cpg_address **) second); if (a->nodeid < b->nodeid) { return -1; } else if (a->nodeid > b->nodeid) { return 1; } /* don't bother with "reason" nor "pid" */ return 0; } +static const char * +cpgreason2str(cpg_reason_t reason) +{ + switch (reason) { + case CPG_REASON_JOIN: return " via cpg_join"; + case CPG_REASON_LEAVE: return " via cpg_leave"; + case CPG_REASON_NODEDOWN: return " via cluster exit"; + case CPG_REASON_NODEUP: return " via cluster join"; + case CPG_REASON_PROCDOWN: return " for unknown reason"; + default: break; + } + return ""; +} + +static inline const char * +peer_name(crm_node_t *peer) +{ + if (peer == NULL) { + return "unknown node"; + } else if (peer->uname == NULL) { + return "peer node"; + } else { + return peer->uname; + } +} + void pcmk_cpg_membership(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { int i; gboolean found = FALSE; static int counter = 0; uint32_t local_nodeid = get_local_nodeid(handle); - const struct cpg_address *key, **rival, **sorted; + const struct cpg_address *key, **sorted; sorted = malloc(member_list_entries * sizeof(const struct cpg_address *)); CRM_ASSERT(sorted != NULL); for (size_t iter = 0; iter < member_list_entries; iter++) { sorted[iter] = member_list + iter; } /* so that the cross-matching multiply-subscribed nodes is then cheap */ qsort(sorted, member_list_entries, sizeof(const struct cpg_address *), cmp_member_list_nodeid); for (i = 0; i < left_list_entries; i++) { crm_node_t *peer = crm_find_peer(left_list[i].nodeid, NULL); - - crm_info("Group event %s.%d: node %u (%s) left: %llu", - groupName->value, counter, left_list[i].nodeid, - PEER_NAME(peer), (unsigned long long) left_list[i].pid); + const struct cpg_address **rival = NULL; /* in CPG world, NODE:PROCESS-IN-MEMBERSHIP-OF-G is an 1:N relation and not playing by this rule may go wild in case of multiple residual instances of the same pacemaker daemon at the same node -- we must ensure that the possible local rival(s) won't make us cry out and bail (e.g. when they quit themselves), since all the surrounding logic denies this simple fact that the full membership is discriminated also per the PID of the process beside mere node ID (and implicitly, group ID); practically, this will be sound in terms of not preventing progress, since all the CPG joiners are also API end-point carriers, and that's what matters locally (who's the winner); remotely, we will just compare leave_list and member_list and if - the left process has it's node retained in member_list (under some + the left process has its node retained in member_list (under some other PID, anyway) we will just ignore it as well XXX: long-term fix is to establish in-out PID-aware tracking? */ if (peer) { key = &left_list[i]; rival = bsearch(&key, sorted, member_list_entries, sizeof(const struct cpg_address *), cmp_member_list_nodeid); - if (rival == NULL) { + } + + if (rival == NULL) { + crm_info("Group %s event %d: %s (node %u pid %u) left%s", + groupName->value, counter, peer_name(peer), + left_list[i].nodeid, left_list[i].pid, + cpgreason2str(left_list[i].reason)); + if (peer) { crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, OFFLINESTATUS); - } else if (left_list[i].nodeid == local_nodeid) { - crm_info("Ignoring the above event %s.%d, comes from a local" - " rival process (presumably not us): %llu", - groupName->value, counter, - (unsigned long long) left_list[i].pid); - } else { - crm_info("Ignoring the above event %s.%d, comes from" - " a rival-rich node: %llu (e.g. %llu process" - " carries on)", - groupName->value, counter, - (unsigned long long) left_list[i].pid, - (unsigned long long) (*rival)->pid); } + } else if (left_list[i].nodeid == local_nodeid) { + crm_warn("Group %s event %d: duplicate local pid %u left%s", + groupName->value, counter, + left_list[i].pid, cpgreason2str(left_list[i].reason)); + } else { + crm_warn("Group %s event %d: " + "%s (node %u) duplicate pid %u left%s (%u remains)", + groupName->value, counter, peer_name(peer), + left_list[i].nodeid, left_list[i].pid, + cpgreason2str(left_list[i].reason), (*rival)->pid); } } free(sorted); sorted = NULL; for (i = 0; i < joined_list_entries; i++) { - crm_info("Group event %s.%d: node %u joined: %llu" - " (unchecked for rivals)", + crm_info("Group %s event %d: node %u pid %u joined%s", groupName->value, counter, joined_list[i].nodeid, - (unsigned long long) joined_list[i].pid); + joined_list[i].pid, cpgreason2str(joined_list[i].reason)); } for (i = 0; i < member_list_entries; i++) { crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); - crm_info("Group event %s.%d: node %u (%s) is member: %llu" - " (at least once)", - groupName->value, counter, member_list[i].nodeid, - PEER_NAME(peer), member_list[i].pid); - if (member_list[i].nodeid == local_nodeid && member_list[i].pid != getpid()) { /* see the note above */ - crm_info("Ignoring the above event %s.%d, comes from a local rival" - " process: %llu", groupName->value, counter, - (unsigned long long) member_list[i].pid); + crm_warn("Group %s event %d: detected duplicate local pid %u", + groupName->value, counter, member_list[i].pid); continue; } + crm_info("Group %s event %d: %s (node %u pid %u) is member", + groupName->value, counter, peer_name(peer), + member_list[i].nodeid, member_list[i].pid); /* If the caller left auto-reaping enabled, this will also update the * state to member. */ peer = crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); if (peer && peer->state && strcmp(peer->state, CRM_NODE_MEMBER)) { /* The node is a CPG member, but we currently think it's not a * cluster member. This is possible only if auto-reaping was * disabled. The node may be joining, and we happened to get the CPG * notification before the quorum notification; or the node may have * just died, and we are processing its final messages; or a bug * has affected the peer cache. */ time_t now = time(NULL); if (peer->when_lost == 0) { // Track when we first got into this contradictory state peer->when_lost = now; } else if (now > (peer->when_lost + 60)) { // If it persists for more than a minute, update the state - crm_warn("Node %u member of group %s but believed offline" - " (unchecked for rivals)", + crm_warn("Node %u is member of group %s but was believed offline", member_list[i].nodeid, groupName->value); crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, 0); } } if (local_nodeid == member_list[i].nodeid) { found = TRUE; } } if (!found) { - crm_err("We're not part of CPG group '%s' anymore!", groupName->value); + crm_err("Local node was evicted from group %s", groupName->value); cpg_evicted = TRUE; } counter++; } gboolean cluster_connect_cpg(crm_cluster_t *cluster) { cs_error_t rc; int fd = -1; int retries = 0; uint32_t id = 0; crm_node_t *peer = NULL; cpg_handle_t handle = 0; const char *message_name = pcmk_message_name(crm_system_name); uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; struct mainloop_fd_callbacks cpg_fd_callbacks = { .dispatch = pcmk_cpg_dispatch, .destroy = cluster->destroy, }; cpg_callbacks_t cpg_callbacks = { .cpg_deliver_fn = cluster->cpg.cpg_deliver_fn, .cpg_confchg_fn = cluster->cpg.cpg_confchg_fn, /* .cpg_deliver_fn = pcmk_cpg_deliver, */ /* .cpg_confchg_fn = pcmk_cpg_membership, */ }; cpg_evicted = FALSE; cluster->group.length = 0; cluster->group.value[0] = 0; /* group.value is char[128] */ strncpy(cluster->group.value, message_name, 127); cluster->group.value[127] = 0; cluster->group.length = 1 + QB_MIN(127, strlen(cluster->group.value)); cs_repeat(retries, 30, rc = cpg_initialize(&handle, &cpg_callbacks)); if (rc != CS_OK) { crm_err("Could not connect to the CPG API: %s (%d)", cs_strerror(rc), rc); goto bail; } rc = cpg_fd_get(handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CPG API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CPG provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CPG provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); rc = CS_ERR_ACCESS; goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CPG provider: %s (%d)", strerror(-rv), -rv); rc = CS_ERR_ACCESS; goto bail; } id = get_local_nodeid(handle); if (id == 0) { crm_err("Could not get local node id from the CPG API"); goto bail; } cluster->nodeid = id; retries = 0; cs_repeat(retries, 30, rc = cpg_join(handle, &cluster->group)); if (rc != CS_OK) { crm_err("Could not join the CPG group '%s': %d", message_name, rc); goto bail; } pcmk_cpg_handle = handle; cluster->cpg_handle = handle; mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, cluster, &cpg_fd_callbacks); bail: if (rc != CS_OK) { cpg_finalize(handle); return FALSE; } peer = crm_get_peer(id, NULL); crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ONLINESTATUS); return TRUE; } gboolean send_cluster_message_cs(xmlNode * msg, gboolean local, crm_node_t * node, enum crm_ais_msg_types dest) { gboolean rc = TRUE; char *data = NULL; data = dump_xml_unformatted(msg); rc = send_cluster_text(crm_class_cluster, data, local, node, dest); free(data); return rc; } gboolean send_cluster_text(enum crm_ais_msg_class msg_class, const char *data, gboolean local, crm_node_t *node, enum crm_ais_msg_types dest) { static int msg_id = 0; static int local_pid = 0; static int local_name_len = 0; static const char *local_name = NULL; char *target = NULL; struct iovec *iov; AIS_Message *msg = NULL; enum crm_ais_msg_types sender = text2msg_type(crm_system_name); switch (msg_class) { case crm_class_cluster: break; default: crm_err("Invalid message class: %d", msg_class); return FALSE; } CRM_CHECK(dest != crm_msg_ais, return FALSE); if(local_name == NULL) { local_name = get_local_node_name(); } if(local_name_len == 0 && local_name) { local_name_len = strlen(local_name); } if (data == NULL) { data = ""; } if (local_pid == 0) { local_pid = getpid(); } if (sender == crm_msg_none) { sender = local_pid; } msg = calloc(1, sizeof(AIS_Message)); msg_id++; msg->id = msg_id; msg->header.id = msg_class; msg->header.error = CS_OK; msg->host.type = dest; msg->host.local = local; if (node) { if (node->uname) { target = strdup(node->uname); msg->host.size = strlen(node->uname); memset(msg->host.uname, 0, MAX_NAME); memcpy(msg->host.uname, node->uname, msg->host.size); } else { target = crm_strdup_printf("%u", node->id); } msg->host.id = node->id; } else { target = strdup("all"); } msg->sender.id = 0; msg->sender.type = sender; msg->sender.pid = local_pid; msg->sender.size = local_name_len; memset(msg->sender.uname, 0, MAX_NAME); if(local_name && msg->sender.size) { memcpy(msg->sender.uname, local_name, msg->sender.size); } msg->size = 1 + strlen(data); msg->header.size = sizeof(AIS_Message) + msg->size; if (msg->size < CRM_BZ2_THRESHOLD) { msg = realloc_safe(msg, msg->header.size); memcpy(msg->data, data, msg->size); } else { char *compressed = NULL; unsigned int new_size = 0; char *uncompressed = strdup(data); if (crm_compress_string(uncompressed, msg->size, 0, &compressed, &new_size)) { msg->header.size = sizeof(AIS_Message) + new_size; msg = realloc_safe(msg, msg->header.size); memcpy(msg->data, compressed, new_size); msg->is_compressed = TRUE; msg->compressed_size = new_size; } else { msg = realloc_safe(msg, msg->header.size); memcpy(msg->data, data, msg->size); } free(uncompressed); free(compressed); } iov = calloc(1, sizeof(struct iovec)); iov->iov_base = msg; iov->iov_len = msg->header.size; if (msg->compressed_size) { crm_trace("Queueing CPG message %u to %s (%llu bytes, %d bytes compressed payload): %.200s", msg->id, target, (unsigned long long) iov->iov_len, msg->compressed_size, data); } else { crm_trace("Queueing CPG message %u to %s (%llu bytes, %d bytes payload): %.200s", msg->id, target, (unsigned long long) iov->iov_len, msg->size, data); } free(target); send_cpg_iov(iov); return TRUE; } enum crm_ais_msg_types text2msg_type(const char *text) { int type = crm_msg_none; CRM_CHECK(text != NULL, return type); text = pcmk_message_name(text); if (safe_str_eq(text, "ais")) { type = crm_msg_ais; } else if (safe_str_eq(text, CRM_SYSTEM_CIB)) { type = crm_msg_cib; } else if (safe_str_eq(text, CRM_SYSTEM_CRMD) || safe_str_eq(text, CRM_SYSTEM_DC)) { type = crm_msg_crmd; } else if (safe_str_eq(text, CRM_SYSTEM_TENGINE)) { type = crm_msg_te; } else if (safe_str_eq(text, CRM_SYSTEM_PENGINE)) { type = crm_msg_pe; } else if (safe_str_eq(text, CRM_SYSTEM_LRMD)) { type = crm_msg_lrmd; } else if (safe_str_eq(text, CRM_SYSTEM_STONITHD)) { type = crm_msg_stonithd; } else if (safe_str_eq(text, "stonith-ng")) { type = crm_msg_stonith_ng; } else if (safe_str_eq(text, "attrd")) { type = crm_msg_attrd; } else { /* This will normally be a transient client rather than * a cluster daemon. Set the type to the pid of the client */ int scan_rc = sscanf(text, "%d", &type); if (scan_rc != 1 || type <= crm_msg_stonith_ng) { /* Ensure it's sane */ type = crm_msg_none; } } return type; } diff --git a/tools/Makefile.am b/tools/Makefile.am index 7be5032d33..6516e558aa 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -1,149 +1,151 @@ # -# Copyright 2004-2019 Andrew Beekhof +# Copyright 2004-2019 the Pacemaker project contributors +# +# The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # include $(top_srcdir)/Makefile.common if BUILD_SYSTEMD systemdunit_DATA = crm_mon.service endif noinst_HEADERS = crm_resource.h pcmkdir = $(datadir)/$(PACKAGE) pcmk_DATA = report.common report.collector sbin_SCRIPTS = crm_report crm_standby crm_master crm_failcount if BUILD_CIBSECRETS sbin_SCRIPTS += cibsecret endif EXTRA_DIST = $(sbin_SCRIPTS) sbin_PROGRAMS = attrd_updater \ cibadmin \ crmadmin \ crm_simulate \ crm_attribute \ crm_diff \ crm_error \ crm_mon \ crm_node \ crm_resource \ crm_rule \ crm_shadow \ crm_verify \ crm_ticket \ iso8601 \ stonith_admin if BUILD_SERVICELOG sbin_PROGRAMS += notifyServicelogEvent endif if BUILD_OPENIPMI_SERVICELOG sbin_PROGRAMS += ipmiservicelogd endif ## SOURCES MAN8DEPS = crm_attribute crm_node crmadmin_SOURCES = crmadmin.c crmadmin_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_error_SOURCES = crm_error.c crm_error_LDADD = $(top_builddir)/lib/common/libcrmcommon.la cibadmin_SOURCES = cibadmin.c cibadmin_LDADD = $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_shadow_SOURCES = crm_shadow.c crm_shadow_LDADD = $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_node_SOURCES = crm_node.c crm_node_LDADD = $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_simulate_SOURCES = crm_simulate.c crm_simulate_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/pacemaker/libpacemaker.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_diff_SOURCES = crm_diff.c crm_diff_LDADD = $(top_builddir)/lib/common/libcrmcommon.la crm_mon_SOURCES = crm_mon.c crm_mon_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/fencing/libstonithd.la \ $(top_builddir)/lib/pacemaker/libpacemaker.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la \ $(CURSESLIBS) # Arguments could be made that this should live in crm/pengine crm_verify_SOURCES = crm_verify.c crm_verify_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/pacemaker/libpacemaker.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_attribute_SOURCES = crm_attribute.c crm_attribute_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_resource_SOURCES = crm_resource.c crm_resource_ban.c crm_resource_runtime.c crm_resource_print.c crm_resource_LDADD = $(top_builddir)/lib/pengine/libpe_rules.la \ $(top_builddir)/lib/fencing/libstonithd.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ $(top_builddir)/lib/services/libcrmservice.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/pacemaker/libpacemaker.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la crm_rule_SOURCES = crm_rule.c crm_rule_LDADD = $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/pengine/libpe_rules.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/common/libcrmcommon.la iso8601_SOURCES = iso8601.c iso8601_LDADD = $(top_builddir)/lib/common/libcrmcommon.la attrd_updater_SOURCES = attrd_updater.c attrd_updater_LDADD = $(top_builddir)/lib/common/libcrmcommon.la crm_ticket_SOURCES = crm_ticket.c crm_ticket_LDADD = $(top_builddir)/lib/pengine/libpe_rules.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/pacemaker/libpacemaker.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la stonith_admin_SOURCES = stonith_admin.c stonith_admin_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/fencing/libstonithd.la if BUILD_SERVICELOG notifyServicelogEvent_SOURCES = notifyServicelogEvent.c notifyServicelogEvent_CFLAGS = $(SERVICELOG_CFLAGS) notifyServicelogEvent_LDADD = $(top_builddir)/lib/common/libcrmcommon.la $(SERVICELOG_LIBS) endif if BUILD_OPENIPMI_SERVICELOG ipmiservicelogd_SOURCES = ipmiservicelogd.c ipmiservicelogd_CFLAGS = $(OPENIPMI_SERVICELOG_CFLAGS) $(SERVICELOG_CFLAGS) ipmiservicelogd_LDFLAGS = $(top_builddir)/lib/common/libcrmcommon.la $(OPENIPMI_SERVICELOG_LIBS) $(SERVICELOG_LIBS) endif CLEANFILES = $(man8_MANS) diff --git a/tools/cibsecret.in b/tools/cibsecret.in index 3157d2ac31..ee8349fb40 100644 --- a/tools/cibsecret.in +++ b/tools/cibsecret.in @@ -1,365 +1,367 @@ #!/bin/sh -# Copyright (C) 2011 Dejan Muhamedagic +# Copyright 2011-2018 the Pacemaker project contributors +# +# The version control history for this file may have further details. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # cibsecret # # Manage the secrets directory (by default, /var/lib/pacemaker/lrm/secrets). # Secrets are ASCII files, holding one value per file: # // . @OCF_ROOT_DIR@/lib/heartbeat/ocf-shellfuncs LRM_CIBSECRETS=@LRM_CIBSECRETS_DIR@ PROG=`basename $0` SSH_OPTS="-o StrictHostKeyChecking=no" usage() { echo "cibsecret - A tool for managing cib secrets"; echo ""; echo "usage: $PROG [-C] "; echo "--version Display version information, then exit"; echo ""; echo "-C: don't read/write the CIB" echo "" echo "command: set | delete | stash | unstash | get | check | sync" echo "" echo " set " echo "" echo " get " echo "" echo " check " echo "" echo " stash (if not -C)" echo "" echo " unstash (if not -C)" echo "" echo " delete " echo "" echo " sync" echo "" echo "stash/unstash: move the parameter from/to the CIB (if you already" echo "have the parameter set in the CIB)." echo "" echo "set/delete: add/remove a parameter from the local file." echo "" echo "get: display the parameter from the local file." echo "" echo "check: verify MD5 hash of the parameter from the local file and the CIB." echo "" echo "sync: copy $LRM_CIBSECRETS to other nodes." echo "" echo "Examples:" echo "" echo " $PROG set ipmi_node1 passwd SecreT_PASS" echo "" echo " $PROG stash ipmi_node1 passwd" echo "" echo " $PROG get ipmi_node1 passwd" echo "" echo " $PROG check ipmi_node1 passwd" echo "" echo " $PROG sync" exit $1 } fatal() { echo "ERROR: $*" exit 1 } warn() { echo "WARNING: $*" } info() { echo "INFO: $*" } check_env() { which md5sum >/dev/null 2>&1 || fatal "please install md5sum to run $PROG" if which pssh >/dev/null 2>&1; then rsh=pssh_fun rcp=pscp_fun elif which pdsh >/dev/null 2>&1; then rsh=pdsh_fun rcp=pdcp_fun elif which ssh >/dev/null 2>&1; then rsh=ssh_fun rcp=scp_fun else fatal "please install pssh, pdsh, or ssh to run $PROG" fi ps -ef | grep '[p]acemaker-controld' >/dev/null || fatal "pacemaker not running? $PROG needs pacemaker" } get_other_nodes() { crm_node -l | awk '{print $2}' | grep -v `uname -n` } get_live_nodes() { if [ `id -u` = 0 ] && which fping >/dev/null 2>&1; then fping -a $@ 2>/dev/null else local h for h; do ping -c 2 -q $h >/dev/null 2>&1 && echo $h; done fi } check_down_nodes() { local n down_nodes down_nodes=`(for n; do echo $n; done) | sort | uniq -u` if [ -n "$down_nodes" ]; then if [ `echo $down_nodes | wc -w` = 1 ]; then warn "node $down_nodes is down" warn "you'll need to update it using $PROG sync later" else warn "nodes `echo $down_nodes` are down" warn "you'll need to update them using $PROG sync later" fi fi } pssh_fun() { pssh -qi -H "$nodes" -x "$SSH_OPTS" $* } pscp_fun() { pscp -q -H "$nodes" -x "-pr" -x "$SSH_OPTS" $* } pdsh_fun() { local pdsh_nodes=`echo $nodes | tr ' ' ','` export PDSH_SSH_ARGS_APPEND="$SSH_OPTS" pdsh -w $pdsh_nodes $* } pdcp_fun() { local pdsh_nodes=`echo $nodes | tr ' ' ','` export PDSH_SSH_ARGS_APPEND="$SSH_OPTS" pdcp -pr -w $pdsh_nodes $* } ssh_fun() { local h for h in $nodes; do ssh $SSH_OPTS $h $* || return done } scp_fun() { local h src="$1" dest=$2 for h in $nodes; do scp -pr -q $SSH_OPTS $src $h:$dest || return done } # TODO: this procedure should be replaced with csync2 # provided that csync2 has already been configured sync_files() { local crm_nodes=`get_other_nodes` local nodes=`get_live_nodes $crm_nodes` check_down_nodes $nodes $crm_nodes [ "$nodes" = "" ] && { info "no other nodes live" return } info "syncing $LRM_CIBSECRETS to `echo $nodes` ..." $rsh rm -rf $LRM_CIBSECRETS && $rsh mkdir -p `dirname $LRM_CIBSECRETS` && $rcp $LRM_CIBSECRETS `dirname $LRM_CIBSECRETS` } sync_one() { local f=$1 f_all="$1 $1.sign" local crm_nodes=`get_other_nodes` local nodes=`get_live_nodes $crm_nodes` check_down_nodes $nodes $crm_nodes [ "$nodes" = "" ] && { info "no other nodes live" return } info "syncing $f to `echo $nodes` ..." $rsh mkdir -p `dirname $f` && if [ -f "$f" ]; then $rcp "$f_all" `dirname $f` else $rsh rm -f $f_all fi } is_secret() { # assume that the secret is in the CIB if we cannot talk to # cib [ "$NO_CRM" ] || test "$1" = "$MAGIC" } check_cib_rsc() { local rsc=$1 output output=`$NO_CRM crm_resource -r $rsc -W >/dev/null 2>&1` || fatal "resource $rsc doesn't exist: $output" } get_cib_param() { local rsc=$1 param=$2 check_cib_rsc $rsc $NO_CRM crm_resource -r $rsc -g $param 2>/dev/null } set_cib_param() { local rsc=$1 param=$2 value=$3 check_cib_rsc $rsc $NO_CRM crm_resource -r $rsc -p $param -v "$value" 2>/dev/null } remove_cib_param() { local rsc=$1 param=$2 check_cib_rsc $rsc $NO_CRM crm_resource -r $rsc -d $param 2>/dev/null } localfiles() { local cmd=$1 local rsc=$2 param=$3 value=$4 local local_file=$LRM_CIBSECRETS/$rsc/$param case $cmd in "get") cat $local_file 2>/dev/null true ;; "getsum") cat $local_file.sign 2>/dev/null true ;; "set") local md5sum md5sum=`printf $value | md5sum` || fatal "md5sum failed to produce hash for resource $rsc parameter $param" md5sum=`echo $md5sum | awk '{print $1}'` mkdir -p `dirname $local_file` && echo $value > $local_file && echo $md5sum > $local_file.sign && sync_one $local_file ;; "remove") rm -f $local_file rm -f $local_file.sign sync_one $local_file ;; *) # not reached, this is local interface ;; esac } get_local_param() { local rsc=$1 param=$2 localfiles get $rsc $param } set_local_param() { local rsc=$1 param=$2 value=$3 localfiles set $rsc $param $value } remove_local_param() { local rsc=$1 param=$2 localfiles remove $rsc $param } cibsecret_set() { local value=$1 if [ -z "$NO_CRM" ]; then [ "$current" -a "$current" != "$MAGIC" -a "$current" != "$value" ] && fatal "CIB value <$current> different for $rsc parameter $param; please delete it first" fi set_local_param $rsc $param $value && set_cib_param $rsc $param "$MAGIC" } cibsecret_check() { local md5sum local_md5sum is_secret "$current" || fatal "resource $rsc parameter $param not set as secret, nothing to check" local_md5sum=`localfiles getsum $rsc $param` [ "$local_md5sum" ] || fatal "no MD5 hash for resource $rsc parameter $param" md5sum=`printf "$current_local" | md5sum | awk '{print $1}'` [ "$md5sum" = "$local_md5sum" ] || fatal "MD5 hash mismatch for resource $rsc parameter $param" } cibsecret_get() { cibsecret_check echo "$current_local" } cibsecret_delete() { remove_local_param $rsc $param && remove_cib_param $rsc $param } cibsecret_stash() { [ "$NO_CRM" ] && fatal "no access to Pacemaker, stash not supported" [ "$current" = "" ] && fatal "nothing to stash for resource $rsc parameter $param" is_secret "$current" && fatal "resource $rsc parameter $param already set as secret, nothing to stash" cibsecret_set "$current" } cibsecret_unstash() { [ "$NO_CRM" ] && fatal "no access to Pacemaker, unstash not supported" [ "$current_local" = "" ] && fatal "nothing to unstash for resource $rsc parameter $param" is_secret "$current" || warn "resource $rsc parameter $param not set as secret, but we have local value so proceeding anyway" remove_local_param $rsc $param && set_cib_param $rsc $param $current_local } cibsecret_sync() { sync_files } MAGIC="lrm://" umask 0077 if [ "$1" = "-C" ]; then NO_CRM=':' shift 1 fi cmd=$1 rsc=$2 param=$3 value=$4 case "$cmd" in set) [ $# -ne 4 ] && usage 1;; get) [ $# -ne 3 ] && usage 1;; check) [ $# -ne 3 ] && usage 1;; stash) [ $# -ne 3 ] && usage 1;; unstash) [ $# -ne 3 ] && usage 1;; delete) [ $# -ne 3 ] && usage 1;; sync) [ $# -ne 1 ] && usage 1;; --help) usage 0;; --version) crm_attribute --version exit $?;; *) usage 1; esac check_env # we'll need these two often current=`get_cib_param $rsc $param` current_local=`get_local_param $rsc $param` cibsecret_$cmd $value diff --git a/tools/crm_diff.c b/tools/crm_diff.c index 41d2965b13..6a03040129 100644 --- a/tools/crm_diff.c +++ b/tools/crm_diff.c @@ -1,361 +1,351 @@ - -/* - * Copyright (C) 2004 Andrew Beekhof - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +/* + * Copyright 2005-2018 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\t\tThis text"}, {"version", 0, 0, '$', "\t\tVersion information" }, {"verbose", 0, 0, 'V', "\t\tIncrease debug output\n"}, {"-spacer-", 1, 0, '-', "\nOriginal XML:"}, {"original", 1, 0, 'o', "\tXML is contained in the named file"}, {"original-string", 1, 0, 'O', "XML is contained in the supplied string"}, {"-spacer-", 1, 0, '-', "\nOperation:"}, {"new", 1, 0, 'n', "\tCompare the original XML to the contents of the named file"}, {"new-string", 1, 0, 'N', "\tCompare the original XML to the contents of the supplied string"}, {"patch", 1, 0, 'p', "\tPatch the original XML with the contents of the named file"}, {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, {"cib", 0, 0, 'c', "\t\tCompare/patch the inputs as a CIB (includes versions details)"}, {"stdin", 0, 0, 's', NULL, 1}, {"no-version", 0, 0, 'u', "\tGenerate the difference without versions details"}, {"-spacer-", 1, 0, '-', "\nExamples:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', "Obtain the two different configuration files by running cibadmin on the two cluster setups to compare:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " cibadmin --query > cib-old.xml", pcmk_option_example}, {"-spacer-", 1, 0, '-', " cibadmin --query > cib-new.xml", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Calculate and save the difference between the two files:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_diff --original cib-old.xml --new cib-new.xml > patch.xml", pcmk_option_example }, {"-spacer-", 1, 0, '-', "Apply the patch to the original file:", pcmk_option_paragraph }, {"-spacer-", 1, 0, '-', " crm_diff --original cib-old.xml --patch patch.xml > updated.xml", pcmk_option_example }, {"-spacer-", 1, 0, '-', "Apply the patch to the running cluster:", pcmk_option_paragraph }, {"-spacer-", 1, 0, '-', " cibadmin --patch patch.xml", pcmk_option_example }, {0, 0, 0, 0} }; /* *INDENT-ON* */ static void print_patch(xmlNode *patch) { char *buffer = dump_xml_formatted(patch); printf("%s\n", crm_str(buffer)); free(buffer); fflush(stdout); } static int apply_patch(xmlNode *input, xmlNode *patch, gboolean as_cib) { int rc; xmlNode *output = copy_xml(input); rc = xml_apply_patchset(output, patch, as_cib); if (rc != pcmk_ok) { fprintf(stderr, "Could not apply patch: %s\n", pcmk_strerror(rc)); free_xml(output); return rc; } if (output != NULL) { const char *version; char *buffer; print_patch(output); version = crm_element_value(output, XML_ATTR_CRM_VERSION); buffer = calculate_xml_versioned_digest(output, FALSE, TRUE, version); crm_trace("Digest: %s\n", crm_str(buffer)); free(buffer); free_xml(output); } return pcmk_ok; } static void log_patch_cib_versions(xmlNode *patch) { int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; const char *fmt = NULL; const char *digest = NULL; xml_patch_versions(patch, add, del); fmt = crm_element_value(patch, "format"); digest = crm_element_value(patch, XML_ATTR_DIGEST); if (add[2] != del[2] || add[1] != del[1] || add[0] != del[0]) { crm_info("Patch: --- %d.%d.%d %s", del[0], del[1], del[2], fmt); crm_info("Patch: +++ %d.%d.%d %s", add[0], add[1], add[2], digest); } } static void strip_patch_cib_version(xmlNode *patch, const char **vfields, size_t nvfields) { int format = 1; crm_element_value_int(patch, "format", &format); if (format == 2) { xmlNode *version_xml = find_xml_node(patch, "version", FALSE); if (version_xml) { free_xml(version_xml); } } else { int i = 0; const char *tags[] = { XML_TAG_DIFF_REMOVED, XML_TAG_DIFF_ADDED, }; for (i = 0; i < DIMOF(tags); i++) { xmlNode *tmp = NULL; int lpc; tmp = find_xml_node(patch, tags[i], FALSE); if (tmp) { for (lpc = 0; lpc < nvfields; lpc++) { xml_remove_prop(tmp, vfields[lpc]); } tmp = find_xml_node(tmp, XML_TAG_CIB, FALSE); if (tmp) { for (lpc = 0; lpc < nvfields; lpc++) { xml_remove_prop(tmp, vfields[lpc]); } } } } } } static int generate_patch(xmlNode *object_1, xmlNode *object_2, const char *xml_file_2, gboolean as_cib, gboolean no_version) { xmlNode *output = NULL; const char *vfields[] = { XML_ATTR_GENERATION_ADMIN, XML_ATTR_GENERATION, XML_ATTR_NUMUPDATES, }; /* If we're ignoring the version, make the version information * identical, so it isn't detected as a change. */ if (no_version) { int lpc; for (lpc = 0; lpc < DIMOF(vfields); lpc++) { crm_copy_xml_element(object_1, object_2, vfields[lpc]); } } xml_track_changes(object_2, NULL, object_2, FALSE); if(as_cib) { xml_calculate_significant_changes(object_1, object_2); } else { xml_calculate_changes(object_1, object_2); } crm_log_xml_debug(object_2, (xml_file_2? xml_file_2: "target")); output = xml_create_patchset(0, object_1, object_2, NULL, FALSE); xml_log_changes(LOG_INFO, __FUNCTION__, object_2); xml_accept_changes(object_2); if (output == NULL) { return pcmk_ok; } patchset_process_digest(output, object_1, object_2, as_cib); if (as_cib) { log_patch_cib_versions(output); } else if (no_version) { strip_patch_cib_version(output, vfields, DIMOF(vfields)); } xml_log_patchset(LOG_NOTICE, __FUNCTION__, output); print_patch(output); free_xml(output); return -pcmk_err_generic; } int main(int argc, char **argv) { gboolean apply = FALSE; gboolean raw_1 = FALSE; gboolean raw_2 = FALSE; gboolean use_stdin = FALSE; gboolean as_cib = FALSE; gboolean no_version = FALSE; int argerr = 0; int flag; int rc = pcmk_ok; xmlNode *object_1 = NULL; xmlNode *object_2 = NULL; const char *xml_file_1 = NULL; const char *xml_file_2 = NULL; int option_index = 0; crm_log_cli_init("crm_diff"); crm_set_options(NULL, "original_xml operation [options]", long_options, "crm_diff can compare two Pacemaker configurations (in XML format) to\n" "produce a custom diff-like output, or apply such an output as a patch\n"); if (argc < 2) { crm_help('?', CRM_EX_USAGE); } while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch (flag) { case 'o': xml_file_1 = optarg; break; case 'O': xml_file_1 = optarg; raw_1 = TRUE; break; case 'n': xml_file_2 = optarg; break; case 'N': xml_file_2 = optarg; raw_2 = TRUE; break; case 'p': xml_file_2 = optarg; apply = TRUE; break; case 's': use_stdin = TRUE; break; case 'c': as_cib = TRUE; break; case 'u': no_version = TRUE; break; case 'V': crm_bump_log_level(argc, argv); break; case '?': case '$': crm_help(flag, CRM_EX_OK); break; default: printf("Argument %c (0%o) is not (yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); } if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', CRM_EX_USAGE); } if (apply && no_version) { fprintf(stderr, "warning: -u/--no-version ignored with -p/--patch\n"); } else if (as_cib && no_version) { fprintf(stderr, "error: -u/--no-version incompatible with -c/--cib\n"); return CRM_EX_USAGE; } if (raw_1) { object_1 = string2xml(xml_file_1); } else if (use_stdin) { fprintf(stderr, "Input first XML fragment:"); object_1 = stdin2xml(); } else if (xml_file_1 != NULL) { object_1 = filename2xml(xml_file_1); } if (raw_2) { object_2 = string2xml(xml_file_2); } else if (use_stdin) { fprintf(stderr, "Input second XML fragment:"); object_2 = stdin2xml(); } else if (xml_file_2 != NULL) { object_2 = filename2xml(xml_file_2); } if (object_1 == NULL) { fprintf(stderr, "Could not parse the first XML fragment\n"); return CRM_EX_DATAERR; } if (object_2 == NULL) { fprintf(stderr, "Could not parse the second XML fragment\n"); return CRM_EX_DATAERR; } if (apply) { rc = apply_patch(object_1, object_2, as_cib); } else { rc = generate_patch(object_1, object_2, xml_file_2, as_cib, no_version); } free_xml(object_1); free_xml(object_2); return crm_errno2exit(rc); } diff --git a/tools/crm_error.c b/tools/crm_error.c index 5c2072e22c..f6dc73cd4a 100644 --- a/tools/crm_error.c +++ b/tools/crm_error.c @@ -1,111 +1,113 @@ /* - * Copyright (C) 2012 Andrew Beekhof + * Copyright 2012-2018 the Pacemaker project contributors + * + * The version control history for this file may have further details. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"name", 0, 0, 'n', "\tShow the error's name with its description." "\n\t\t\tUseful for looking for sources of the error in source code"}, {"list", 0, 0, 'l', "\tShow all known errors."}, {"exit", 0, 0, 'X', "\tInterpret as exit code rather than function return value"}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int main(int argc, char **argv) { int rc = 0; int lpc = 0; int flag = 0; int option_index = 0; bool do_list = FALSE; bool with_name = FALSE; bool as_exit_code = FALSE; crm_log_cli_init("crm_error"); crm_set_options(NULL, "[options] -- rc", long_options, "Tool for displaying the textual name or description of a reported error code"); while (flag >= 0) { flag = crm_get_option(argc, argv, &option_index); switch (flag) { case -1: break; case 'V': crm_bump_log_level(argc, argv); break; case '$': case '?': crm_help(flag, CRM_EX_OK); break; case 'n': with_name = TRUE; break; case 'l': do_list = TRUE; break; case 'X': as_exit_code = TRUE; break; default: crm_help(flag, CRM_EX_OK); break; } } if(do_list) { for (rc = 0; rc < 256; rc++) { const char *name = as_exit_code? crm_exit_name(rc) : pcmk_errorname(rc); const char *desc = as_exit_code? crm_exit_str(rc) : pcmk_strerror(rc); if (!name || !strcmp(name, "Unknown") || !strcmp(name, "CRM_EX_UNKNOWN")) { /* Unknown */ } else if(with_name) { printf("%.3d: %-26s %s\n", rc, name, desc); } else { printf("%.3d: %s\n", rc, desc); } } return CRM_EX_OK; } for (lpc = optind; lpc < argc; lpc++) { const char *str, *name; rc = crm_atoi(argv[lpc], NULL); str = as_exit_code? crm_exit_str(rc) : pcmk_strerror(rc); if(with_name) { name = as_exit_code? crm_exit_name(rc) : pcmk_errorname(rc); printf("%s - %s\n", name, str); } else { printf("%s\n", str); } } return CRM_EX_OK; } diff --git a/tools/crm_failcount.in b/tools/crm_failcount.in index c3050c75aa..f70fe78a91 100755 --- a/tools/crm_failcount.in +++ b/tools/crm_failcount.in @@ -1,292 +1,294 @@ #!@BASH_PATH@ # -# Copyright 2009-2018 Andrew Beekhof +# Copyright 2009-2018 the Pacemaker project contributors +# +# The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # USAGE_TEXT="Usage: crm_failcount [] Common options: --help Display this text, then exit --version Display version information, then exit -V, --verbose Specify multiple times to increase debug output -q, --quiet Print only the value (if querying) Commands: -G, --query Query the current value of the resource's fail count -D, --delete Delete resource's recorded failures Additional Options: -r, --resource=value Name of the resource to use (required) -n, --operation=value Name of operation to use (instead of all operations) -I, --interval=value If operation is specified, its interval -N, --node=value Use failcount on named node (instead of local node)" HELP_TEXT="crm_failcount - Query or delete resource fail counts $USAGE_TEXT" # These constants must track crm_exit_t values CRM_EX_OK=0 CRM_EX_USAGE=64 CRM_EX_NOSUCH=105 exit_usage() { if [ $# -gt 0 ]; then echo "error:" "$@" >&2 fi echo echo "$USAGE_TEXT" exit $CRM_EX_USAGE } warn() { echo "warning:" "$@" >&2 } interval_re() { echo "^[[:blank:]]*([0-9]+)[[:blank:]]*(${1})[[:blank:]]*$" } # This function should follow crm_get_interval() as closely as possible parse_interval() { INT_S="$1" INT_8601RE="^P(([0-9]+)Y)?(([0-9]+)M)?(([0-9]+)D)?T?(([0-9]+)H)?(([0-9]+)M)?(([0-9]+)S)?$" if [[ $INT_S =~ $(interval_re "") ]]; then echo $(( ${BASH_REMATCH[1]} * 1000 )) elif [[ $INT_S =~ $(interval_re "s|sec") ]]; then echo $(( ${BASH_REMATCH[1]} * 1000 )) elif [[ $INT_S =~ $(interval_re "ms|msec") ]]; then echo "${BASH_REMATCH[1]}" elif [[ $INT_S =~ $(interval_re "m|min") ]]; then echo $(( ${BASH_REMATCH[1]} * 60000 )) elif [[ $INT_S =~ $(interval_re "h|hr") ]]; then echo $(( ${BASH_REMATCH[1]} * 3600000 )) elif [[ $INT_S =~ $(interval_re "us|usec") ]]; then echo $(( ${BASH_REMATCH[1]} / 1000 )) elif [[ $INT_S =~ ^P([0-9]+)W$ ]]; then echo $(( ${BASH_REMATCH[1]} * 604800000 )) elif [[ $INT_S =~ $INT_8601RE ]]; then echo $(( ( ${BASH_REMATCH[2]:-0} * 31536000000 ) \ + ( ${BASH_REMATCH[4]:-0} * 2592000000 ) \ + ( ${BASH_REMATCH[6]:-0} * 86400000 ) \ + ( ${BASH_REMATCH[8]:-0} * 3600000 ) \ + ( ${BASH_REMATCH[10]:-0} * 60000 ) \ + ( ${BASH_REMATCH[12]:-0} * 1000 ) )) else warn "Unrecognized interval, using 0" echo "0" fi } query_single_attr() { QSR_TARGET="$1" QSR_ATTR="$2" crm_attribute $VERBOSE --quiet --query -t status -d 0 \ -N "$QSR_TARGET" -n "$QSR_ATTR" } query_attr_sum() { QAS_TARGET="$1" QAS_PREFIX="$2" # Build xpath to match all transient node attributes with prefix QAS_XPATH="/cib/status/node_state[@uname='${QAS_TARGET}']" QAS_XPATH="${QAS_XPATH}/transient_attributes/instance_attributes" QAS_XPATH="${QAS_XPATH}/nvpair[starts-with(@name,'$QAS_PREFIX')]" # Query attributes that match xpath # @TODO We ignore stderr because we don't want "no results" to look # like an error, but that also makes $VERBOSE pointless. QAS_ALL=$(cibadmin --query --sync-call --local \ --xpath="$QAS_XPATH" 2>/dev/null) QAS_EX=$? # "No results" is not an error if [ $QAS_EX -ne $CRM_EX_OK ] && [ $QAS_EX -ne $CRM_EX_NOSUCH ]; then echo "error: could not query CIB for fail counts" >&2 exit $QAS_EX fi # Extract the attribute values (one per line) from the output QAS_VALUE=$(echo "$QAS_ALL" | sed -n -e \ 's/.*.*/\1/p') # Sum the values QAS_SUM=0 for i in 0 $QAS_VALUE; do if [ "$i" = "INFINITY" ]; then QAS_SUM="INFINITY" break else QAS_SUM=$(($QAS_SUM + $i)) fi done if [ "$QAS_SUM" = "INFINITY" ]; then echo $QAS_SUM elif [ "$QAS_SUM" -ge 1000000 ]; then echo "INFINITY" else echo $QAS_SUM fi } query_failcount() { QF_TARGET="$1" QF_RESOURCE="$2" QF_OPERATION="$3" QF_INTERVAL="$4" QF_ATTR_RSC="fail-count-${QF_RESOURCE}" if [ -n "$QF_OPERATION" ]; then QF_ATTR_DISPLAY="${QF_ATTR_RSC}#${QF_OPERATION}_${QF_INTERVAL}" QF_COUNT=$(query_single_attr "$QF_TARGET" "$QF_ATTR_DISPLAY") else QF_ATTR_DISPLAY="$QF_ATTR_RSC" QF_COUNT=$(query_attr_sum "$QF_TARGET" "${QF_ATTR_RSC}#") fi # @COMPAT attributes set < 1.1.17: # If we didn't find any per-operation failcount, # check whether there is a legacy per-resource failcount. if [ "$QF_COUNT" = "0" ]; then QF_COUNT=$(query_single_attr "$QF_TARGET" "$QF_ATTR_RSC") if [ "$QF_COUNT" != "0" ]; then QF_ATTR_DISPLAY="$QF_ATTR_RSC" fi fi # Echo result (comparable to crm_attribute, for backward compatibility) if [ -n "$QUIET" ]; then echo $QF_COUNT else echo "scope=status name=$QF_ATTR_DISPLAY value=$QF_COUNT" fi } clear_failcount() { CF_TARGET="$1" CF_RESOURCE="$2" CF_OPERATION="$3" CF_INTERVAL="$4" if [ -n "$CF_OPERATION" ]; then CF_OPERATION="-n $CF_OPERATION -I ${CF_INTERVAL}ms" fi crm_resource $QUIET $VERBOSE --cleanup \ -N "$CF_TARGET" -r "$CF_RESOURCE" $CF_OPERATION } QUIET="" VERBOSE="" command="" resource="" operation="" interval="0" target=$(crm_node -n 2>/dev/null) SHORTOPTS="qDGQVN:U:v:i:l:r:n:I:" LONGOPTS_COMMON="help,version,verbose,quiet" LONGOPTS_COMMANDS="query,delete" LONGOPTS_OTHER="resource:,node:,operation:,interval:" LONGOPTS_COMPAT="delete-attr,get-value,resource-id:,uname:,lifetime:,attr-value:,attr-id:" LONGOPTS="$LONGOPTS_COMMON,$LONGOPTS_COMMANDS,$LONGOPTS_OTHER,$LONGOPTS_COMPAT" TEMP=$(@GETOPT_PATH@ -o $SHORTOPTS --long $LONGOPTS -n crm_failcount -- "$@") if [ $? -ne 0 ]; then exit_usage fi eval set -- "$TEMP" # Quotes around $TEMP are essential while true ; do case "$1" in --help) echo "$HELP_TEXT" exit $CRM_EX_OK ;; --version) crm_attribute --version exit $? ;; -q|-Q|--quiet) QUIET="--quiet" shift ;; -V|--verbose) VERBOSE="$VERBOSE $1" shift ;; -G|--query|--get-value) command="--query" shift ;; -D|--delete|--delete-attr) command="--delete" shift ;; -r|--resource|--resource-id) resource="$2" shift 2 ;; -n|--operation) operation="$2" shift 2 ;; -I|--interval) interval="$2" shift 2 ;; -N|--node|-U|--uname) target="$2" shift 2 ;; -v|--attr-value) if [ "$2" = "0" ]; then command="--delete" else warn "ignoring deprecated option '$1' with nonzero value" fi shift 2 ;; -i|--attr-id|-l|--lifetime) warn "ignoring deprecated option '$1'" shift 2 ;; --) shift break ;; *) exit_usage "unknown option '$1'" ;; esac done [ -n "$command" ] || exit_usage "must specify a command" [ -n "$resource" ] || exit_usage "resource name required" [ -n "$target" ] || exit_usage "node name required" interval=$(parse_interval $interval) if [ "$command" = "--query" ]; then query_failcount "$target" "$resource" "$operation" "$interval" else clear_failcount "$target" "$resource" "$operation" "$interval" fi diff --git a/tools/crm_master.in b/tools/crm_master.in index 4a20e41421..937f88633b 100755 --- a/tools/crm_master.in +++ b/tools/crm_master.in @@ -1,110 +1,112 @@ #!@BASH_PATH@ # -# Copyright 2009-2018 Andrew Beekhof +# Copyright 2009-2018 the Pacemaker project contributors +# +# The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # USAGE_TEXT="Usage: crm_master [] Common options: --help Display this text, then exit --version Display version information, then exit -V, --verbose Specify multiple times to increase debug output -q, --quiet Print only the value (if querying) Commands: -G, --query Query the current value of the promotion score -v, --update=VALUE Update the value of the promotion score -D, --delete Delete the promotion score Additional Options: -r, --resource=RSC ID of resource to act on -N, --node=NODE Use promotion score on named node (instead of local node) -l, --lifetime=VALUE Until when should the setting take effect (valid values: reboot, forever) -i, --id=VALUE (Advanced) XML ID used to identify promotion score attribute" HELP_TEXT="crm_master - Query, update, or delete a resource's promotion score This program should normally be invoked only from inside an OCF resource agent. $USAGE_TEXT" exit_usage() { if [ $# -gt 0 ]; then echo "error:" "$@" >&2 fi echo echo "$USAGE_TEXT" exit 1 } SHORTOPTS_DEPRECATED="U:Q" LONGOPTS_DEPRECATED="uname:,get-value,delete-attr,attr-value:,attr-id:" SHORTOPTS="VqGv:DN:l:i:r:" LONGOPTS="help,version,verbose,quiet,query,update:,delete,node:,lifetime:,id:,resource:" TEMP=$(@GETOPT_PATH@ -o ${SHORTOPTS}${SHORTOPTS_DEPRECATED} \ --long ${LONGOPTS},${LONGOPTS_DEPRECATED} \ -n crm_master -- "$@") if [ $? -ne 0 ]; then exit_usage fi eval set -- "$TEMP" # Quotes around $TEMP are essential # Explicitly set the (usual default) lifetime, so the attribute gets set as a # node attribute and not a cluster property. options="--lifetime forever" while true ; do case "$1" in --help) echo "$HELP_TEXT" exit 0 ;; --version) crm_attribute --version exit 0 ;; --verbose|-V|--quiet|-q|--query|-G|--delete|-D) options="$options $1" shift ;; --update|-v|--node|-N|--lifetime|-l|--id|-i) options="$options $1 $2" shift shift ;; -r|--resource) OCF_RESOURCE_INSTANCE=$2; shift shift ;; --get-value|--delete-attr|-Q) # deprecated options="$options $1" shift ;; --uname|-U|--attr-value|--attr-id) # deprecated options="$options $1 $2" shift shift ;; --) shift break ;; *) exit_usage "unknown option '$1'" ;; esac done if [ -z "$OCF_RESOURCE_INSTANCE" ]; then echo "This program should normally only be invoked from inside an OCF resource agent." echo "To set a promotion score from the command line, please specify resource with -r." exit 1 fi crm_attribute -n master-$OCF_RESOURCE_INSTANCE $options diff --git a/tools/crm_report.in b/tools/crm_report.in index 957c2969ed..d1bc4257d1 100644 --- a/tools/crm_report.in +++ b/tools/crm_report.in @@ -1,473 +1,475 @@ #!/bin/sh # -# Copyright 2010-2018 Andrew Beekhof +# Copyright 2010-2019 the Pacemaker project contributors +# +# The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # TEMP=`@GETOPT_PATH@ \ -o hv?xl:f:t:n:T:L:p:c:dSCu:D:MVse: \ --long help,cts:,cts-log:,dest:,node:,nodes:,from:,to:,sos-mode,logfile:,as-directory,single-node,cluster:,user:,max-depth:,version,features,rsh: \ -n 'crm_report' -- "$@"` # The quotes around $TEMP are essential eval set -- "$TEMP" progname=$(basename "$0") rsh="ssh -T" tests="" nodes="" compress=1 cluster="any" ssh_user="root" search_logs=1 sos_mode=0 report_data=`dirname $0` maxdepth=5 extra_logs="" sanitize_patterns="passw.*" log_patterns="CRIT: ERROR:" usage() { cat< "$l_base/$HALOG_F" fi for node in $nodes; do cat <$l_base/.env LABEL="$label" REPORT_HOME="$r_base" REPORT_MASTER="$host" REPORT_TARGET="$node" LOG_START=$start LOG_END=$end REMOVE=1 SANITIZE="$sanitize_patterns" CLUSTER=$cluster LOG_PATTERNS="$log_patterns" EXTRA_LOGS="$extra_logs" SEARCH_LOGS=$search_logs SOS_MODE=$sos_mode verbose=$verbose maxdepth=$maxdepth EOF if [ $host = $node ]; then cat <>$l_base/.env REPORT_HOME="$l_base" EOF cat $l_base/.env $report_data/report.common $report_data/report.collector > $l_base/collector bash $l_base/collector else cat $l_base/.env $report_data/report.common $report_data/report.collector \ | $rsh -l $ssh_user $node -- "mkdir -p $r_base; cat > $r_base/collector; bash $r_base/collector" | (cd $l_base && tar mxf -) fi done analyze $l_base > $l_base/$ANALYSIS_F if [ -f $l_base/$HALOG_F ]; then node_events $l_base/$HALOG_F > $l_base/$EVENTS_F fi for node in $nodes; do cat $l_base/$node/$ANALYSIS_F >> $l_base/$ANALYSIS_F if [ -s $l_base/$node/$EVENTS_F ]; then cat $l_base/$node/$EVENTS_F >> $l_base/$EVENTS_F elif [ -s $l_base/$HALOG_F ]; then awk "\$4==\"$nodes\"" $l_base/$EVENTS_F >> $l_base/$n/$EVENTS_F fi done log " " if [ $compress = 1 ]; then fname=`shrink $l_base` rm -rf $l_base log "Collected results are available in $fname" log " " log "Please create a bug entry at" log " @BUG_URL@" log "Include a description of your problem and attach this tarball" log " " log "Thank you for taking time to create this report." else log "Collected results are available in $l_base" fi log " " } # # check if files have same content in the cluster # cibdiff() { d1=$(dirname $1) d2=$(dirname $2) if [ -f "$d1/RUNNING" ] && [ ! -f "$d2/RUNNING" ]; then DIFF_OK=0 elif [ -f "$d1/STOPPED" ] && [ ! -f "$d2/STOPPED" ]; then DIFF_OK=0 else DIFF_OK=1 fi if [ $DIFF_OK -eq 1 ]; then if which crm_diff > /dev/null 2>&1; then crm_diff -c -n $1 -o $2 else info "crm_diff(8) not found, cannot diff CIBs" fi else echo "can't compare cibs from running and stopped systems" fi } diffcheck() { [ -f "$1" ] || { echo "$1 does not exist" return 1 } [ -f "$2" ] || { echo "$2 does not exist" return 1 } case $(basename "$1") in $CIB_F) cibdiff $1 $2 ;; *) diff -u $1 $2 ;; esac } # # remove duplicates if files are same, make links instead # consolidate() { for n in $nodes; do if [ -f $1/$2 ]; then rm $1/$n/$2 else mv $1/$n/$2 $1 fi ln -s ../$2 $1/$n done } analyze_one() { rc=0 node0="" for n in $nodes; do if [ "$node0" ]; then diffcheck $1/$node0/$2 $1/$n/$2 rc=$(($rc+$?)) else node0=$n fi done return $rc } analyze() { flist="$MEMBERSHIP_F $CIB_F $CRM_MON_F $SYSINFO_F" for f in $flist; do printf "Diff $f... " ls $1/*/$f >/dev/null 2>&1 || { echo "no $1/*/$f :/" continue } if analyze_one $1 $f; then echo "OK" [ "$f" != $CIB_F ] && consolidate $1 $f else echo "" fi done } do_cts() { test_sets=`echo $tests | tr ',' ' '` for test_set in $test_sets; do start_time=0 start_test=`echo $test_set | tr '-' ' ' | awk '{print $1}'` end_time=0 end_test=`echo $test_set | tr '-' ' ' | awk '{print $2}'` if [ x$end_test = x ]; then msg="Extracting test $start_test" label="CTS-$start_test-`date +"%b-%d-%Y"`" end_test=`expr $start_test + 1` else msg="Extracting tests $start_test to $end_test" label="CTS-$start_test-$end_test-`date +"%b-%d-%Y"`" end_test=`expr $end_test + 1` fi if [ $start_test = 0 ]; then start_pat="BEGINNING [0-9].* TESTS" else start_pat="Running test.*\[ *$start_test\]" fi if [ x$ctslog = x ]; then ctslog=`findmsg 1 "$start_pat"` if [ x$ctslog = x ]; then fatal "No CTS control file detected" else log "Using CTS control file: $ctslog" fi fi line=`grep -n "$start_pat" $ctslog | tail -1 | sed 's/:.*//'` if [ ! -z "$line" ]; then start_time=`linetime $ctslog $line` fi line=`grep -n "Running test.*\[ *$end_test\]" $ctslog | tail -1 | sed 's/:.*//'` if [ ! -z "$line" ]; then end_time=`linetime $ctslog $line` fi if [ -z "$nodes" ]; then nodes=`grep CTS: $ctslog | grep -v debug: | grep " \* " | sed s:.*\\\*::g | sort -u | tr '\\n' ' '` log "Calculated node list: $nodes" fi if [ $end_time -lt $start_time ]; then debug "Test didn't complete, grabbing everything up to now" end_time=`date +%s` fi if [ $start_time != 0 ];then log "$msg (`time2str $start_time` to `time2str $end_time`)" collect_data $label $start_time $end_time $ctslog else fatal "$msg failed: not found" fi done } node_names_from_xml() { awk ' /uname/ { for( i=1; i<=NF; i++ ) if( $i~/^uname=/ ) { sub("uname=.","",$i); sub("\".*","",$i); print $i; next; } } ' | tr '\n' ' ' } getnodes() { cluster="$1" # 1. Live (cluster nodes or Pacemaker Remote nodes) # TODO: This will not detect Pacemaker Remote nodes unless they # have ever had a permanent node attribute set, because it only # searches the nodes section. It should also search the config # for resources that create Pacemaker Remote nodes. cib_nodes=$(cibadmin -Ql -o nodes 2>/dev/null) if [ $? -eq 0 ]; then debug "Querying CIB for nodes" echo "$cib_nodes" | node_names_from_xml return fi # 2. Saved if [ -f "@CRM_CONFIG_DIR@/cib.xml" ]; then debug "Querying on-disk CIB for nodes" grep "node " "@CRM_CONFIG_DIR@/cib.xml" | node_names_from_xml return fi # 3. logs # TODO: Look for something like crm_update_peer } if [ "x$tests" != "x" ]; then do_cts elif [ "x$start_time" != "x" ]; then masterlog="" if [ -z "$sanitize_patterns" ]; then log "WARNING: The tarball produced by this program may contain" log " sensitive information such as passwords." log "" log "We will attempt to remove such information if you use the" log "-p option. For example: -p \"pass.*\" -p \"user.*\"" log "" log "However, doing this may reduce the ability for the recipients" log "to diagnose issues and generally provide assistance." log "" log "IT IS YOUR RESPONSIBILITY TO PROTECT SENSITIVE DATA FROM EXPOSURE" log "" fi # If user didn't specify a cluster stack, make a best guess if possible. if [ -z "$cluster" ] || [ "$cluster" = "any" ]; then cluster=$(get_cluster_type) fi # If user didn't specify node(s), make a best guess if possible. if [ -z "$nodes" ]; then nodes=`getnodes $cluster` if [ -n "$nodes" ]; then log "Calculated node list: $nodes" else fatal "Cannot determine nodes; specify --nodes or --single-node" fi fi if echo $nodes | grep -qs $host then debug "We are a cluster node" else debug "We are a log master" masterlog=`findmsg 1 "pacemaker-controld\\|CTS"` fi if [ -z $end_time ]; then end_time=`perl -e 'print time()'` fi label="pcmk-`date +"%a-%d-%b-%Y"`" log "Collecting data from $nodes (`time2str $start_time` to `time2str $end_time`)" collect_data $label $start_time $end_time $masterlog else fatal "Not sure what to do, no tests or time ranges to extract" fi # vim: set expandtab tabstop=8 softtabstop=4 shiftwidth=4 textwidth=80: diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c index 388832339d..1f9faa93fc 100644 --- a/tools/crm_resource_print.c +++ b/tools/crm_resource_print.c @@ -1,328 +1,330 @@ /* - * Copyright 2004-2018 Andrew Beekhof + * Copyright 2004-2018 the Pacemaker project contributors + * + * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #define cons_string(x) x?x:"NA" void cli_resource_print_cts_constraints(pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; xmlNode *lifetime = NULL; xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); for (xml_obj = __xml_first_child(cib_constraints); xml_obj != NULL; xml_obj = __xml_next(xml_obj)) { const char *id = crm_element_value(xml_obj, XML_ATTR_ID); if (id == NULL) { continue; } lifetime = first_named_child(xml_obj, "lifetime"); if (test_ruleset(lifetime, NULL, data_set->now) == FALSE) { continue; } if (safe_str_eq(XML_CONS_TAG_RSC_DEPEND, crm_element_name(xml_obj))) { printf("Constraint %s %s %s %s %s %s %s\n", crm_element_name(xml_obj), cons_string(crm_element_value(xml_obj, XML_ATTR_ID)), cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE)), cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET)), cons_string(crm_element_value(xml_obj, XML_RULE_ATTR_SCORE)), cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE)), cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET_ROLE))); } else if (safe_str_eq(XML_CONS_TAG_RSC_LOCATION, crm_element_name(xml_obj))) { /* unpack_location(xml_obj, data_set); */ } } } void cli_resource_print_cts(resource_t * rsc) { GListPtr lpc = NULL; const char *host = NULL; bool needs_quorum = TRUE; const char *rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); const char *rprov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); const char *rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); pe_node_t *node = pe__current_node(rsc); if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH)) { needs_quorum = FALSE; } else { // @TODO check requires in resource meta-data and rsc_defaults } if (node != NULL) { host = node->details->uname; } printf("Resource: %s %s %s %s %s %s %s %s %d %lld 0x%.16llx\n", crm_element_name(rsc->xml), rsc->id, rsc->clone_name ? rsc->clone_name : rsc->id, rsc->parent ? rsc->parent->id : "NA", rprov ? rprov : "NA", rclass, rtype, host ? host : "NA", needs_quorum, rsc->flags, rsc->flags); for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) { resource_t *child = (resource_t *) lpc->data; cli_resource_print_cts(child); } } void cli_resource_print_raw(resource_t * rsc) { GListPtr lpc = NULL; GListPtr children = rsc->children; if (children == NULL) { printf("%s\n", rsc->id); } for (lpc = children; lpc != NULL; lpc = lpc->next) { resource_t *child = (resource_t *) lpc->data; cli_resource_print_raw(child); } } int cli_resource_print_list(pe_working_set_t * data_set, bool raw) { int found = 0; GListPtr lpc = NULL; int opts = pe_print_printf | pe_print_rsconly | pe_print_pending; for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { resource_t *rsc = (resource_t *) lpc->data; if (is_set(rsc->flags, pe_rsc_orphan) && rsc->fns->active(rsc, TRUE) == FALSE) { continue; } rsc->fns->print(rsc, NULL, opts, stdout); found++; } if (found == 0) { printf("NO resources configured\n"); return -ENXIO; } return 0; } int cli_resource_print_operations(const char *rsc_id, const char *host_uname, bool active, pe_working_set_t * data_set) { resource_t *rsc = NULL; int opts = pe_print_printf | pe_print_rsconly | pe_print_suppres_nl | pe_print_pending; GListPtr ops = find_operations(rsc_id, host_uname, active, data_set); GListPtr lpc = NULL; for (lpc = ops; lpc != NULL; lpc = lpc->next) { xmlNode *xml_op = (xmlNode *) lpc->data; const char *op_rsc = crm_element_value(xml_op, "resource"); const char *last = crm_element_value(xml_op, XML_RSC_OP_LAST_CHANGE); const char *status_s = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); int status = crm_parse_int(status_s, "0"); rsc = pe_find_resource(data_set->resources, op_rsc); if(rsc) { rsc->fns->print(rsc, "", opts, stdout); } else { fprintf(stdout, "Unknown resource %s", op_rsc); } fprintf(stdout, ": %s (node=%s, call=%s, rc=%s", op_key ? op_key : ID(xml_op), crm_element_value(xml_op, XML_ATTR_UNAME), crm_element_value(xml_op, XML_LRM_ATTR_CALLID), crm_element_value(xml_op, XML_LRM_ATTR_RC)); if (last) { time_t run_at = crm_parse_int(last, "0"); fprintf(stdout, ", last-rc-change=%s, exec=%sms", crm_strip_trailing_newline(ctime(&run_at)), crm_element_value(xml_op, XML_RSC_OP_T_EXEC)); } fprintf(stdout, "): %s\n", services_lrm_status_str(status)); } return pcmk_ok; } void cli_resource_print_location(resource_t * rsc, const char *prefix) { GListPtr lpc = NULL; GListPtr list = rsc->rsc_location; int offset = 0; if (prefix) { offset = strlen(prefix) - 2; } for (lpc = list; lpc != NULL; lpc = lpc->next) { pe__location_t *cons = lpc->data; GListPtr lpc2 = NULL; for (lpc2 = cons->node_list_rh; lpc2 != NULL; lpc2 = lpc2->next) { node_t *node = (node_t *) lpc2->data; char *score = score2char(node->weight); fprintf(stdout, "%s: Node %-*s (score=%s, id=%s)\n", prefix ? prefix : " ", 71 - offset, node->details->uname, score, cons->id); free(score); } } } void cli_resource_print_colocation(resource_t * rsc, bool dependents, bool recursive, int offset) { char *prefix = NULL; GListPtr lpc = NULL; GListPtr list = rsc->rsc_cons; prefix = calloc(1, (offset * 4) + 1); memset(prefix, ' ', offset * 4); if (dependents) { list = rsc->rsc_cons_lhs; } if (is_set(rsc->flags, pe_rsc_allocating)) { /* Break colocation loops */ printf("loop %s\n", rsc->id); free(prefix); return; } set_bit(rsc->flags, pe_rsc_allocating); for (lpc = list; lpc != NULL; lpc = lpc->next) { rsc_colocation_t *cons = (rsc_colocation_t *) lpc->data; char *score = NULL; resource_t *peer = cons->rsc_rh; if (dependents) { peer = cons->rsc_lh; } if (is_set(peer->flags, pe_rsc_allocating)) { if (dependents == FALSE) { fprintf(stdout, "%s%-*s (id=%s - loop)\n", prefix, 80 - (4 * offset), peer->id, cons->id); } continue; } if (dependents && recursive) { cli_resource_print_colocation(peer, dependents, recursive, offset + 1); } score = score2char(cons->score); if (cons->role_rh > RSC_ROLE_STARTED) { fprintf(stdout, "%s%-*s (score=%s, %s role=%s, id=%s)\n", prefix, 80 - (4 * offset), peer->id, score, dependents ? "needs" : "with", role2text(cons->role_rh), cons->id); } else { fprintf(stdout, "%s%-*s (score=%s, id=%s)\n", prefix, 80 - (4 * offset), peer->id, score, cons->id); } cli_resource_print_location(peer, prefix); free(score); if (!dependents && recursive) { cli_resource_print_colocation(peer, dependents, recursive, offset + 1); } } free(prefix); } int cli_resource_print(resource_t *rsc, pe_working_set_t *data_set, bool expanded) { char *rsc_xml = NULL; int opts = pe_print_printf | pe_print_pending; rsc->fns->print(rsc, NULL, opts, stdout); rsc_xml = dump_xml_formatted((!expanded && rsc->orig_xml)? rsc->orig_xml : rsc->xml); fprintf(stdout, "%sxml:\n%s\n", expanded ? "" : "raw ", rsc_xml); free(rsc_xml); return 0; } int cli_resource_print_attribute(resource_t *rsc, const char *attr, pe_working_set_t * data_set) { int rc = -ENXIO; unsigned int count = 0; GHashTable *params = NULL; const char *value = NULL; node_t *current = pe__find_active_on(rsc, &count, NULL); if (count > 1) { CMD_ERR("%s is active on more than one node," " returning the default value for %s", rsc->id, crm_str(attr)); current = NULL; } params = crm_str_table_new(); if (safe_str_eq(attr_set_type, XML_TAG_ATTR_SETS)) { get_rsc_attributes(params, rsc, current, data_set); } else if (safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { /* No need to redirect to the parent */ get_meta_attributes(params, rsc, current, data_set); } else { unpack_instance_attributes(data_set->input, rsc->xml, XML_TAG_UTILIZATION, NULL, params, NULL, FALSE, data_set->now); } crm_debug("Looking up %s in %s", attr, rsc->id); value = g_hash_table_lookup(params, attr); if (value != NULL) { fprintf(stdout, "%s\n", value); rc = 0; } else { CMD_ERR("Attribute '%s' not found for '%s'", attr, rsc->id); } g_hash_table_destroy(params); return rc; } int cli_resource_print_property(resource_t *rsc, const char *attr, pe_working_set_t * data_set) { const char *value = crm_element_value(rsc->xml, attr); if (value != NULL) { fprintf(stdout, "%s\n", value); return 0; } return -ENXIO; } diff --git a/tools/crm_standby.in b/tools/crm_standby.in index 1c6dea4655..0911b9daec 100755 --- a/tools/crm_standby.in +++ b/tools/crm_standby.in @@ -1,156 +1,158 @@ #!@BASH_PATH@ # -# Copyright 2009-2018 Andrew Beekhof +# Copyright 2009-2018 the Pacemaker project contributors +# +# The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # USAGE_TEXT="Usage: crm_standby [options] Common options: --help Display this text, then exit --version Display version information, then exit -V, --verbose Specify multiple times to increase debug output -q, --quiet Print only the standby status (if querying) Commands: -G, --query Query the current value of standby mode (on/off) -v, --update=VALUE Update the value of standby mode (on/off) -D, --delete Let standby mode use default value Additional Options: -N, --node=NODE Operate on the named node instead of the current one -l, --lifetime=VALUE Until when should the setting take effect (valid values: reboot, forever) -i, --id=VALUE (Advanced) XML ID used to identify standby attribute" HELP_TEXT="crm_standby - Query, enable, or disable standby mode for a node Nodes in standby mode may not host cluster resources. $USAGE_TEXT " exit_usage() { if [ $# -gt 0 ]; then echo "error:" "$@" >&2 fi echo echo "$USAGE_TEXT" exit 1 } op="" options="" lifetime=0 target="" SHORTOPTS_DEPRECATED="U:Q" LONGOPTS_DEPRECATED="uname:,get-value,delete-attr,attr-value:,attr-id:" SHORTOPTS="VqGv:DN:l:i:" LONGOPTS="help,version,verbose,quiet,query,update:,delete,node:,lifetime:,id:" TEMP=$(@GETOPT_PATH@ -o ${SHORTOPTS}${SHORTOPTS_DEPRECATED} \ --long ${LONGOPTS},${LONGOPTS_DEPRECATED} \ -n crm_standby -- "$@") if [ $? -ne 0 ]; then exit_usage fi eval set -- "$TEMP" # Quotes around $TEMP are essential while true ; do case "$1" in --help) echo "$HELP_TEXT" exit 0 ;; --version) crm_attribute --version exit 0 ;; -q|--quiet|-V|--verbose|-Q) options="$options $1" shift ;; -N|--node|-U|--uname) target="$2" shift shift ;; -G|--query|--get-value) options="$options --query" op=g shift ;; -v|--update|--attr-value) options="$options --update $2" op=u shift shift ;; -D|--delete|--delete-attr) options="$options --delete" op=d shift ;; -l|--lifetime) options="$options --lifetime $2" lifetime=1 shift shift ;; -i|--id|--attr-id) options="$options --id $2" shift shift ;; --) shift break ;; *) exit_usage "unknown option '$1'" ;; esac done # It's important to call cluster commands only after arguments are processed, # so --version and --help work without problems even if those commands don't. if [ "$target" = "" ]; then target=$(crm_node -n) fi options="-N $target -n standby $options" if [ x$op = x ]; then options="$options -G"; op=g fi # If the user didn't explicitly specify a lifetime ... if [ $lifetime -eq 0 ]; then case $op in g) # For query, report the forever entry if one exists, otherwise # report the reboot entry if one exists, otherwise report off. crm_attribute $options -l forever >/dev/null 2>&1 if [ $? -eq 0 ]; then options="$options -l forever" else options="$options -l reboot -d off" fi ;; u) # For update, default to updating the forever entry. options="$options -l forever" ;; d) # For delete, default to deleting both forever and reboot entries. crm_attribute $options -l forever crm_attribute $options -l reboot exit 0 ;; esac fi crm_attribute $options diff --git a/tools/crmadmin.c b/tools/crmadmin.c index fe1cefa84c..5104043f83 100644 --- a/tools/crmadmin.c +++ b/tools/crmadmin.c @@ -1,471 +1,473 @@ /* - * Copyright 2004-2018 Andrew Beekhof + * Copyright 2004-2018 the Pacemaker project contributors + * + * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int message_timer_id = -1; static int message_timeout_ms = 30 * 1000; static GMainLoop *mainloop = NULL; static crm_ipc_t *crmd_channel = NULL; static char *admin_uuid = NULL; gboolean do_init(void); int do_work(void); void crmadmin_ipc_connection_destroy(gpointer user_data); int admin_msg_callback(const char *buffer, ssize_t length, gpointer userdata); int do_find_node_list(xmlNode * xml_node); gboolean admin_message_timeout(gpointer data); static gboolean BE_VERBOSE = FALSE; static int expected_responses = 1; static gboolean BASH_EXPORT = FALSE; static gboolean DO_HEALTH = FALSE; static gboolean DO_RESET = FALSE; static gboolean DO_RESOURCE = FALSE; static gboolean DO_ELECT_DC = FALSE; static gboolean DO_WHOIS_DC = FALSE; static gboolean DO_NODE_LIST = FALSE; static gboolean BE_SILENT = FALSE; static gboolean DO_RESOURCE_LIST = FALSE; static const char *crmd_operation = NULL; static char *dest_node = NULL; static crm_exit_t exit_code = CRM_EX_OK; static const char *sys_to = NULL; /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"quiet", 0, 0, 'q', "\tDisplay only the essential query information"}, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"-spacer-", 1, 0, '-', "\nCommands:"}, /* daemon options */ {"status", 1, 0, 'S', "Display the status of the specified node." }, {"-spacer-", 1, 0, '-', "\n\tResult is the node's internal FSM state which can be useful for debugging\n"}, {"dc_lookup", 0, 0, 'D', "Display the uname of the node co-ordinating the cluster."}, {"-spacer-", 1, 0, '-', "\n\tThis is an internal detail and is rarely useful to administrators except when deciding on which node to examine the logs.\n"}, {"nodes", 0, 0, 'N', "\tDisplay the uname of all member nodes"}, {"election", 0, 0, 'E', "(Advanced) Start an election for the cluster co-ordinator"}, { "kill", 1, 0, 'K', "(Advanced) Stop the controller (not the rest of the cluster stack) on specified node" }, {"health", 0, 0, 'H', NULL, 1}, {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, {XML_ATTR_TIMEOUT, 1, 0, 't', "Time (in milliseconds) to wait before declaring the operation failed"}, {"bash-export", 0, 0, 'B', "Create Bash export entries of the form 'export uname=uuid'\n"}, {"-spacer-", 1, 0, '-', "Notes:"}, {"-spacer-", 1, 0, '-', " The -K and -E commands are rarely used and may be removed in future versions."}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int main(int argc, char **argv) { int option_index = 0; int argerr = 0; int flag; crm_log_cli_init("crmadmin"); crm_set_options(NULL, "command [options]", long_options, "Development tool for performing some controller-specific commands." "\n Likely to be replaced by crm_node in the future"); if (argc < 2) { crm_help('?', CRM_EX_USAGE); } while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch (flag) { case 'V': BE_VERBOSE = TRUE; crm_bump_log_level(argc, argv); break; case 't': message_timeout_ms = atoi(optarg); if (message_timeout_ms < 1) { message_timeout_ms = 30 * 1000; } break; case '$': case '?': crm_help(flag, CRM_EX_OK); break; case 'D': DO_WHOIS_DC = TRUE; break; case 'B': BASH_EXPORT = TRUE; break; case 'K': DO_RESET = TRUE; crm_trace("Option %c => %s", flag, optarg); dest_node = strdup(optarg); crmd_operation = CRM_OP_LOCAL_SHUTDOWN; break; case 'q': BE_SILENT = TRUE; break; case 'S': DO_HEALTH = TRUE; crm_trace("Option %c => %s", flag, optarg); dest_node = strdup(optarg); break; case 'E': DO_ELECT_DC = TRUE; break; case 'N': DO_NODE_LIST = TRUE; break; case 'H': DO_HEALTH = TRUE; break; default: printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); } if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', CRM_EX_USAGE); } if (do_init()) { int res = 0; res = do_work(); if (res > 0) { /* wait for the reply by creating a mainloop and running it until * the callbacks are invoked... */ mainloop = g_main_loop_new(NULL, FALSE); crm_trace("Waiting for %d replies from the local CRM", expected_responses); message_timer_id = g_timeout_add(message_timeout_ms, admin_message_timeout, NULL); g_main_loop_run(mainloop); } else if (res < 0) { crm_err("No message to send"); exit_code = CRM_EX_ERROR; } } else { crm_warn("Init failed, could not perform requested operations"); exit_code = CRM_EX_UNAVAILABLE; } crm_trace("%s exiting normally", crm_system_name); return exit_code; } int do_work(void) { int ret = 1; /* construct the request */ xmlNode *msg_data = NULL; gboolean all_is_good = TRUE; if (DO_HEALTH == TRUE) { crm_trace("Querying the system"); sys_to = CRM_SYSTEM_DC; if (dest_node != NULL) { sys_to = CRM_SYSTEM_CRMD; crmd_operation = CRM_OP_PING; if (BE_VERBOSE) { expected_responses = 1; } } else { crm_info("Cluster-wide health not available yet"); all_is_good = FALSE; } } else if (DO_ELECT_DC) { /* tell the local node to initiate an election */ dest_node = NULL; sys_to = CRM_SYSTEM_CRMD; crmd_operation = CRM_OP_VOTE; ret = 0; /* no return message */ } else if (DO_WHOIS_DC) { dest_node = NULL; sys_to = CRM_SYSTEM_DC; crmd_operation = CRM_OP_PING; } else if (DO_NODE_LIST) { cib_t *the_cib = cib_new(); xmlNode *output = NULL; int rc = the_cib->cmds->signon(the_cib, crm_system_name, cib_command); if (rc != pcmk_ok) { return -1; } rc = the_cib->cmds->query(the_cib, NULL, &output, cib_scope_local | cib_sync_call); if(rc == pcmk_ok) { do_find_node_list(output); free_xml(output); } the_cib->cmds->signoff(the_cib); crm_exit(crm_errno2exit(rc)); } else if (DO_RESET) { /* tell dest_node to initiate the shutdown procedure * * if dest_node is NULL, the request will be sent to the * local node */ sys_to = CRM_SYSTEM_CRMD; ret = 0; /* no return message */ } else { crm_err("Unknown options"); all_is_good = FALSE; } if (all_is_good == FALSE) { crm_err("Creation of request failed. No message to send"); return -1; } /* send it */ if (crmd_channel == NULL) { crm_err("The IPC connection is not valid, cannot send anything"); return -1; } if (sys_to == NULL) { if (dest_node != NULL) { sys_to = CRM_SYSTEM_CRMD; } else { sys_to = CRM_SYSTEM_DC; } } { xmlNode *cmd = create_request(crmd_operation, msg_data, dest_node, sys_to, crm_system_name, admin_uuid); crm_ipc_send(crmd_channel, cmd, 0, 0, NULL); free_xml(cmd); } return ret; } void crmadmin_ipc_connection_destroy(gpointer user_data) { crm_err("Connection to controller was terminated"); if (mainloop) { g_main_loop_quit(mainloop); } else { crm_exit(CRM_EX_DISCONNECT); } } struct ipc_client_callbacks crm_callbacks = { .dispatch = admin_msg_callback, .destroy = crmadmin_ipc_connection_destroy }; gboolean do_init(void) { mainloop_io_t *source = mainloop_add_ipc_client(CRM_SYSTEM_CRMD, G_PRIORITY_DEFAULT, 0, NULL, &crm_callbacks); admin_uuid = crm_getpid_s(); crmd_channel = mainloop_get_ipc_client(source); if (DO_RESOURCE || DO_RESOURCE_LIST || DO_NODE_LIST) { return TRUE; } else if (crmd_channel != NULL) { xmlNode *xml = create_hello_message(admin_uuid, crm_system_name, "0", "1"); crm_ipc_send(crmd_channel, xml, 0, 0, NULL); return TRUE; } return FALSE; } static bool validate_crm_message(xmlNode * msg, const char *sys, const char *uuid, const char *msg_type) { const char *type = NULL; const char *crm_msg_reference = NULL; if (msg == NULL) { return FALSE; } type = crm_element_value(msg, F_CRM_MSG_TYPE); crm_msg_reference = crm_element_value(msg, XML_ATTR_REFERENCE); if (type == NULL) { crm_info("No message type defined."); return FALSE; } else if (msg_type != NULL && strcasecmp(msg_type, type) != 0) { crm_info("Expecting a (%s) message but received a (%s).", msg_type, type); return FALSE; } if (crm_msg_reference == NULL) { crm_info("No message crm_msg_reference defined."); return FALSE; } return TRUE; } int admin_msg_callback(const char *buffer, ssize_t length, gpointer userdata) { static int received_responses = 0; xmlNode *xml = string2xml(buffer); received_responses++; g_source_remove(message_timer_id); crm_log_xml_trace(xml, "ipc"); if (xml == NULL) { crm_info("XML in IPC message was not valid... " "discarding."); } else if (validate_crm_message(xml, crm_system_name, admin_uuid, XML_ATTR_RESPONSE) == FALSE) { crm_trace("Message was not a CRM response. Discarding."); } else if (DO_HEALTH) { xmlNode *data = get_message_xml(xml, F_CRM_DATA); const char *state = crm_element_value(data, XML_PING_ATTR_CRMDSTATE); printf("Status of %s@%s: %s (%s)\n", crm_element_value(data, XML_PING_ATTR_SYSFROM), crm_element_value(xml, F_CRM_HOST_FROM), state, crm_element_value(data, XML_PING_ATTR_STATUS)); if (BE_SILENT && state != NULL) { fprintf(stderr, "%s\n", state); } } else if (DO_WHOIS_DC) { const char *dc = crm_element_value(xml, F_CRM_HOST_FROM); printf("Designated Controller is: %s\n", dc); if (BE_SILENT && dc != NULL) { fprintf(stderr, "%s\n", dc); } crm_exit(CRM_EX_OK); } free_xml(xml); if (received_responses >= expected_responses) { crm_trace("Received expected number (%d) of replies, exiting normally", expected_responses); crm_exit(CRM_EX_OK); } message_timer_id = g_timeout_add(message_timeout_ms, admin_message_timeout, NULL); return 0; } gboolean admin_message_timeout(gpointer data) { fprintf(stderr, "No messages received in %d seconds.. aborting\n", (int)message_timeout_ms / 1000); crm_err("No messages received in %d seconds", (int)message_timeout_ms / 1000); exit_code = CRM_EX_TIMEOUT; g_main_loop_quit(mainloop); return FALSE; } int do_find_node_list(xmlNode * xml_node) { int found = 0; xmlNode *node = NULL; xmlNode *nodes = get_object_root(XML_CIB_TAG_NODES, xml_node); for (node = __xml_first_child(nodes); node != NULL; node = __xml_next(node)) { if (crm_str_eq((const char *)node->name, XML_CIB_TAG_NODE, TRUE)) { if (BASH_EXPORT) { printf("export %s=%s\n", crm_element_value(node, XML_ATTR_UNAME), crm_element_value(node, XML_ATTR_ID)); } else { printf("%s node: %s (%s)\n", crm_element_value(node, XML_ATTR_TYPE), crm_element_value(node, XML_ATTR_UNAME), crm_element_value(node, XML_ATTR_ID)); } found++; } } if (found == 0) { printf("NO nodes configured\n"); } return found; } diff --git a/tools/ipmiservicelogd.c b/tools/ipmiservicelogd.c index 247ce71038..626cb80a0a 100644 --- a/tools/ipmiservicelogd.c +++ b/tools/ipmiservicelogd.c @@ -1,606 +1,609 @@ /* * ipmiservicelogd.c * * A program that listens to IPMI events and writes them * out to servicelog. * * Author: International Business Machines, IBM * Mark Hamzy * Author: Intel Corporation * Jeff Zheng * - * Copyright 2009-2018 International Business Machines, IBM + * Original copyright 2009 International Business Machines, IBM + * Later changes copyright 2009-2018 the Pacemaker project contributors + * + * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* gcc -o ipmiservicelogd -g `pkg-config --cflags --libs OpenIPMI OpenIPMIposix servicelog-1` ipmiservicelogd.c */ /* ./ipmiservicelogd smi 0 */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define COMPLEX 1 static os_handler_t *os_hnd; char *getStringExecOutput(const char *const args[]); char *getSerialNumber(void); char *getProductName(void); static void con_usage(const char *name, const char *help, void *cb_data); static void usage(const char *progname); void ipmi2servicelog(struct sl_data_bmc *bmc_data); static int sensor_threshold_event_handler(ipmi_sensor_t * sensor, enum ipmi_event_dir_e dir, enum ipmi_thresh_e threshold, enum ipmi_event_value_dir_e high_low, enum ipmi_value_present_e value_present, unsigned int raw_value, double value, void *cb_data, ipmi_event_t * event); static int sensor_discrete_event_handler(ipmi_sensor_t * sensor, enum ipmi_event_dir_e dir, int offset, int severity, int prev_severity, void *cb_data, ipmi_event_t * event); static void sensor_change(enum ipmi_update_e op, ipmi_entity_t * ent, ipmi_sensor_t * sensor, void *cb_data); static void entity_change(enum ipmi_update_e op, ipmi_domain_t * domain, ipmi_entity_t * entity, void *cb_data); void setup_done(ipmi_domain_t * domain, int err, unsigned int conn_num, unsigned int port_num, int still_connected, void *user_data); char * getStringExecOutput(const char *const args[]) { int rc; pid_t pid; int pipefd[2]; rc = pipe2(pipefd, 0); if (rc == -1) { crm_err("Error: pipe errno = %d", errno); return NULL; } pid = fork(); if (0 < pid) { /* Parent */ int childExitStatus; char serialNumber[256]; ssize_t sizeRead; /* close write end of pipe */ rc = close(pipefd[1]); if (rc == -1) { crm_err("Error: parent close (pipefd[1]) = %d", errno); } /* make 0 same as read-from end of pipe */ rc = dup2(pipefd[0], 0); if (rc == -1) { crm_err("Error: parent dup2 (pipefd[0]) = %d", errno); } /* close excess fildes */ rc = close(pipefd[0]); if (rc == -1) { crm_err("Error: parent close (pipefd[0]) = %d", errno); } waitpid(pid, &childExitStatus, 0); if (!WIFEXITED(childExitStatus)) { crm_err("waitpid() exited with an error: status = %d", WEXITSTATUS(childExitStatus)); return NULL; } else if (WIFSIGNALED(childExitStatus)) { crm_err("waitpid() exited due to a signal = %d", WTERMSIG(childExitStatus)); return NULL; } memset(serialNumber, 0, sizeof(serialNumber)); sizeRead = read(0, serialNumber, sizeof(serialNumber) - 1); if (sizeRead > 0) { char *end = serialNumber + strlen(serialNumber) - 1; while (end > serialNumber && (*end == '\n' || *end == '\r' || *end == '\t' || *end == ' ') ) { *end = '\0'; end--; } return strdup(serialNumber); } return NULL; } else if (pid == 0) { /* Child */ /* close read end of pipe */ rc = close(pipefd[0]); if (rc == -1) { crm_err("Error: child close (pipefd[0]) = %d", errno); } /* make 1 same as write-to end of pipe */ rc = dup2(pipefd[1], 1); if (rc == -1) { crm_err("Error: child dup2 (pipefd[1]) = %d", errno); } /* close excess fildes */ rc = close(pipefd[1]); if (rc == -1) { crm_err("Error: child close (pipefd[1]) = %d", errno); } /* execvp() takes (char *const *) for backward compatibility, * but POSIX guarantees that it will not modify the strings, * so the cast is safe */ rc = execvp(args[0], (char *const *) args); if (rc == -1) { crm_err("Error: child execvp = %d", errno); } /* In case of error */ return NULL; } else { /* Error */ crm_err("fork errno = %d", errno); return NULL; } return NULL; } char * getSerialNumber(void) { const char *const dmiArgs[] = { "dmidecode", "--string", "system-serial-number", NULL }; return getStringExecOutput(dmiArgs); } char * getProductName(void) { const char *dmiArgs[] = { "dmidecode", "--string", "system-product-name", NULL }; return getStringExecOutput(dmiArgs); } static void con_usage(const char *name, const char *help, void *cb_data) { printf("%s\n", help); } static void usage(const char *progname) { printf("Usage:\n"); printf(" %s \n", progname); printf(" Where is one of:\n"); ipmi_parse_args_iter_help(con_usage, NULL); } void ipmi2servicelog(struct sl_data_bmc *bmc_data) { servicelog *slog = NULL; struct sl_event sl_event; uint64_t new_id = 0; struct utsname name; char *serial_number = NULL; char *product_name = NULL; int rc; if (uname(&name) == -1) { crm_err("Error: uname failed"); return; } rc = servicelog_open(&slog, 0); /* flags is one of SL_FLAG_xxx */ if (!slog) { crm_err("Error: servicelog_open failed, rc = %d", rc); return; } serial_number = getSerialNumber(); if (serial_number) { if (strlen(serial_number) > 20) { serial_number[20] = '\0'; } } product_name = getProductName(); if (product_name) { if (strlen(product_name) > 20) { product_name[20] = '\0'; } } memset(&sl_event, 0, sizeof(sl_event)); /* *INDENT-OFF* */ sl_event.next = NULL; /* only used if in a linked list */ sl_event.id = 0; /* unique identifier - filled in by API call */ sl_event.time_logged = time (NULL); sl_event.time_event = time (NULL); sl_event.time_last_update = time (NULL); sl_event.type = SL_TYPE_BMC; /* one of SL_TYPE_* */ sl_event.severity = SL_SEV_WARNING; /* one of SL_SEV_* */ sl_event.platform = name.machine; /* ppc64, etc */ sl_event.machine_serial = serial_number; sl_event.machine_model = product_name; /* it may not have the serial # within the first 20 chars */ sl_event.nodename = name.nodename; sl_event.refcode = strdup("ipmi"); sl_event.description = strdup("ipmi event"); sl_event.serviceable = 1; /* 1 or 0 */ sl_event.predictive = 0; /* 1 or 0 */ sl_event.disposition = SL_DISP_RECOVERABLE; /* one of SL_DISP_* */ sl_event.call_home_status = SL_CALLHOME_NONE; /* one of SL_CALLHOME_*, only valid if serviceable */ sl_event.closed = 1; /* 1 or 0, only valid if serviceable */ sl_event.repair = 0; /* id of repairing repair_action */ sl_event.callouts = NULL; sl_event.raw_data_len = 0; sl_event.raw_data = NULL; sl_event.addl_data = &bmc_data; /* pointer to an sl_data_* struct */ /* *INDENT-ON* */ rc = servicelog_event_log(slog, &sl_event, &new_id); if (rc != 0) { crm_err("Error: servicelog_event_log, rc = %d (\"%s\")", rc, servicelog_error(slog)); } else { crm_debug("Sending to servicelog database"); } free(sl_event.refcode); free(sl_event.description); free(serial_number); free(product_name); servicelog_close(slog); } static int sensor_threshold_event_handler(ipmi_sensor_t * sensor, enum ipmi_event_dir_e dir, enum ipmi_thresh_e threshold, enum ipmi_event_value_dir_e high_low, enum ipmi_value_present_e value_present, unsigned int raw_value, double value, void *cb_data, ipmi_event_t * event) { ipmi_entity_t *ent = ipmi_sensor_get_entity(sensor); char name[IPMI_ENTITY_NAME_LEN]; struct sl_data_bmc bmc_data; uint32_t sel_id; uint32_t sel_type; uint16_t generator; uint8_t version; uint8_t sensor_type; int sensor_lun; int sensor_number; uint8_t event_class; uint8_t event_type; int direction; ipmi_sensor_get_id(sensor, name, sizeof(name)); ipmi_sensor_get_num(sensor, &sensor_lun, &sensor_number); sel_id = ipmi_entity_get_entity_id(ent); sel_type = ipmi_entity_get_type(ent); generator = ipmi_entity_get_slave_address(ent) | (sensor_lun << 5); /* LUN (2 bits) | SLAVE ADDRESS (5 bits) */ version = 0x04; sensor_type = ipmi_sensor_get_sensor_type(sensor); event_class = 0; /* @TBD - where does this come from? */ event_type = ipmi_event_get_type(event); direction = dir; memset(&bmc_data, 0, sizeof(bmc_data)); bmc_data.sel_id = sel_id; bmc_data.sel_type = sel_type; bmc_data.generator = generator; bmc_data.version = version; bmc_data.sensor_type = sensor_type; bmc_data.sensor_number = sensor_number; bmc_data.event_class = event_class; bmc_data.event_type = event_type; bmc_data.direction = direction; crm_debug("Writing bmc_data (%08x, %08x, %04x, %02x, %02x, %02x, %02x, %02x, %d)", bmc_data.sel_id, bmc_data.sel_type, bmc_data.generator, bmc_data.version, bmc_data.sensor_type, bmc_data.sensor_number, bmc_data.event_class, bmc_data.event_type, bmc_data.direction); ipmi2servicelog(&bmc_data); /* This passes the event on to the main event handler, which does not exist in this program. */ return IPMI_EVENT_NOT_HANDLED; } static int sensor_discrete_event_handler(ipmi_sensor_t * sensor, enum ipmi_event_dir_e dir, int offset, int severity, int prev_severity, void *cb_data, ipmi_event_t * event) { ipmi_entity_t *ent = ipmi_sensor_get_entity(sensor); char name[IPMI_ENTITY_NAME_LEN]; struct sl_data_bmc bmc_data; uint32_t sel_id; uint32_t sel_type; uint16_t generator; uint8_t version; uint8_t sensor_type; int sensor_lun; int sensor_number; uint8_t event_class; uint8_t event_type; int direction; ipmi_sensor_get_id(sensor, name, sizeof(name)); ipmi_sensor_get_num(sensor, &sensor_lun, &sensor_number); sel_id = ipmi_entity_get_entity_id(ent); sel_type = ipmi_entity_get_type(ent); generator = ipmi_entity_get_slave_address(ent) | (sensor_lun << 5); /* LUN (2 bits) | SLAVE ADDRESS (5 bits) */ version = 0x04; sensor_type = ipmi_sensor_get_sensor_type(sensor); event_class = 0; /* @TBD - where does this come from? */ event_type = ipmi_event_get_type(event); direction = dir; memset(&bmc_data, 0, sizeof(bmc_data)); bmc_data.sel_id = sel_id; bmc_data.sel_type = sel_type; bmc_data.generator = generator; bmc_data.version = version; bmc_data.sensor_type = sensor_type; bmc_data.sensor_number = sensor_number; bmc_data.event_class = event_class; bmc_data.event_type = event_type; bmc_data.direction = direction; crm_debug("Writing bmc_data (%08x, %08x, %04x, %02x, %02x, %02x, %02x, %02x, %d)", bmc_data.sel_id, bmc_data.sel_type, bmc_data.generator, bmc_data.version, bmc_data.sensor_type, bmc_data.sensor_number, bmc_data.event_class, bmc_data.event_type, bmc_data.direction); ipmi2servicelog(&bmc_data); /* This passes the event on to the main event handler, which does not exist in this program. */ return IPMI_EVENT_NOT_HANDLED; } /* Whenever the status of a sensor changes, the function is called We display the information of the sensor if we find a new sensor */ static void sensor_change(enum ipmi_update_e op, ipmi_entity_t * ent, ipmi_sensor_t * sensor, void *cb_data) { int rv; if (op == IPMI_ADDED) { if (ipmi_sensor_get_event_reading_type(sensor) == IPMI_EVENT_READING_TYPE_THRESHOLD) rv = ipmi_sensor_add_threshold_event_handler(sensor, sensor_threshold_event_handler, NULL); else rv = ipmi_sensor_add_discrete_event_handler(sensor, sensor_discrete_event_handler, NULL); if (rv) crm_err("Unable to add the sensor event handler: %x", rv); } } /* Whenever the status of an entity changes, the function is called When a new entity is created, we search all sensors that belong to the entity */ static void entity_change(enum ipmi_update_e op, ipmi_domain_t * domain, ipmi_entity_t * entity, void *cb_data) { int rv; if (op == IPMI_ADDED) { /* Register callback so that when the status of a sensor changes, sensor_change is called */ rv = ipmi_entity_add_sensor_update_handler(entity, sensor_change, entity); if (rv) { crm_err("ipmi_entity_set_sensor_update_handler: 0x%x", rv); crm_exit(CRM_EX_ERROR); } } } /* After we have established connection to domain, this function get called At this time, we can do whatever things we want to do. Herr we want to search all entities in the system */ void setup_done(ipmi_domain_t * domain, int err, unsigned int conn_num, unsigned int port_num, int still_connected, void *user_data) { int rv; /* Register a callback functin entity_change. When a new entities is created, entity_change is called */ rv = ipmi_domain_add_entity_update_handler(domain, entity_change, domain); if (rv) { crm_err("ipmi_domain_add_entity_update_handler return error: %d", rv); return; } } int main(int argc, char *argv[]) { int rv; int curr_arg = 1; ipmi_args_t *args; ipmi_con_t *con; /* OS handler allocated first. */ os_hnd = ipmi_posix_setup_os_handler(); if (!os_hnd) { crm_err("ipmi_smi_setup_con: Unable to allocate os handler"); crm_exit(CRM_EX_ERROR); } /* Initialize the OpenIPMI library. */ ipmi_init(os_hnd); // Check for pacemaker-standard help and version options if (argc > 1) { for (char **arg = &argv[1]; *arg != NULL; ++arg) { if (!strcmp(*arg, "--help") || !strcmp(*arg, "-?")) { usage(argv[0]); return CRM_EX_OK; } else if (!strcmp(*arg, "--version") || !strcmp(*arg, "-$")) { crm_help('$', CRM_EX_OK); } } } #ifdef COMPLEX rv = ipmi_parse_args2(&curr_arg, argc, argv, &args); if (rv) { crm_err("Error parsing command arguments, argument %d: %s", curr_arg, strerror(rv)); usage(argv[0]); crm_exit(CRM_EX_USAGE); } #endif crm_make_daemon("ipmiservicelogd", TRUE, "/var/run/ipmiservicelogd.pid0"); crm_log_cli_init("ipmiservicelogd"); // Maybe this should log like a daemon instead? // crm_log_init("ipmiservicelogd", LOG_INFO, TRUE, FALSE, argc, argv, FALSE); #ifdef COMPLEX rv = ipmi_args_setup_con(args, os_hnd, NULL, &con); if (rv) { crm_err("ipmi_ip_setup_con: %s", strerror(rv)); crm_err("Error: Is IPMI configured correctly?"); crm_exit(CRM_EX_ERROR); } #else /* If all you need is an SMI connection, this is all the code you need. */ /* Establish connections to domain through system interface. This function connect domain, selector and OS handler together. When there is response message from domain, the status of file descriptor in selector is changed and predefined callback is called. After the connection is established, setup_done will be called. */ rv = ipmi_smi_setup_con(0, os_hnd, NULL, &con); if (rv) { crm_err("ipmi_smi_setup_con: %s", strerror(rv)); crm_err("Error: Is IPMI configured correctly?"); crm_exit(CRM_EX_ERROR); } #endif rv = ipmi_open_domain("", &con, 1, setup_done, NULL, NULL, NULL, NULL, 0, NULL); if (rv) { crm_err("ipmi_init_domain: %s", strerror(rv)); crm_exit(CRM_EX_ERROR); } /* This is the main loop of the event-driven program. Try to exit the program */ /* Let the selector code run the select loop. */ os_hnd->operation_loop(os_hnd); /* Technically, we can't get here, but this is an example. */ os_hnd->free_os_handler(os_hnd); return CRM_EX_OK; } diff --git a/tools/notifyServicelogEvent.c b/tools/notifyServicelogEvent.c index 5c9fd32ba6..ed4967cad9 100644 --- a/tools/notifyServicelogEvent.c +++ b/tools/notifyServicelogEvent.c @@ -1,189 +1,192 @@ /* - * Copyright 2009-2018 International Business Machines, IBM, Mark Hamzy + * Original copyright 2009 International Business Machines, IBM, Mark Hamzy + * Later changes copyright 2009-2018 the Pacemaker project contributors + * + * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ /* gcc -o notifyServicelogEvent `pkg-config --cflags servicelog-1` `pkg-config --libs servicelog-1` notifyServicelogEvent.c */ #include #include #include #include #include #include #include #include #include #include /* U64T ~ PRIu64, U64TS ~ SCNu64 */ #include #include #include typedef enum { STATUS_GREEN = 1, STATUS_YELLOW, STATUS_RED } STATUS; const char *status2char(STATUS status); STATUS event2status(struct sl_event *event); const char * status2char(STATUS status) { switch (status) { default: case STATUS_GREEN: return "green"; case STATUS_YELLOW: return "yellow"; case STATUS_RED: return "red"; } } STATUS event2status(struct sl_event * event) { STATUS status = STATUS_GREEN; crm_debug("Severity = %d, Disposition = %d", event->severity, event->disposition); /* @TBD */ if (event->severity == SL_SEV_WARNING) { status = STATUS_YELLOW; } if (event->disposition == SL_DISP_UNRECOVERABLE) { status = STATUS_RED; } return status; } static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information"}, {"-spacer-", 0, 0, '-', "\nUsage: notifyServicelogEvent event_id"}, {"-spacer-", 0, 0, '-', "\nWhere event_id is unique unsigned event identifier which is then passed into servicelog"}, {0, 0, 0, 0} }; int main(int argc, char *argv[]) { int argerr = 0; int flag; int index = 0; int rc = 0; servicelog *slog = NULL; struct sl_event *event = NULL; uint64_t event_id = 0; crm_log_cli_init("notifyServicelogEvent"); crm_set_options(NULL, "event_id ", long_options, "Gets called upon events written to servicelog database"); if (argc < 2) { argerr++; } while (1) { flag = crm_get_option(argc, argv, &index); if (flag == -1) break; switch (flag) { case '?': case '$': crm_help(flag, CRM_EX_OK); break; default: ++argerr; break; } } if (argc - optind != 1) { ++argerr; } if (argerr) { crm_help('?', CRM_EX_USAGE); } openlog("notifyServicelogEvent", LOG_NDELAY, LOG_USER); if (sscanf(argv[optind], "%" U64TS, &event_id) != 1) { crm_err("Error: could not read event_id from args!"); rc = 1; goto cleanup; } if (event_id == 0) { crm_err("Error: event_id is 0!"); rc = 1; goto cleanup; } rc = servicelog_open(&slog, 0); /* flags is one of SL_FLAG_xxx */ if (!slog) { crm_err("Error: servicelog_open failed, rc = %d", rc); rc = 1; goto cleanup; } if (slog) { rc = servicelog_event_get(slog, event_id, &event); } if (rc == 0) { STATUS status = STATUS_GREEN; const char *health_component = "#health-ipmi"; const char *health_status = NULL; crm_debug("Event id = %" U64T ", Log timestamp = %s, Event timestamp = %s", event_id, ctime(&(event->time_logged)), ctime(&(event->time_event))); status = event2status(event); health_status = status2char(status); if (health_status) { gboolean rc; /* @TODO pass attrd_opt_remote when appropriate */ rc = (attrd_update_delegate(NULL, 'v', NULL, health_component, health_status, NULL, NULL, NULL, NULL, attrd_opt_none) > 0); crm_debug("Updating attribute ('%s', '%s') = %d", health_component, health_status, rc); } else { crm_err("Error: status2char failed, status = %d", status); rc = 1; } } else { crm_err("Error: servicelog_event_get failed, rc = %d", rc); } cleanup: if (event) { servicelog_event_free(event); } if (slog) { servicelog_close(slog); } closelog(); return rc; } diff --git a/tools/report.collector.in b/tools/report.collector.in index 4fb0ed58a7..9419f1748e 100644 --- a/tools/report.collector.in +++ b/tools/report.collector.in @@ -1,882 +1,883 @@ # # Originally based on hb_report # Copyright 2007 Dejan Muhamedagic +# Later changes copyright 2010-2018 the Pacemaker project contributors # -# Later changes copyright 2010-2018 Andrew Beekhof +# The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # if echo $REPORT_HOME | grep -qs '^/' then debug "Using full path to working directory: $REPORT_HOME" else REPORT_HOME="$HOME/$REPORT_HOME" debug "Canonicalizing working directory path: $REPORT_HOME" fi detect_host # # find files newer than a and older than b # isnumber() { echo "$*" | grep -qs '^[0-9][0-9]*$' } touchfile() { t=`mktemp` && perl -e "\$file=\"$t\"; \$tm=$1;" -e 'utime $tm, $tm, $file;' && echo $t } find_files_clean() { [ -z "$from_stamp" ] || rm -f "$from_stamp" [ -z "$to_stamp" ] || rm -f "$to_stamp" from_stamp="" to_stamp="" } find_files() { dirs= from_time=$2 to_time=$3 for d in $1; do if [ -d $d ]; then dirs="$dirs $d" fi done if [ x"$dirs" = x ]; then return fi isnumber "$from_time" && [ "$from_time" -gt 0 ] || { warning "sorry, can't find files in [ $1 ] based on time if you don't supply time" return } trap find_files_clean 0 if ! from_stamp=`touchfile $from_time`; then warning "sorry, can't create temporary file for find_files" return fi findexp="-newer $from_stamp" if isnumber "$to_time" && [ "$to_time" -gt 0 ]; then if ! to_stamp=`touchfile $to_time`; then warning "sorry, can't create temporary file for find_files" find_files_clean return fi findexp="$findexp ! -newer $to_stamp" fi find $dirs -type f $findexp find_files_clean trap "" 0 } # # check permissions of files/dirs # pl_checkperms() { perl -e ' # check permissions and ownership # uid and gid are numeric # everything must match exactly # no error checking! (file should exist, etc) ($filename, $perms, $in_uid, $in_gid) = @ARGV; ($mode,$uid,$gid) = (stat($filename))[2,4,5]; $p=sprintf("%04o", $mode & 07777); $p ne $perms and exit(1); $uid ne $in_uid and exit(1); $gid ne $in_gid and exit(1); ' $* } num_id() { getent $1 $2 | awk -F: '{print $3}' } chk_id() { [ "$2" ] && return 0 echo "$1: id not found" return 1 } check_perms() { while read type f p uid gid; do if [ ! -e "$f" ]; then echo "$f doesn't exist" continue elif [ ! -$type "$f" ]; then echo "$f has wrong type" continue fi n_uid=`num_id passwd $uid` chk_id "$uid" "$n_uid" || continue n_gid=`num_id group $gid` chk_id "$gid" "$n_gid" || continue pl_checkperms $f $p $n_uid $n_gid || { echo "wrong permissions or ownership for $f:" ls -ld $f } done } # # coredumps # findbinary() { random_binary=`which cat 2>/dev/null` # suppose we are lucky binary=`gdb $random_binary $1 < /dev/null 2>/dev/null | grep 'Core was generated' | awk '{print $5}' | sed "s/^.//;s/[.':]*$//"` if [ x = x"$binary" ]; then debug "Could not detect the program name for core $1 from the gdb output; will try with file(1)" binary=$(file $1 | awk '/from/{ for( i=1; i<=NF; i++ ) if( $i == "from" ) { print $(i+1) break } }') binary=`echo $binary | tr -d "'"` binary=$(echo $binary | tr -d '`') if [ "$binary" ]; then binary=`which $binary 2>/dev/null` fi fi if [ x = x"$binary" ]; then warning "Could not find the program path for core $1" return fi fullpath=`which $binary 2>/dev/null` if [ x = x"$fullpath" ]; then if [ -x $CRM_DAEMON_DIR/$binary ]; then echo $CRM_DAEMON_DIR/$binary debug "Found the program at $CRM_DAEMON_DIR/$binary for core $1" else warning "Could not find the program path for core $1" fi else echo $fullpath debug "Found the program at $fullpath for core $1" fi } getbt() { which gdb > /dev/null 2>&1 || { warning "Please install gdb to get backtraces" return } for corefile; do absbinpath=`findbinary $corefile` [ x = x"$absbinpath" ] && continue echo "====================== start backtrace ======================" ls -l $corefile # Summary first... gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt"} -ex quit \ $absbinpath $corefile 2>/dev/null echo "====================== start detail ======================" # Now the unreadable details... gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt full"} -ex quit \ $absbinpath $corefile 2>/dev/null echo "======================= end backtrace =======================" done } dump_status_and_config() { crm_mon -1 2>&1 | grep -v '^Last upd' > $target/$CRM_MON_F cibadmin -Ql 2>/dev/null > $target/${CIB_F}.live } getconfig() { cluster=$1; shift; target=$1; shift; for cf in $*; do if [ -e "$cf" ]; then cp -a "$cf" $target/ fi done if is_running pacemaker-controld; then dump_status_and_config crm_node -p > "$target/$MEMBERSHIP_F" 2>&1 echo "$host" > $target/RUNNING elif is_running pacemaker-remoted; then dump_status_and_config echo "$host" > $target/RUNNING # Pre-2.0.0 daemon name in case we're collecting on a mixed-version cluster elif is_running pacemaker_remoted; then dump_status_and_config echo "$host" > $target/RUNNING else echo "$host" > $target/STOPPED fi } get_readable_cib() { target="$1"; shift; if [ -f "$target/$CIB_F" ]; then crm_verify -V -x "$target/$CIB_F" >"$target/$CRM_VERIFY_F" 2>&1 if which crm >/dev/null 2>&1 ; then CIB_file="$target/$CIB_F" crm configure show >"$target/$CIB_TXT_F" 2>&1 elif which pcs >/dev/null 2>&1 ; then pcs config -f "$target/$CIB_F" >"$target/$CIB_TXT_F" 2>&1 fi fi } # # remove values of sensitive attributes # # this is not proper xml parsing, but it will work under the # circumstances sanitize_xml_attrs() { sed $( for patt in $SANITIZE; do echo "-e /name=\"$patt\"/s/value=\"[^\"]*\"/value=\"****\"/" done ) } sanitize_hacf() { awk ' $1=="stonith_host"{ for( i=5; i<=NF; i++ ) $i="****"; } {print} ' } sanitize_one_clean() { [ -z "$tmp" ] || rm -f "$tmp" tmp="" [ -z "$ref" ] || rm -f "$ref" ref="" } sanitize() { file=$1 compress="" if [ -z "$SANITIZE" ]; then return fi echo $file | grep -qs 'gz$' && compress=gzip echo $file | grep -qs 'bz2$' && compress=bzip2 if [ "$compress" ]; then decompress="$compress -dc" else compress=cat decompress=cat fi trap sanitize_one_clean 0 tmp=`mktemp` ref=`mktemp` if [ -z "$tmp" -o -z "$ref" ]; then sanitize_one_clean fatal "cannot create temporary files" fi touch -r $file $ref # save the mtime if [ "`basename $file`" = ha.cf ]; then sanitize_hacf else $decompress | sanitize_xml_attrs | $compress fi < $file > $tmp mv $tmp $file # note: cleaning $tmp up is still needed even after it's renamed # because its temp directory is still there. touch -r $ref $file sanitize_one_clean trap "" 0 } # # get some system info # distro() { if which lsb_release >/dev/null 2>&1 then lsb_release -d | sed -e 's/^Description:\s*//' debug "Using lsb_release for distribution info" return fi relf=`ls /etc/debian_version 2>/dev/null` || relf=`ls /etc/slackware-version 2>/dev/null` || relf=`ls -d /etc/*-release 2>/dev/null` && { for f in $relf; do test -f $f && { echo "`ls $f` `cat $f`" debug "Found `echo $relf | tr '\n' ' '` distribution release file(s)" return } done } warning "No lsb_release, no /etc/*-release, no /etc/debian_version: no distro information" } pkg_ver() { if which dpkg >/dev/null 2>&1 ; then pkg_mgr="deb" elif which rpm >/dev/null 2>&1 ; then pkg_mgr="rpm" elif which pkg_info >/dev/null 2>&1 ; then pkg_mgr="pkg_info" elif which pkginfo >/dev/null 2>&1 ; then pkg_mgr="pkginfo" else warning "Unknown package manager" return fi debug "The package manager is: $pkg_mgr" echo "The package manager is: $pkg_mgr" echo "Installed packages:" case $pkg_mgr in deb) dpkg-query -f '${Package} ${Version} ${Architecture}\n' -W | sort echo for pkg in $*; do if dpkg-query -W $pkg 2>/dev/null ; then debug "Verifying installation of: $pkg" echo "Verifying installation of: $pkg" debsums -s $pkg 2>/dev/null fi done ;; rpm) rpm -qa --qf '%{name} %{version}-%{release} - %{distribution} %{arch}\n' | sort echo for pkg in $*; do if rpm -q $pkg >/dev/null 2>&1 ; then debug "Verifying installation of: $pkg" echo "Verifying installation of: $pkg" rpm --verify $pkg 2>&1 fi done ;; pkg_info) pkg_info ;; pkginfo) pkginfo | awk '{print $3}' # format? ;; esac } getbacktraces() { debug "Looking for backtraces: $*" flist=$( for f in `find_files "$CRM_CORE_DIRS" $1 $2`; do bf=`basename $f` test `expr match $bf core` -gt 0 && echo $f done) if [ "$flist" ]; then for core in $flist; do log "Found core file: `ls -al $core`" done # Make a copy of them in case we need more data later # Luckily they compress well mkdir cores >/dev/null 2>&1 cp -a $flist cores/ shrink cores rm -rf cores # Now get as much as we can from them automagically for f in $flist; do getbt $f done fi } getpeinputs() { if [ -n "$PE_STATE_DIR" ]; then flist=$( find_files "$PE_STATE_DIR" "$1" "$2" | sed "s,`dirname $PE_STATE_DIR`/,,g" ) if [ "$flist" ]; then (cd $(dirname "$PE_STATE_DIR") && tar cf - $flist) | (cd "$3" && tar xf -) debug "found `echo $flist | wc -w` scheduler input files in $PE_STATE_DIR" fi fi } getblackboxes() { flist=$( find_files $BLACKBOX_DIR $1 $2 ) for bb in $flist; do bb_short=`basename $bb` qb-blackbox $bb > $3/${bb_short}.blackbox 2>&1 info "Extracting contents of blackbox: $bb_short" done } # # some basic system info and stats # sys_info() { cluster=$1; shift echo "Platform: `uname`" echo "Kernel release: `uname -r`" echo "Architecture: `uname -m`" if [ `uname` = Linux ]; then echo "Distribution: `distro`" fi echo cibadmin --version 2>&1 | head -1 cibadmin -! 2>&1 case $cluster in corosync) /usr/sbin/corosync -v 2>&1 | head -1 ;; esac # Cluster glue version hash (if available) stonith -V 2>/dev/null # Resource agents version hash echo "resource-agents: `grep 'Build version:' /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs`" echo pkg_ver $* } sys_stats() { set -x uname -n uptime ps axf ps auxw top -b -n 1 ifconfig -a ip addr list netstat -i arp -an test -d /proc && { cat /proc/cpuinfo } lsscsi lspci lsblk mount df set +x } dlm_dump() { if which dlm_tool >/dev/null 2>&1 ; then if is_running dlm_controld; then echo "--- Lockspace overview:" dlm_tool ls -n echo "---Lockspace history:" dlm_tool dump echo "---Lockspace status:" dlm_tool status dlm_tool status -v echo "---Lockspace config:" dlm_tool dump_config dlm_tool log_plock dlm_tool ls | grep name | while read X N ; do echo "--- Lockspace $N:" dlm_tool lockdump "$N" dlm_tool lockdebug -svw "$N" done fi fi } drbd_info() { test -f /proc/drbd && { echo "--- /proc/drbd:" cat /proc/drbd 2>&1 echo } if which drbdadm >/dev/null 2>&1; then echo "--- drbdadm dump:" if [ -z "$SANITIZE"]; then drbdadm dump 2>&1 else drbdadm dump 2>&1 | sed "s/\(shared-secret[ ]*\"\)[^\"]*\";/\1****\";/" fi echo echo "--- drbdadm status:" drbdadm status 2>&1 echo echo "--- drbdadm show-gi:" for res in $(drbdsetup status | grep -e ^\\S | awk '{ print $1 }'); do echo "$res:" drbdadm show-gi $res 2>&1 echo done fi if which drbd-overview >/dev/null 2>&1; then echo "--- drbd-overview:" drbd-overview 2>&1 echo fi if which drbdsetup >/dev/null 2>&1; then echo "--- drbdsetup status:" drbdsetup status --verbose --statistics 2>&1 echo echo "--- drbdsetup events2:" drbdsetup events2 --timestamps --statistics --now 2>&1 echo fi } iscfvarset() { test "`getcfvar $1 $2`" } iscfvartrue() { getcfvar $1 $2 $3 | grep -E -qsi "^(true|y|yes|on|1)" } iscfvarfalse() { getcfvar $1 $2 $3 | grep -E -qsi "^(false|n|no|off|0)" } find_syslog() { priority="$1" # Always include system logs (if we can find them) msg="Mark:pcmk:`perl -e 'print time()'`" logger -p "$priority" "$msg" >/dev/null 2>&1 # Force buffer flush killall -HUP rsyslogd >/dev/null 2>&1 sleep 2 # Give syslog time to catch up in case it's busy findmsg 1 "$msg" } get_logfiles_cs() { if [ ! -f "$cf_file" ]; then return fi debug "Reading $cf_type log settings from $cf_file" # The default value of to_syslog is yes. if ! iscfvarfalse $cf_type to_syslog "$cf_file"; then facility_cs=$(getcfvar $cf_type syslog_facility "$cf_file") if [ -z "$facility_cs" ]; then facility_cs="daemon" fi find_syslog "$facility_cs.info" fi if [ "$SOS_MODE" = "1" ]; then return fi if iscfvartrue $cf_type to_logfile "$cf_file"; then logfile=$(getcfvar $cf_type logfile "$cf_file") if [ -f "$logfile" ]; then debug "Log settings found for cluster type $cf_type: $logfile" echo "$logfile" fi fi } get_logfiles() { cf_type=$1 cf_file="$2" case $cf_type in corosync) get_logfiles_cs;; esac . @CONFIGDIR@/pacemaker facility="$PCMK_logfacility" if [ -z "$facility" ]; then facility="daemon" fi if [ "$facility" != "$facility_cs" ]&&[ "$facility" != none ]; then find_syslog "$facility.notice" fi if [ "$SOS_MODE" = "1" ]; then return fi logfile="$PCMK_logfile" if [ "$logfile" != none ]; then if [ -z "$logfile" ]; then for logfile in "@CRM_LOG_DIR@/pacemaker.log" "/var/log/pacemaker.log"; do if [ -f "$logfile" ]; then debug "Log settings not found for Pacemaker, assuming $logfile" echo "$logfile" break fi done elif [ -f "$logfile" ]; then debug "Log settings found for Pacemaker: $logfile" echo "$logfile" fi fi # Look for detail logs: # - initial pacemakerd logs and tracing might go to a different file pattern="Starting Pacemaker" # - make sure we get something from the scheduler pattern="$pattern\\|Calculated transition" # - cib and pacemaker-execd updates # (helpful on non-DC nodes and when cluster has been up for a long time) pattern="$pattern\\|cib_perform_op\\|process_lrm_event" # - pacemaker_remote might use a different file pattern="$pattern\\|pacemaker[-_]remoted:" findmsg 3 "$pattern" } essential_files() { cat< /dev/null 2>&1 if [ $? -eq 0 ]; then cl_have_journald=1 else cl_have_journald=0 fi cl_lognames="$CL_LOGFILES" if [ $cl_have_journald -eq 1 ]; then cl_lognames="$cl_lognames journalctl" fi cl_lognames=$(trim "$cl_lognames") if [ -z "$cl_lognames" ]; then return fi # YYYY-MM-DD HH:MM:SS cl_start_ymd=$(date -d @${CL_START} +"%F %T") cl_end_ymd=$(date -d @${CL_END} +"%F %T") debug "Gathering logs from $cl_start_ymd to $cl_end_ymd:" debug " $cl_lognames" # Remove our temporary file if we get interrupted here trap '[ -z "$cl_pattfile" ] || rm -f "$cl_pattfile"' 0 # Create a temporary file with patterns to grep for cl_pattfile=$(mktemp) || fatal "cannot create temporary files" for cl_pattern in $LOG_PATTERNS; do echo "$cl_pattern" done > $cl_pattfile echo "Log pattern matches from $REPORT_TARGET:" > $ANALYSIS_F if [ -n "$CL_LOGFILES" ]; then for cl_logfile in $CL_LOGFILES; do cl_extract="$(basename $cl_logfile).extract.txt" if [ ! -f "$cl_logfile" ]; then # Not a file continue elif [ -f "$cl_extract" ]; then # We already have it continue fi dumplogset "$cl_logfile" $LOG_START $LOG_END > "$cl_extract" sanitize "$cl_extract" grep -f "$cl_pattfile" "$cl_extract" >> $ANALYSIS_F done fi # Collect systemd logs if present if [ $cl_have_journald -eq 1 ]; then journalctl --since "$cl_start_ymd" --until "$cl_end_ymd" > journal.log grep -f "$cl_pattfile" journal.log >> $ANALYSIS_F fi rm -f $cl_pattfile trap "" 0 } debug "Initializing $REPORT_TARGET subdir" if [ "$REPORT_MASTER" != "$REPORT_TARGET" ]; then if [ -e $REPORT_HOME/$REPORT_TARGET ]; then warning "Directory $REPORT_HOME/$REPORT_TARGET already exists, using /tmp/$$/$REPORT_TARGET instead" REPORT_HOME=/tmp/$$ fi fi mkdir -p $REPORT_HOME/$REPORT_TARGET cd $REPORT_HOME/$REPORT_TARGET case $CLUSTER in any) cluster=`get_cluster_type`;; *) cluster=$CLUSTER;; esac cluster_cf=`find_cluster_cf $cluster` # If cluster stack is still "any", this might be a Pacemaker Remote node, # so don't complain in that case. if [ -z "$cluster_cf" ] && [ $cluster != "any" ]; then warning "Could not determine the location of your cluster configuration" fi if [ "$SEARCH_LOGS" = "1" ]; then logfiles=$(get_logfiles "$cluster" "$cluster_cf" | sort -u) fi logfiles="$(trim "$logfiles $EXTRA_LOGS")" if [ -z "$logfiles" ]; then which journalctl > /dev/null 2>&1 if [ $? -eq 0 ]; then info "Systemd journal will be only log collected" else info "No logs will be collected" fi info "No log files found or specified with --logfile /some/path" fi debug "Config: $cluster ($cluster_cf) $logfiles" sys_info $cluster $PACKAGES > $SYSINFO_F essential_files $cluster | check_perms > $PERMISSIONS_F 2>&1 getconfig $cluster "$REPORT_HOME/$REPORT_TARGET" "$cluster_cf" "$CRM_CONFIG_DIR/$CIB_F" "/etc/drbd.conf" "/etc/drbd.d" "/etc/booth" getpeinputs $LOG_START $LOG_END $REPORT_HOME/$REPORT_TARGET getbacktraces $LOG_START $LOG_END > $REPORT_HOME/$REPORT_TARGET/$BT_F getblackboxes $LOG_START $LOG_END $REPORT_HOME/$REPORT_TARGET case $cluster in corosync) if is_running corosync; then corosync-blackbox >corosync-blackbox-live.txt 2>&1 # corosync-fplay > corosync-blackbox.txt tool=`pickfirst corosync-objctl corosync-cmapctl` case $tool in *objctl) $tool -a > corosync.dump 2>/dev/null;; *cmapctl) $tool > corosync.dump 2>/dev/null;; esac corosync-quorumtool -s -i > corosync.quorum 2>&1 fi ;; esac dc=`crm_mon -1 2>/dev/null | awk '/Current DC/ {print $3}'` if [ "$REPORT_TARGET" = "$dc" ]; then echo "$REPORT_TARGET" > DC fi dlm_dump > $DLM_DUMP_F 2>&1 sys_stats > $SYSSTATS_F 2>&1 drbd_info > $DRBD_INFO_F 2>&1 debug "Sanitizing files: $SANITIZE" # # replace sensitive info with '****' # cf="" if [ ! -z "$cluster_cf" ]; then cf=`basename $cluster_cf` fi for f in "$cf" "$CIB_F" "$CIB_F.live" pengine/*; do if [ -f "$f" ]; then sanitize "$f" fi done # For convenience, generate human-readable version of CIB and any XML errors # in it (AFTER sanitizing, so we don't need to sanitize this output). # sosreport does this itself, so we do not need to when run by sosreport. if [ "$SOS_MODE" != "1" ]; then get_readable_cib "$REPORT_HOME/$REPORT_TARGET" fi collect_logs "$LOG_START" "$LOG_END" $logfiles # Purge files containing no information for f in `ls -1`; do if [ -d "$f" ]; then continue elif [ ! -s "$f" ]; then case $f in *core*) log "Detected empty core file: $f";; *) debug "Removing empty file: `ls -al $f`" rm -f $f ;; esac fi done # Parse for events for l in $logfiles; do b="$(basename $l).extract.txt" node_events "$b" > $EVENTS_F # Link the first logfile to a standard name if it doesn't yet exist if [ -e "$b" -a ! -e "$HALOG_F" ]; then ln -s "$b" "$HALOG_F" fi done if [ -e $REPORT_HOME/.env ]; then debug "Localhost: $REPORT_MASTER $REPORT_TARGET" elif [ "$REPORT_MASTER" != "$REPORT_TARGET" ]; then debug "Streaming report back to $REPORT_MASTER" (cd $REPORT_HOME && tar cf - $REPORT_TARGET) if [ "$REMOVE" = "1" ]; then cd rm -rf $REPORT_HOME fi fi # vim: set expandtab tabstop=8 softtabstop=4 shiftwidth=4 textwidth=80: diff --git a/tools/report.common.in b/tools/report.common.in index 9f48f14fb1..b98888e435 100644 --- a/tools/report.common.in +++ b/tools/report.common.in @@ -1,874 +1,875 @@ # # Originally based on hb_report # Copyright 2007 Dejan Muhamedagic +# Later changes copyright 2010-2018 the Pacemaker project contributors # -# Later changes copyright 2010-2018 Andrew Beekhof +# The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # host=`uname -n` shorthost=`echo $host | sed s:\\\\..*::` if [ -z $verbose ]; then verbose=0 fi # Target Files EVENTS_F=events.txt ANALYSIS_F=analysis.txt HALOG_F=cluster-log.txt BT_F=backtraces.txt SYSINFO_F=sysinfo.txt SYSSTATS_F=sysstats.txt DLM_DUMP_F=dlm_dump.txt CRM_MON_F=crm_mon.txt MEMBERSHIP_F=members.txt CRM_VERIFY_F=crm_verify.txt PERMISSIONS_F=permissions.txt CIB_F=cib.xml CIB_TXT_F=cib.txt DRBD_INFO_F=drbd_info.txt EVENT_PATTERNS=" state do_state_transition membership pcmk_peer_update.*(lost|memb): quorum (crmd|pacemaker-controld).*crm_update_quorum pause Process.pause.detected resources (lrmd|pacemaker-execd).*rsc:(start|stop) stonith te_fence_node|fenced.*(requests|(Succeeded|Failed).to.|result=) start_stop shutdown.decision|Corosync.Cluster.Engine|corosync.*Initializing.transport|Executive.Service.RELEASE|crm_shutdown:.Requesting.shutdown|pcmk_shutdown:.Shutdown.complete " # superset of all packages of interest on all distros # (the package manager will be used to validate the installation # of any of these packages that are installed) PACKAGES="pacemaker pacemaker-libs pacemaker-cluster-libs libpacemaker3 pacemaker-remote pacemaker-pygui pacemaker-pymgmt pymgmt-client corosync corosynclib libcorosync4 resource-agents cluster-glue-libs cluster-glue libglue2 ldirectord ocfs2-tools ocfs2-tools-o2cb ocfs2console ocfs2-kmp-default ocfs2-kmp-pae ocfs2-kmp-xen ocfs2-kmp-debug ocfs2-kmp-trace drbd drbd-kmp-xen drbd-kmp-pae drbd-kmp-default drbd-kmp-debug drbd-kmp-trace drbd-pacemaker drbd-utils drbd-bash-completion drbd-xen lvm2 lvm2-clvm cmirrord libdlm libdlm2 libdlm3 hawk ruby lighttpd kernel-default kernel-pae kernel-xen glibc " # Potential locations of system log files SYSLOGS=" /var/log/* /var/logs/* /var/syslog/* /var/adm/* /var/log/ha/* /var/log/cluster/* " # Whether pacemaker-remoted was found (0 = yes, 1 = no, -1 = haven't looked yet) REMOTED_STATUS=-1 # # keep the user posted # record() { if [ x != x"$REPORT_HOME" -a -d "${REPORT_HOME}/$shorthost" ]; then rec="${REPORT_HOME}/$shorthost/report.out" elif [ x != x"${l_base}" -a -d "${l_base}" ]; then rec="${l_base}/report.summary" else rec="/dev/null" fi printf "%-10s $*\n" "$shorthost:" 2>&1 >> "${rec}" } log() { printf "%-10s $*\n" "$shorthost:" 1>&2 record "$*" } debug() { if [ $verbose -gt 0 ]; then log "Debug: $*" else record "Debug: $*" fi } info() { log "$*" } warning() { log "WARN: $*" } fatal() { log "ERROR: $*" exit 1 } # check if process of given substring in its name does exist; # only look for processes originated by user 0 (by UID), "@CRM_DAEMON_USER@" # or effective user running this script, and/or group 0 (by GID), # "@CRM_DAEMON_GROUP@" or one of the groups the effective user belongs to # (there's no business in probing any other processes) is_running() { ps -G "0 $(getent group '@CRM_DAEMON_GROUP@' 2>/dev/null | cut -d: -f3) $(id -G)" \ -u "0 @CRM_DAEMON_USER@ $(id -u)" -f \ | grep -Eqs $(echo "$1" | sed -e 's/^\(.\)/[\1]/') } has_remoted() { if [ $REMOTED_STATUS -eq -1 ]; then REMOTED_STATUS=1 if which pacemaker-remoted >/dev/null 2>&1; then REMOTED_STATUS=0 # Check for pre-2.0.0 daemon name in case we have mixed-version cluster elif which pacemaker_remoted >/dev/null 2>&1; then REMOTED_STATUS=0 elif [ -x "@sbindir@/pacemaker-remoted" ]; then REMOTED_STATUS=0 elif [ -x "@sbindir@/pacemaker_remoted" ]; then REMOTED_STATUS=0 else # @TODO: the binary might be elsewhere, # but a global search is too expensive for d in /{usr,opt}/{local/,}{s,}bin; do if [ -x "${d}/pacemaker-remoted" ]; then REMOTED_STATUS=0 elif [ -x "${d}/pacemaker_remoted" ]; then REMOTED_STATUS=0 fi done fi fi return $REMOTED_STATUS } # found_dir found_dir() { echo "$2" info "Pacemaker $1 found in: $2" } detect_daemon_dir() { info "Searching for where Pacemaker daemons live... this may take a while" for d in \ {/usr,/usr/local,/opt/local,@exec_prefix@}/{libexec,lib64,lib}/pacemaker do # pacemaker and pacemaker-cts packages can install to daemon directory, # so check for a file from each if [ -e $d/pacemaker-schedulerd ] || [ -e $d/cts-exec-helper ]; then found_dir "daemons" "$d" return fi done # Pacemaker Remote nodes don't need to install daemons if has_remoted; then info "Pacemaker daemons not found (this appears to be a Pacemaker Remote node)" return fi for f in $(find / -maxdepth $maxdepth -type f -name pacemaker-schedulerd -o -name cts-exec-helper); do d=$(dirname "$f") found_dir "daemons" "$d" return done fatal "Pacemaker daemons not found (nonstandard installation?)" } detect_cib_dir() { d="${local_state_dir}/lib/pacemaker/cib" if [ -f "$d/cib.xml" ]; then found_dir "config files" "$d" return fi # Pacemaker Remote nodes don't need a CIB if has_remoted; then info "Pacemaker config not found (this appears to be a Pacemaker Remote node)" return fi info "Searching for where Pacemaker keeps config information... this may take a while" # TODO: What about false positives where someone copied the CIB? for f in $(find / -maxdepth $maxdepth -type f -name cib.xml); do d=$(dirname $f) found_dir "config files" "$d" return done warning "Pacemaker config not found (nonstandard installation?)" } detect_state_dir() { if [ -n "$CRM_CONFIG_DIR" ]; then # Assume new layout # $local_state_dir/lib/pacemaker/(cib,pengine,blackbox,cores) dirname "$CRM_CONFIG_DIR" # Pacemaker Remote nodes might not have a CRM_CONFIG_DIR elif [ -d "$local_state_dir/lib/pacemaker" ]; then echo $local_state_dir/lib/pacemaker fi } detect_pe_dir() { config_root="$1" d="$config_root/pengine" if [ -d "$d" ]; then found_dir "scheduler inputs" "$d" return fi if has_remoted; then info "Pacemaker scheduler inputs not found (this appears to be a Pacemaker Remote node)" return fi info "Searching for where Pacemaker keeps scheduler inputs... this may take a while" for d in $(find / -maxdepth $maxdepth -type d -name pengine); do found_dir "scheduler inputs" "$d" return done fatal "Pacemaker scheduler inputs not found (nonstandard installation?)" } detect_host() { local_state_dir=@localstatedir@ if [ -d $local_state_dir/run ]; then CRM_STATE_DIR=$local_state_dir/run/crm else info "Searching for where Pacemaker keeps runtime data... this may take a while" for d in `find / -maxdepth $maxdepth -type d -name run`; do local_state_dir=`dirname $d` CRM_STATE_DIR=$d/crm break done info "Found: $CRM_STATE_DIR" fi debug "Machine runtime directory: $local_state_dir" debug "Pacemaker runtime data located in: $CRM_STATE_DIR" CRM_DAEMON_DIR=$(detect_daemon_dir) CRM_CONFIG_DIR=$(detect_cib_dir) config_root=$(detect_state_dir) # Older versions had none BLACKBOX_DIR=$config_root/blackbox debug "Pacemaker blackboxes (if any) located in: $BLACKBOX_DIR" PE_STATE_DIR=$(detect_pe_dir "$config_root") CRM_CORE_DIRS="" for d in $config_root/cores $local_state_dir/lib/corosync; do if [ -d $d ]; then CRM_CORE_DIRS="$CRM_CORE_DIRS $d" fi done debug "Core files located under: $CRM_CORE_DIRS" } time2str() { perl -e "use POSIX; print strftime('%x %X',localtime($1));" } get_time() { perl -e "\$time=\"$*\";" -e ' $unix_tm = 0; eval "use Date::Parse"; if (index($time, ":") < 0) { } elsif (!$@) { $unix_tm = str2time($time); } else { eval "use Date::Manip"; if (!$@) { $unix_tm = UnixDate(ParseDateString($time), "%s"); } } if ($unix_tm != "") { print int($unix_tm); } else { print ""; } ' } get_time_syslog() { awk '{print $1,$2,$3}' } get_time_legacy() { awk '{print $2}' | sed 's/_/ /' } get_time_iso8601() { awk '{print $1}' } get_time_format_for_string() { l="$*" t=$(get_time `echo $l | get_time_syslog`) if [ "x$t" != x ]; then echo syslog return fi t=$(get_time `echo $l | get_time_iso8601`) if [ "x$t" != x ]; then echo iso8601 return fi t=$(get_time `echo $l | get_time_legacy`) if [ "x$t" != x ]; then echo legacy return fi } get_time_format() { t=0 l="" func="" trycnt=10 while [ $trycnt -gt 0 ] && read l; do func=$(get_time_format_for_string $l) if [ "x$func" != x ]; then break fi trycnt=$(($trycnt-1)) done #debug "Logfile uses the $func time format" echo $func } get_time_from_line() { GTFL_FORMAT="$1" shift if [ "$GTFL_FORMAT" = "" ]; then GTFL_FORMAT=$(get_time_format_for_string "$@") fi case $GTFL_FORMAT in syslog|legacy|iso8601) get_time $(echo "$@" | get_time_${GTFL_FORMAT}) ;; *) warning "Unknown time format in: $@" ;; esac } get_first_time() { l="" format=$1 while read l; do ts=$(get_time_from_line "$format" "$l") if [ "x$ts" != x ]; then echo "$ts" return fi done } get_last_time() { l="" best=`date +%s` # Now format=$1 while read l; do ts=$(get_time_from_line "$format" "$l") if [ "x$ts" != x ]; then best=$ts fi done echo $best } linetime() { get_time_from_line "" $(tail -n +$2 $1 | grep -a ":[0-5][0-9]:" | head -n 1) } # # findmsg # # Print the names of up to system logs that contain , # ordered by most recently modified. # findmsg() { max=$1 pattern="$2" found=0 # List all potential system logs ordered by most recently modified. candidates=$(ls -1td $SYSLOGS 2>/dev/null) if [ -z "$candidates" ]; then debug "No system logs found to search for pattern \'$pattern\'" return fi # Portable way to handle files with spaces in their names. SAVE_IFS=$IFS IFS=" " # Check each log file for matches. logfiles="" for f in $candidates; do local cat="" # We only care about readable files with something in them. if [ ! -f "$f" ] || [ ! -r "$f" ] || [ ! -s "$f" ] ; then continue fi cat=$(find_decompressor "$f") # We want to avoid grepping through potentially huge binary logs such # as lastlog. However, control characters sometimes find their way into # text logs, so we use a heuristic of more than 256 nonprintable # characters in the file's first kilobyte. if [ $($cat "$f" 2>/dev/null | head -c 1024 | tr -d '[:print:][:space:]' | wc -c) -gt 256 ] then continue fi # Our patterns are ASCII, so we can use LC_ALL="C" to speed up grep $cat "$f" 2>/dev/null | LC_ALL="C" grep -q -e "$pattern" if [ $? -eq 0 ]; then # Add this file to the list of hits # (using newline as separator to handle spaces in names). if [ -z "$logfiles" ]; then logfiles="$f" else logfiles="$logfiles $f" fi # If we have enough hits, print them and return. found=$(($found+1)) if [ $found -ge $max ]; then break fi fi done 2>/dev/null IFS=$SAVE_IFS if [ -z "$logfiles" ]; then debug "Pattern \'$pattern\' not found in any system logs" else debug "Pattern \'$pattern\' found in: [ $logfiles ]" echo "$logfiles" fi } node_events() { if [ -e $1 ]; then Epatt=`echo "$EVENT_PATTERNS" | while read title p; do [ -n "$p" ] && echo -n "|$p"; done | sed 's/.//' ` grep -E "$Epatt" $1 fi } pickfirst() { for x; do which $x >/dev/null 2>&1 && { echo $x return 0 } done return 1 } shrink() { olddir=$PWD dir=`dirname $1` base=`basename $1` target=$1.tar tar_options="cf" variant=`pickfirst bzip2 gzip xz false` case $variant in bz*) tar_options="jcf" target="$target.bz2" ;; gz*) tar_options="zcf" target="$target.gz" ;; xz*) tar_options="Jcf" target="$target.xz" ;; *) warning "Could not find a compression program, the resulting tarball may be huge" ;; esac if [ -e $target ]; then fatal "Destination $target already exists, specify an alternate name with --dest" fi cd $dir >/dev/null 2>&1 tar $tar_options $target $base >/dev/null 2>&1 cd $olddir >/dev/null 2>&1 echo $target } findln_by_time() { local logf=$1 local tm=$2 local first=1 # Some logs can be massive (over 1,500,000,000 lines have been seen in the wild) # Even just 'wc -l' on these files can take 10+ minutes local fileSize=`ls -lh | awk '{ print $5 }' | grep -ie G` if [ x$fileSize != x ]; then warning "$logf is ${fileSize} in size and could take many hours to process. Skipping." return fi local last=`wc -l < $logf` while [ $first -le $last ]; do mid=$((($last+$first)/2)) trycnt=10 while [ $trycnt -gt 0 ]; do tmid=`linetime $logf $mid` [ "$tmid" ] && break warning "cannot extract time: $logf:$mid; will try the next one" trycnt=$(($trycnt-1)) # shift the whole first-last segment first=$(($first-1)) last=$(($last-1)) mid=$((($last+$first)/2)) done if [ -z "$tmid" ]; then warning "giving up on log..." return fi if [ $tmid -gt $tm ]; then last=$(($mid-1)) elif [ $tmid -lt $tm ]; then first=$(($mid+1)) else break fi done echo $mid } dumplog() { local logf=$1 local from_line=$2 local to_line=$3 [ "$from_line" ] || return tail -n +$from_line $logf | if [ "$to_line" ]; then head -$(($to_line-$from_line+1)) else cat fi } # # find log/set of logs which are interesting for us # # # find log slices # find_decompressor() { case $1 in *bz2) echo "bzip2 -dc" ;; *gz) echo "gzip -dc" ;; *xz) echo "xz -dc" ;; *) echo "cat" ;; esac } # # check if the log contains a piece of our segment # is_our_log() { local logf=$1 local from_time=$2 local to_time=$3 local cat=`find_decompressor $logf` local format=`$cat $logf | get_time_format` local first_time=`$cat $logf | head -10 | get_first_time $format` local last_time=`$cat $logf | tail -10 | get_last_time $format` if [ x = "x$first_time" -o x = "x$last_time" ]; then warning "Skipping bad logfile '$1': Could not determine log dates" return 0 # skip (empty log?) fi if [ $from_time -gt $last_time ]; then # we shouldn't get here anyway if the logs are in order return 2 # we're past good logs; exit fi if [ $from_time -ge $first_time ]; then return 3 # this is the last good log fi # have to go further back if [ x = "x$to_time" -o $to_time -ge $first_time ]; then return 1 # include this log else return 0 # don't include this log fi } # # go through archived logs (timewise backwards) and see if there # are lines belonging to us # (we rely on untouched log files, i.e. that modify time # hasn't been changed) # arch_logs() { local logf=$1 local from_time=$2 local to_time=$3 # look for files such as: ha-log-20090308 or # ha-log-20090308.gz (.bz2) or ha-log.0, etc ls -t $logf $logf*[0-9z] 2>/dev/null | while read next_log; do is_our_log $next_log $from_time $to_time case $? in 0) ;; # noop, continue 1) echo $next_log # include log and continue debug "Found log $next_log" ;; 2) break;; # don't go through older logs! 3) echo $next_log # include log and continue debug "Found log $next_log" break ;; # don't go through older logs! esac done } # # print part of the log # drop_tmp_file() { [ -z "$tmp" ] || rm -f "$tmp" } print_logseg() { local logf=$1 local from_time=$2 local to_time=$3 # uncompress to a temp file (if necessary) local cat=`find_decompressor $logf` if [ "$cat" != "cat" ]; then tmp=`mktemp` $cat $logf > $tmp trap drop_tmp_file 0 sourcef=$tmp else sourcef=$logf tmp="" fi if [ "$from_time" = 0 ]; then FROM_LINE=1 else FROM_LINE=`findln_by_time $sourcef $from_time` fi if [ -z "$FROM_LINE" ]; then warning "couldn't find line for time $from_time; corrupt log file?" return fi TO_LINE="" if [ "$to_time" != 0 ]; then TO_LINE=`findln_by_time $sourcef $to_time` if [ -z "$TO_LINE" ]; then warning "couldn't find line for time $to_time; corrupt log file?" return fi if [ $FROM_LINE -lt $TO_LINE ]; then dumplog $sourcef $FROM_LINE $TO_LINE log "Including segment [$FROM_LINE-$TO_LINE] from $logf" else debug "Empty segment [$FROM_LINE-$TO_LINE] from $logf" fi else dumplog $sourcef $FROM_LINE $TO_LINE log "Including all logs after line $FROM_LINE from $logf" fi drop_tmp_file trap "" 0 } # # find log/set of logs which are interesting for us # dumplogset() { local logf=$1 local from_time=$2 local to_time=$3 local logf_set=`arch_logs $logf $from_time $to_time` if [ x = "x$logf_set" ]; then return fi local num_logs=`echo "$logf_set" | wc -l` local oldest=`echo $logf_set | awk '{print $NF}'` local newest=`echo $logf_set | awk '{print $1}'` local mid_logfiles=`echo $logf_set | awk '{for(i=NF-1; i>1; i--) print $i}'` # the first logfile: from $from_time to $to_time (or end) # logfiles in the middle: all # the last logfile: from beginning to $to_time (or end) case $num_logs in 1) print_logseg $newest $from_time $to_time;; *) print_logseg $oldest $from_time 0 for f in $mid_logfiles; do `find_decompressor $f` $f debug "including complete $f logfile" done print_logseg $newest 0 $to_time ;; esac } # cut out a stanza getstanza() { awk -v name="$1" ' !in_stanza && NF==2 && /^[a-z][a-z]*[[:space:]]*{/ { # stanza start if ($1 == name) in_stanza = 1 } in_stanza { print } in_stanza && NF==1 && $1 == "}" { exit } ' } # supply stanza in $1 and variable name in $2 # (stanza is optional) getcfvar() { cf_type=$1; shift; cf_var=$1; shift; cf_file=$* [ -f "$cf_file" ] || return case $cf_type in corosync) sed 's/#.*//' < $cf_file | if [ $# -eq 2 ]; then getstanza "$cf_var" shift 1 else cat fi | awk -v varname="$cf_var" ' NF==2 && match($1,varname":$")==1 { print $2; exit; } ' ;; esac } pickfirst() { for x; do which $x >/dev/null 2>&1 && { echo $x return 0 } done return 1 } # # figure out the cluster type, depending on the process list # and existence of configuration files # get_cluster_type() { if is_running corosync; then tool=`pickfirst corosync-objctl corosync-cmapctl` case $tool in *objctl) quorum=`$tool -a | grep quorum.provider | sed 's/.*=\s*//'`;; *cmapctl) quorum=`$tool | grep quorum.provider | sed 's/.*=\s*//'`;; esac stack="corosync" # Now we're guessing... # TODO: Technically these could be anywhere :-/ elif [ -f /etc/corosync/corosync.conf ]; then stack="corosync" else # We still don't know. This might be a Pacemaker Remote node, # or the configuration might be in a nonstandard location. stack="any" fi debug "Detected the '$stack' cluster stack" echo $stack } find_cluster_cf() { case $1 in corosync) best_size=0 best_file="" # TODO: Technically these could be anywhere :-/ for cf in /etc/corosync/corosync.conf; do if [ -f $cf ]; then size=`wc -l $cf | awk '{print $1}'` if [ $size -gt $best_size ]; then best_size=$size best_file=$cf fi fi done if [ -z "$best_file" ]; then debug "Looking for corosync configuration file. This may take a while..." for f in `find / -maxdepth $maxdepth -type f -name corosync.conf`; do best_file=$f break done fi debug "Located corosync config file: $best_file" echo "$best_file" ;; any) # Cluster type is undetermined. Don't complain, because this # might be a Pacemaker Remote node. ;; *) warning "Unknown cluster type: $1" ;; esac } # # check for the major prereq for a) parameter parsing and b) # parsing logs # t=`get_time "12:00"` if [ "$t" = "" ]; then fatal "please install the perl Date::Parse module (perl-DateTime-Format-DateParse on Fedora/Red Hat)" fi # vim: set expandtab tabstop=8 softtabstop=4 shiftwidth=4 textwidth=80: diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c index d92320a6ca..cba467397f 100644 --- a/tools/stonith_admin.c +++ b/tools/stonith_admin.c @@ -1,783 +1,785 @@ /* - * Copyright 2009-2018 Andrew Beekhof + * Copyright 2009-2019 the Pacemaker project contributors + * + * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* *INDENT-OFF* */ static struct crm_option long_options[] = { { "help", no_argument, NULL, '?', "\tDisplay this text and exit." }, { "version", no_argument, NULL, '$', "\tDisplay version information and exit." }, { "verbose", no_argument, NULL, 'V', "\tIncrease debug output (may be specified multiple times)." }, { "quiet", no_argument, NULL, 'q', "\tBe less descriptive in output." }, { "cleanup", no_argument, NULL, 'c', "\tCleanup wherever appropriate." }, { "broadcast", no_argument, NULL, 'b', "Broadcast wherever appropriate." }, PCMK__OUTPUT_OPTIONS("text, xml"), { "-spacer-", no_argument, NULL, '-', "\nDevice definition commands:" }, { "register", required_argument, NULL, 'R', "Register the named stonith device. Requires: --agent.\n" "\t\t\tOptional: --option, --env-option." }, { "deregister", required_argument, NULL, 'D', "De-register the named stonith device." }, { "register-level", required_argument, NULL, 'r', "Register a stonith level for the named target,\n" "\t\t\tspecified as one of NAME, @PATTERN, or ATTR=VALUE.\n" "\t\t\tRequires: --index and one or more --device entries." }, { "deregister-level", required_argument, NULL, 'd', "Unregister a stonith level for the named target,\n" "\t\t\tspecified as for --register-level. Requires: --index." }, { "-spacer-", no_argument, NULL, '-', "\nQueries:" }, { "list", required_argument, NULL, 'l', "List devices that can terminate the specified host.\n" "\t\t\tOptional: --timeout." }, { "list-registered", no_argument, NULL, 'L', "List all registered devices. Optional: --timeout." }, { "list-installed", no_argument, NULL, 'I', "List all installed devices. Optional: --timeout." }, { "list-targets", required_argument, NULL, 's', "List the targets that can be fenced by the\n" "\t\t\tnamed device. Optional: --timeout." }, { "metadata", no_argument, NULL, 'M', "\tShow agent metadata. Requires: --agent.\n" "\t\t\tOptional: --timeout." }, { "query", required_argument, NULL, 'Q', "Check the named device's status. Optional: --timeout." }, { "history", required_argument, NULL, 'H', "Show last successful fencing operation for named node\n" "\t\t\t(or '*' for all nodes). Optional: --timeout, --cleanup,\n" "\t\t\t--quiet (show only the operation's epoch timestamp),\n" "\t\t\t--verbose (show all recorded and pending operations),\n" "\t\t\t--broadcast (update history from all nodes available)." }, { "last", required_argument, NULL, 'h', "Indicate when the named node was last fenced.\n" "\t\t\tOptional: --as-node-id." }, { "validate", no_argument, NULL, 'K', "\tValidate a fence device configuration.\n" "\t\t\tRequires: --agent. Optional: --option, --env-option,\n" "\t\t\t--quiet (print no output, only return status).\n" }, { "-spacer-", no_argument, NULL, '-', "\nFencing Commands:" }, { "fence", required_argument, NULL, 'F', "Fence named host. Optional: --timeout, --tolerance." }, { "unfence", required_argument, NULL, 'U', "Unfence named host. Optional: --timeout, --tolerance." }, { "reboot", required_argument, NULL, 'B', "Reboot named host. Optional: --timeout, --tolerance." }, { "confirm", required_argument, NULL, 'C', "Tell cluster that named host is now safely down." }, { "-spacer-", no_argument, NULL, '-', "\nAdditional Options:" }, { "agent", required_argument, NULL, 'a', "The agent to use (for example, fence_xvm;\n" "\t\t\twith --register, --metadata, --validate)." }, { "option", required_argument, NULL, 'o', "Specify a device configuration parameter as NAME=VALUE\n" "\t\t\t(may be specified multiple times; with --register,\n" "\t\t\t--validate)." }, { "env-option", required_argument, NULL, 'e', "Specify a device configuration parameter with the\n" "\t\t\tspecified name, using the value of the\n" "\t\t\tenvironment variable of the same name prefixed with\n" "\t\t\tOCF_RESKEY_ (may be specified multiple times;\n" "\t\t\twith --register, --validate)." }, { "tag", required_argument, NULL, 'T', "Identify fencing operations in logs with the specified\n" "\t\t\ttag; useful when multiple entities might invoke\n" "\t\t\tstonith_admin (used with most commands)." }, { "device", required_argument, NULL, 'v', "Device ID (with --register-level, device to associate with\n" "\t\t\ta given host and level; may be specified multiple times)" #if SUPPORT_CIBSECRETS "\n\t\t\t(with --validate, name to use to load CIB secrets)" #endif "." }, { "index", required_argument, NULL, 'i', "The stonith level (1-9) (with --register-level,\n" "\t\t\t--deregister-level)." }, { "timeout", required_argument, NULL, 't', "Operation timeout in seconds (default 120;\n" "\t\t\tused with most commands)." }, { "as-node-id", no_argument, NULL, 'n', "(Advanced) The supplied node is the corosync node ID\n" "\t\t\t(with --last)." }, { "tolerance", required_argument, NULL, 0, "(Advanced) Do nothing if an equivalent --fence request\n" "\t\t\tsucceeded less than this many seconds earlier\n" "\t\t\t(with --fence, --unfence, --reboot)." }, { 0, 0, 0, 0 } }; /* *INDENT-ON* */ static int st_opts = st_opt_sync_call | st_opt_allow_suicide; static GMainLoop *mainloop = NULL; struct { stonith_t *st; const char *target; const char *action; char *name; int timeout; int tolerance; int rc; } async_fence_data; static int try_mainloop_connect(void) { stonith_t *st = async_fence_data.st; int tries = 10; int i = 0; int rc = 0; for (i = 0; i < tries; i++) { crm_debug("Connecting as %s", async_fence_data.name); rc = st->cmds->connect(st, async_fence_data.name, NULL); if (!rc) { crm_debug("stonith client connection established"); return 0; } else { crm_debug("stonith client connection failed"); } sleep(1); } crm_err("Could not connect to the fencer"); return -1; } static void notify_callback(stonith_t * st, stonith_event_t * e) { if (e->result != pcmk_ok) { return; } if (safe_str_eq(async_fence_data.target, e->target) && safe_str_eq(async_fence_data.action, e->action)) { async_fence_data.rc = e->result; g_main_loop_quit(mainloop); } } static void fence_callback(stonith_t * stonith, stonith_callback_data_t * data) { async_fence_data.rc = data->rc; g_main_loop_quit(mainloop); } static gboolean async_fence_helper(gpointer user_data) { stonith_t *st = async_fence_data.st; int call_id = 0; if (try_mainloop_connect()) { g_main_loop_quit(mainloop); return TRUE; } st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, notify_callback); call_id = st->cmds->fence(st, st_opt_allow_suicide, async_fence_data.target, async_fence_data.action, async_fence_data.timeout, async_fence_data.tolerance); if (call_id < 0) { g_main_loop_quit(mainloop); return TRUE; } st->cmds->register_callback(st, call_id, async_fence_data.timeout, st_opt_timeout_updates, NULL, "callback", fence_callback); return TRUE; } static int mainloop_fencing(stonith_t * st, const char *target, const char *action, int timeout, int tolerance) { crm_trigger_t *trig; async_fence_data.st = st; async_fence_data.target = target; async_fence_data.action = action; async_fence_data.timeout = timeout; async_fence_data.tolerance = tolerance; async_fence_data.rc = -1; trig = mainloop_add_trigger(G_PRIORITY_HIGH, async_fence_helper, NULL); mainloop_set_trigger(trig); mainloop = g_main_loop_new(NULL, FALSE); g_main_loop_run(mainloop); return async_fence_data.rc; } static int handle_level(stonith_t *st, char *target, int fence_level, stonith_key_value_t *devices, bool added) { char *node = NULL; char *pattern = NULL; char *name = NULL; char *value = strchr(target, '='); /* Determine if targeting by attribute, node name pattern or node name */ if (value != NULL) { name = target; *value++ = '\0'; } else if (*target == '@') { pattern = target + 1; } else { node = target; } /* Register or unregister level as appropriate */ if (added) { return st->cmds->register_level_full(st, st_opts, node, pattern, name, value, fence_level, devices); } return st->cmds->remove_level_full(st, st_opts, node, pattern, name, value, fence_level); } static int handle_history(stonith_t *st, const char *target, int timeout, int quiet, int verbose, int cleanup, int broadcast, pcmk__output_t *out) { stonith_history_t *history = NULL, *hp, *latest = NULL; int rc = 0; if (!quiet) { if (cleanup) { out->info(out, "cleaning up fencing-history%s%s", target ? " for node " : "", target ? target : ""); } if (broadcast) { out->info(out, "gather fencing-history from all nodes"); } } rc = st->cmds->history(st, st_opts | (cleanup?st_opt_cleanup:0) | (broadcast?st_opt_broadcast:0), (safe_str_eq(target, "*")? NULL : target), &history, timeout); out->begin_list(out, "Fencing history", "event", "events"); for (hp = history; hp; hp = hp->next) { if (hp->state == st_done) { latest = hp; } if (quiet || !verbose) { continue; } out->message(out, "stonith-event", hp); } if (latest) { if (quiet && out->supports_quiet) { out->info(out, "%lld", (long long) latest->completed); } else if (!verbose) { // already printed if verbose out->message(out, "stonith-event", latest); } } out->end_list(out); stonith_history_free(history); return rc; } static int validate(stonith_t *st, const char *agent, const char *id, stonith_key_value_t *params, int timeout, int quiet, pcmk__output_t *out) { int rc = 1; char *output = NULL; char *error_output = NULL; rc = st->cmds->validate(st, st_opt_sync_call, id, NULL, agent, params, timeout, &output, &error_output); if (quiet) { return rc; } out->message(out, "validate", agent, id, output, error_output, rc); return rc; } int main(int argc, char **argv) { int flag; int rc = 0; int quiet = 0; int cleanup = 0; int broadcast = 0; int verbose = 0; int argerr = 0; int timeout = 120; int option_index = 0; int fence_level = 0; int no_connect = 0; int tolerance = 0; int as_nodeid = FALSE; bool required_agent = false; char *name = NULL; char *value = NULL; char *target = NULL; char *lists = NULL; const char *agent = NULL; const char *device = NULL; const char *longname = NULL; char action = 0; crm_exit_t exit_code = CRM_EX_OK; stonith_t *st = NULL; stonith_key_value_t *params = NULL; stonith_key_value_t *devices = NULL; stonith_key_value_t *dIter = NULL; char *output_ty = NULL; char *output_dest = NULL; pcmk__output_t *out = NULL; crm_log_cli_init("stonith_admin"); crm_set_options(NULL, " []", long_options, "access the Pacemaker fencing API"); async_fence_data.name = strdup(crm_system_name); while (1) { flag = crm_get_option_long(argc, argv, &option_index, &longname); if (flag == -1) break; switch (flag) { case 'V': verbose = 1; crm_bump_log_level(argc, argv); break; case '$': case '?': crm_help(flag, CRM_EX_OK); break; case 'K': required_agent = true; /* fall through */ case 'I': no_connect = 1; /* fall through */ case 'L': action = flag; break; case 'q': quiet = 1; break; case 'c': cleanup = 1; break; case 'b': broadcast = 1; break; case 'R': required_agent = true; /* fall through */ case 'Q': case 'D': case 's': action = flag; device = optarg; break; case 'T': free(async_fence_data.name); async_fence_data.name = crm_strdup_printf("%s.%s", crm_system_name, optarg); break; case 'a': agent = optarg; break; case 'l': target = optarg; action = 'L'; break; case 'M': no_connect = 1; action = flag; required_agent = true; break; case 't': timeout = crm_atoi(optarg, NULL); break; case 'B': case 'F': case 'U': /* using mainloop here */ no_connect = 1; /* fall through */ case 'C': /* Always log the input arguments */ crm_log_args(argc, argv); target = optarg; action = flag; break; case 'n': as_nodeid = TRUE; break; case 'h': case 'H': case 'r': case 'd': target = optarg; action = flag; break; case 'i': fence_level = crm_atoi(optarg, NULL); break; case 'v': devices = stonith_key_value_add(devices, NULL, optarg); break; case 'o': crm_info("Scanning: -o %s", optarg); rc = pcmk_scan_nvpair(optarg, &name, &value); if (rc != 2) { crm_err("Invalid option: -o %s: %s", optarg, pcmk_strerror(rc)); ++argerr; } else { crm_info("Got: '%s'='%s'", name, value); params = stonith_key_value_add(params, name, value); } free(value); value = NULL; free(name); name = NULL; break; case 'e': { char *key = crm_concat("OCF_RESKEY", optarg, '_'); const char *env = getenv(key); if (env == NULL) { crm_err("Invalid option: -e %s", optarg); ++argerr; } else { crm_info("Got: '%s'='%s'", optarg, env); params = stonith_key_value_add(params, optarg, env); } free(key); } break; case 0: if (safe_str_eq("tolerance", longname)) { tolerance = crm_get_msec(optarg) / 1000; /* Send in seconds */ } else if (pcmk__parse_output_args(longname, optarg, &output_ty, &output_dest) == false) { fprintf(stderr, "Unknown long option used: %s\n", longname); ++argerr; } break; default: ++argerr; break; } } if (optind > argc || action == 0) { ++argerr; } if (required_agent && agent == NULL) { fprintf(stderr, "Please specify an agent to query using -a,--agent [value]\n"); ++argerr; } if (argerr) { crm_help('?', CRM_EX_USAGE); } CRM_ASSERT(pcmk__register_format("text", pcmk__mk_text_output) == 0); CRM_ASSERT(pcmk__register_format("xml", pcmk__mk_xml_output) == 0); rc = pcmk__output_new(&out, output_ty, output_dest, argv); if (rc != 0) { fprintf(stderr, "Error creating output format %s: %s\n", output_ty, pcmk_strerror(rc)); exit_code = CRM_EX_ERROR; goto done; } stonith_register_messages(out); st = stonith_api_new(); if (!no_connect) { rc = st->cmds->connect(st, async_fence_data.name, NULL); if (rc < 0) { fprintf(stderr, "Could not connect to fencer: %s\n", pcmk_strerror(rc)); exit_code = CRM_EX_DISCONNECT; goto done; } } switch (action) { case 'I': rc = st->cmds->list_agents(st, st_opt_sync_call, NULL, &devices, timeout); if (rc < 0) { fprintf(stderr, "Failed to list installed devices: %s\n", pcmk_strerror(rc)); break; } out->begin_list(out, "Installed fence devices", "fence device", "fence devices"); for (dIter = devices; dIter; dIter = dIter->next) { out->list_item(out, "device", dIter->value); } out->end_list(out); rc = 0; stonith_key_value_freeall(devices, 1, 1); break; case 'L': rc = st->cmds->query(st, st_opts, target, &devices, timeout); if (rc < 0) { fprintf(stderr, "Failed to list registered devices: %s\n", pcmk_strerror(rc)); break; } out->begin_list(out, "Registered fence devices", "fence device", "fence devices"); for (dIter = devices; dIter; dIter = dIter->next) { out->list_item(out, "device", dIter->value); } out->end_list(out); rc = 0; stonith_key_value_freeall(devices, 1, 1); break; case 'Q': rc = st->cmds->monitor(st, st_opts, device, timeout); if (rc < 0) { rc = st->cmds->list(st, st_opts, device, NULL, timeout); } break; case 's': rc = st->cmds->list(st, st_opts, device, &lists, timeout); if (rc == 0 && lists) { char *head = lists; char *eol = NULL; out->begin_list(out, "Fence targets", "fence target", "fence targets"); do { char *line = NULL; char *elem = NULL; char *hostname = NULL; char *uuid = NULL; char *status = NULL; eol = strstr(head, "\\n"); line = strndup(head, eol-head); while ((elem = strsep(&line, " ")) != NULL) { if (strcmp(elem, "") == 0) { continue; } if (hostname == NULL) { hostname = elem; } else if (uuid == NULL) { uuid = elem; } else if (status == NULL) { char *end = NULL; status = elem; end = strchr(status, '\n'); if (end != NULL) { *end = '\0'; } } } if (hostname != NULL && uuid != NULL && status != NULL) { out->message(out, "fence-target", hostname, uuid, status); } free(line); head = eol+2; } while (eol != NULL); out->end_list(out); } else if (rc != 0) { fprintf(stderr, "List command returned error. rc : %d\n", rc); } break; case 'R': rc = st->cmds->register_device(st, st_opts, device, NULL, agent, params); break; case 'D': rc = st->cmds->remove_device(st, st_opts, device); break; case 'd': case 'r': rc = handle_level(st, target, fence_level, devices, action == 'r'); break; case 'M': { char *buffer = NULL; rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, timeout); if (rc == pcmk_ok) { out->output_xml(out, "metadata", buffer); } free(buffer); } break; case 'C': rc = st->cmds->confirm(st, st_opts, target); break; case 'B': rc = mainloop_fencing(st, target, "reboot", timeout, tolerance); break; case 'F': rc = mainloop_fencing(st, target, "off", timeout, tolerance); break; case 'U': rc = mainloop_fencing(st, target, "on", timeout, tolerance); break; case 'h': { time_t when = 0; if(as_nodeid) { uint32_t nodeid = atol(target); when = stonith_api_time(nodeid, NULL, FALSE); } else { when = stonith_api_time(0, target, FALSE); } out->message(out, "last-fenced", target, when); } break; case 'H': rc = handle_history(st, target, timeout, quiet, verbose, cleanup, broadcast, out); break; case 'K': device = (devices? devices->key : NULL); rc = validate(st, agent, device, params, timeout, quiet, out); break; } crm_info("Command returned: %s (%d)", pcmk_strerror(rc), rc); exit_code = crm_errno2exit(rc); pcmk__output_free(out, exit_code); done: free(async_fence_data.name); stonith_key_value_freeall(params, 1, 1); if (st != NULL) { st->cmds->disconnect(st); stonith_api_delete(st); } return exit_code; }