diff --git a/cts/cli/regression.crm_verify.exp b/cts/cli/regression.crm_verify.exp index 483efabfb2..a0d95730ce 100644 --- a/cts/cli/regression.crm_verify.exp +++ b/cts/cli/regression.crm_verify.exp @@ -1,112 +1,111 @@ =#=#=#= Begin test: Verbosely verify a file-specified configuration with an unallowed fencing level ID =#=#=#= warning: Ignoring topology registration with invalid level 10 -Warnings found during check: config not valid +Configuration invalid (with warnings) =#=#=#= End test: Verbosely verify a file-specified configuration with an unallowed fencing level ID - Invalid configuration (78) =#=#=#= * Passed: crm_verify - Verbosely verify a file-specified configuration with an unallowed fencing level ID =#=#=#= Begin test: Verify a file-specified invalid configuration =#=#=#= -Errors found during check: config not valid --V may provide more details +Configuration invalid (with errors) (-V may provide more detail) =#=#=#= End test: Verify a file-specified invalid configuration - Invalid configuration (78) =#=#=#= * Passed: crm_verify - Verify a file-specified invalid configuration =#=#=#= Begin test: Verify a file-specified invalid configuration (XML) =#=#=#= error: Resource test2:0 is of type systemd and therefore cannot be used as a promotable clone resource error: Ignoring <clone> resource 'test2-clone' because configuration is invalid error: CIB did not pass schema validation - Errors found during check: config not valid + Configuration invalid (with errors) =#=#=#= End test: Verify a file-specified invalid configuration (XML) - Invalid configuration (78) =#=#=#= * Passed: crm_verify - Verify a file-specified invalid configuration (XML) =#=#=#= Begin test: Verify a file-specified invalid configuration (verbose) =#=#=#= unpack_config warning: Blind faith: not fencing unseen nodes error: Resource test2:0 is of type systemd and therefore cannot be used as a promotable clone resource error: Ignoring resource 'test2-clone' because configuration is invalid error: CIB did not pass schema validation -Errors found during check: config not valid +Configuration invalid (with errors) =#=#=#= End test: Verify a file-specified invalid configuration (verbose) - Invalid configuration (78) =#=#=#= * Passed: crm_verify - Verify a file-specified invalid configuration (verbose) =#=#=#= Begin test: Verify a file-specified invalid configuration (verbose) (XML) =#=#=#= unpack_config warning: Blind faith: not fencing unseen nodes error: Resource test2:0 is of type systemd and therefore cannot be used as a promotable clone resource error: Ignoring <clone> resource 'test2-clone' because configuration is invalid error: CIB did not pass schema validation - Errors found during check: config not valid + Configuration invalid (with errors) =#=#=#= End test: Verify a file-specified invalid configuration (verbose) (XML) - Invalid configuration (78) =#=#=#= * Passed: crm_verify - Verify a file-specified invalid configuration (verbose) (XML) =#=#=#= Begin test: Verify a file-specified invalid configuration (quiet) =#=#=#= =#=#=#= End test: Verify a file-specified invalid configuration (quiet) - Invalid configuration (78) =#=#=#= * Passed: crm_verify - Verify a file-specified invalid configuration (quiet) =#=#=#= Begin test: Verify a file-specified invalid configuration (quiet) (XML) =#=#=#= error: Resource test2:0 is of type systemd and therefore cannot be used as a promotable clone resource error: Ignoring <clone> resource 'test2-clone' because configuration is invalid error: CIB did not pass schema validation =#=#=#= End test: Verify a file-specified invalid configuration (quiet) (XML) - Invalid configuration (78) =#=#=#= * Passed: crm_verify - Verify a file-specified invalid configuration (quiet) (XML) =#=#=#= Begin test: Verify another file-specified invalid configuration (XML) =#=#=#= error: Resource start-up disabled since no STONITH resources have been defined error: Either configure some or disable STONITH with the stonith-enabled option error: NOTE: Clusters with shared data need STONITH to ensure data integrity warning: Node pcmk-1 is unclean but cannot be fenced warning: Node pcmk-2 is unclean but cannot be fenced error: CIB did not pass schema validation - Errors found during check: config not valid + Configuration invalid (with errors) =#=#=#= End test: Verify another file-specified invalid configuration (XML) - Invalid configuration (78) =#=#=#= * Passed: crm_verify - Verify another file-specified invalid configuration (XML) =#=#=#= Begin test: Verify a file-specified valid configuration (XML) =#=#=#= =#=#=#= End test: Verify a file-specified valid configuration (XML) - OK (0) =#=#=#= * Passed: crm_verify - Verify a file-specified valid configuration (XML) =#=#=#= Begin test: Verify a piped-in valid configuration (XML) =#=#=#= =#=#=#= End test: Verify a piped-in valid configuration (XML) - OK (0) =#=#=#= * Passed: crm_verify - Verify a piped-in valid configuration (XML) =#=#=#= Begin test: Verbosely verify a file-specified valid configuration (XML) =#=#=#= =#=#=#= End test: Verbosely verify a file-specified valid configuration (XML) - OK (0) =#=#=#= * Passed: crm_verify - Verbosely verify a file-specified valid configuration (XML) =#=#=#= Begin test: Verbosely verify a piped-in valid configuration (XML) =#=#=#= =#=#=#= End test: Verbosely verify a piped-in valid configuration (XML) - OK (0) =#=#=#= * Passed: crm_verify - Verbosely verify a piped-in valid configuration (XML) =#=#=#= Begin test: Verify a string-supplied valid configuration (XML) =#=#=#= =#=#=#= End test: Verify a string-supplied valid configuration (XML) - OK (0) =#=#=#= * Passed: crm_verify - Verify a string-supplied valid configuration (XML) =#=#=#= Begin test: Verbosely verify a string-supplied valid configuration (XML) =#=#=#= =#=#=#= End test: Verbosely verify a string-supplied valid configuration (XML) - OK (0) =#=#=#= * Passed: crm_verify - Verbosely verify a string-supplied valid configuration (XML) diff --git a/cts/cts-fencing.in b/cts/cts-fencing.in index 8ce354e035..e104701411 100644 --- a/cts/cts-fencing.in +++ b/cts/cts-fencing.in @@ -1,1102 +1,953 @@ #!@PYTHON@ """ Regression tests for Pacemaker's fencer """ -__copyright__ = "Copyright 2012-2023 the Pacemaker project contributors" +__copyright__ = "Copyright 2012-2024 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import argparse import os import sys import subprocess import tempfile # These imports allow running from a source checkout after running `make`. # Note that while this doesn't necessarily mean it will successfully run tests, # but being able to see --help output can be useful. if os.path.exists("@abs_top_srcdir@/python"): sys.path.insert(0, "@abs_top_srcdir@/python") if os.path.exists("@abs_top_builddir@/python") and "@abs_top_builddir@" != "@abs_top_srcdir@": sys.path.insert(0, "@abs_top_builddir@/python") from pacemaker.buildoptions import BuildOptions from pacemaker.exitstatus import ExitStatus from pacemaker._cts.corosync import Corosync, localname from pacemaker._cts.errors import ExitCodeError, OutputFoundError, OutputNotFoundError, XmlValidationError from pacemaker._cts.process import killall, exit_if_proc_running from pacemaker._cts.test import Test, Tests TEST_DIR = sys.path[0] def update_path(): """ Set the PATH environment variable appropriately for the tests """ new_path = os.environ['PATH'] if os.path.exists("%s/cts-fencing.in" % TEST_DIR): print("Running tests from the source tree: %s (%s)" % (BuildOptions._BUILD_DIR, TEST_DIR)) # For pacemaker-fenced and cts-fence-helper new_path = "%s/daemons/fenced:%s" % (BuildOptions._BUILD_DIR, new_path) new_path = "%s/tools:%s" % (BuildOptions._BUILD_DIR, new_path) # For stonith_admin new_path = "%s/cts/support:%s" % (BuildOptions._BUILD_DIR, new_path) # For cts-support else: print("Running tests from the install tree: %s (not %s)" % (BuildOptions.DAEMON_DIR, TEST_DIR)) # For pacemaker-fenced, cts-fence-helper, and cts-support new_path = "%s:%s" % (BuildOptions.DAEMON_DIR, new_path) print('Using PATH="%s"' % new_path) os.environ['PATH'] = new_path class FenceTest(Test): """ Executor for a single test """ def __init__(self, name, description, **kwargs): Test.__init__(self, name, description, **kwargs) - if kwargs.get("with_cpg", False): - self._enable_corosync = True - self._daemon_options = ["-c"] - else: - self._enable_corosync = False - self._daemon_options = ["-s"] - self._daemon_location = "pacemaker-fenced" def _kill_daemons(self): killall(["pacemakerd", "pacemaker-fenced"]) def _start_daemons(self): + cmd = ["pacemaker-fenced", "--stand-alone", "--logfile", self.logpath] if self.verbose: - self._daemon_options += ["-V"] - print("Starting %s with %s" % (self._daemon_location, self._daemon_options)) + cmd += ["-V"] + print("Starting %s" % " ".join(cmd)) - cmd = ["pacemaker-fenced", "-l", self.logpath] + self._daemon_options self._daemon_process = subprocess.Popen(cmd) class FenceTests(Tests): """ Collection of all fencing regression tests """ def __init__(self, **kwargs): Tests.__init__(self, **kwargs) self._corosync = Corosync(self.verbose, self.logdir, "cts-fencing") - def new_test(self, name, description, with_cpg=False): + def new_test(self, name, description): """ Create a named test """ - test = FenceTest(name, description, verbose=self.verbose, with_cpg=with_cpg, + test = FenceTest(name, description, verbose=self.verbose, timeout=self.timeout, force_wait=self.force_wait, logdir=self.logdir) self._tests.append(test) return test - def run_cpg_only(self): - """ Run all corosync-enabled tests """ - - for test in self._tests: - if test._enable_corosync: - test.run() - - def run_no_cpg(self): - """ Run all standalone tests """ - - for test in self._tests: - if not test._enable_corosync: - test.run() - def build_api_sanity_tests(self): """ Register tests to verify basic API usage """ verbose_arg = "" if self.verbose: verbose_arg = "-V" - test = self.new_test("standalone_low_level_api_test", "Sanity test client api in standalone mode.") + test = self.new_test("low_level_api_test", "Sanity-test client API") test.add_cmd("cts-fence-helper", args="-t %s" % verbose_arg, validate=False) - test = self.new_test("cpg_low_level_api_test", "Sanity test client api using mainloop and cpg.", True) + test = self.new_test("low_level_api_mainloop_test", + "Sanity-test client API using mainloop") test.add_cmd("cts-fence-helper", args="-m %s" % verbose_arg, validate=False) def build_custom_timeout_tests(self): """ Register tests to verify custom timeout usage """ # custom timeout without topology - test = self.new_test("cpg_custom_timeout_1", - "Verify per device timeouts work as expected without using topology.", True) + test = self.new_test("custom_timeout_1", + "Verify per device timeouts work as expected without using topology") test.add_cmd('stonith_admin', args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd('stonith_admin', args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3 -o pcmk_off_timeout=1') test.add_cmd('stonith_admin', args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node3 -o pcmk_off_timeout=4') test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") # timeout is 5+1+4 = 10 test.add_log_pattern("Total timeout set to 12s") # custom timeout _WITH_ topology - test = self.new_test("cpg_custom_timeout_2", - "Verify per device timeouts work as expected _WITH_ topology.", True) + test = self.new_test("custom_timeout_2", + "Verify per device timeouts work as expected _WITH_ topology") test.add_cmd('stonith_admin', args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') test.add_cmd('stonith_admin', args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3 -o pcmk_off_timeout=1000ms') test.add_cmd('stonith_admin', args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node3 -o pcmk_off_timeout=4000s') test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v false2") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") # timeout is 5+1+4000 = 4006 test.add_log_pattern("Total timeout set to 4807s") def build_fence_merge_tests(self): """ Register tests to verify when fence operations should be merged """ ### Simple test that overlapping fencing operations get merged - test = self.new_test("cpg_custom_merge_single", - "Verify overlapping identical fencing operations are merged, no fencing levels used.", True) + test = self.new_test("custom_merge_single", + "Verify overlapping identical fencing operations are merged, no fencing levels used") test.add_cmd("stonith_admin", args="--output-as=xml -R false1 -a fence_dummy -o mode=fail -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10") ### one merger will happen test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") ### Test that multiple mergers occur - test = self.new_test("cpg_custom_merge_multiple", - "Verify multiple overlapping identical fencing operations are merged", True) + test = self.new_test("custom_merge_multiple", + "Verify multiple overlapping identical fencing operations are merged") test.add_cmd("stonith_admin", args="--output-as=xml -R false1 -a fence_dummy -o mode=fail -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o delay=2 -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10") ### 4 mergers should occur test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") ### Test that multiple mergers occur with topologies used - test = self.new_test("cpg_custom_merge_with_topology", - "Verify multiple overlapping identical fencing operations are merged with fencing levels.", - True) + test = self.new_test("custom_merge_with_topology", + "Verify multiple overlapping identical fencing operations are merged with fencing levels") test.add_cmd("stonith_admin", args="--output-as=xml -R false1 -a fence_dummy -o mode=fail -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node3") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10") ### 4 mergers should occur test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client") ### the pattern below signifies that both the original and duplicate operation completed test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") test.add_log_pattern("Operation 'off' targeting node3 by ") def build_fence_no_merge_tests(self): """ Register tests to verify when fence operations should not be merged """ - test = self.new_test("cpg_custom_no_merge", - "Verify differing fencing operations are not merged", True) + test = self.new_test("custom_no_merge", + "Verify differing fencing operations are not merged") test.add_cmd("stonith_admin", args="--output-as=xml -R false1 -a fence_dummy -o mode=fail -o pcmk_host_list=node3 node2") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3 node2") test.add_cmd("stonith_admin", args="--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node3 node2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false2") test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -F node2 -t 10", no_wait=True) test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 10") test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client", negative=True) def build_standalone_tests(self): - """ Register a grab bag of tests that can be executed in standalone or corosync mode """ - - test_types = [ - { - "prefix" : "standalone", - "use_cpg" : False, - }, - { - "prefix" : "cpg", - "use_cpg" : True, - }, - ] + """ Register a grab bag of tests """ # test what happens when all devices timeout - for test_type in test_types: - test = self.new_test("%s_fence_multi_device_failure" % test_type["prefix"], - "Verify that all devices timeout, a fencing failure is returned.", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", - args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R false3 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') - if test_type["use_cpg"]: - test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 2", expected_exitcode=ExitStatus.TIMEOUT) - test.add_log_pattern("Total timeout set to 7s") - else: - test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 2", expected_exitcode=ExitStatus.ERROR) - - test.add_log_pattern("targeting node3 using false1 returned ") - test.add_log_pattern("targeting node3 using false2 returned ") - test.add_log_pattern("targeting node3 using false3 returned ") - - # test what happens when multiple devices can fence a node, but the first device fails. - for test_type in test_types: - test = self.new_test("%s_fence_device_failure_rollover" % test_type["prefix"], - "Verify that when one fence device fails for a node, the others are tried.", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", - args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") - - if test_type["use_cpg"]: - test.add_log_pattern("Total timeout set to 18s") - - # test what happens when we try to use a missing fence-agent. - for test_type in test_types: - test = self.new_test("%s_fence_missing_agent" % test_type["prefix"], - "Verify proper error-handling when using a non-existent fence-agent.", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", - args="--output-as=xml -R true1 -a fence_missing -o mode=pass -o pcmk_host_list=node3") - test.add_cmd("stonith_admin", - args="--output-as=xml -R true2 -a fence_dummy -o mode=pass -o pcmk_host_list=node2") - - test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5", expected_exitcode=ExitStatus.NOSUCH) - test.add_cmd("stonith_admin", args="--output-as=xml -F node2 -t 5") + test = self.new_test("fence_multi_device_failure", + "Verify that all devices timeout, a fencing failure is returned") + test.add_cmd("stonith_admin", + args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R false3 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 2", expected_exitcode=ExitStatus.TIMEOUT) + test.add_log_pattern("Total timeout set to 7s") + test.add_log_pattern("targeting node3 using false1 returned ") + test.add_log_pattern("targeting node3 using false2 returned ") + test.add_log_pattern("targeting node3 using false3 returned ") + + # test what happens when multiple devices can fence a node, but the first device fails + test = self.new_test("fence_device_failure_rollover", + "Verify that when one fence device fails for a node, the others are tried") + test.add_cmd("stonith_admin", + args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") + test.add_log_pattern("Total timeout set to 18s") - # simple topology test for one device - for test_type in test_types: - if not test_type["use_cpg"]: - continue + # test what happens when we try to use a missing fence-agent + test = self.new_test("fence_missing_agent", + "Verify proper error-handling when using a non-existent fence-agent") + test.add_cmd("stonith_admin", + args="--output-as=xml -R true1 -a fence_missing -o mode=pass -o pcmk_host_list=node3") + test.add_cmd("stonith_admin", + args="--output-as=xml -R true2 -a fence_dummy -o mode=pass -o pcmk_host_list=node2") - test = self.new_test("%s_topology_simple" % test_type["prefix"], - "Verify all fencing devices at a level are used.", test_type["use_cpg"]) - test.add_cmd("stonith_admin", - args='--output-as=xml -R true -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true") - test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") + test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5", expected_exitcode=ExitStatus.NOSUCH) + test.add_cmd("stonith_admin", args="--output-as=xml -F node2 -t 5") - test.add_log_pattern("Total timeout set to 6s") - test.add_log_pattern("targeting node3 using true returned 0") + # simple topology test for one device + test = self.new_test("topology_simple", + "Verify all fencing devices at a level are used") + test.add_cmd("stonith_admin", + args='--output-as=xml -R true -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true") + test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") + test.add_log_pattern("Total timeout set to 6s") + test.add_log_pattern("targeting node3 using true returned 0") # add topology, delete topology, verify fencing still works - for test_type in test_types: - if not test_type["use_cpg"]: - continue - - test = self.new_test("%s_topology_add_remove" % test_type["prefix"], - "Verify fencing occurrs after all topology levels are removed", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", - args='--output-as=xml -R true -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true") - test.add_cmd("stonith_admin", args="--output-as=xml -d node3 -i 1") - test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") - - test.add_log_pattern("Total timeout set to 6s") - test.add_log_pattern("targeting node3 using true returned 0") - - # test what happens when the first fencing level has multiple devices. - for test_type in test_types: - if not test_type["use_cpg"]: - continue - - test = self.new_test("%s_topology_device_fails" % test_type["prefix"], - "Verify if one device in a level fails, the other is tried.", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", - args='--output-as=xml -R false -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R true -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false") - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true") - test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 20") - - test.add_log_pattern("Total timeout set to 48s") - test.add_log_pattern("targeting node3 using false returned 1") - test.add_log_pattern("targeting node3 using true returned 0") - - # test what happens when the first fencing level fails. - for test_type in test_types: - if not test_type["use_cpg"]: - continue - - test = self.new_test("%s_topology_multi_level_fails" % test_type["prefix"], - "Verify if one level fails, the next leve is tried.", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", - args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R true4 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') - - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true1") - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true2") - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v false2") - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true3") - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true4") - - test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 3") - - test.add_log_pattern("Total timeout set to 21s") - test.add_log_pattern("targeting node3 using false1 returned 1") - test.add_log_pattern("targeting node3 using false2 returned 1") - test.add_log_pattern("targeting node3 using true3 returned 0") - test.add_log_pattern("targeting node3 using true4 returned 0") + test = self.new_test("topology_add_remove", + "Verify fencing occurrs after all topology levels are removed") + test.add_cmd("stonith_admin", + args='--output-as=xml -R true -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true") + test.add_cmd("stonith_admin", args="--output-as=xml -d node3 -i 1") + test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") + + test.add_log_pattern("Total timeout set to 6s") + test.add_log_pattern("targeting node3 using true returned 0") + + # test what happens when the first fencing level has multiple devices + test = self.new_test("topology_device_fails", + "Verify if one device in a level fails, the other is tried") + test.add_cmd("stonith_admin", + args='--output-as=xml -R false -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R true -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false") + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true") + test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 20") + test.add_log_pattern("Total timeout set to 48s") + test.add_log_pattern("targeting node3 using false returned 1") + test.add_log_pattern("targeting node3 using true returned 0") + + # test what happens when the first fencing level fails + test = self.new_test("topology_multi_level_fails", + "Verify if one level fails, the next leve is tried") + test.add_cmd("stonith_admin", + args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R true4 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') + + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true1") + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true2") + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v false2") + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true3") + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true4") + + test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 3") + + test.add_log_pattern("Total timeout set to 21s") + test.add_log_pattern("targeting node3 using false1 returned 1") + test.add_log_pattern("targeting node3 using false2 returned 1") + test.add_log_pattern("targeting node3 using true3 returned 0") + test.add_log_pattern("targeting node3 using true4 returned 0") # test what happens when the first fencing level had devices that no one has registered - for test_type in test_types: - if not test_type["use_cpg"]: - continue - - test = self.new_test("%s_topology_missing_devices" % test_type["prefix"], - "Verify topology can continue with missing devices.", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", - args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R true4 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') - - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true1") - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true2") - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v false2") - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true3") - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true4") - - test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") - - # Test what happens if multiple fencing levels are defined, and then the first one is removed. - for test_type in test_types: - if not test_type["use_cpg"]: - continue - - test = self.new_test("%s_topology_level_removal" % test_type["prefix"], - "Verify level removal works.", test_type["use_cpg"]) - test.add_cmd("stonith_admin", - args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R true4 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') - - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true1") - - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true2") - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v false2") - - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true3") - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true4") - - # Now remove level 2, verify none of the devices in level two are hit. - test.add_cmd("stonith_admin", args="--output-as=xml -d node3 -i 2") - - test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 20") - - test.add_log_pattern("Total timeout set to 96s") - test.add_log_pattern("targeting node3 using false1 returned 1") - test.add_log_pattern("targeting node3 using false2 returned ", - negative=True) - test.add_log_pattern("targeting node3 using true3 returned 0") - test.add_log_pattern("targeting node3 using true4 returned 0") - - # Test targeting a topology level by node name pattern. - for test_type in test_types: - if not test_type["use_cpg"]: - continue - - test = self.new_test("%s_topology_level_pattern" % test_type["prefix"], - "Verify targeting topology by node name pattern works.", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", - args='--output-as=xml -R true -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", args="--output-as=xml -r '@node.*' -i 1 -v true") - test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") - test.add_log_pattern("targeting node3 using true returned 0") + test = self.new_test("topology_missing_devices", + "Verify topology can continue with missing devices") + test.add_cmd("stonith_admin", + args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R true4 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') + + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true1") + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true2") + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v false2") + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true3") + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true4") + + test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") + + # Test what happens if multiple fencing levels are defined, and then the first one is removed + test = self.new_test("topology_level_removal", + "Verify level removal works") + test.add_cmd("stonith_admin", + args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R true4 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3"') + + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true1") + + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true2") + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v false2") + + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true3") + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 3 -v true4") + + # Now remove level 2, verify none of the devices in level two are hit + test.add_cmd("stonith_admin", args="--output-as=xml -d node3 -i 2") + + test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 20") + + test.add_log_pattern("Total timeout set to 96s") + test.add_log_pattern("targeting node3 using false1 returned 1") + test.add_log_pattern("targeting node3 using false2 returned ", + negative=True) + test.add_log_pattern("targeting node3 using true3 returned 0") + test.add_log_pattern("targeting node3 using true4 returned 0") + + # Test targeting a topology level by node name pattern + test = self.new_test("topology_level_pattern", + "Verify targeting topology by node name pattern works") + test.add_cmd("stonith_admin", + args='--output-as=xml -R true -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", args="--output-as=xml -r '@node.*' -i 1 -v true") + test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5") + test.add_log_pattern("targeting node3 using true returned 0") # test allowing commas and semicolons as delimiters in pcmk_host_list - for test_type in test_types: - test = self.new_test("%s_host_list_delimiters" % test_type["prefix"], - "Verify commas and semicolons can be used as pcmk_host_list delimiters", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", - args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1,node2,node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=pcmk1;pcmk2;pcmk3"') - test.add_cmd("stonith_admin", args="stonith_admin --output-as=xml -F node2 -t 5") - test.add_cmd("stonith_admin", args="stonith_admin --output-as=xml -F pcmk3 -t 5") - test.add_log_pattern("targeting node2 using true1 returned 0") - test.add_log_pattern("targeting pcmk3 using true2 returned 0") - - # test the stonith builds the correct list of devices that can fence a node. - for test_type in test_types: - test = self.new_test("%s_list_devices" % test_type["prefix"], - "Verify list of devices that can fence a node is correct", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", - args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", - args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", args="--output-as=xml -l node1 -V", - stdout_match="true2", stdout_no_match="true1") - test.add_cmd("stonith_admin", args="--output-as=xml -l node1 -V", - stdout_match="true3", stdout_no_match="true1") + test = self.new_test("host_list_delimiters", + "Verify commas and semicolons can be used as pcmk_host_list delimiters") + test.add_cmd("stonith_admin", + args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1,node2,node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=pcmk1;pcmk2;pcmk3"') + test.add_cmd("stonith_admin", args="stonith_admin --output-as=xml -F node2 -t 5") + test.add_cmd("stonith_admin", args="stonith_admin --output-as=xml -F pcmk3 -t 5") + test.add_log_pattern("targeting node2 using true1 returned 0") + test.add_log_pattern("targeting pcmk3 using true2 returned 0") + + # test the stonith builds the correct list of devices that can fence a node + test = self.new_test("list_devices", + "Verify list of devices that can fence a node is correct") + test.add_cmd("stonith_admin", + args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", + args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", args="--output-as=xml -l node1 -V", + stdout_match="true2", stdout_no_match="true1") + test.add_cmd("stonith_admin", args="--output-as=xml -l node1 -V", + stdout_match="true3", stdout_no_match="true1") # simple test of device monitor - for test_type in test_types: - test = self.new_test("%s_monitor" % test_type["prefix"], - "Verify device is reachable", test_type["use_cpg"]) - test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node3"') - test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node3"') + test = self.new_test("monitor", "Verify device is reachable") + test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node3"') + test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node3"') - test.add_cmd("stonith_admin", args="--output-as=xml -Q true1") - test.add_cmd("stonith_admin", args="--output-as=xml -Q false1") - test.add_cmd("stonith_admin", args="--output-as=xml -Q true2", expected_exitcode=ExitStatus.NOSUCH) + test.add_cmd("stonith_admin", args="--output-as=xml -Q true1") + test.add_cmd("stonith_admin", args="--output-as=xml -Q false1") + test.add_cmd("stonith_admin", args="--output-as=xml -Q true2", expected_exitcode=ExitStatus.NOSUCH) # Verify monitor occurs for duration of timeout period on failure - for test_type in test_types: - test = self.new_test("%s_monitor_timeout" % test_type["prefix"], - "Verify monitor uses duration of timeout period given.", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", - args='--output-as=xml -R true1 -a fence_dummy -o mode=fail -o monitor_mode=fail -o pcmk_host_list=node3') - test.add_cmd("stonith_admin", args="--output-as=xml -Q true1 -t 5", expected_exitcode=ExitStatus.ERROR) - test.add_log_pattern("Attempt 2 to execute") + test = self.new_test("monitor_timeout", + "Verify monitor uses duration of timeout period given") + test.add_cmd("stonith_admin", + args='--output-as=xml -R true1 -a fence_dummy -o mode=fail -o monitor_mode=fail -o pcmk_host_list=node3') + test.add_cmd("stonith_admin", args="--output-as=xml -Q true1 -t 5", expected_exitcode=ExitStatus.ERROR) + test.add_log_pattern("Attempt 2 to execute") # Verify monitor occurs for duration of timeout period on failure, but stops at max retries - for test_type in test_types: - test = self.new_test("%s_monitor_timeout_max_retries" % test_type["prefix"], - "Verify monitor retries until max retry value or timeout is hit.", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", - args='--output-as=xml -R true1 -a fence_dummy -o mode=fail -o monitor_mode=fail -o pcmk_host_list=node3') - test.add_cmd("stonith_admin", args="--output-as=xml -Q true1 -t 15", expected_exitcode=ExitStatus.ERROR) - test.add_log_pattern("Attempted to execute agent fence_dummy (list) the maximum number of times") + test = self.new_test("monitor_timeout_max_retries", + "Verify monitor retries until max retry value or timeout is hit") + test.add_cmd("stonith_admin", + args='--output-as=xml -R true1 -a fence_dummy -o mode=fail -o monitor_mode=fail -o pcmk_host_list=node3') + test.add_cmd("stonith_admin", args="--output-as=xml -Q true1 -t 15", expected_exitcode=ExitStatus.ERROR) + test.add_log_pattern("Attempted to execute agent fence_dummy (list) the maximum number of times") # simple register test - for test_type in test_types: - test = self.new_test("%s_register" % test_type["prefix"], - "Verify devices can be registered and un-registered", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3') - test.add_cmd("stonith_admin", args="--output-as=xml -Q true1") - test.add_cmd("stonith_admin", args="--output-as=xml -D true1") - test.add_cmd("stonith_admin", args="--output-as=xml -Q true1", expected_exitcode=ExitStatus.NOSUCH) + test = self.new_test("register", + "Verify devices can be registered and un-registered") + test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3') + test.add_cmd("stonith_admin", args="--output-as=xml -Q true1") + test.add_cmd("stonith_admin", args="--output-as=xml -D true1") + test.add_cmd("stonith_admin", args="--output-as=xml -Q true1", expected_exitcode=ExitStatus.NOSUCH) # simple reboot test - for test_type in test_types: - test = self.new_test("%s_reboot" % test_type["prefix"], - "Verify devices can be rebooted", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3') - test.add_cmd("stonith_admin", args="--output-as=xml -B node3 -t 5") - test.add_cmd("stonith_admin", args="--output-as=xml -D true1") - test.add_cmd("stonith_admin", args="--output-as=xml -Q true1", expected_exitcode=ExitStatus.NOSUCH) - - # test fencing history. - for test_type in test_types: - if not test_type["use_cpg"]: - continue - test = self.new_test("%s_fence_history" % test_type["prefix"], - "Verify last fencing operation is returned.", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3') - test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5 -V") - test.add_cmd("stonith_admin", args="--output-as=xml -H node3", - stdout_match='action="off" target="node3" .* status="success"') + test = self.new_test("reboot", "Verify devices can be rebooted") + test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3') + test.add_cmd("stonith_admin", args="--output-as=xml -B node3 -t 5") + test.add_cmd("stonith_admin", args="--output-as=xml -D true1") + test.add_cmd("stonith_admin", args="--output-as=xml -Q true1", expected_exitcode=ExitStatus.NOSUCH) + + # test fencing history + test = self.new_test("fence_history", + "Verify last fencing operation is returned") + test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node3') + test.add_cmd("stonith_admin", args="--output-as=xml -F node3 -t 5 -V") + test.add_cmd("stonith_admin", args="--output-as=xml -H node3", + stdout_match='action="off" target="node3" .* status="success"') # simple test of dynamic list query - for test_type in test_types: - test = self.new_test("%s_dynamic_list_query" % test_type["prefix"], - "Verify dynamic list of fencing devices can be retrieved.", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") - test.add_cmd("stonith_admin", args="--output-as=xml -R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") - test.add_cmd("stonith_admin", args="--output-as=xml -R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") + test = self.new_test("dynamic_list_query", + "Verify dynamic list of fencing devices can be retrieved") + test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") + test.add_cmd("stonith_admin", args="--output-as=xml -R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") + test.add_cmd("stonith_admin", args="--output-as=xml -R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") - test.add_cmd("stonith_admin", args="--output-as=xml -l fake_port_1", - stdout_match='count="3"') + test.add_cmd("stonith_admin", args="--output-as=xml -l fake_port_1", + stdout_match='count="3"') # fence using dynamic list query - for test_type in test_types: - test = self.new_test("%s_fence_dynamic_list_query" % test_type["prefix"], - "Verify dynamic list of fencing devices can be retrieved.", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") - test.add_cmd("stonith_admin", args="--output-as=xml -R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") - test.add_cmd("stonith_admin", args="--output-as=xml -R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") + test = self.new_test("fence_dynamic_list_query", + "Verify dynamic list of fencing devices can be retrieved") + test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") + test.add_cmd("stonith_admin", args="--output-as=xml -R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") + test.add_cmd("stonith_admin", args="--output-as=xml -R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1") - test.add_cmd("stonith_admin", args="--output-as=xml -F fake_port_1 -t 5 -V") + test.add_cmd("stonith_admin", args="--output-as=xml -F fake_port_1 -t 5 -V") # simple test of query using status action - for test_type in test_types: - test = self.new_test("%s_status_query" % test_type["prefix"], - "Verify dynamic list of fencing devices can be retrieved.", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_check=status') - test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o pcmk_host_check=status') - test.add_cmd("stonith_admin", args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o pcmk_host_check=status') + test = self.new_test("status_query", + "Verify dynamic list of fencing devices can be retrieved") + test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_check=status') + test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o pcmk_host_check=status') + test.add_cmd("stonith_admin", args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o pcmk_host_check=status') - test.add_cmd("stonith_admin", args="--output-as=xml -l fake_port_1", - stdout_match='count="3"') + test.add_cmd("stonith_admin", args="--output-as=xml -l fake_port_1", + stdout_match='count="3"') # test what happens when no reboot action is advertised - for test_type in test_types: - test = self.new_test("%s_no_reboot_support" % test_type["prefix"], - "Verify reboot action defaults to off when no reboot action is advertised by agent.", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", - args='--output-as=xml -R true1 -a fence_dummy_no_reboot -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", args="--output-as=xml -B node1 -t 5 -V") - test.add_log_pattern("does not support reboot") - test.add_log_pattern("using true1 returned 0") + test = self.new_test("no_reboot_support", + "Verify reboot action defaults to off when no reboot action is advertised by agent") + test.add_cmd("stonith_admin", + args='--output-as=xml -R true1 -a fence_dummy_no_reboot -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", args="--output-as=xml -B node1 -t 5 -V") + test.add_log_pattern("does not support reboot") + test.add_log_pattern("using true1 returned 0") # make sure reboot is used when reboot action is advertised - for test_type in test_types: - test = self.new_test("%s_with_reboot_support" % test_type["prefix"], - "Verify reboot action can be used when metadata advertises it.", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", - args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - test.add_cmd("stonith_admin", args="--output-as=xml -B node1 -t 5 -V") - test.add_log_pattern("does not advertise support for 'reboot', performing 'off'", - negative=True) - test.add_log_pattern("using true1 returned 0") + test = self.new_test("with_reboot_support", + "Verify reboot action can be used when metadata advertises it") + test.add_cmd("stonith_admin", + args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + test.add_cmd("stonith_admin", args="--output-as=xml -B node1 -t 5 -V") + test.add_log_pattern("does not advertise support for 'reboot', performing 'off'", + negative=True) + test.add_log_pattern("using true1 returned 0") # make sure all fencing delays are applied correctly and taken into account by fencing timeouts with topology - for test_type in test_types: - if not test_type["use_cpg"]: - continue - - test = self.new_test("%s_topology_delays" % test_type["prefix"], - "Verify all fencing delays are applied correctly and taken into account by fencing timeouts with topology.", - test_type["use_cpg"]) - test.add_cmd("stonith_admin", - args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3" -o pcmk_delay_base=1') - test.add_cmd("stonith_admin", - args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3" -o pcmk_delay_base=1') - # Resulting "random" delay will always be 1 since (rand() % (delay_max - delay_base)) is always 0 here. - test.add_cmd("stonith_admin", - args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3" -o pcmk_delay_base=1 -o pcmk_delay_max=2') - test.add_cmd("stonith_admin", - args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') - - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true1") - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true2") - test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true3") - - test.add_cmd("stonith_admin", args="--output-as=xml -F node3 --delay 1") - - # Total fencing timeout takes all fencing delays into account. - test.add_log_pattern("Total timeout set to 582s") - - # Fencing timeout for the first device takes the requested fencing delay into account. - # Fencing timeout also takes pcmk_delay_base into account. - test.add_log_pattern(r"Requesting that .* perform 'off' action targeting node3 using true1 .*146s.*", - regex=True) - # Requested fencing delay is applied only for the first device in the first level. - # Static delay from pcmk_delay_base is added. - test.add_log_pattern("Delaying 'off' action targeting node3 using true1 for 2s | timeout=120s requested_delay=1s base=1s max=1s") - - # Fencing timeout no longer takes the requested fencing delay into account for further devices. - test.add_log_pattern(r"Requesting that .* perform 'off' action targeting node3 using false1 .*145s.*", - regex=True) - # Requested fencing delay is no longer applied for further devices. - test.add_log_pattern("Delaying 'off' action targeting node3 using false1 for 1s | timeout=120s requested_delay=0s base=1s max=1s") - - # Fencing timeout takes pcmk_delay_max into account. - test.add_log_pattern(r"Requesting that .* perform 'off' action targeting node3 using true2 .*146s.*", - regex=True) - test.add_log_pattern("Delaying 'off' action targeting node3 using true2 for 1s | timeout=120s requested_delay=0s base=1s max=2s") - - test.add_log_pattern("Delaying 'off' action targeting node3 using true3", - negative=True) + test = self.new_test("topology_delays", + "Verify all fencing delays are applied correctly and taken into account by fencing timeouts with topology") + test.add_cmd("stonith_admin", + args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3" -o pcmk_delay_base=1') + test.add_cmd("stonith_admin", + args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=node1 node2 node3" -o pcmk_delay_base=1') + # Resulting "random" delay will always be 1 since (rand() % (delay_max - delay_base)) is always 0 here + test.add_cmd("stonith_admin", + args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3" -o pcmk_delay_base=1 -o pcmk_delay_max=2') + test.add_cmd("stonith_admin", + args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o "pcmk_host_list=node1 node2 node3"') + + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v true1") + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 1 -v false1") + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true2") + test.add_cmd("stonith_admin", args="--output-as=xml -r node3 -i 2 -v true3") + + test.add_cmd("stonith_admin", args="--output-as=xml -F node3 --delay 1") + + # Total fencing timeout takes all fencing delays into account + test.add_log_pattern("Total timeout set to 582s") + + # Fencing timeout for the first device takes the requested fencing delay + # and pcmk_delay_base into account + test.add_log_pattern(r"Requesting that .* perform 'off' action targeting node3 using true1 .*146s.*", + regex=True) + # Requested fencing delay is applied only for the first device in the + # first level, with the static delay from pcmk_delay_base added + test.add_log_pattern("Delaying 'off' action targeting node3 using true1 for 2s | timeout=120s requested_delay=1s base=1s max=1s") + + # Fencing timeout no longer takes the requested fencing delay into account for further devices + test.add_log_pattern(r"Requesting that .* perform 'off' action targeting node3 using false1 .*145s.*", + regex=True) + # Requested fencing delay is no longer applied for further devices + test.add_log_pattern("Delaying 'off' action targeting node3 using false1 for 1s | timeout=120s requested_delay=0s base=1s max=1s") + + # Fencing timeout takes pcmk_delay_max into account + test.add_log_pattern(r"Requesting that .* perform 'off' action targeting node3 using true2 .*146s.*", + regex=True) + test.add_log_pattern("Delaying 'off' action targeting node3 using true2 for 1s | timeout=120s requested_delay=0s base=1s max=2s") + + test.add_log_pattern("Delaying 'off' action targeting node3 using true3", + negative=True) def build_nodeid_tests(self): """ Register tests that use a corosync node id """ our_uname = localname() ### verify nodeid is supplied when nodeid is in the metadata parameters - test = self.new_test("cpg_supply_nodeid", - "Verify nodeid is given when fence agent has nodeid as parameter", True) + test = self.new_test("supply_nodeid", + "Verify nodeid is given when fence agent has nodeid as parameter") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=%s"' % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -F %s -t 3" % our_uname) test.add_log_pattern("as nodeid with fence action 'off' targeting %s" % (our_uname)) ### verify nodeid is _NOT_ supplied when nodeid is not in the metadata parameters - test = self.new_test("cpg_do_not_supply_nodeid", - "Verify nodeid is _NOT_ given when fence agent does not have nodeid as parameter", - True) + test = self.new_test("do_not_supply_nodeid", + "Verify nodeid is _NOT_ given when fence agent does not have nodeid as parameter") # use a host name that won't be in corosync.conf test.add_cmd("stonith_admin", - args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=regr-test') + args='--output-as=xml -R true1 -a fence_dummy_no_nodeid ' + '-o mode=pass -o pcmk_host_list="regr-test %s"' + % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -F regr-test -t 3") test.add_log_pattern("as nodeid with fence action 'off' targeting regr-test", negative=True) - - ### verify nodeid use doesn't explode standalone mode - test = self.new_test("standalone_do_not_supply_nodeid", - "Verify nodeid in metadata parameter list doesn't kill standalone mode", - False) - - test.add_cmd("stonith_admin", - args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=%s"' % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -F %s -t 3" % our_uname) test.add_log_pattern("as nodeid with fence action 'off' targeting %s" % our_uname, negative=True) def build_unfence_tests(self): """ Register tests that verify unfencing """ our_uname = localname() ### verify unfencing using automatic unfencing - test = self.new_test("cpg_unfence_required_1", - "Verify require unfencing on all devices when automatic=true in agent's metadata", - True) + test = self.new_test("unfence_required_1", + "Verify require unfencing on all devices when automatic=true in agent's metadata") test.add_cmd('stonith_admin', args='--output-as=xml -R true1 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list=%s"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R true2 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list=%s"' % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -U %s -t 3" % our_uname) # both devices should be executed test.add_log_pattern("using true1 returned 0") test.add_log_pattern("using true2 returned 0") ### verify unfencing using automatic unfencing fails if any of the required agents fail - test = self.new_test("cpg_unfence_required_2", - "Verify require unfencing on all devices when automatic=true in agent's metadata", - True) + test = self.new_test("unfence_required_2", + "Verify require unfencing on all devices when automatic=true in agent's metadata") test.add_cmd('stonith_admin', args='--output-as=xml -R true1 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list=%s"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R true2 -a fence_dummy_auto_unfence -o mode=fail -o "pcmk_host_list=%s"' % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -U %s -t 6" % our_uname, expected_exitcode=ExitStatus.ERROR) ### verify unfencing using automatic devices with topology - test = self.new_test("cpg_unfence_required_3", - "Verify require unfencing on all devices even when at different topology levels", - True) + test = self.new_test("unfence_required_3", + "Verify require unfencing on all devices even when at different topology levels") test.add_cmd('stonith_admin', args='--output-as=xml -R true1 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R true2 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 1 -v true1" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 2 -v true2" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -U %s -t 3" % our_uname) test.add_log_pattern("using true1 returned 0") test.add_log_pattern("using true2 returned 0") ### verify unfencing using automatic devices with topology - test = self.new_test("cpg_unfence_required_4", - "Verify all required devices are executed even with topology levels fail.", - True) + test = self.new_test("unfence_required_4", + "Verify all required devices are executed even with topology levels fail") test.add_cmd('stonith_admin', args='--output-as=xml -R true1 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R true2 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R true3 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R true4 -a fence_dummy_auto_unfence -o mode=pass -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R false3 -a fence_dummy -o mode=fail -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd('stonith_admin', args='--output-as=xml -R false4 -a fence_dummy -o mode=fail -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 1 -v true1" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 1 -v false1" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 2 -v false2" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 2 -v true2" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 2 -v false3" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 2 -v true3" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 3 -v false4" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 4 -v true4" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -U %s -t 3" % our_uname) test.add_log_pattern("using true1 returned 0") test.add_log_pattern("using true2 returned 0") test.add_log_pattern("using true3 returned 0") test.add_log_pattern("using true4 returned 0") def build_unfence_on_target_tests(self): """ Register tests that verify unfencing that runs on the target """ our_uname = localname() ### verify unfencing using on_target device - test = self.new_test("cpg_unfence_on_target_1", - "Verify unfencing with on_target = true", True) + test = self.new_test("unfence_on_target_1", + "Verify unfencing with on_target = true") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=%s"' % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -U %s -t 3" % our_uname) test.add_log_pattern("(on) to be executed on target") ### verify failure of unfencing using on_target device - test = self.new_test("cpg_unfence_on_target_2", - "Verify failure unfencing with on_target = true", - True) + test = self.new_test("unfence_on_target_2", + "Verify failure unfencing with on_target = true") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=%s node_fake_1234"' % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -U node_fake_1234 -t 3", expected_exitcode=ExitStatus.NOSUCH) test.add_log_pattern("(on) to be executed on target") ### verify unfencing using on_target device with topology - test = self.new_test("cpg_unfence_on_target_3", - "Verify unfencing with on_target = true using topology", - True) + test = self.new_test("unfence_on_target_3", + "Verify unfencing with on_target = true using topology") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=%s node3"' % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 1 -v true1" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -r %s -i 2 -v true2" % our_uname) test.add_cmd("stonith_admin", args="--output-as=xml -U %s -t 3" % our_uname) test.add_log_pattern("(on) to be executed on target") ### verify unfencing using on_target device with topology fails when target node doesn't exist - test = self.new_test("cpg_unfence_on_target_4", - "Verify unfencing failure with on_target = true using topology", - True) - + test = self.new_test("unfence_on_target_4", + "Verify unfencing failure with on_target = true using topology") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list=%s node_fake"' % our_uname) test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o "pcmk_host_list=%s node_fake"' % our_uname) - test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 1 -v true1") test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 2 -v true2") - test.add_cmd("stonith_admin", args="--output-as=xml -U node_fake -t 3", expected_exitcode=ExitStatus.NOSUCH) test.add_log_pattern("(on) to be executed on target") def build_remap_tests(self): """ Register tests that verify remapping of reboots to off-on """ - test = self.new_test("cpg_remap_simple", - "Verify sequential topology reboot is remapped to all-off-then-all-on", True) + test = self.new_test("remap_simple", + "Verify sequential topology reboot is remapped to all-off-then-all-on") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake ' '-o pcmk_off_timeout=1 -o pcmk_reboot_timeout=10') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake ' '-o pcmk_off_timeout=2 -o pcmk_reboot_timeout=20') test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -B node_fake -t 5") test.add_log_pattern("Remapping multiple-device reboot targeting node_fake") # timeout should be sum of off timeouts (1+2=3), not reboot timeouts (10+20=30) test.add_log_pattern("Total timeout set to 3s for peer's fencing targeting node_fake") test.add_log_pattern("perform 'off' action targeting node_fake using true1") test.add_log_pattern("perform 'off' action targeting node_fake using true2") test.add_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'") # fence_dummy sets "on" as an on_target action test.add_log_pattern("Ignoring true1 'on' failure (no capable peers) targeting node_fake") test.add_log_pattern("Ignoring true2 'on' failure (no capable peers) targeting node_fake") test.add_log_pattern("Undoing remap of reboot targeting node_fake") - test = self.new_test("cpg_remap_simple_off", + test = self.new_test("remap_simple_off", "Verify sequential topology reboot skips 'on' if " "pcmk_reboot_action=off or agent doesn't support " - "'on'", True) + "'on'") test.add_cmd("stonith_admin", args="--output-as=xml -R true1 -a fence_dummy -o mode=pass " "-o pcmk_host_list=node_fake -o pcmk_off_timeout=1 " "-o pcmk_reboot_timeout=10 -o pcmk_reboot_action=off") test.add_cmd("stonith_admin", args="--output-as=xml -R true2 -a fence_dummy_no_on " "-o mode=pass -o pcmk_host_list=node_fake " "-o pcmk_off_timeout=2 -o pcmk_reboot_timeout=20") test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -B node_fake -t 5") test.add_log_pattern("Remapping multiple-device reboot targeting node_fake") # timeout should be sum of off timeouts (1+2=3), not reboot timeouts (10+20=30) test.add_log_pattern("Total timeout set to 3s for peer's fencing targeting node_fake") test.add_log_pattern("perform 'off' action targeting node_fake using true1") test.add_log_pattern("perform 'off' action targeting node_fake using true2") test.add_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'") # "on" should be skipped test.add_log_pattern("Not turning node_fake back on using " "true1 because the device is configured " "to stay off") test.add_log_pattern("Not turning node_fake back on using true2" " because the agent doesn't support 'on'") test.add_log_pattern("Undoing remap of reboot targeting node_fake") - test = self.new_test("cpg_remap_automatic", - "Verify remapped topology reboot skips automatic 'on'", True) + test = self.new_test("remap_automatic", + "Verify remapped topology reboot skips automatic 'on'") test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy_auto_unfence ' '-o mode=pass -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy_auto_unfence ' '-o "mode=pass" -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 1 -v true1 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -B node_fake -t 5") test.add_log_pattern("Remapping multiple-device reboot targeting node_fake") test.add_log_pattern("perform 'off' action targeting node_fake using true1") test.add_log_pattern("perform 'off' action targeting node_fake using true2") test.add_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'") test.add_log_pattern("Undoing remap of reboot targeting node_fake") test.add_log_pattern("perform 'on' action targeting node_fake using", negative=True) test.add_log_pattern("'on' failure", negative=True) - test = self.new_test("cpg_remap_complex_1", - "Verify remapped topology reboot in second level works if non-remapped first level fails", - True) + test = self.new_test("remap_complex_1", + "Verify remapped topology reboot in second level works if non-remapped first level fails") test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 2 -v true1 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -B node_fake -t 5") test.add_log_pattern("perform 'reboot' action targeting node_fake using false1") test.add_log_pattern("Remapping multiple-device reboot targeting node_fake") test.add_log_pattern("perform 'off' action targeting node_fake using true1") test.add_log_pattern("perform 'off' action targeting node_fake using true2") test.add_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'") test.add_log_pattern("Ignoring true1 'on' failure (no capable peers) targeting node_fake") test.add_log_pattern("Ignoring true2 'on' failure (no capable peers) targeting node_fake") test.add_log_pattern("Undoing remap of reboot targeting node_fake") - test = self.new_test("cpg_remap_complex_2", - "Verify remapped topology reboot failure in second level proceeds to third level", - True) + test = self.new_test("remap_complex_2", + "Verify remapped topology reboot failure in second level proceeds to third level") test.add_cmd("stonith_admin", args='--output-as=xml -R false1 -a fence_dummy -o mode=fail -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R false2 -a fence_dummy -o mode=fail -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R true1 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R true2 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args='--output-as=xml -R true3 -a fence_dummy -o mode=pass -o pcmk_host_list=node_fake') test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 1 -v false1") test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 2 -v true1 -v false2 -v true3") test.add_cmd("stonith_admin", args="--output-as=xml -r node_fake -i 3 -v true2") test.add_cmd("stonith_admin", args="--output-as=xml -B node_fake -t 5") test.add_log_pattern("perform 'reboot' action targeting node_fake using false1") test.add_log_pattern("Remapping multiple-device reboot targeting node_fake") test.add_log_pattern("perform 'off' action targeting node_fake using true1") test.add_log_pattern("perform 'off' action targeting node_fake using false2") test.add_log_pattern("Attempted to execute agent fence_dummy (off) the maximum number of times") test.add_log_pattern("Undoing remap of reboot targeting node_fake") test.add_log_pattern("perform 'reboot' action targeting node_fake using true2") test.add_log_pattern("node_fake with true3", negative=True) def build_query_tests(self): """ run stonith_admin --metadata for the fence_dummy agent and check command output """ test = self.new_test("get_metadata", - "Run stonith_admin --metadata for the fence_dummy agent", True) + "Run stonith_admin --metadata for the fence_dummy agent") test.add_cmd("stonith_admin", args="--output-as=xml -a fence_dummy --metadata", stdout_match='= offset: logfile.seek(offset) else: print("%sFile truncated from %d to %d" % (prefix, offset, newsize)) if (newsize * 1.05) < offset: logfile.seek(0) # Don't block when we reach EOF fcntl.fcntl(logfile.fileno(), fcntl.F_SETFL, os.O_NONBLOCK) count = 0 while True: if logfile.tell() >= newsize: break if limit and count >= limit: break line = logfile.readline() if not line: break print(line.strip()) count += 1 print("%sLast read: %d, limit=%d, count=%d" % (prefix, logfile.tell(), limit, count)) def build_options(): """Handle command line arguments.""" # Create the top-level parser parser = argparse.ArgumentParser(description="Support tool for CTS") subparsers = parser.add_subparsers(dest="subparser_name") # Create the parser for the "install" command subparsers.add_parser("install", help="Install support files") # Create the parser for the "uninstall" command subparsers.add_parser("uninstall", help="Remove support files") # Create the parser for the "watch" command watch_parser = subparsers.add_parser("watch", help="Remote log watcher") watch_parser.add_argument("-f", "--filename", default="/var/log/messages", help="File to watch") watch_parser.add_argument("-l", "--limit", type=int, default=0, help="Maximum number of lines to read") watch_parser.add_argument("-o", "--offset", default=0, help="Which line number to start reading from") watch_parser.add_argument("-p", "--prefix", default="", help="String to add to the beginning of each line") args = parser.parse_args() return args if __name__ == "__main__": opts = build_options() if os.geteuid() != 0: print("This command must be run as root") sys.exit(ExitStatus.ERROR) # If the install directory doesn't exist, assume we're in a build directory. data_dir = "%s/pacemaker/tests/cts" % BuildOptions.DATA_DIR if not os.path.exists(data_dir): data_dir = "%s/pacemaker/tests/cts" % BuildOptions._BUILD_DIR if opts.subparser_name == "install": cmd_install(data_dir) if opts.subparser_name == "uninstall": cmd_uninstall() if opts.subparser_name == "watch": cmd_watch(opts.filename, opts.limit, opts.offset, opts.prefix) diff --git a/cts/support/fence_dummy.in b/cts/support/fence_dummy.in index 6404000dd5..42c9a54eb9 100644 --- a/cts/support/fence_dummy.in +++ b/cts/support/fence_dummy.in @@ -1,517 +1,519 @@ #!@PYTHON@ """Dummy fence agent for testing.""" __copyright__ = "Copyright 2012-2024 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import io import os import re import sys import time import random import atexit import getopt import contextlib AGENT_VERSION = "4.1.0" OCF_VERSION = "1.0" SHORT_DESC = "Dummy fence agent" LONG_DESC = """fence_dummy is a fake fencing agent which reports success based on its mode (pass|fail|random) without doing anything.""" # Short options used: difhmnoqsvBDHMRUV ALL_OPT = { "quiet": { "getopt": "q", "help": "", "order": 50 }, "verbose": { "getopt": "v", "longopt": "verbose", "help": "-v, --verbose Verbose mode", "required": "0", "shortdesc": "Verbose mode", "order": 51 }, "debug": { "getopt": "D:", "longopt": "debug-file", "help": "-D, --debug-file=[debugfile] Debugging to output file", "required": "0", "shortdesc": "Write debug information to given file", "order": 52 }, "version": { "getopt": "V", "longopt": "version", "help": "-V, --version Display version information and exit", "required": "0", "shortdesc": "Display version information and exit", "order": 53 }, "help": { "getopt": "h", "longopt": "help", "help": "-h, --help Display this help and exit", "required": "0", "shortdesc": "Display help and exit", "order": 54 }, "action": { "getopt": "o:", "longopt": "action", "help": "-o, --action=[action] Action: validate-all, status, list, reboot (default), off or on", "required": "1", "shortdesc": "Fencing Action", "default": "reboot", "order": 1 }, "nodename": { "getopt": "N:", "longopt": "nodename", "help": "-N, --nodename Node name of fence target (ignored)", "required": "0", "shortdesc": "The node name of fence target (ignored)", "order": 2 }, "mode": { "getopt": "M:", "longopt": "mode", "required": "0", "help": "-M, --mode=(pass|fail|random) Exit status to return for non-monitor operations", "shortdesc": "Whether fence operations should always pass, always fail, or fail at random", "order": 3 }, "monitor_mode": { "getopt": "m:", "longopt": "monitor_mode", "help": "-m, --monitor_mode=(pass|fail|random) Exit status to return for monitor operations", "required": "0", "shortdesc": "Whether monitor operations should always pass, always fail, or fail at random", "order": 3 }, "random_sleep_range": { "getopt": "R:", "required": "0", "longopt": "random_sleep_range", "help": "-R, --random_sleep_range=[seconds] Sleep between 1 and [seconds] before returning", "shortdesc": "Wait randomly between 1 and [seconds]", "order": 3 }, "mock_dynamic_hosts": { "getopt": "H:", "longopt": "mock_dynamic_hosts", "help": "-H, --mock_dynamic_hosts=[list] What to return when dynamically queried for possible targets", "required": "0", "shortdesc": "A list of hosts we can fence", "order": 3 }, "delay": { "getopt": "f:", "longopt": "delay", "help": "-f, --delay [seconds] Wait X seconds before fencing is started", "required": "0", "shortdesc": "Wait X seconds before fencing is started", "default": "0", "order": 3 }, "monitor_delay": { "getopt": "d:", "longopt": "monitor_delay", "help": "-d, --monitor_delay [seconds] Wait X seconds before monitor completes", "required": "0", "shortdesc": "Wait X seconds before monitor completes", "default": "0", "order": 3 }, "off_delay": { "getopt": "F:", "longopt": "off_delay", "help": "-F, --off_delay [seconds] Wait additional X seconds before off action", "required": "0", "shortdesc": "Wait additional X seconds before off action", "default": "0", "order": 3 }, "plug": { "getopt": "n:", "longopt": "plug", "help": "-n, --plug=[id] Physical plug number on device (ignored)", "required": "1", "shortdesc": "Ignored", "order": 4 }, "port": { "getopt": "n:", "longopt": "plug", "help": "-n, --plug=[id] Physical plug number on device (ignored)", "required": "1", "shortdesc": "Ignored", "order": 4 }, "switch": { "getopt": "s:", "longopt": "switch", "help": "-s, --switch=[id] Physical switch number on device (ignored)", "required": "0", "shortdesc": "Ignored", "order": 4 }, "nodeid": { "getopt": "i:", "longopt": "nodeid", "help": "-i, --nodeid Corosync id of fence target (ignored)", "required": "0", "shortdesc": "Ignored", "order": 4 }, "uuid": { "getopt": "U:", "longopt": "uuid", "help": "-U, --uuid UUID of the VM to fence (ignored)", "required": "0", "shortdesc": "Ignored", "order": 4 } } def agent(): """Return name this file was run as.""" return os.path.basename(sys.argv[0]) def fail_usage(message): """Print a usage message and exit.""" sys.exit("%s\nPlease use '-h' for usage" % message) def show_docs(options, auto_unfence, no_reboot, no_on): """Handle informational options (display info and exit).""" device_opt = options["device_opt"] if "-h" in options: usage(device_opt) sys.exit(0) if "-o" in options and options["-o"].lower() == "metadata": f = "%s.fail" % __file__ if not os.path.exists(f): metadata(device_opt, options, auto_unfence, no_reboot, no_on) else: os.remove(f) sys.exit(0) if "-V" in options: print(AGENT_VERSION) sys.exit(0) def sorted_options(avail_opt): """Return a list of all options, in their internally specified order.""" sorted_list = [(key, ALL_OPT[key]) for key in avail_opt] sorted_list.sort(key=lambda x: x[1]["order"]) return sorted_list def usage(avail_opt): """Print a usage message.""" print("Usage:") print("\t%s [options]" % agent()) print("Options:") for (_, value) in sorted_options(avail_opt): if len(value["help"]) != 0: print(" %s" % value["help"]) def metadata(avail_opt, options, auto_unfence, no_reboot, no_on): """Print agent metadata.""" # This log is just for testing handling of stderr output print("asked for fence_dummy metadata", file=sys.stderr) print(""" %s %s """ % (agent(), SHORT_DESC, AGENT_VERSION, OCF_VERSION, LONG_DESC)) for (option, _) in sorted_options(avail_opt): if "shortdesc" not in ALL_OPT[option]: continue print(' ' % (option, ALL_OPT[option]["required"])) default = "" default_name_arg = "-%s" % ALL_OPT[option]["getopt"][:-1] default_name_no_arg = "-%s" % ALL_OPT[option]["getopt"] if "default" in ALL_OPT[option]: default = 'default="%s"' % ALL_OPT[option]["default"] elif options.get(default_name_arg) is not None: try: default = 'default="%s"' % options[default_name_arg] except TypeError: # @todo/@note: Currently there is no clean way how to handle lists # we can create a string from it but we can't set it on command line default = 'default="%s"' % str(options[default_name_arg]) elif default_name_no_arg in options: default = 'default="true"' mixed = ALL_OPT[option]["help"] # split it between option and help text res = re.compile(r"^(.*--\S+)\s+", re.IGNORECASE | re.S).search(mixed) if res is not None: mixed = res.group(1) mixed = mixed.replace("<", "<").replace(">", ">") print(' ' % mixed) if ALL_OPT[option]["getopt"].count(":") > 0: print(' ' % default) else: print(' ' % default) print(' %s' % ALL_OPT[option]["shortdesc"]) print(' ') print(' \n ') if not no_on: if auto_unfence: attr_name = 'automatic' else: attr_name = 'on_target' print(' ' % attr_name) print(' ') if not no_reboot: print(' ') print(' ') print(' ') print(' ') print(' ') print(' ') print('') def option_longopt(option): """Return the getopt-compatible long-option name of the given option.""" if ALL_OPT[option]["getopt"].endswith(":"): return ALL_OPT[option]["longopt"] + "=" return ALL_OPT[option]["longopt"] def opts_from_command_line(argv, avail_opt): """Read options from command-line arguments.""" # Prepare list of options for getopt getopt_string = "" longopt_list = [] for k in avail_opt: if k in ALL_OPT: getopt_string += ALL_OPT[k]["getopt"] else: fail_usage("Parse error: unknown option '%s'" % k) if k in ALL_OPT and "longopt" in ALL_OPT[k]: longopt_list.append(option_longopt(k)) try: (opt, _) = getopt.gnu_getopt(argv, getopt_string, longopt_list) except getopt.GetoptError as error: fail_usage("Parse error: %s" % error.msg) # Transform longopt to short one which are used in fencing agents old_opt = opt opt = {} for old_option in dict(old_opt): if old_option.startswith("--"): for rec in ALL_OPT.values(): if rec.get("longopt") is None: continue long = "--%s" % rec["longopt"] if long == old_option: short = "-%s" % rec["getopt"][0] opt[short] = dict(old_opt)[old_option] else: opt[old_option] = dict(old_opt)[old_option] # Compatibility Layer (with what? probably not needed for fence_dummy) new_opt = dict(opt) if "-T" in new_opt: new_opt["-o"] = "status" if "-n" in new_opt: new_opt["-m"] = new_opt["-n"] opt = new_opt return opt def opts_from_stdin(avail_opt): """Read options from standard input.""" opt = {} name = "" for line in sys.stdin.readlines(): line = line.strip() if line.startswith("#") or (len(line) == 0): continue (name, value) = (line + "=").split("=", 1) value = value[:-1] # Compatibility Layer (with what? probably not needed for fence_dummy) if name == "option": name = "action" if name not in avail_opt: print("Parse error: Ignoring unknown option '%s'" % line, file=sys.stderr) continue if ALL_OPT[name]["getopt"].endswith(":"): short = "-%s" % ALL_OPT[name]["getopt"][0] opt[short] = value elif value.lower() in ["1", "yes", "on", "true"]: short = "-%s" % ALL_OPT[name]["getopt"] opt[short] = "1" return opt def process_input(avail_opt): """Set standard environment variables, and parse all options.""" # Set standard environment os.putenv("LANG", "C") os.putenv("LC_ALL", "C") # Read options from command line or standard input if len(sys.argv) > 1: return opts_from_command_line(sys.argv[1:], avail_opt) return opts_from_stdin(avail_opt) def atexit_handler(): """Close stdout on exit.""" try: sys.stdout.close() os.close(1) except IOError: sys.exit("%s failed to close standard output" % agent()) def success_mode(options, option, default_value): """Return exit code specified by option.""" if option in options: test_value = options[option] else: test_value = default_value if test_value == "pass": exitcode = 0 elif test_value == "fail": exitcode = 1 else: exitcode = random.randint(0, 1) return exitcode def write_options(options): """Write out all options to debug file.""" with contextlib.suppress(IOError): with io.open(options["-D"], "at", encoding="utf-8") as debugfile: debugfile.write("### %s ###\n" % time.strftime("%Y-%m-%d %H:%M:%S")) for option in sorted(options): debugfile.write("%s=%s\n" % (option, options[option])) debugfile.write("###\n") def main(): """Run the dummy fencing agent.""" auto_unfence = False no_reboot = False no_on = False # Meta-data can't take parameters, so we simulate different meta-data # behavior based on the executable name (which can be a symbolic link). if sys.argv[0].endswith("_auto_unfence"): auto_unfence = True elif sys.argv[0].endswith("_no_reboot"): no_reboot = True elif sys.argv[0].endswith("_no_on"): no_on = True + elif sys.argv[0].endswith("_no_nodeid"): + del ALL_OPT["nodeid"] device_opt = ALL_OPT.keys() # Defaults for fence agent atexit.register(atexit_handler) options = process_input(device_opt) options["device_opt"] = device_opt show_docs(options, auto_unfence, no_reboot, no_on) action = options.get("-o", "reboot") # dump input to file if "-D" in options and action != "validate-all": write_options(options) if "-f" in options and action != "validate-all": val = int(options["-f"]) print("delay sleep for %d seconds" % val, file=sys.stderr) time.sleep(val) # random sleep for testing if "-R" in options and action != "validate-all": val = int(options["-R"]) ran = random.randint(1, val) print("random sleep for %d seconds" % ran, file=sys.stderr) time.sleep(ran) if action == "monitor": if "-d" in options: time.sleep(int(options["-d"])) exitcode = success_mode(options, "-m", "pass") elif action == "list": print("fence_dummy action (list) called", file=sys.stderr) if "-H" in options: print(options["-H"]) exitcode = 0 else: print("dynamic hostlist requires mock_dynamic_hosts to be set", file=sys.stderr) exitcode = 1 elif action == "validate-all": if "-f" in options: val = int(options["-f"]) if val > 10: exitcode = 1 else: exitcode = 0 else: exitcode = 1 elif action == "off": if "-F" in options: time.sleep(int(options["-F"])) exitcode = success_mode(options, "-M", "random") else: exitcode = success_mode(options, "-M", "random") # Ensure we generate some error output on failure exit. if exitcode == 1: print("simulated %s failure" % action, file=sys.stderr) sys.exit(exitcode) if __name__ == "__main__": main() diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c index edde8ca820..1424a31623 100644 --- a/daemons/fenced/cts-fence-helper.c +++ b/daemons/fenced/cts-fence-helper.c @@ -1,695 +1,695 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "cts-fence-helper - inject commands into the Pacemaker fencer and watch for events" static GMainLoop *mainloop = NULL; static crm_trigger_t *trig = NULL; static int mainloop_iter = 0; static pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; typedef void (*mainloop_test_iteration_cb) (int check_event); #define MAINLOOP_DEFAULT_TIMEOUT 2 enum test_modes { test_standard = 0, // test using a specific developer environment test_passive, // watch notifications only test_api_sanity, // sanity-test stonith client API using fence_dummy test_api_mainloop, // sanity-test mainloop code with async responses }; struct { enum test_modes mode; } options = { .mode = test_standard }; static gboolean mode_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (pcmk__str_any_of(option_name, "--mainloop_api_test", "-m", NULL)) { options.mode = test_api_mainloop; } else if (pcmk__str_any_of(option_name, "--api_test", "-t", NULL)) { options.mode = test_api_sanity; } else if (pcmk__str_any_of(option_name, "--passive", "-p", NULL)) { options.mode = test_passive; } return TRUE; } static GOptionEntry entries[] = { { "mainloop_api_test", 'm', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, mode_cb, NULL, NULL, }, { "api_test", 't', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, mode_cb, NULL, NULL, }, { "passive", 'p', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, mode_cb, NULL, NULL, }, { NULL } }; static stonith_t *st = NULL; static struct pollfd pollfd; static const int st_opts = st_opt_sync_call; static int expected_notifications = 0; static int verbose = 0; static void mainloop_test_done(const char *origin, bool pass) { if (pass) { crm_info("SUCCESS - %s", origin); mainloop_iter++; mainloop_set_trigger(trig); result.execution_status = PCMK_EXEC_DONE; result.exit_status = CRM_EX_OK; } else { crm_err("FAILURE - %s (%d: %s)", origin, result.exit_status, pcmk_exec_status_str(result.execution_status)); crm_exit(CRM_EX_ERROR); } } static void dispatch_helper(int timeout) { int rc; crm_debug("Looking for notification"); pollfd.events = POLLIN; while (true) { rc = poll(&pollfd, 1, timeout); /* wait 10 minutes, -1 forever */ if (rc > 0) { if (!stonith_dispatch(st)) { break; } } else { break; } } } static void st_callback(stonith_t * st, stonith_event_t * e) { char *desc = NULL; if (st->state == stonith_disconnected) { crm_exit(CRM_EX_DISCONNECT); } desc = stonith__event_description(e); crm_notice("%s", desc); free(desc); if (expected_notifications) { expected_notifications--; } } static void st_global_callback(stonith_t * stonith, stonith_callback_data_t * data) { crm_notice("Call %d exited %d: %s (%s)", data->call_id, stonith__exit_status(data), stonith__execution_status(data), pcmk__s(stonith__exit_reason(data), "unspecified reason")); } static void passive_test(void) { int rc = 0; rc = st->cmds->connect(st, crm_system_name, &pollfd.fd); if (rc != pcmk_ok) { stonith_api_delete(st); crm_exit(CRM_EX_DISCONNECT); } st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_DISCONNECT, st_callback); st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_FENCE, st_callback); st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback); st->cmds->register_notification(st, STONITH_OP_DEVICE_DEL, st_callback); st->cmds->register_callback(st, 0, 120, st_opt_timeout_updates, NULL, "st_global_callback", st_global_callback); dispatch_helper(600 * 1000); } #define single_test(cmd, str, num_notifications, expected_rc) \ { \ int rc = 0; \ rc = cmd; \ expected_notifications = 0; \ if (num_notifications) { \ expected_notifications = num_notifications; \ dispatch_helper(500); \ } \ if (rc != expected_rc) { \ crm_err("FAILURE - expected rc %d != %d(%s) for cmd - %s", expected_rc, rc, pcmk_strerror(rc), str); \ crm_exit(CRM_EX_ERROR); \ } else if (expected_notifications) { \ crm_err("FAILURE - expected %d notifications, got only %d for cmd - %s", \ num_notifications, num_notifications - expected_notifications, str); \ crm_exit(CRM_EX_ERROR); \ } else { \ if (verbose) { \ crm_info("SUCCESS - %s: %d", str, rc); \ } else { \ crm_debug("SUCCESS - %s: %d", str, rc); \ } \ } \ }\ static void run_fence_failure_test(void) { stonith_key_value_t *params = NULL; params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, "false_1_node1=1,2 false_1_node2=3,4"); params = stonith_key_value_add(params, "mode", "fail"); single_test(st-> cmds->register_device(st, st_opts, "test-id1", "stonith-ng", "fence_dummy", params), "Register device1 for failure test", 1, 0); single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_OFF, 3, 0), "Fence failure results off", 1, -ENODATA); single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_REBOOT, 3, 0), "Fence failure results reboot", 1, -ENODATA); single_test(st->cmds->remove_device(st, st_opts, "test-id1"), "Remove device1 for failure test", 1, 0); stonith_key_value_freeall(params, 1, 1); } static void run_fence_failure_rollover_test(void) { stonith_key_value_t *params = NULL; params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, "false_1_node1=1,2 false_1_node2=3,4"); params = stonith_key_value_add(params, "mode", "fail"); single_test(st-> cmds->register_device(st, st_opts, "test-id1", "stonith-ng", "fence_dummy", params), "Register device1 for rollover test", 1, 0); stonith_key_value_freeall(params, 1, 1); params = NULL; params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, "false_1_node1=1,2 false_1_node2=3,4"); params = stonith_key_value_add(params, "mode", "pass"); single_test(st-> cmds->register_device(st, st_opts, "test-id2", "stonith-ng", "fence_dummy", params), "Register device2 for rollover test", 1, 0); single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_OFF, 3, 0), "Fence rollover results off", 1, 0); /* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */ single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_ON, 3, 0), "Fence rollover results on", 1, -ENODEV); single_test(st->cmds->remove_device(st, st_opts, "test-id1"), "Remove device1 for rollover tests", 1, 0); single_test(st->cmds->remove_device(st, st_opts, "test-id2"), "Remove device2 for rollover tests", 1, 0); stonith_key_value_freeall(params, 1, 1); } static void run_standard_test(void) { stonith_key_value_t *params = NULL; params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, "false_1_node1=1,2 false_1_node2=3,4"); params = stonith_key_value_add(params, "mode", "pass"); params = stonith_key_value_add(params, "mock_dynamic_hosts", "false_1_node1 false_1_node2"); single_test(st-> cmds->register_device(st, st_opts, "test-id", "stonith-ng", "fence_dummy", params), "Register", 1, 0); stonith_key_value_freeall(params, 1, 1); params = NULL; single_test(st->cmds->list(st, st_opts, "test-id", NULL, 1), - PCMK_ACTION_LIST, 1, 0); + PCMK_ACTION_LIST, 0, 0); - single_test(st->cmds->monitor(st, st_opts, "test-id", 1), "Monitor", 1, 0); + single_test(st->cmds->monitor(st, st_opts, "test-id", 1), "Monitor", 0, 0); single_test(st->cmds->status(st, st_opts, "test-id", "false_1_node2", 1), - "Status false_1_node2", 1, 0); + "Status false_1_node2", 0, 0); single_test(st->cmds->status(st, st_opts, "test-id", "false_1_node1", 1), - "Status false_1_node1", 1, 0); + "Status false_1_node1", 0, 0); single_test(st->cmds->fence(st, st_opts, "unknown-host", PCMK_ACTION_OFF, 1, 0), "Fence unknown-host (expected failure)", 0, -ENODEV); single_test(st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_OFF, 1, 0), "Fence false_1_node1", 1, 0); /* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */ single_test(st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_ON, 1, 0), "Unfence false_1_node1", 1, -ENODEV); /* Confirm that an invalid level index is rejected */ single_test(st->cmds->register_level(st, st_opts, "node1", 999, params), "Attempt to register an invalid level index", 0, -EINVAL); single_test(st->cmds->remove_device(st, st_opts, "test-id"), "Remove test-id", 1, 0); stonith_key_value_freeall(params, 1, 1); } static void sanity_tests(void) { int rc = 0; rc = st->cmds->connect(st, crm_system_name, &pollfd.fd); if (rc != pcmk_ok) { stonith_api_delete(st); crm_exit(CRM_EX_DISCONNECT); } st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_DISCONNECT, st_callback); st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_FENCE, st_callback); st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback); st->cmds->register_notification(st, STONITH_OP_DEVICE_DEL, st_callback); st->cmds->register_callback(st, 0, 120, st_opt_timeout_updates, NULL, "st_global_callback", st_global_callback); crm_info("Starting API Sanity Tests"); run_standard_test(); run_fence_failure_test(); run_fence_failure_rollover_test(); crm_info("Sanity Tests Passed"); } static void standard_dev_test(void) { int rc = 0; char *tmp = NULL; stonith_key_value_t *params = NULL; rc = st->cmds->connect(st, crm_system_name, &pollfd.fd); if (rc != pcmk_ok) { stonith_api_delete(st); crm_exit(CRM_EX_DISCONNECT); } params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, "some-host=pcmk-7 true_1_node1=3,4"); rc = st->cmds->register_device(st, st_opts, "test-id", "stonith-ng", "fence_xvm", params); crm_debug("Register: %d", rc); rc = st->cmds->list(st, st_opts, "test-id", &tmp, 10); crm_debug("List: %d output: %s", rc, tmp ? tmp : ""); rc = st->cmds->monitor(st, st_opts, "test-id", 10); crm_debug("Monitor: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node2", 10); crm_debug("Status false_1_node2: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); rc = st->cmds->fence(st, st_opts, "unknown-host", PCMK_ACTION_OFF, 60, 0); crm_debug("Fence unknown-host: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); rc = st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_OFF, 60, 0); crm_debug("Fence false_1_node1: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); rc = st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_ON, 10, 0); crm_debug("Unfence false_1_node1: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); rc = st->cmds->fence(st, st_opts, "some-host", PCMK_ACTION_OFF, 10, 0); crm_debug("Fence alias: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "some-host", 10); crm_debug("Status alias: %d", rc); rc = st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_ON, 10, 0); crm_debug("Unfence false_1_node1: %d", rc); rc = st->cmds->remove_device(st, st_opts, "test-id"); crm_debug("Remove test-id: %d", rc); stonith_key_value_freeall(params, 1, 1); } static void iterate_mainloop_tests(gboolean event_ready); static void mainloop_callback(stonith_t * stonith, stonith_callback_data_t * data) { pcmk__set_result(&result, stonith__exit_status(data), stonith__execution_status(data), stonith__exit_reason(data)); iterate_mainloop_tests(TRUE); } static int register_callback_helper(int callid) { return st->cmds->register_callback(st, callid, MAINLOOP_DEFAULT_TIMEOUT, st_opt_timeout_updates, NULL, "callback", mainloop_callback); } static void test_async_fence_pass(int check_event) { int rc = 0; if (check_event) { mainloop_test_done(__func__, (result.exit_status == CRM_EX_OK)); return; } rc = st->cmds->fence(st, 0, "true_1_node1", PCMK_ACTION_OFF, MAINLOOP_DEFAULT_TIMEOUT, 0); if (rc < 0) { crm_err("fence failed with rc %d", rc); mainloop_test_done(__func__, false); } register_callback_helper(rc); /* wait for event */ } #define CUSTOM_TIMEOUT_ADDITION 10 static void test_async_fence_custom_timeout(int check_event) { int rc = 0; static time_t begin = 0; if (check_event) { uint32_t diff = (time(NULL) - begin); if (result.execution_status != PCMK_EXEC_TIMEOUT) { mainloop_test_done(__func__, false); } else if (diff < CUSTOM_TIMEOUT_ADDITION + MAINLOOP_DEFAULT_TIMEOUT) { crm_err ("Custom timeout test failed, callback expiration should be updated to %d, actual timeout was %d", CUSTOM_TIMEOUT_ADDITION + MAINLOOP_DEFAULT_TIMEOUT, diff); mainloop_test_done(__func__, false); } else { mainloop_test_done(__func__, true); } return; } begin = time(NULL); rc = st->cmds->fence(st, 0, "custom_timeout_node1", PCMK_ACTION_OFF, MAINLOOP_DEFAULT_TIMEOUT, 0); if (rc < 0) { crm_err("fence failed with rc %d", rc); mainloop_test_done(__func__, false); } register_callback_helper(rc); /* wait for event */ } static void test_async_fence_timeout(int check_event) { int rc = 0; if (check_event) { mainloop_test_done(__func__, (result.execution_status == PCMK_EXEC_NO_FENCE_DEVICE)); return; } rc = st->cmds->fence(st, 0, "false_1_node2", PCMK_ACTION_OFF, MAINLOOP_DEFAULT_TIMEOUT, 0); if (rc < 0) { crm_err("fence failed with rc %d", rc); mainloop_test_done(__func__, false); } register_callback_helper(rc); /* wait for event */ } static void test_async_monitor(int check_event) { int rc = 0; if (check_event) { mainloop_test_done(__func__, (result.exit_status == CRM_EX_OK)); return; } rc = st->cmds->monitor(st, 0, "false_1", MAINLOOP_DEFAULT_TIMEOUT); if (rc < 0) { crm_err("monitor failed with rc %d", rc); mainloop_test_done(__func__, false); } register_callback_helper(rc); /* wait for event */ } static void test_register_async_devices(int check_event) { char buf[16] = { 0, }; stonith_key_value_t *params = NULL; params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, "false_1_node1=1,2"); params = stonith_key_value_add(params, "mode", "fail"); st->cmds->register_device(st, st_opts, "false_1", "stonith-ng", "fence_dummy", params); stonith_key_value_freeall(params, 1, 1); params = NULL; params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, "true_1_node1=1,2"); params = stonith_key_value_add(params, "mode", "pass"); st->cmds->register_device(st, st_opts, "true_1", "stonith-ng", "fence_dummy", params); stonith_key_value_freeall(params, 1, 1); params = NULL; params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, "custom_timeout_node1=1,2"); params = stonith_key_value_add(params, "mode", "fail"); params = stonith_key_value_add(params, "delay", "1000"); snprintf(buf, sizeof(buf) - 1, "%d", MAINLOOP_DEFAULT_TIMEOUT + CUSTOM_TIMEOUT_ADDITION); params = stonith_key_value_add(params, "pcmk_off_timeout", buf); st->cmds->register_device(st, st_opts, "false_custom_timeout", "stonith-ng", "fence_dummy", params); stonith_key_value_freeall(params, 1, 1); mainloop_test_done(__func__, true); } static void try_mainloop_connect(int check_event) { int rc = stonith_api_connect_retry(st, crm_system_name, 10); if (rc == pcmk_ok) { mainloop_test_done(__func__, true); return; } crm_err("API CONNECTION FAILURE"); mainloop_test_done(__func__, false); } static void iterate_mainloop_tests(gboolean event_ready) { static mainloop_test_iteration_cb callbacks[] = { try_mainloop_connect, test_register_async_devices, test_async_monitor, test_async_fence_pass, test_async_fence_timeout, test_async_fence_custom_timeout, }; if (mainloop_iter == (sizeof(callbacks) / sizeof(mainloop_test_iteration_cb))) { /* all tests ran, everything passed */ crm_info("ALL MAINLOOP TESTS PASSED!"); crm_exit(CRM_EX_OK); } callbacks[mainloop_iter] (event_ready); } static gboolean trigger_iterate_mainloop_tests(gpointer user_data) { iterate_mainloop_tests(FALSE); return TRUE; } static void test_shutdown(int nsig) { int rc = 0; if (st) { rc = st->cmds->disconnect(st); crm_info("Disconnect: %d", rc); crm_debug("Destroy"); stonith_api_delete(st); } if (rc) { crm_exit(CRM_EX_ERROR); } } static void mainloop_tests(void) { trig = mainloop_add_trigger(G_PRIORITY_HIGH, trigger_iterate_mainloop_tests, NULL); mainloop_set_trigger(trig); mainloop_add_signal(SIGTERM, test_shutdown); crm_info("Starting"); mainloop = g_main_loop_new(NULL, FALSE); g_main_loop_run(mainloop); } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, NULL, group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv) { GError *error = NULL; crm_exit_t exit_code = CRM_EX_OK; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); GOptionContext *context = build_arg_context(args, NULL); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } /* We have to use crm_log_init here to set up the logging because there's * different handling for daemons vs. command line programs, and * pcmk__cli_init_logging is set up to only handle the latter. */ crm_log_init(NULL, LOG_INFO, TRUE, (verbose? TRUE : FALSE), argc, argv, FALSE); for (int i = 0; i < args->verbosity; i++) { crm_bump_log_level(argc, argv); } st = stonith_api_new(); if (st == NULL) { exit_code = CRM_EX_DISCONNECT; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Could not connect to fencer: API memory allocation failed"); goto done; } switch (options.mode) { case test_standard: standard_dev_test(); break; case test_passive: passive_test(); break; case test_api_sanity: sanity_tests(); break; case test_api_mainloop: mainloop_tests(); break; } test_shutdown(0); done: g_strfreev(processed_args); pcmk__free_arg_context(context); pcmk__output_and_clear_error(&error, NULL); crm_exit(exit_code); } diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index 884699554a..e9f2086c33 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -1,3641 +1,3619 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *device_list = NULL; GHashTable *topology = NULL; static GList *cmd_list = NULL; static GHashTable *fenced_handlers = NULL; struct device_search_s { /* target of fence action */ char *host; /* requested fence action */ char *action; /* timeout to use if a device is queried dynamically for possible targets */ int per_device_timeout; /* number of registered fencing devices at time of request */ int replies_needed; /* number of device replies received so far */ int replies_received; /* whether the target is eligible to perform requested action (or off) */ bool allow_suicide; /* private data to pass to search callback function */ void *user_data; /* function to call when all replies have been received */ void (*callback) (GList * devices, void *user_data); /* devices capable of performing requested action (or off if remapping) */ GList *capable; /* Whether to perform searches that support the action */ uint32_t support_action_only; }; static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data); static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence); static int get_agent_metadata(const char *agent, xmlNode **metadata); static void read_action_metadata(stonith_device_t *device); static enum fenced_target_by unpack_level_kind(const xmlNode *level); typedef struct async_command_s { int id; int pid; int fd_stdout; int options; int default_timeout; /* seconds */ int timeout; /* seconds */ int start_delay; // seconds (-1 means disable static/random fencing delays) int delay_id; char *op; char *origin; char *client; char *client_name; char *remote_op_id; char *target; uint32_t target_nodeid; char *action; char *device; GList *device_list; GList *next_device_iter; // device_list entry for next device to execute void *internal_user_data; void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data); guint timer_sigterm; guint timer_sigkill; /*! If the operation timed out, this is the last signal * we sent to the process to get it to terminate */ int last_timeout_signo; stonith_device_t *active_on; stonith_device_t *activating_on; } async_command_t; static xmlNode *construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result); static gboolean is_action_required(const char *action, const stonith_device_t *device) { return (device != NULL) && device->automatic_unfencing && pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none); } static int get_action_delay_max(const stonith_device_t *device, const char *action) { const char *value = NULL; guint delay_max = 0U; if (!pcmk__is_fencing_action(action)) { return 0; } value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX); if (value) { pcmk_parse_interval_spec(value, &delay_max); delay_max /= 1000; } return (int) delay_max; } static int get_action_delay_base(const stonith_device_t *device, const char *action, const char *target) { char *hash_value = NULL; guint delay_base = 0U; if (!pcmk__is_fencing_action(action)) { return 0; } hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE); if (hash_value) { char *value = pcmk__str_copy(hash_value); char *valptr = value; if (target != NULL) { for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) { char *mapval = strchr(val, ':'); if (mapval == NULL || mapval[1] == 0) { crm_err("pcmk_delay_base: empty value in mapping", val); continue; } if (mapval != val && strncasecmp(target, val, (size_t)(mapval - val)) == 0) { value = mapval + 1; crm_debug("pcmk_delay_base mapped to %s for %s", value, target); break; } } } if (strchr(value, ':') == 0) { pcmk_parse_interval_spec(value, &delay_base); delay_base /= 1000; } free(valptr); } return (int) delay_base; } /*! * \internal * \brief Override STONITH timeout with pcmk_*_timeout if available * * \param[in] device STONITH device to use * \param[in] action STONITH action name * \param[in] default_timeout Timeout to use if device does not have * a pcmk_*_timeout parameter for action * * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout * \note For consistency, it would be nice if reboot/off/on timeouts could be * set the same way as start/stop/monitor timeouts, i.e. with an * entry in the fencing resource configuration. However that * is insufficient because fencing devices may be registered directly via * the fencer's register_device() API instead of going through the CIB * (e.g. stonith_admin uses it for its -R option, and the executor uses it * to ensure a device is registered when a command is issued). As device * properties, pcmk_*_timeout parameters can be grabbed by the fencer when * the device is registered, whether by CIB change or API call. */ static int get_action_timeout(const stonith_device_t *device, const char *action, int default_timeout) { if (action && device && device->params) { char buffer[64] = { 0, }; const char *value = NULL; /* If "reboot" was requested but the device does not support it, * we will remap to "off", so check timeout for "off" instead */ if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_trace("%s doesn't support reboot, using timeout for off instead", device->id); action = PCMK_ACTION_OFF; } /* If the device config specified an action-specific timeout, use it */ snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action); value = g_hash_table_lookup(device->params, buffer); if (value) { long long timeout_ms = crm_get_msec(value); return (int) QB_MIN(timeout_ms / 1000, INT_MAX); } } return default_timeout; } /*! * \internal * \brief Get the currently executing device for a fencing operation * * \param[in] cmd Fencing operation to check * * \return Currently executing device for \p cmd if any, otherwise NULL */ static stonith_device_t * cmd_device(const async_command_t *cmd) { if ((cmd == NULL) || (cmd->device == NULL) || (device_list == NULL)) { return NULL; } return g_hash_table_lookup(device_list, cmd->device); } /*! * \internal * \brief Return the configured reboot action for a given device * * \param[in] device_id Device ID * * \return Configured reboot action for \p device_id */ const char * fenced_device_reboot_action(const char *device_id) { const char *action = NULL; if ((device_list != NULL) && (device_id != NULL)) { stonith_device_t *device = g_hash_table_lookup(device_list, device_id); if ((device != NULL) && (device->params != NULL)) { action = g_hash_table_lookup(device->params, "pcmk_reboot_action"); } } return pcmk__s(action, PCMK_ACTION_REBOOT); } /*! * \internal * \brief Check whether a given device supports the "on" action * * \param[in] device_id Device ID * * \return true if \p device_id supports "on", otherwise false */ bool fenced_device_supports_on(const char *device_id) { if ((device_list != NULL) && (device_id != NULL)) { stonith_device_t *device = g_hash_table_lookup(device_list, device_id); if (device != NULL) { return pcmk_is_set(device->flags, st_device_supports_on); } } return false; } static void free_async_command(async_command_t * cmd) { if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } cmd_list = g_list_remove(cmd_list, cmd); g_list_free_full(cmd->device_list, free); free(cmd->device); free(cmd->action); free(cmd->target); free(cmd->remote_op_id); free(cmd->client); free(cmd->client_name); free(cmd->origin); free(cmd->op); free(cmd); } /*! * \internal * \brief Create a new asynchronous fencing operation from request XML * * \param[in] msg Fencing request XML (from IPC or CPG) * * \return Newly allocated fencing operation on success, otherwise NULL * * \note This asserts on memory errors, so a NULL return indicates an * unparseable message. */ static async_command_t * create_async_command(xmlNode *msg) { xmlNode *op = NULL; async_command_t *cmd = NULL; if (msg == NULL) { return NULL; } op = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, msg, LOG_ERR); if (op == NULL) { return NULL; } cmd = pcmk__assert_alloc(1, sizeof(async_command_t)); // All messages must include these cmd->action = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ACTION); cmd->op = crm_element_value_copy(msg, PCMK__XA_ST_OP); cmd->client = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTID); if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) { free_async_command(cmd); return NULL; } crm_element_value_int(msg, PCMK__XA_ST_CALLID, &(cmd->id)); crm_element_value_int(msg, PCMK__XA_ST_CALLOPT, &(cmd->options)); crm_element_value_int(msg, PCMK__XA_ST_DELAY, &(cmd->start_delay)); crm_element_value_int(msg, PCMK__XA_ST_TIMEOUT, &(cmd->default_timeout)); cmd->timeout = cmd->default_timeout; cmd->origin = crm_element_value_copy(msg, PCMK__XA_SRC); cmd->remote_op_id = crm_element_value_copy(msg, PCMK__XA_ST_REMOTE_OP); cmd->client_name = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTNAME); cmd->target = crm_element_value_copy(op, PCMK__XA_ST_TARGET); cmd->device = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ID); cmd->done_cb = st_child_done; // Track in global command list cmd_list = g_list_append(cmd_list, cmd); return cmd; } static int get_action_limit(stonith_device_t * device) { const char *value = NULL; int action_limit = 1; value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT); if ((value == NULL) || (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok) || (action_limit == 0)) { action_limit = 1; } return action_limit; } static int get_active_cmds(stonith_device_t * device) { int counter = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return 0); for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd = gIter->data; gIterNext = gIter->next; if (cmd->active_on == device) { counter++; } } return counter; } static void fork_cb(int pid, void *user_data) { async_command_t *cmd = (async_command_t *) user_data; stonith_device_t * device = /* in case of a retry we've done the move from activating_on to active_on already */ cmd->activating_on?cmd->activating_on:cmd->active_on; CRM_ASSERT(device); crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout", cmd->action, pid, ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, cmd->timeout); cmd->active_on = device; cmd->activating_on = NULL; } static int get_agent_metadata_cb(gpointer data) { stonith_device_t *device = data; guint period_ms; switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } return G_SOURCE_REMOVE; case EAGAIN: period_ms = pcmk__mainloop_timer_get_period(device->timer); if (period_ms < 160 * 1000) { mainloop_timer_set_period(device->timer, 2 * period_ms); } return G_SOURCE_CONTINUE; default: return G_SOURCE_REMOVE; } } /*! * \internal * \brief Call a command's action callback for an internal (not library) result * * \param[in,out] cmd Command to report result for * \param[in] execution_status Execution status to use for result * \param[in] exit_status Exit status to use for result * \param[in] exit_reason Exit reason to use for result */ static void report_internal_result(async_command_t *cmd, int exit_status, int execution_status, const char *exit_reason) { pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__set_result(&result, exit_status, execution_status, exit_reason); cmd->done_cb(0, &result, cmd); pcmk__reset_result(&result); } static gboolean stonith_device_execute(stonith_device_t * device) { int exec_rc = 0; const char *action_str = NULL; const char *host_arg = NULL; async_command_t *cmd = NULL; stonith_action_t *action = NULL; int active_cmds = 0; int action_limit = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return FALSE); active_cmds = get_active_cmds(device); action_limit = get_action_limit(device); if (action_limit > -1 && active_cmds >= action_limit) { crm_trace("%s is over its action limit of %d (%u active action%s)", device->id, action_limit, active_cmds, pcmk__plural_s(active_cmds)); return TRUE; } for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) { async_command_t *pending_op = gIter->data; gIterNext = gIter->next; if (pending_op && pending_op->delay_id) { crm_trace("Operation '%s'%s%s using %s was asked to run too early, " "waiting for start delay of %ds", pending_op->action, ((pending_op->target == NULL)? "" : " targeting "), pcmk__s(pending_op->target, ""), device->id, pending_op->start_delay); continue; } device->pending_ops = g_list_remove_link(device->pending_ops, gIter); g_list_free_1(gIter); cmd = pending_op; break; } if (cmd == NULL) { crm_trace("No actions using %s are needed", device->id); return TRUE; } if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { if (pcmk__is_fencing_action(cmd->action)) { if (node_does_watchdog_fencing(fenced_get_local_node())) { pcmk__panic("Watchdog self-fencing required"); goto done; } } else { crm_info("Faking success for %s watchdog operation", cmd->action); report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL); goto done; } } #if PCMK__ENABLE_CIBSECRETS exec_rc = pcmk__substitute_secrets(device->id, device->params); if (exec_rc != pcmk_rc_ok) { if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) { crm_info("Proceeding with stop operation for %s " "despite being unable to load CIB secrets (%s)", device->id, pcmk_rc_str(exec_rc)); } else { crm_err("Considering %s unconfigured " "because unable to load CIB secrets: %s", device->id, pcmk_rc_str(exec_rc)); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS, "Failed to get CIB secrets"); goto done; } } #endif action_str = cmd->action; if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_notice("Remapping 'reboot' action%s%s using %s to 'off' " "because agent '%s' does not support reboot", ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, device->agent); action_str = PCMK_ACTION_OFF; } if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) { host_arg = "port"; } else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) { host_arg = "plug"; } action = stonith__action_create(device->agent, action_str, cmd->target, cmd->target_nodeid, cmd->timeout, device->params, device->aliases, host_arg); /* for async exec, exec_rc is negative for early error exit otherwise handling of success/errors is done via callbacks */ cmd->activating_on = device; exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb, fork_cb); if (exec_rc < 0) { cmd->activating_on = NULL; cmd->done_cb(0, stonith__action_result(action), cmd); stonith__destroy_action(action); } done: /* Device might get triggered to work by multiple fencing commands * simultaneously. Trigger the device again to make sure any * remaining concurrent commands get executed. */ if (device->pending_ops) { mainloop_set_trigger(device->work); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static gboolean start_delay_helper(gpointer data) { async_command_t *cmd = data; stonith_device_t *device = cmd_device(cmd); cmd->delay_id = 0; if (device) { mainloop_set_trigger(device->work); } return FALSE; } static void schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) { int delay_max = 0; int delay_base = 0; int requested_delay = cmd->start_delay; CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); if (cmd->device) { free(cmd->device); } if (device->include_nodeid && (cmd->target != NULL)) { pcmk__node_status_t *node = pcmk__get_node(0, cmd->target, NULL, pcmk__node_search_cluster_member); cmd->target_nodeid = node->cluster_layer_id; } cmd->device = pcmk__str_copy(device->id); cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout); if (cmd->remote_op_id) { crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s " "with op id %.8s and timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->origin, cmd->remote_op_id, cmd->timeout); } else { crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->client, cmd->timeout); } device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); // Value -1 means disable any static/random fencing delays if (requested_delay < 0) { return; } delay_max = get_action_delay_max(device, cmd->action); delay_base = get_action_delay_base(device, cmd->action, cmd->target); if (delay_max == 0) { delay_max = delay_base; } if (delay_max < delay_base) { crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than " PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s " "(limiting to maximum delay)", delay_base, delay_max, cmd->action, device->id); delay_base = delay_max; } if (delay_max > 0) { // coverity[dontcall] It doesn't matter here if rand() is predictable cmd->start_delay += ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + delay_base; } if (cmd->start_delay > 0) { crm_notice("Delaying '%s' action%s%s using %s for %ds " QB_XS " timeout=%ds requested_delay=%ds base=%ds max=%ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->start_delay, cmd->timeout, requested_delay, delay_base, delay_max); cmd->delay_id = g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd); } } static void free_device(gpointer data) { GList *gIter = NULL; stonith_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { async_command_t *cmd = gIter->data; crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Device was removed before action could be executed"); } g_list_free(device->pending_ops); g_list_free_full(device->targets, free); if (device->timer) { mainloop_timer_stop(device->timer); mainloop_timer_del(device->timer); } mainloop_destroy_trigger(device->work); pcmk__xml_free(device->agent_metadata); free(device->namespace); if (device->on_target_actions != NULL) { g_string_free(device->on_target_actions, TRUE); } free(device->agent); free(device->id); free(device); } void free_device_list(void) { if (device_list != NULL) { g_hash_table_destroy(device_list); device_list = NULL; } } void init_device_list(void) { if (device_list == NULL) { device_list = pcmk__strkey_table(NULL, free_device); } } static GHashTable * build_port_aliases(const char *hostmap, GList ** targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = pcmk__strikey_table(free, free); if (hostmap == NULL) { return aliases; } max = strlen(hostmap); for (; lpc <= max; lpc++) { switch (hostmap[lpc]) { /* Skip escaped chars */ case '\\': lpc++; break; /* Assignment chars */ case '=': case ':': if (lpc > last) { free(name); name = pcmk__assert_alloc(1, 1 + lpc - last); memcpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if (name) { char *value = NULL; int k = 0; value = pcmk__assert_alloc(1, 1 + lpc - last); memcpy(value, hostmap + last, lpc - last); for (int i = 0; value[i] != '\0'; i++) { if (value[i] != '\\') { value[k++] = value[i]; } } value[k] = '\0'; crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if (targets) { *targets = g_list_append(*targets, pcmk__str_copy(value)); } value = NULL; name = NULL; added++; } else if (lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last); } last = lpc + 1; break; } if (hostmap[lpc] == 0) { break; } } if (added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } free(name); return aliases; } GHashTable *metadata_cache = NULL; void free_metadata_cache(void) { if (metadata_cache != NULL) { g_hash_table_destroy(metadata_cache); metadata_cache = NULL; } } static void init_metadata_cache(void) { if (metadata_cache == NULL) { metadata_cache = pcmk__strkey_table(free, free); } } int get_agent_metadata(const char *agent, xmlNode ** metadata) { char *buffer = NULL; if (metadata == NULL) { return EINVAL; } *metadata = NULL; if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) { return pcmk_rc_ok; } init_metadata_cache(); buffer = g_hash_table_lookup(metadata_cache, agent); if (buffer == NULL) { stonith_t *st = stonith_api_new(); int rc; if (st == NULL) { crm_warn("Could not get agent meta-data: " "API memory allocation failed"); return EAGAIN; } rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10); stonith_api_delete(st); if (rc || !buffer) { crm_err("Could not retrieve metadata for fencing agent %s", agent); return EAGAIN; } g_hash_table_replace(metadata_cache, pcmk__str_copy(agent), buffer); } *metadata = pcmk__xml_parse(buffer); return pcmk_rc_ok; } static gboolean is_nodeid_required(xmlNode * xml) { xmlXPathObjectPtr xpath = NULL; - if (stand_alone) { - return FALSE; - } - if (!xml) { return FALSE; } xpath = xpath_search(xml, "//" PCMK_XE_PARAMETER "[@" PCMK_XA_NAME "='nodeid']"); if (numXpathResults(xpath) <= 0) { freeXpathObject(xpath); return FALSE; } freeXpathObject(xpath); return TRUE; } static void read_action_metadata(stonith_device_t *device) { xmlXPathObjectPtr xpath = NULL; int max = 0; int lpc = 0; if (device->agent_metadata == NULL) { return; } xpath = xpath_search(device->agent_metadata, "//action"); max = numXpathResults(xpath); if (max <= 0) { freeXpathObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *action = NULL; xmlNode *match = getXpathResult(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; action = crm_element_value(match, PCMK_XA_NAME); if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_list); } else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_status); } else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_reboot); } else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { /* PCMK_XA_AUTOMATIC means the cluster will unfence a node when it * joins. * * @COMPAT PCMK__XA_REQUIRED is a deprecated synonym for * PCMK_XA_AUTOMATIC. */ if (pcmk__xe_attr_is_true(match, PCMK_XA_AUTOMATIC) || pcmk__xe_attr_is_true(match, PCMK__XA_REQUIRED)) { device->automatic_unfencing = TRUE; } stonith__set_device_flags(device->flags, device->id, st_device_supports_on); } if ((action != NULL) && pcmk__xe_attr_is_true(match, PCMK_XA_ON_TARGET)) { pcmk__add_word(&(device->on_target_actions), 64, action); } } freeXpathObject(xpath); } static const char * target_list_type(stonith_device_t * dev) { const char *check_type = NULL; check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK); if (check_type == NULL) { if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) { check_type = PCMK_VALUE_STATIC_LIST; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) { check_type = PCMK_VALUE_STATIC_LIST; } else if (pcmk_is_set(dev->flags, st_device_supports_list)) { check_type = PCMK_VALUE_DYNAMIC_LIST; } else if (pcmk_is_set(dev->flags, st_device_supports_status)) { check_type = PCMK_VALUE_STATUS; } else { check_type = PCMK_VALUE_NONE; } } return check_type; } static stonith_device_t * build_device_from_xml(xmlNode *dev) { const char *value; stonith_device_t *device = NULL; char *agent = crm_element_value_copy(dev, PCMK_XA_AGENT); CRM_CHECK(agent != NULL, return device); device = pcmk__assert_alloc(1, sizeof(stonith_device_t)); device->id = crm_element_value_copy(dev, PCMK_XA_ID); device->agent = agent; device->namespace = crm_element_value_copy(dev, PCMK__XA_NAMESPACE); device->params = xml2list(dev); value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST); if (value) { device->targets = stonith__parse_targets(value); } value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP); device->aliases = build_port_aliases(value, &(device->targets)); value = target_list_type(device); if (!pcmk__str_eq(value, PCMK_VALUE_STATIC_LIST, pcmk__str_casei) && (device->targets != NULL)) { // device->targets is necessary only with PCMK_VALUE_STATIC_LIST g_list_free_full(device->targets, free); device->targets = NULL; } switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } break; case EAGAIN: if (device->timer == NULL) { device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000, TRUE, get_agent_metadata_cb, device); } if (!mainloop_timer_running(device->timer)) { mainloop_timer_start(device->timer); } break; default: break; } value = g_hash_table_lookup(device->params, "nodeid"); if (!value) { device->include_nodeid = is_nodeid_required(device->agent_metadata); } value = crm_element_value(dev, PCMK__XA_RSC_PROVIDES); if (pcmk__str_eq(value, PCMK_VALUE_UNFENCING, pcmk__str_casei)) { device->automatic_unfencing = TRUE; } if (is_action_required(PCMK_ACTION_ON, device)) { crm_info("Fencing device '%s' requires unfencing", device->id); } if (device->on_target_actions != NULL) { crm_info("Fencing device '%s' requires actions (%s) to be executed " "on target", device->id, (const char *) device->on_target_actions->str); } device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); /* TODO: Hook up priority */ return device; } static void schedule_internal_command(const char *origin, stonith_device_t * device, const char *action, const char *target, int timeout, void *internal_user_data, void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data)) { async_command_t *cmd = NULL; cmd = pcmk__assert_alloc(1, sizeof(async_command_t)); cmd->id = -1; cmd->default_timeout = timeout ? timeout : 60; cmd->timeout = cmd->default_timeout; cmd->action = pcmk__str_copy(action); cmd->target = pcmk__str_copy(target); cmd->device = pcmk__str_copy(device->id); cmd->origin = pcmk__str_copy(origin); cmd->client = pcmk__str_copy(crm_system_name); cmd->client_name = pcmk__str_copy(crm_system_name); cmd->internal_user_data = internal_user_data; cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */ schedule_stonith_command(cmd, device); } // Fence agent status commands use custom exit status codes enum fence_status_code { fence_status_invalid = -1, fence_status_active = 0, fence_status_unknown = 1, fence_status_inactive = 2, }; static void status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd_device(cmd); gboolean can = FALSE; free_async_command(cmd); if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (result->execution_status != PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because status could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); search_devices_record_result(search, dev->id, FALSE); return; } switch (result->exit_status) { case fence_status_unknown: crm_trace("%s reported it cannot fence %s", dev->id, search->host); break; case fence_status_active: case fence_status_inactive: crm_trace("%s reported it can fence %s", dev->id, search->host); can = TRUE; break; default: crm_warn("Assuming %s cannot fence %s " "(status returned unknown code %d)", dev->id, search->host, result->exit_status); break; } search_devices_record_result(search, dev->id, can); } static void dynamic_list_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd_device(cmd); gboolean can_fence = FALSE; free_async_command(cmd); /* Host/alias must be in the list output to be eligible to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (pcmk__result_ok(result)) { crm_info("Refreshing target list for %s", dev->id); g_list_free_full(dev->targets, free); dev->targets = stonith__parse_targets(result->action_stdout); dev->targets_age = time(NULL); } else if (dev->targets != NULL) { if (result->execution_status == PCMK_EXEC_DONE) { crm_info("Reusing most recent target list for %s " "because list returned error code %d", dev->id, result->exit_status); } else { crm_info("Reusing most recent target list for %s " "because list could not be executed: %s%s%s%s", dev->id, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } } else { // We have never successfully executed list if (result->execution_status == PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because list returned error code %d", dev->id, search->host, result->exit_status); } else { crm_warn("Assuming %s cannot fence %s " "because list could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } /* Fall back to pcmk_host_check=PCMK_VALUE_STATUS if the user didn't * explicitly specify PCMK_VALUE_DYNAMIC_LIST */ if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) { crm_notice("Switching to pcmk_host_check='status' for %s", dev->id); pcmk__insert_dup(dev->params, PCMK_STONITH_HOST_CHECK, PCMK_VALUE_STATUS); } } if (dev->targets) { const char *alias = g_hash_table_lookup(dev->aliases, search->host); if (!alias) { alias = search->host; } if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) { can_fence = TRUE; } } search_devices_record_result(search, dev->id, can_fence); } /*! * \internal * \brief Returns true if any key in first is not in second or second has a different value for key */ static int device_params_diff(GHashTable *first, GHashTable *second) { char *key = NULL; char *value = NULL; GHashTableIter gIter; g_hash_table_iter_init(&gIter, first); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) { if(strstr(key, "CRM_meta") == key) { continue; } else if (strcmp(key, PCMK_XA_CRM_FEATURE_SET) == 0) { continue; } else { char *other_value = g_hash_table_lookup(second, key); if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) { crm_trace("Different value for %s: %s != %s", key, other_value, value); return 1; } } } return 0; } /*! * \internal * \brief Checks to see if an identical device already exists in the device_list */ static stonith_device_t * device_has_duplicate(const stonith_device_t *device) { stonith_device_t *dup = g_hash_table_lookup(device_list, device->id); if (!dup) { crm_trace("No match for %s", device->id); return NULL; } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) { crm_trace("Different agent: %s != %s", dup->agent, device->agent); return NULL; } // Use pcmk__digest_operation() here? if (device_params_diff(device->params, dup->params) || device_params_diff(dup->params, device->params)) { return NULL; } crm_trace("Match"); return dup; } int stonith_device_register(xmlNode *dev, gboolean from_cib) { stonith_device_t *dup = NULL; stonith_device_t *device = build_device_from_xml(dev); guint ndevices = 0; int rv = pcmk_ok; CRM_CHECK(device != NULL, return -ENOMEM); /* do we have a watchdog-device? */ if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) || pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do { if (stonith_watchdog_timeout_ms <= 0) { crm_err("Ignoring watchdog fence device without " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " set."); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { crm_err("Ignoring watchdog fence device with unknown " "agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.", device->agent?device->agent:""); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { crm_err("Ignoring watchdog fence device " "named %s !='"STONITH_WATCHDOG_ID"'.", device->id?device->id:""); rv = -ENODEV; /* fall through to cleanup & return */ } else { const char *local_node_name = fenced_get_local_node(); if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) { /* this either has an empty list or the targets configured for watchdog-fencing */ g_list_free_full(stonith_watchdog_targets, free); stonith_watchdog_targets = device->targets; device->targets = NULL; } if (node_does_watchdog_fencing(local_node_name)) { g_list_free_full(device->targets, free); device->targets = stonith__parse_targets(local_node_name); pcmk__insert_dup(device->params, PCMK_STONITH_HOST_LIST, local_node_name); /* proceed as with any other stonith-device */ break; } crm_debug("Skip registration of watchdog fence device on node not in host-list."); /* cleanup and fall through to more cleanup and return */ device->targets = NULL; stonith_device_remove(device->id, from_cib); } free_device(device); return rv; } while (0); dup = device_has_duplicate(device); if (dup) { ndevices = g_hash_table_size(device_list); crm_debug("Device '%s' already in device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); free_device(device); device = dup; dup = g_hash_table_lookup(device_list, device->id); dup->dirty = FALSE; } else { stonith_device_t *old = g_hash_table_lookup(device_list, device->id); if (from_cib && old && old->api_registered) { /* If the cib is writing over an entry that is shared with a stonith client, * copy any pending ops that currently exist on the old entry to the new one. * Otherwise the pending ops will be reported as failures */ crm_info("Overwriting existing entry for %s from CIB", device->id); device->pending_ops = old->pending_ops; device->api_registered = TRUE; old->pending_ops = NULL; if (device->pending_ops) { mainloop_set_trigger(device->work); } } g_hash_table_replace(device_list, device->id, device); ndevices = g_hash_table_size(device_list); crm_notice("Added '%s' to device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); } if (from_cib) { device->cib_registered = TRUE; } else { device->api_registered = TRUE; } return pcmk_ok; } void stonith_device_remove(const char *id, bool from_cib) { stonith_device_t *device = g_hash_table_lookup(device_list, id); guint ndevices = 0; if (!device) { ndevices = g_hash_table_size(device_list); crm_info("Device '%s' not found (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); return; } if (from_cib) { device->cib_registered = FALSE; } else { device->verified = FALSE; device->api_registered = FALSE; } if (!device->cib_registered && !device->api_registered) { g_hash_table_remove(device_list, id); ndevices = g_hash_table_size(device_list); crm_info("Removed '%s' from device list (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); } else { crm_trace("Not removing '%s' from device list (%d active) because " "still registered via:%s%s", id, g_hash_table_size(device_list), (device->cib_registered? " cib" : ""), (device->api_registered? " api" : "")); } } /*! * \internal * \brief Return the number of stonith levels registered for a node * * \param[in] tp Node's topology table entry * * \return Number of non-NULL levels in topology entry * \note This function is used only for log messages. */ static int count_active_levels(const stonith_topology_t *tp) { int lpc = 0; int count = 0; for (lpc = 0; lpc < ST__LEVEL_COUNT; lpc++) { if (tp->levels[lpc] != NULL) { count++; } } return count; } static void free_topology_entry(gpointer data) { stonith_topology_t *tp = data; int lpc = 0; for (lpc = 0; lpc < ST__LEVEL_COUNT; lpc++) { if (tp->levels[lpc] != NULL) { g_list_free_full(tp->levels[lpc], free); } } free(tp->target); free(tp->target_value); free(tp->target_pattern); free(tp->target_attribute); free(tp); } void free_topology_list(void) { if (topology != NULL) { g_hash_table_destroy(topology); topology = NULL; } } void init_topology_list(void) { if (topology == NULL) { topology = pcmk__strkey_table(NULL, free_topology_entry); } } char * stonith_level_key(const xmlNode *level, enum fenced_target_by mode) { if (mode == fenced_target_by_unknown) { mode = unpack_level_kind(level); } switch (mode) { case fenced_target_by_name: return crm_element_value_copy(level, PCMK_XA_TARGET); case fenced_target_by_pattern: return crm_element_value_copy(level, PCMK_XA_TARGET_PATTERN); case fenced_target_by_attribute: return crm_strdup_printf("%s=%s", crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE), crm_element_value(level, PCMK_XA_TARGET_VALUE)); default: return crm_strdup_printf("unknown-%s", pcmk__xe_id(level)); } } /*! * \internal * \brief Parse target identification from topology level XML * * \param[in] level Topology level XML to parse * * \return How to identify target of \p level */ static enum fenced_target_by unpack_level_kind(const xmlNode *level) { if (crm_element_value(level, PCMK_XA_TARGET) != NULL) { return fenced_target_by_name; } if (crm_element_value(level, PCMK_XA_TARGET_PATTERN) != NULL) { return fenced_target_by_pattern; } - if (!stand_alone /* if standalone, there's no attribute manager */ - && (crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE) != NULL) + if ((crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE) != NULL) && (crm_element_value(level, PCMK_XA_TARGET_VALUE) != NULL)) { return fenced_target_by_attribute; } return fenced_target_by_unknown; } static stonith_key_value_t * parse_device_list(const char *devices) { int lpc = 0; int max = 0; int last = 0; stonith_key_value_t *output = NULL; if (devices == NULL) { return output; } max = strlen(devices); for (lpc = 0; lpc <= max; lpc++) { if (devices[lpc] == ',' || devices[lpc] == 0) { char *line = strndup(devices + last, lpc - last); output = stonith_key_value_add(output, NULL, line); free(line); last = lpc + 1; } } return output; } /*! * \internal * \brief Unpack essential information from topology request XML * * \param[in] xml Request XML to search * \param[out] mode If not NULL, where to store level kind * \param[out] target If not NULL, where to store representation of target * \param[out] id If not NULL, where to store level number * \param[out] desc If not NULL, where to store log-friendly level description * * \return Topology level XML from within \p xml, or NULL if not found * \note The caller is responsible for freeing \p *target and \p *desc if set. */ static xmlNode * unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target, int *id, char **desc) { enum fenced_target_by local_mode = fenced_target_by_unknown; char *local_target = NULL; int local_id = 0; /* The level element can be the top element or lower. If top level, don't * search by xpath, because it might give multiple hits if the XML is the * CIB. */ if ((xml != NULL) && !pcmk__xe_is(xml, PCMK_XE_FENCING_LEVEL)) { xml = get_xpath_object("//" PCMK_XE_FENCING_LEVEL, xml, LOG_WARNING); } if (xml == NULL) { if (desc != NULL) { *desc = crm_strdup_printf("missing"); } } else { local_mode = unpack_level_kind(xml); local_target = stonith_level_key(xml, local_mode); crm_element_value_int(xml, PCMK_XA_INDEX, &local_id); if (desc != NULL) { *desc = crm_strdup_printf("%s[%d]", local_target, local_id); } } if (mode != NULL) { *mode = local_mode; } if (id != NULL) { *id = local_id; } if (target != NULL) { *target = local_target; } else { free(local_target); } return xml; } /*! * \internal * \brief Register a fencing topology level for a target * * Given an XML request specifying the target name, level index, and device IDs * for the level, this will create an entry for the target in the global topology * table if one does not already exist, then append the specified device IDs to * the entry's device list for the specified level. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]" * \param[out] result Where to set result of registration */ void fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result) { int id = 0; xmlNode *level; enum fenced_target_by mode; char *target; stonith_topology_t *tp; stonith_key_value_t *dIter = NULL; stonith_key_value_t *devices = NULL; CRM_CHECK((msg != NULL) && (result != NULL), return); level = unpack_level_request(msg, &mode, &target, &id, desc); if (level == NULL) { fenced_set_protocol_error(result); return; } // Ensure an ID was given (even the client API adds an ID) if (pcmk__str_empty(pcmk__xe_id(level))) { crm_warn("Ignoring registration for topology level without ID"); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Topology level is invalid without ID"); return; } // Ensure a valid target was specified if (mode == fenced_target_by_unknown) { crm_warn("Ignoring registration for topology level '%s' " "without valid target", pcmk__xe_id(level)); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid target for topology level '%s'", pcmk__xe_id(level)); return; } // Ensure level ID is in allowed range if ((id < ST__LEVEL_MIN) || (id > ST__LEVEL_MAX)) { crm_warn("Ignoring topology registration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level '%s'", pcmk__s(crm_element_value(level, PCMK_XA_INDEX), ""), pcmk__xe_id(level)); return; } /* Find or create topology table entry */ tp = g_hash_table_lookup(topology, target); if (tp == NULL) { tp = pcmk__assert_alloc(1, sizeof(stonith_topology_t)); tp->kind = mode; tp->target = target; tp->target_value = crm_element_value_copy(level, PCMK_XA_TARGET_VALUE); tp->target_pattern = crm_element_value_copy(level, PCMK_XA_TARGET_PATTERN); tp->target_attribute = crm_element_value_copy(level, PCMK_XA_TARGET_ATTRIBUTE); g_hash_table_replace(topology, tp->target, tp); crm_trace("Added %s (%d) to the topology (%d active entries)", target, (int) mode, g_hash_table_size(topology)); } else { free(target); } if (tp->levels[id] != NULL) { crm_info("Adding to the existing %s[%d] topology entry", tp->target, id); } devices = parse_device_list(crm_element_value(level, PCMK_XA_DEVICES)); for (dIter = devices; dIter; dIter = dIter->next) { const char *device = dIter->value; crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id); tp->levels[id] = g_list_append(tp->levels[id], pcmk__str_copy(device)); } stonith_key_value_freeall(devices, 1, 1); { int nlevels = count_active_levels(tp); crm_info("Target %s has %d active fencing level%s", tp->target, nlevels, pcmk__plural_s(nlevels)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } /*! * \internal * \brief Unregister a fencing topology level for a target * * Given an XML request specifying the target name and level index (or 0 for all * levels), this will remove any corresponding entry for the target from the * global topology table. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]" * \param[out] result Where to set result of unregistration */ void fenced_unregister_level(xmlNode *msg, char **desc, pcmk__action_result_t *result) { int id = -1; stonith_topology_t *tp; char *target; xmlNode *level = NULL; CRM_CHECK(result != NULL, return); level = unpack_level_request(msg, NULL, &target, &id, desc); if (level == NULL) { fenced_set_protocol_error(result); return; } // Ensure level ID is in allowed range if ((id < 0) || (id >= ST__LEVEL_COUNT)) { crm_warn("Ignoring topology unregistration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level %s", pcmk__s(crm_element_value(level, PCMK_XA_INDEX), ""), // Client API doesn't add ID to unregistration XML pcmk__s(pcmk__xe_id(level), "")); return; } tp = g_hash_table_lookup(topology, target); if (tp == NULL) { guint nentries = g_hash_table_size(topology); crm_info("No fencing topology found for %s (%d active %s)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (id == 0 && g_hash_table_remove(topology, target)) { guint nentries = g_hash_table_size(topology); crm_info("Removed all fencing topology entries related to %s " "(%d active %s remaining)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (tp->levels[id] != NULL) { guint nlevels; g_list_free_full(tp->levels[id], free); tp->levels[id] = NULL; nlevels = count_active_levels(tp); crm_info("Removed level %d from fencing topology for %s " "(%d active level%s remaining)", id, target, nlevels, pcmk__plural_s(nlevels)); } free(target); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } static char * list_to_string(GList *list, const char *delim, gboolean terminate_with_delim) { int max = g_list_length(list); size_t delim_len = delim?strlen(delim):0; size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0); char *rv; GList *gIter; char *pos = NULL; const char *lead_delim = ""; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; alloc_size += strlen(value); } rv = pcmk__assert_alloc(alloc_size, sizeof(char)); pos = rv; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; pos = &pos[sprintf(pos, "%s%s", lead_delim, value)]; lead_delim = delim; } if (max && terminate_with_delim) { sprintf(pos, "%s", delim); } return rv; } /*! * \internal * \brief Execute a fence agent action directly (and asynchronously) * * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action * directly on a specified device. Only list, monitor, and status actions are * expected to use this call, though it should work with any agent command. * * \param[in] msg Request XML specifying action * \param[out] result Where to store result of action * * \note If the action is monitor, the device must be registered via the API * (CIB registration is not sufficient), because monitor should not be * possible unless the device is "started" (API registered). */ static void execute_agent_action(xmlNode *msg, pcmk__action_result_t *result) { xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, msg, LOG_ERR); xmlNode *op = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, msg, LOG_ERR); const char *id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); const char *action = crm_element_value(op, PCMK__XA_ST_DEVICE_ACTION); async_command_t *cmd = NULL; stonith_device_t *device = NULL; if ((id == NULL) || (action == NULL)) { crm_info("Malformed API action request: device %s, action %s", (id? id : "not specified"), (action? action : "not specified")); fenced_set_protocol_error(result); return; } if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) { // Watchdog agent actions are implemented internally if (stonith_watchdog_timeout_ms <= 0) { pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Watchdog fence device not configured"); return; } else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_result_output(result, list_to_string(stonith_watchdog_targets, "\n", TRUE), NULL); return; } else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return; } } device = g_hash_table_lookup(device_list, id); if (device == NULL) { crm_info("Ignoring API '%s' action request because device %s not found", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not found", id); return; } else if (!device->api_registered && (strcmp(action, PCMK_ACTION_MONITOR) == 0)) { // Monitors may run only on "started" (API-registered) devices crm_info("Ignoring API '%s' action request because device %s not active", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not active", id); return; } cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); fenced_set_protocol_error(result); return; } schedule_stonith_command(cmd, device); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence) { search->replies_received++; if (can_fence && device) { if (search->support_action_only != st_device_supports_none) { stonith_device_t *dev = g_hash_table_lookup(device_list, device); if (dev && !pcmk_is_set(dev->flags, search->support_action_only)) { return; } } search->capable = g_list_append(search->capable, pcmk__str_copy(device)); } if (search->replies_needed == search->replies_received) { guint ndevices = g_list_length(search->capable); crm_debug("Search found %d device%s that can perform '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); search->callback(search->capable, search->user_data); free(search->host); free(search->action); free(search); } } /*! * \internal * \brief Check whether the local host is allowed to execute a fencing action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Hostname of fence target * \param[in] allow_suicide Whether self-fencing is allowed for this operation * * \return TRUE if local host is allowed to execute action, FALSE otherwise */ static gboolean localhost_is_eligible(const stonith_device_t *device, const char *action, const char *target, gboolean allow_suicide) { gboolean localhost_is_target = pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei); if ((device != NULL) && (action != NULL) && (device->on_target_actions != NULL) && (strstr((const char*) device->on_target_actions->str, action) != NULL)) { if (!localhost_is_target) { crm_trace("Operation '%s' using %s can only be executed for local " "host, not %s", action, device->id, target); return FALSE; } } else if (localhost_is_target && !allow_suicide) { crm_trace("'%s' operation does not support self-fencing", action); return FALSE; } return TRUE; } /*! * \internal * \brief Check if local node is allowed to execute (possibly remapped) action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Node name of fence target * \param[in] allow_self Whether self-fencing is allowed for this operation * * \return true if local node is allowed to execute \p action or any actions it * might be remapped to, otherwise false */ static bool localhost_is_eligible_with_remap(const stonith_device_t *device, const char *action, const char *target, gboolean allow_self) { // Check exact action if (localhost_is_eligible(device, action, target, allow_self)) { return true; } // Check potential remaps if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* "reboot" might get remapped to "off" then "on", so even if reboot is * disallowed, return true if either of those is allowed. We'll report * the disallowed actions with the results. We never allow self-fencing * for remapped "on" actions because the target is off at that point. */ if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self) || localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) { return true; } } return false; } static void can_fence_host_with_device(stonith_device_t *dev, struct device_search_s *search) { gboolean can = FALSE; const char *check_type = "Internal bug"; const char *target = NULL; const char *alias = NULL; const char *dev_id = "Unspecified device"; const char *action = (search == NULL)? NULL : search->action; CRM_CHECK((dev != NULL) && (action != NULL), goto search_report_results); if (dev->id != NULL) { dev_id = dev->id; } target = search->host; if (target == NULL) { can = TRUE; check_type = "No target"; goto search_report_results; } /* Answer immediately if the device does not support the action * or the local node is not allowed to perform it */ if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none) && !pcmk_is_set(dev->flags, st_device_supports_on)) { check_type = "Agent does not support 'on'"; goto search_report_results; } else if (!localhost_is_eligible_with_remap(dev, action, target, search->allow_suicide)) { check_type = "This node is not allowed to execute action"; goto search_report_results; } // Check eligibility as specified by pcmk_host_check check_type = target_list_type(dev); alias = g_hash_table_lookup(dev->aliases, target); if (pcmk__str_eq(check_type, PCMK_VALUE_NONE, pcmk__str_casei)) { can = TRUE; } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATIC_LIST, pcmk__str_casei)) { if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) { can = TRUE; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP) && g_hash_table_lookup(dev->aliases, target)) { can = TRUE; } } else if (pcmk__str_eq(check_type, PCMK_VALUE_DYNAMIC_LIST, pcmk__str_casei)) { time_t now = time(NULL); if (dev->targets == NULL || dev->targets_age + 60 < now) { int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_list_timeout (%ds) parameter of %s " "is larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL, search->per_device_timeout, search, dynamic_list_search_cb); /* we'll respond to this search request async in the cb */ return; } if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets, pcmk__str_casei)) { can = TRUE; } } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATUS, pcmk__str_casei)) { int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_status_timeout (%ds) parameter of %s is " "larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), " "timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ return; } else { crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type); check_type = "Invalid " PCMK_STONITH_HOST_CHECK; } search_report_results: crm_info("%s is%s eligible to fence (%s) %s%s%s%s: %s", dev_id, (can? "" : " not"), pcmk__s(action, "unspecified action"), pcmk__s(target, "unspecified target"), (alias == NULL)? "" : " (as '", pcmk__s(alias, ""), (alias == NULL)? "" : "')", check_type); search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can); } static void search_devices(gpointer key, gpointer value, gpointer user_data) { stonith_device_t *dev = value; struct device_search_s *search = user_data; can_fence_host_with_device(dev, search); } #define DEFAULT_QUERY_TIMEOUT 20 static void get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data, void (*callback) (GList * devices, void *user_data), uint32_t support_action_only) { struct device_search_s *search; guint ndevices = g_hash_table_size(device_list); if (ndevices == 0) { callback(NULL, user_data); return; } search = pcmk__assert_alloc(1, sizeof(struct device_search_s)); search->host = pcmk__str_copy(host); search->action = pcmk__str_copy(action); search->per_device_timeout = timeout; search->allow_suicide = suicide; search->callback = callback; search->user_data = user_data; search->support_action_only = support_action_only; /* We are guaranteed this many replies, even if a device is * unregistered while the search is in progress. */ search->replies_needed = ndevices; crm_debug("Searching %d device%s to see which can execute '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); g_hash_table_foreach(device_list, search_devices, search); } struct st_query_data { xmlNode *reply; char *remote_peer; char *client_id; char *target; char *action; int call_options; }; /*! * \internal * \brief Add action-specific attributes to query reply XML * * \param[in,out] xml XML to add attributes to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target */ static void add_action_specific_attributes(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target) { int action_specific_timeout; int delay_max; int delay_base; CRM_CHECK(xml && action && device, return); // PCMK__XA_ST_REQUIRED is currently used only for unfencing if (is_action_required(action, device)) { crm_trace("Action '%s' is required using %s", action, device->id); crm_xml_add_int(xml, PCMK__XA_ST_REQUIRED, 1); } // pcmk__timeout if configured action_specific_timeout = get_action_timeout(device, action, 0); if (action_specific_timeout) { crm_trace("Action '%s' has timeout %ds using %s", action, action_specific_timeout, device->id); crm_xml_add_int(xml, PCMK__XA_ST_ACTION_TIMEOUT, action_specific_timeout); } delay_max = get_action_delay_max(device, action); if (delay_max > 0) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); crm_xml_add_int(xml, PCMK__XA_ST_DELAY_MAX, delay_max); } delay_base = get_action_delay_base(device, action, target); if (delay_base > 0) { crm_xml_add_int(xml, PCMK__XA_ST_DELAY_BASE, delay_base); } if ((delay_max > 0) && (delay_base == 0)) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); } else if ((delay_max == 0) && (delay_base > 0)) { crm_trace("Action '%s' has a static delay of %ds using %s", action, delay_base, device->id); } else if ((delay_max > 0) && (delay_base > 0)) { crm_trace("Action '%s' has a minimum delay of %ds and a randomly chosen " "maximum delay of %ds using %s", action, delay_base, delay_max, device->id); } } /*! * \internal * \brief Add "disallowed" attribute to query reply XML if appropriate * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_disallowed(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target, gboolean allow_suicide) { if (!localhost_is_eligible(device, action, target, allow_suicide)) { crm_trace("Action '%s' using %s is disallowed for local host", action, device->id); pcmk__xe_set_bool_attr(xml, PCMK__XA_ST_ACTION_DISALLOWED, true); } } /*! * \internal * \brief Add child element with action-specific values to query reply XML * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_action_reply(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target, gboolean allow_suicide) { xmlNode *child = pcmk__xe_create(xml, PCMK__XE_ST_DEVICE_ACTION); crm_xml_add(child, PCMK_XA_ID, action); add_action_specific_attributes(child, action, device, target); add_disallowed(child, action, device, target, allow_suicide); } /*! * \internal * \brief Send a reply to a CPG peer or IPC client * * \param[in] reply XML reply to send * \param[in] call_options Send synchronously if st_opt_sync_call is set * \param[in] remote_peer If not NULL, name of peer node to send CPG reply * \param[in,out] client If not NULL, client to send IPC reply */ static void stonith_send_reply(const xmlNode *reply, int call_options, const char *remote_peer, pcmk__client_t *client) { CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)), return); if (remote_peer == NULL) { do_local_reply(reply, client, call_options); } else { const pcmk__node_status_t *node = pcmk__get_node(0, remote_peer, NULL, pcmk__node_search_cluster_member); pcmk__cluster_send_message(node, pcmk_ipc_fenced, reply); } } static void stonith_query_capable_device_cb(GList * devices, void *user_data) { struct st_query_data *query = user_data; int available_devices = 0; xmlNode *wrapper = NULL; xmlNode *list = NULL; GList *lpc = NULL; pcmk__client_t *client = NULL; if (query->client_id != NULL) { client = pcmk__find_client_by_id(query->client_id); if ((client == NULL) && (query->remote_peer == NULL)) { crm_trace("Skipping reply to %s: no longer a client", query->client_id); goto done; } } // Pack the results into XML wrapper = pcmk__xe_create(query->reply, PCMK__XE_ST_CALLDATA); list = pcmk__xe_create(wrapper, __func__); crm_xml_add(list, PCMK__XA_ST_TARGET, query->target); for (lpc = devices; lpc != NULL; lpc = lpc->next) { stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data); const char *action = query->action; xmlNode *dev = NULL; if (!device) { /* It is possible the device got unregistered while * determining who can fence the target */ continue; } available_devices++; dev = pcmk__xe_create(list, PCMK__XE_ST_DEVICE_ID); crm_xml_add(dev, PCMK_XA_ID, device->id); crm_xml_add(dev, PCMK__XA_NAMESPACE, device->namespace); crm_xml_add(dev, PCMK_XA_AGENT, device->agent); // Has had successful monitor, list, or status on this node crm_xml_add_int(dev, PCMK__XA_ST_MONITOR_VERIFIED, device->verified); crm_xml_add_int(dev, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, device->flags); /* If the originating fencer wants to reboot the node, and we have a * capable device that doesn't support "reboot", remap to "off" instead. */ if (!pcmk_is_set(device->flags, st_device_supports_reboot) && pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { crm_trace("%s doesn't support reboot, using values for off instead", device->id); action = PCMK_ACTION_OFF; } /* Add action-specific values if available */ add_action_specific_attributes(dev, action, device, query->target); if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* A "reboot" *might* get remapped to "off" then "on", so after * sending the "reboot"-specific values in the main element, we add * sub-elements for "off" and "on" values. * * We short-circuited earlier if "reboot", "off" and "on" are all * disallowed for the local host. However if only one or two are * disallowed, we send back the results and mark which ones are * disallowed. If "reboot" is disallowed, this might cause problems * with older fencer versions, which won't check for it. Older * versions will ignore "off" and "on", so they are not a problem. */ add_disallowed(dev, action, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, PCMK_ACTION_OFF, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, PCMK_ACTION_ON, device, query->target, FALSE); } /* A query without a target wants device parameters */ if (query->target == NULL) { xmlNode *attrs = pcmk__xe_create(dev, PCMK__XE_ATTRIBUTES); g_hash_table_foreach(device->params, hash2field, attrs); } } crm_xml_add_int(list, PCMK__XA_ST_AVAILABLE_DEVICES, available_devices); if (query->target) { crm_debug("Found %d matching device%s for target '%s'", available_devices, pcmk__plural_s(available_devices), query->target); } else { crm_debug("%d device%s installed", available_devices, pcmk__plural_s(available_devices)); } crm_log_xml_trace(list, "query-result"); stonith_send_reply(query->reply, query->call_options, query->remote_peer, client); done: pcmk__xml_free(query->reply); free(query->remote_peer); free(query->client_id); free(query->target); free(query->action); free(query); g_list_free_full(devices, free); } /*! * \internal * \brief Log the result of an asynchronous command * * \param[in] cmd Command the result is for * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] next Alternate device that will be tried if command failed * \param[in] op_merged Whether this command was merged with an earlier one */ static void log_async_result(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, const char *next, bool op_merged) { int log_level = LOG_ERR; int output_log_level = LOG_NEVER; guint devices_remaining = g_list_length(cmd->next_device_iter); GString *msg = g_string_sized_new(80); // Reasonable starting size // Choose log levels appropriately if we have a result if (pcmk__result_ok(result)) { log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA, pcmk__str_none)) { output_log_level = LOG_DEBUG; } next = NULL; } else { log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA, pcmk__str_none)) { output_log_level = LOG_WARNING; } } // Build the log message piece by piece pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL); if (pid != 0) { g_string_append_printf(msg, "[%d] ", pid); } if (cmd->target != NULL) { pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL); } if (cmd->device != NULL) { pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL); } // Add exit status or execution status as appropriate if (result->execution_status == PCMK_EXEC_DONE) { g_string_append_printf(msg, "returned %d", result->exit_status); } else { pcmk__g_strcat(msg, "could not be executed: ", pcmk_exec_status_str(result->execution_status), NULL); } // Add exit reason and next device if appropriate if (result->exit_reason != NULL) { pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL); } if (next != NULL) { pcmk__g_strcat(msg, ", retrying with ", next, NULL); } if (devices_remaining > 0) { g_string_append_printf(msg, " (%u device%s remaining)", (unsigned int) devices_remaining, pcmk__plural_s(devices_remaining)); } g_string_append_printf(msg, " " QB_XS " %scall %d from %s", (op_merged? "merged " : ""), cmd->id, cmd->client_name); // Log the result do_crm_log(log_level, "%s", msg->str); g_string_free(msg, TRUE); // Log the output (which may have multiple lines), if appropriate if (output_log_level != LOG_NEVER) { char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid); crm_log_output(output_log_level, prefix, result->action_stdout); free(prefix); } } /*! * \internal * \brief Reply to requester after asynchronous command completion * * \param[in] cmd Command that completed * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] merged If true, command was merged with another, not executed */ static void send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, bool merged) { xmlNode *reply = NULL; pcmk__client_t *client = NULL; CRM_CHECK((cmd != NULL) && (result != NULL), return); log_async_result(cmd, result, pid, NULL, merged); if (cmd->client != NULL) { client = pcmk__find_client_by_id(cmd->client); if ((client == NULL) && (cmd->origin == NULL)) { crm_trace("Skipping reply to %s: no longer a client", cmd->client); return; } } reply = construct_async_reply(cmd, result); if (merged) { pcmk__xe_set_bool_attr(reply, PCMK__XA_ST_OP_MERGED, true); } - if (!stand_alone && pcmk__is_fencing_action(cmd->action) + if (pcmk__is_fencing_action(cmd->action) && pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) { /* The target was also the originator, so broadcast the result on its * behalf (since it will be unable to). */ crm_trace("Broadcast '%s' result for %s (target was also originator)", cmd->action, cmd->target); crm_xml_add(reply, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST); crm_xml_add(reply, PCMK__XA_ST_OP, STONITH_OP_NOTIFY); pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, reply); } else { // Reply only to the originator stonith_send_reply(reply, cmd->options, cmd->origin, client); } crm_log_xml_trace(reply, "Reply"); pcmk__xml_free(reply); - - if (stand_alone) { - /* Do notification with a clean data object */ - xmlNode *notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_NOTIFY_FENCE); - - stonith__xe_set_result(notify_data, result); - crm_xml_add(notify_data, PCMK__XA_ST_TARGET, cmd->target); - crm_xml_add(notify_data, PCMK__XA_ST_OP, cmd->op); - crm_xml_add(notify_data, PCMK__XA_ST_DELEGATE, "localhost"); - crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ID, cmd->device); - crm_xml_add(notify_data, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id); - crm_xml_add(notify_data, PCMK__XA_ST_ORIGIN, cmd->client); - - fenced_send_notification(PCMK__VALUE_ST_NOTIFY_FENCE, result, - notify_data); - fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL); - } } static void cancel_stonith_command(async_command_t * cmd) { stonith_device_t *device = cmd_device(cmd); if (device) { crm_trace("Cancel scheduled '%s' action using %s", cmd->action, device->id); device->pending_ops = g_list_remove(device->pending_ops, cmd); } } /*! * \internal * \brief Cancel and reply to any duplicates of a just-completed operation * * Check whether any fencing operations are scheduled to do the same thing as * one that just succeeded. If so, rather than performing the same operation * twice, return the result of this operation for all matching pending commands. * * \param[in,out] cmd Fencing operation that just succeeded * \param[in] result Result of \p cmd * \param[in] pid If nonzero, process ID of agent invocation (for logs) * * \note Duplicate merging will do the right thing for either type of remapped * reboot. If the executing fencer remapped an unsupported reboot to off, * then cmd->action will be "reboot" and will be merged with any other * reboot requests. If the originating fencer remapped a topology reboot * to off then on, we will get here once with cmd->action "off" and once * with "on", and they will be merged separately with similar requests. */ static void reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result, int pid) { GList *next = NULL; for (GList *iter = cmd_list; iter != NULL; iter = next) { async_command_t *cmd_other = iter->data; next = iter->next; // We might delete this entry, so grab next now if (cmd == cmd_other) { continue; } /* A pending operation matches if: * 1. The client connections are different. * 2. The target is the same. * 3. The fencing action is the same. * 4. The device scheduled to execute the action is the same. */ if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) || !pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) || !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) || !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) { continue; } crm_notice("Merging fencing action '%s'%s%s originating from " "client %s with identical fencing request from client %s", cmd_other->action, (cmd_other->target == NULL)? "" : " targeting ", pcmk__s(cmd_other->target, ""), cmd_other->client_name, cmd->client_name); // Stop tracking the duplicate, send its result, and cancel it cmd_list = g_list_remove_link(cmd_list, iter); send_async_reply(cmd_other, result, pid, true); cancel_stonith_command(cmd_other); free_async_command(cmd_other); g_list_free_1(iter); } } /*! * \internal * \brief Return the next required device (if any) for an operation * * \param[in,out] cmd Fencing operation that just succeeded * * \return Next device required for action if any, otherwise NULL */ static stonith_device_t * next_required_device(async_command_t *cmd) { for (GList *iter = cmd->next_device_iter; iter != NULL; iter = iter->next) { stonith_device_t *next_device = g_hash_table_lookup(device_list, iter->data); if (is_action_required(cmd->action, next_device)) { /* This is only called for successful actions, so it's OK to skip * non-required devices. */ cmd->next_device_iter = iter->next; return next_device; } } return NULL; } static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; stonith_device_t *next_device = NULL; CRM_CHECK(cmd != NULL, return); device = cmd_device(cmd); cmd->active_on = NULL; /* The device is ready to do something else now */ if (device) { if (!device->verified && pcmk__result_ok(result) && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL)) { device->verified = TRUE; } mainloop_set_trigger(device->work); } if (pcmk__result_ok(result)) { next_device = next_required_device(cmd); } else if ((cmd->next_device_iter != NULL) && !is_action_required(cmd->action, device)) { /* if this device didn't work out, see if there are any others we can try. * if the failed device was 'required', we can't pick another device. */ next_device = g_hash_table_lookup(device_list, cmd->next_device_iter->data); cmd->next_device_iter = cmd->next_device_iter->next; } if (next_device == NULL) { send_async_reply(cmd, result, pid, false); if (pcmk__result_ok(result)) { reply_to_duplicates(cmd, result, pid); } free_async_command(cmd); } else { // This operation requires more fencing log_async_result(cmd, result, pid, next_device->id, false); schedule_stonith_command(cmd, next_device); } } static gint sort_device_priority(gconstpointer a, gconstpointer b) { const stonith_device_t *dev_a = a; const stonith_device_t *dev_b = b; if (dev_a->priority > dev_b->priority) { return -1; } else if (dev_a->priority < dev_b->priority) { return 1; } return 0; } static void stonith_fence_get_devices_cb(GList * devices, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; guint ndevices = g_list_length(devices); crm_info("Found %d matching device%s for target '%s'", ndevices, pcmk__plural_s(ndevices), cmd->target); if (devices != NULL) { /* Order based on priority */ devices = g_list_sort(devices, sort_device_priority); device = g_hash_table_lookup(device_list, devices->data); } if (device == NULL) { // No device found pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "No device configured for target '%s'", cmd->target); send_async_reply(cmd, &result, 0, false); pcmk__reset_result(&result); free_async_command(cmd); g_list_free_full(devices, free); } else { // Device found, schedule it for fencing cmd->device_list = devices; cmd->next_device_iter = devices->next; schedule_stonith_command(cmd, device); } } /*! * \internal * \brief Execute a fence action via the local node * * \param[in] msg Fencing request * \param[out] result Where to store result of fence action */ static void fence_locally(xmlNode *msg, pcmk__action_result_t *result) { const char *device_id = NULL; stonith_device_t *device = NULL; async_command_t *cmd = NULL; xmlNode *dev = NULL; CRM_CHECK((msg != NULL) && (result != NULL), return); dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, msg, LOG_ERR); cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); fenced_set_protocol_error(result); return; } device_id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (device_id != NULL) { device = g_hash_table_lookup(device_list, device_id); if (device == NULL) { crm_err("Requested device '%s' is not available", device_id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Requested device '%s' not found", device_id); return; } schedule_stonith_command(cmd, device); } else { const char *host = crm_element_value(dev, PCMK__XA_ST_TARGET); if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) { int nodeid = 0; pcmk__node_status_t *node = NULL; pcmk__scan_min_int(host, &nodeid, 0); node = pcmk__search_node_caches(nodeid, NULL, pcmk__node_search_any |pcmk__node_search_cluster_cib); if (node != NULL) { host = node->name; } } /* If we get to here, then self-fencing is implicitly allowed */ get_capable_devices(host, cmd->action, cmd->default_timeout, TRUE, cmd, stonith_fence_get_devices_cb, fenced_support_flag(cmd->action)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } /*! * \internal * \brief Build an XML reply for a fencing operation * * \param[in] request Request that reply is for * \param[in] data If not NULL, add to reply as call data * \param[in] result Full result of fencing operation * * \return Newly created XML reply * \note The caller is responsible for freeing the result. * \note This has some overlap with construct_async_reply(), but that copies * values from an async_command_t, whereas this one copies them from the * request. */ xmlNode * fenced_construct_reply(const xmlNode *request, xmlNode *data, const pcmk__action_result_t *result) { xmlNode *reply = NULL; reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG); stonith__xe_set_result(reply, result); if (request == NULL) { /* Most likely, this is the result of a stonith operation that was * initiated before we came up. Unfortunately that means we lack enough * information to provide clients with a full result. * * @TODO Maybe synchronize this information at start-up? */ crm_warn("Missing request information for client notifications for " "operation with result '%s' (initiated before we came up?)", pcmk_exec_status_str(result->execution_status)); } else { const char *name = NULL; const char *value = NULL; // Attributes to copy from request to reply const char *names[] = { PCMK__XA_ST_OP, PCMK__XA_ST_CALLID, PCMK__XA_ST_CLIENTID, PCMK__XA_ST_CLIENTNAME, PCMK__XA_ST_REMOTE_OP, PCMK__XA_ST_CALLOPT, }; for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(reply, PCMK__XE_ST_CALLDATA); pcmk__xml_copy(wrapper, data); } } return reply; } /*! * \internal * \brief Build an XML reply to an asynchronous fencing command * * \param[in] cmd Fencing command that reply is for * \param[in] result Command result */ static xmlNode * construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result) { xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(reply, PCMK__XA_ST_OP, cmd->op); crm_xml_add(reply, PCMK__XA_ST_DEVICE_ID, cmd->device); crm_xml_add(reply, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id); crm_xml_add(reply, PCMK__XA_ST_CLIENTID, cmd->client); crm_xml_add(reply, PCMK__XA_ST_CLIENTNAME, cmd->client_name); crm_xml_add(reply, PCMK__XA_ST_TARGET, cmd->target); crm_xml_add(reply, PCMK__XA_ST_DEVICE_ACTION, cmd->op); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, cmd->origin); crm_xml_add_int(reply, PCMK__XA_ST_CALLID, cmd->id); crm_xml_add_int(reply, PCMK__XA_ST_CALLOPT, cmd->options); stonith__xe_set_result(reply, result); return reply; } bool fencing_peer_active(pcmk__node_status_t *peer) { return (peer != NULL) && (peer->name != NULL) && pcmk_is_set(peer->processes, crm_get_cluster_proc()); } void set_fencing_completed(remote_fencing_op_t *op) { struct timespec tv; qb_util_timespec_from_epoch_get(&tv); op->completed = tv.tv_sec; op->completed_nsec = tv.tv_nsec; } /*! * \internal * \brief Look for alternate node needed if local node shouldn't fence target * * \param[in] target Node that must be fenced * * \return Name of an alternate node that should fence \p target if any, * or NULL otherwise */ static const char * check_alternate_host(const char *target) { if (pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei)) { GHashTableIter gIter; pcmk__node_status_t *entry = NULL; g_hash_table_iter_init(&gIter, pcmk__peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if (fencing_peer_active(entry) && !pcmk__str_eq(entry->name, target, pcmk__str_casei)) { crm_notice("Forwarding self-fencing request to %s", entry->name); return entry->name; } } crm_warn("Will handle own fencing because no peer can"); } return NULL; } static void remove_relay_op(xmlNode * request) { xmlNode *dev = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, request, LOG_TRACE); const char *relay_op_id = NULL; const char *op_id = NULL; const char *client_name = NULL; const char *target = NULL; remote_fencing_op_t *relay_op = NULL; if (dev) { target = crm_element_value(dev, PCMK__XA_ST_TARGET); } relay_op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP_RELAY); op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP); client_name = crm_element_value(request, PCMK__XA_ST_CLIENTNAME); /* Delete RELAY operation. */ if ((relay_op_id != NULL) && (target != NULL) && pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei)) { relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id); if (relay_op) { GHashTableIter iter; remote_fencing_op_t *list_op = NULL; g_hash_table_iter_init(&iter, stonith_remote_op_list); /* If the operation to be deleted is registered as a duplicate, delete the registration. */ while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) { GList *dup_iter = NULL; if (list_op != relay_op) { for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) { remote_fencing_op_t *other = dup_iter->data; if (other == relay_op) { other->duplicates = g_list_remove(other->duplicates, relay_op); break; } } } } crm_debug("Deleting relay op %s ('%s'%s%s for %s), " "replaced by op %s ('%s'%s%s for %s)", relay_op->id, relay_op->action, (relay_op->target == NULL)? "" : " targeting ", pcmk__s(relay_op->target, ""), relay_op->client_name, op_id, relay_op->action, (target == NULL)? "" : " targeting ", pcmk__s(target, ""), client_name); g_hash_table_remove(stonith_remote_op_list, relay_op_id); } } } /*! * \internal * \brief Check whether an API request was sent by a privileged user * * API commands related to fencing configuration may be done only by privileged * IPC users (i.e. root or hacluster), because all other users should go through * the CIB to have ACLs applied. If no client was given, this is a peer request, * which is always allowed. * * \param[in] c IPC client that sent request (or NULL if sent by CPG peer) * \param[in] op Requested API operation (for logging only) * * \return true if sender is peer or privileged client, otherwise false */ static inline bool is_privileged(const pcmk__client_t *c, const char *op) { if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) { return true; } else { crm_warn("Rejecting IPC request '%s' from unprivileged client %s", pcmk__s(op, ""), pcmk__client_name(c)); return false; } } // CRM_OP_REGISTER static xmlNode * handle_register_request(pcmk__request_t *request) { xmlNode *reply = pcmk__xe_create(NULL, "reply"); CRM_ASSERT(request->ipc_client != NULL); crm_xml_add(reply, PCMK__XA_ST_OP, CRM_OP_REGISTER); crm_xml_add(reply, PCMK__XA_ST_CLIENTID, request->ipc_client->id); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return reply; } // STONITH_OP_EXEC static xmlNode * handle_agent_request(pcmk__request_t *request) { execute_agent_action(request->xml, &request->result); if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_TIMEOUT_UPDATE static xmlNode * handle_update_timeout_request(pcmk__request_t *request) { const char *call_id = crm_element_value(request->xml, PCMK__XA_ST_CALLID); const char *client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); int op_timeout = 0; crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &op_timeout); do_stonith_async_timeout_update(client_id, call_id, op_timeout); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } // STONITH_OP_QUERY static xmlNode * handle_query_request(pcmk__request_t *request) { int timeout = 0; xmlNode *dev = NULL; const char *action = NULL; const char *target = NULL; const char *client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); struct st_query_data *query = NULL; if (request->peer != NULL) { // Record it for the future notification create_remote_stonith_op(client_id, request->xml, TRUE); } /* Delete the DC node RELAY operation. */ remove_relay_op(request->xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); dev = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, request->xml, LOG_NEVER); if (dev != NULL) { const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) { return NULL; // No query or reply necessary } target = crm_element_value(dev, PCMK__XA_ST_TARGET); action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION); } crm_log_xml_trace(request->xml, "Query"); query = pcmk__assert_alloc(1, sizeof(struct st_query_data)); query->reply = fenced_construct_reply(request->xml, NULL, &request->result); query->remote_peer = pcmk__str_copy(request->peer); query->client_id = pcmk__str_copy(client_id); query->target = pcmk__str_copy(target); query->action = pcmk__str_copy(action); query->call_options = request->call_options; crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &timeout); get_capable_devices(target, action, timeout, pcmk_is_set(query->call_options, st_opt_allow_suicide), query, stonith_query_capable_device_cb, st_device_supports_none); return NULL; } // STONITH_OP_NOTIFY static xmlNode * handle_notify_request(pcmk__request_t *request) { const char *flag_name = NULL; CRM_ASSERT(request->ipc_client != NULL); flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_ACTIVATE); if (flag_name != NULL) { crm_debug("Enabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__set_client_flags(request->ipc_client, get_stonith_flag(flag_name)); } flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_DEACTIVATE); if (flag_name != NULL) { crm_debug("Disabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__clear_client_flags(request->ipc_client, get_stonith_flag(flag_name)); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return pcmk__ipc_create_ack(request->ipc_flags, PCMK__XE_ACK, NULL, CRM_EX_OK); } // STONITH_OP_RELAY static xmlNode * handle_relay_request(pcmk__request_t *request) { xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request->xml, LOG_TRACE); crm_notice("Received forwarded fencing request from " "%s %s to fence (%s) peer %s", pcmk__request_origin_type(request), pcmk__request_origin(request), crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION), crm_element_value(dev, PCMK__XA_ST_TARGET)); if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) { fenced_set_protocol_error(&request->result); return fenced_construct_reply(request->xml, NULL, &request->result); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); return NULL; } // STONITH_OP_FENCE static xmlNode * handle_fence_request(pcmk__request_t *request) { - if ((request->peer != NULL) || stand_alone) { + if (request->peer != NULL) { fence_locally(request->xml, &request->result); } else if (pcmk_is_set(request->call_options, st_opt_manual_ack)) { switch (fenced_handle_manual_confirmation(request->ipc_client, request->xml)) { case pcmk_rc_ok: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); break; case EINPROGRESS: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); break; default: fenced_set_protocol_error(&request->result); break; } } else { const char *alternate_host = NULL; xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request->xml, LOG_TRACE); const char *target = crm_element_value(dev, PCMK__XA_ST_TARGET); const char *action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION); const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (request->ipc_client != NULL) { int tolerance = 0; crm_notice("Client %s wants to fence (%s) %s using %s", pcmk__request_origin(request), action, target, (device? device : "any device")); crm_element_value_int(dev, PCMK__XA_ST_TOLERANCE, &tolerance); if (stonith_check_fence_tolerance(tolerance, target, action)) { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return fenced_construct_reply(request->xml, NULL, &request->result); } alternate_host = check_alternate_host(target); } else { crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'", request->peer, action, target, (device == NULL)? "(any)" : device); } if (alternate_host != NULL) { const char *client_id = NULL; remote_fencing_op_t *op = NULL; pcmk__node_status_t *node = pcmk__get_node(0, alternate_host, NULL, pcmk__node_search_cluster_member); if (request->ipc_client->id == 0) { client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); } else { client_id = request->ipc_client->id; } /* Create a duplicate fencing operation to relay with the client ID. * When a query response is received, this operation should be * deleted to avoid keeping the duplicate around. */ op = create_remote_stonith_op(client_id, request->xml, FALSE); crm_xml_add(request->xml, PCMK__XA_ST_OP, STONITH_OP_RELAY); crm_xml_add(request->xml, PCMK__XA_ST_CLIENTID, request->ipc_client->id); crm_xml_add(request->xml, PCMK__XA_ST_REMOTE_OP, op->id); pcmk__cluster_send_message(node, pcmk_ipc_fenced, request->xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } else if (initiate_remote_stonith_op(request->ipc_client, request->xml, FALSE) == NULL) { fenced_set_protocol_error(&request->result); } else { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } } if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_FENCE_HISTORY static xmlNode * handle_history_request(pcmk__request_t *request) { xmlNode *reply = NULL; xmlNode *data = NULL; stonith_fence_history(request->xml, &data, request->peer, request->call_options); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (!pcmk_is_set(request->call_options, st_opt_discard_reply)) { /* When the local node broadcasts its history, it sets * st_opt_discard_reply and doesn't need a reply. */ reply = fenced_construct_reply(request->xml, data, &request->result); } pcmk__xml_free(data); return reply; } // STONITH_OP_DEVICE_ADD static xmlNode * handle_device_add_request(pcmk__request_t *request) { const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, request->xml, LOG_ERR); if (is_privileged(request->ipc_client, op)) { int rc = stonith_device_register(dev, FALSE); pcmk__set_result(&request->result, ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR), stonith__legacy2status(rc), ((rc == pcmk_ok)? NULL : pcmk_strerror(rc))); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must register device via CIB"); } fenced_send_config_notification(op, &request->result, (dev == NULL)? NULL : pcmk__xe_id(dev)); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_DEVICE_DEL static xmlNode * handle_device_delete_request(pcmk__request_t *request) { xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, request->xml, LOG_ERR); const char *device_id = crm_element_value(dev, PCMK_XA_ID); const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { stonith_device_remove(device_id, false); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete device via CIB"); } fenced_send_config_notification(op, &request->result, device_id); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_ADD static xmlNode * handle_level_add_request(pcmk__request_t *request) { char *desc = NULL; const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { fenced_register_level(request->xml, &desc, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL, &desc); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must add level via CIB"); } fenced_send_config_notification(op, &request->result, desc); free(desc); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_DEL static xmlNode * handle_level_delete_request(pcmk__request_t *request) { char *desc = NULL; const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { fenced_unregister_level(request->xml, &desc, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL, &desc); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete level via CIB"); } fenced_send_config_notification(op, &request->result, desc); free(desc); return fenced_construct_reply(request->xml, NULL, &request->result); } // CRM_OP_RM_NODE_CACHE static xmlNode * handle_cache_request(pcmk__request_t *request) { int node_id = 0; const char *name = NULL; crm_element_value_int(request->xml, PCMK_XA_ID, &node_id); name = crm_element_value(request->xml, PCMK_XA_UNAME); pcmk__cluster_forget_cluster_node(node_id, name); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } static xmlNode * handle_unknown_request(pcmk__request_t *request) { crm_err("Unknown IPC request %s from %s %s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request)); pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Unknown IPC request type '%s' (bug?)", request->op); return fenced_construct_reply(request->xml, NULL, &request->result); } static void fenced_register_handlers(void) { pcmk__server_command_t handlers[] = { { CRM_OP_REGISTER, handle_register_request }, { STONITH_OP_EXEC, handle_agent_request }, { STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request }, { STONITH_OP_QUERY, handle_query_request }, { STONITH_OP_NOTIFY, handle_notify_request }, { STONITH_OP_RELAY, handle_relay_request }, { STONITH_OP_FENCE, handle_fence_request }, { STONITH_OP_FENCE_HISTORY, handle_history_request }, { STONITH_OP_DEVICE_ADD, handle_device_add_request }, { STONITH_OP_DEVICE_DEL, handle_device_delete_request }, { STONITH_OP_LEVEL_ADD, handle_level_add_request }, { STONITH_OP_LEVEL_DEL, handle_level_delete_request }, { CRM_OP_RM_NODE_CACHE, handle_cache_request }, { NULL, handle_unknown_request }, }; fenced_handlers = pcmk__register_handlers(handlers); } void fenced_unregister_handlers(void) { if (fenced_handlers != NULL) { g_hash_table_destroy(fenced_handlers); fenced_handlers = NULL; } } static void handle_request(pcmk__request_t *request) { xmlNode *reply = NULL; const char *reason = NULL; if (fenced_handlers == NULL) { fenced_register_handlers(); } reply = pcmk__process_request(request, fenced_handlers); if (reply != NULL) { if (pcmk_is_set(request->flags, pcmk__request_reuse_options) && (request->ipc_client != NULL)) { /* Certain IPC-only commands must reuse the call options from the * original request rather than the ones set by stonith_send_reply() * -> do_local_reply(). */ pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply, request->ipc_flags); request->ipc_client->request_id = 0; } else { stonith_send_reply(reply, request->call_options, request->peer, request->ipc_client); } pcmk__xml_free(reply); } reason = request->result.exit_reason; crm_debug("Processed %s request from %s %s: %s%s%s%s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request), pcmk_exec_status_str(request->result.execution_status), (reason == NULL)? "" : " (", (reason == NULL)? "" : reason, (reason == NULL)? "" : ")"); } static void handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer) { // Copy, because request might be freed before we want to log this char *op = crm_element_value_copy(request, PCMK__XA_ST_OP); if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { process_remote_stonith_query(request); } else if (pcmk__str_any_of(op, STONITH_OP_NOTIFY, STONITH_OP_FENCE, NULL)) { fenced_process_fencing_reply(request); } else { crm_err("Ignoring unknown %s reply from %s %s", pcmk__s(op, "untyped"), ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); crm_log_xml_warn(request, "UnknownOp"); free(op); return; } crm_debug("Processed %s reply from %s %s", op, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); free(op); } /*! * \internal * \brief Handle a message from an IPC client or CPG peer * * \param[in,out] client If not NULL, IPC client that sent message * \param[in] id If from IPC client, IPC message ID * \param[in] flags Message flags * \param[in,out] message Message XML * \param[in] remote_peer If not NULL, CPG peer that sent message */ void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *message, const char *remote_peer) { int call_options = st_opt_none; bool is_reply = false; CRM_CHECK(message != NULL, return); if (get_xpath_object("//" PCMK__XE_ST_REPLY, message, LOG_NEVER) != NULL) { is_reply = true; } crm_element_value_int(message, PCMK__XA_ST_CALLOPT, &call_options); crm_debug("Processing %ssynchronous %s %s %u from %s %s", pcmk_is_set(call_options, st_opt_sync_call)? "" : "a", crm_element_value(message, PCMK__XA_ST_OP), (is_reply? "reply" : "request"), id, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (is_reply) { handle_reply(client, message, remote_peer); } else { pcmk__request_t request = { .ipc_client = client, .ipc_id = id, .ipc_flags = flags, .peer = remote_peer, .xml = message, .call_options = call_options, .result = PCMK__UNKNOWN_RESULT, }; request.op = crm_element_value_copy(request.xml, PCMK__XA_ST_OP); CRM_CHECK(request.op != NULL, return); if (pcmk_is_set(request.call_options, st_opt_sync_call)) { pcmk__set_request_flags(&request, pcmk__request_sync); } handle_request(&request); pcmk__reset_request(&request); } } diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c index 9dc36d62ce..ac90f6f8a6 100644 --- a/daemons/fenced/pacemaker-fenced.c +++ b/daemons/fenced/pacemaker-fenced.c @@ -1,677 +1,658 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include // PRIu32, PRIx32 #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "daemon for executing fencing devices in a Pacemaker cluster" long long stonith_watchdog_timeout_ms = 0; GList *stonith_watchdog_targets = NULL; static GMainLoop *mainloop = NULL; -gboolean stand_alone = FALSE; gboolean stonith_shutdown_flag = FALSE; static qb_ipcs_service_t *ipcs = NULL; static pcmk__output_t *out = NULL; pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static struct { - bool no_cib_connect; + gboolean stand_alone; gchar **log_files; } options; crm_exit_t exit_code = CRM_EX_OK; static void stonith_cleanup(void); static int32_t st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", pcmk__client_pid(c)); return -ECONNREFUSED; } if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return 0; } /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; xmlNode *request = NULL; pcmk__client_t *c = pcmk__find_client(qbc); const char *op = NULL; if (c == NULL) { crm_info("Invalid client: %p", qbc); return 0; } request = pcmk__client_data2xml(c, data, &id, &flags); if (request == NULL) { pcmk__ipc_send_ack(c, id, flags, PCMK__XE_NACK, NULL, CRM_EX_PROTOCOL); return 0; } op = crm_element_value(request, PCMK__XA_CRM_TASK); if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) { crm_xml_add(request, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(request, PCMK__XA_ST_OP, op); crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id); crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, fenced_get_local_node()); pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, request); pcmk__xml_free(request); return 0; } if (c->name == NULL) { const char *value = crm_element_value(request, PCMK__XA_ST_CLIENTNAME); c->name = crm_strdup_printf("%s.%u", pcmk__s(value, "unknown"), c->pid); } crm_element_value_int(request, PCMK__XA_ST_CALLOPT, &call_options); crm_trace("Flags %#08" PRIx32 "/%#08x for command %" PRIu32 " from client %s", flags, call_options, id, pcmk__client_name(c)); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ c->request_id = id; /* Reply only to the last one */ } crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id); crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, fenced_get_local_node()); crm_log_xml_trace(request, "ipc-received"); stonith_command(c, id, flags, request, NULL); pcmk__xml_free(request); return 0; } /* Error code means? */ static int32_t st_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p closed", c); pcmk__free_client(client); /* 0 means: yes, go ahead and destroy the connection */ return 0; } static void st_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p destroyed", c); st_ipc_closed(c); } static void stonith_peer_callback(xmlNode * msg, void *private_data) { const char *remote_peer = crm_element_value(msg, PCMK__XA_SRC); const char *op = crm_element_value(msg, PCMK__XA_ST_OP); if (pcmk__str_eq(op, STONITH_OP_POKE, pcmk__str_none)) { return; } crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, 0, 0, msg, remote_peer); } #if SUPPORT_COROSYNC static void stonith_peer_ais_callback(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from); if(data == NULL) { return; } xml = pcmk__xml_parse(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, PCMK__XA_SRC, from); stonith_peer_callback(xml, NULL); pcmk__xml_free(xml); free(data); } static void stonith_peer_cs_destroy(gpointer user_data) { crm_crit("Lost connection to cluster layer, shutting down"); stonith_shutdown(0); } #endif void do_local_reply(const xmlNode *notify_src, pcmk__client_t *client, int call_options) { /* send callback to originating child */ int local_rc = pcmk_rc_ok; int rid = 0; uint32_t ipc_flags = crm_ipc_server_event; if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_LOG_ASSERT(client->request_id); rid = client->request_id; client->request_id = 0; ipc_flags = crm_ipc_flags_none; } local_rc = pcmk__ipc_send_xml(client, rid, notify_src, ipc_flags); if (local_rc == pcmk_rc_ok) { crm_trace("Sent response %d to client %s", rid, pcmk__client_name(client)); } else { crm_warn("%synchronous reply to client %s failed: %s", (pcmk_is_set(call_options, st_opt_sync_call)? "S" : "As"), pcmk__client_name(client), pcmk_rc_str(local_rc)); } } uint64_t get_stonith_flag(const char *name) { if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_FENCE, pcmk__str_none)) { return st_callback_notify_fence; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) { return st_callback_device_add; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) { return st_callback_device_del; } else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY, pcmk__str_none)) { return st_callback_notify_history; } else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED, pcmk__str_none)) { return st_callback_notify_history_synced; } return st_callback_unknown; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { const xmlNode *update_msg = user_data; pcmk__client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = crm_element_value(update_msg, PCMK__XA_SUBT); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if (client->ipcs == NULL) { crm_trace("Skipping client with NULL channel"); return; } if (pcmk_is_set(client->flags, get_stonith_flag(type))) { int rc = pcmk__ipc_send_xml(client, 0, update_msg, crm_ipc_server_event); if (rc != pcmk_rc_ok) { crm_warn("%s notification of client %s failed: %s " QB_XS " id=%.8s rc=%d", type, pcmk__client_name(client), pcmk_rc_str(rc), client->id, rc); } else { crm_trace("Sent %s notification to client %s", type, pcmk__client_name(client)); } } } void do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout) { pcmk__client_t *client = NULL; xmlNode *notify_data = NULL; if (!timeout || !call_id || !client_id) { return; } client = pcmk__find_client_by_id(client_id); if (!client) { return; } notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_ASYNC_TIMEOUT_VALUE); crm_xml_add(notify_data, PCMK__XA_T, PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE); crm_xml_add(notify_data, PCMK__XA_ST_CALLID, call_id); crm_xml_add_int(notify_data, PCMK__XA_ST_TIMEOUT, timeout); crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id); if (client) { pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event); } pcmk__xml_free(notify_data); } /*! * \internal * \brief Notify relevant IPC clients of a fencing operation result * * \param[in] type Notification type * \param[in] result Result of fencing operation (assume success if NULL) * \param[in] data If not NULL, add to notification as call data */ void fenced_send_notification(const char *type, const pcmk__action_result_t *result, xmlNode *data) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = pcmk__xe_create(NULL, PCMK__XE_NOTIFY); CRM_LOG_ASSERT(type != NULL); crm_xml_add(update_msg, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY); crm_xml_add(update_msg, PCMK__XA_SUBT, type); crm_xml_add(update_msg, PCMK__XA_ST_OP, type); stonith__xe_set_result(update_msg, result); if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(update_msg, PCMK__XE_ST_CALLDATA); pcmk__xml_copy(wrapper, data); } crm_trace("Notifying clients"); pcmk__foreach_ipc_client(stonith_notify_client, update_msg); pcmk__xml_free(update_msg); crm_trace("Notify complete"); } /*! * \internal * \brief Send notifications for a configuration change to subscribed clients * * \param[in] op Notification type (\c STONITH_OP_DEVICE_ADD, * \c STONITH_OP_DEVICE_DEL, \c STONITH_OP_LEVEL_ADD, or * \c STONITH_OP_LEVEL_DEL) * \param[in] result Operation result * \param[in] desc Description of what changed (either device ID or string * representation of level * ([])) */ void fenced_send_config_notification(const char *op, const pcmk__action_result_t *result, const char *desc) { xmlNode *notify_data = pcmk__xe_create(NULL, op); crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ID, desc); fenced_send_notification(op, result, notify_data); pcmk__xml_free(notify_data); } /*! * \internal * \brief Check whether a node does watchdog-fencing * * \param[in] node Name of node to check * * \return TRUE if node found in stonith_watchdog_targets * or stonith_watchdog_targets is empty indicating * all nodes are doing watchdog-fencing */ gboolean node_does_watchdog_fencing(const char *node) { return ((stonith_watchdog_targets == NULL) || pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei)); } void stonith_shutdown(int nsig) { crm_info("Terminating with %d clients", pcmk__ipc_client_count()); stonith_shutdown_flag = TRUE; if (mainloop != NULL && g_main_loop_is_running(mainloop)) { g_main_loop_quit(mainloop); } } static void stonith_cleanup(void) { fenced_cib_cleanup(); if (ipcs) { qb_ipcs_destroy(ipcs); } pcmk__cluster_destroy_node_caches(); pcmk__client_cleanup(); free_stonith_remote_op_list(); free_topology_list(); free_device_list(); free_metadata_cache(); fenced_unregister_handlers(); } -static gboolean -stand_alone_cpg_cb(const gchar *option_name, const gchar *optarg, gpointer data, - GError **error) -{ - stand_alone = FALSE; - options.no_cib_connect = true; - return TRUE; -} - struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = NULL, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy }; /*! * \internal * \brief Callback for peer status changes * * \param[in] type What changed * \param[in] node What peer had the change * \param[in] data Previous value of what changed */ static void st_peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node, const void *data) { if ((type != pcmk__node_update_processes) && !pcmk_is_set(node->flags, pcmk__node_status_remote)) { /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in stonith_peer_callback() */ xmlNode *query = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND); crm_xml_add(query, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(query, PCMK__XA_ST_OP, STONITH_OP_POKE); crm_debug("Broadcasting our uname because of node %" PRIu32, node->cluster_layer_id); pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, query); pcmk__xml_free(query); } } /* @COMPAT Deprecated since 2.1.8. Use pcmk_list_fence_attrs() or * crm_resource --list-options=fencing instead of querying daemon metadata. * * NOTE: pcs (as of at least 0.11.8) uses this */ static int fencer_metadata(void) { const char *name = PCMK__SERVER_FENCED; const char *desc_short = N_("Instance attributes available for all " "\"stonith\"-class resources"); const char *desc_long = N_("Instance attributes available for all " "\"stonith\"-class resources and used by " "Pacemaker's fence daemon"); return pcmk__daemon_metadata(out, name, desc_short, desc_long, pcmk__opt_fencing); } static GOptionEntry entries[] = { - { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, - N_("Deprecated (will be removed in a future release)"), NULL }, - - { "stand-alone-w-cpg", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, - stand_alone_cpg_cb, N_("Intended for use in regression testing only"), NULL }, + { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, + &options.stand_alone, N_("Intended for use in regression testing only"), + NULL }, { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, &options.log_files, N_("Send logs to the additional named logfile"), NULL }, { NULL } }; static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv) { int rc = pcmk_rc_ok; pcmk_cluster_t *cluster = NULL; crm_ipc_t *old_instance = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, "l"); GOptionContext *context = build_arg_context(args, &output_group); crm_log_preinit(NULL, argc, argv); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } if ((g_strv_length(processed_args) >= 2) && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { rc = fencer_metadata(); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Unable to display metadata: %s", pcmk_rc_str(rc)); } goto done; } // Open additional log files pcmk__add_logfiles(options.log_files, out); crm_log_init(NULL, LOG_INFO + args->verbosity, TRUE, (args->verbosity > 0), argc, argv, FALSE); crm_notice("Starting Pacemaker fencer"); old_instance = crm_ipc_new("stonith-ng", 0); if (old_instance == NULL) { /* crm_ipc_new() will have already logged an error message with * crm_err() */ exit_code = CRM_EX_FATAL; goto done; } if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) { // IPC endpoint already up crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_crit("Aborting start-up because another fencer instance is " "already active"); goto done; } else { // Not up or not authentic, we'll proceed either way crm_ipc_destroy(old_instance); old_instance = NULL; } mainloop_add_signal(SIGTERM, stonith_shutdown); pcmk__cluster_init_node_caches(); rc = fenced_scheduler_init(); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error initializing scheduler data: %s", pcmk_rc_str(rc)); goto done; } cluster = pcmk_cluster_new(); - if (!stand_alone) { #if SUPPORT_COROSYNC - if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) { - pcmk_cluster_set_destroy_fn(cluster, stonith_peer_cs_destroy); - pcmk_cpg_set_deliver_fn(cluster, stonith_peer_ais_callback); - pcmk_cpg_set_confchg_fn(cluster, pcmk__cpg_confchg_cb); - } + if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) { + pcmk_cluster_set_destroy_fn(cluster, stonith_peer_cs_destroy); + pcmk_cpg_set_deliver_fn(cluster, stonith_peer_ais_callback); + pcmk_cpg_set_confchg_fn(cluster, pcmk__cpg_confchg_cb); + } #endif // SUPPORT_COROSYNC - pcmk__cluster_set_status_callback(&st_peer_update_callback); - - if (pcmk_cluster_connect(cluster) != pcmk_rc_ok) { - exit_code = CRM_EX_FATAL; - crm_crit("Cannot sign in to the cluster... terminating"); - goto done; - } - fenced_set_local_node(cluster->priv->node_name); + pcmk__cluster_set_status_callback(&st_peer_update_callback); - if (!options.no_cib_connect) { - setup_cib(); - } + if (pcmk_cluster_connect(cluster) != pcmk_rc_ok) { + exit_code = CRM_EX_FATAL; + crm_crit("Cannot sign in to the cluster... terminating"); + goto done; + } + fenced_set_local_node(cluster->priv->node_name); - } else { - fenced_set_local_node("localhost"); - crm_warn("Stand-alone mode is deprecated and will be removed " - "in a future release"); + if (!options.stand_alone) { + setup_cib(); } init_device_list(); init_topology_list(); pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks); // Create the mainloop and run it... mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker fencer successfully started and accepting connections"); g_main_loop_run(mainloop); done: g_strfreev(processed_args); pcmk__free_arg_context(context); g_strfreev(options.log_files); stonith_cleanup(); pcmk_cluster_free(cluster); fenced_scheduler_cleanup(); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h index 9c61c200b8..c4ea58cd74 100644 --- a/daemons/fenced/pacemaker-fenced.h +++ b/daemons/fenced/pacemaker-fenced.h @@ -1,331 +1,330 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include // uint32_t, uint64_t #include // xmlNode #include #include #include #include /*! * \internal * \brief Check whether target has already been fenced recently * * \param[in] tolerance Number of seconds to look back in time * \param[in] target Name of node to search for * \param[in] action Action we want to match * * \return TRUE if an equivalent fencing operation took place in the last * \p tolerance seconds, FALSE otherwise */ gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action); typedef struct stonith_device_s { char *id; char *agent; char *namespace; /*! list of actions that must execute on the target node. Used for unfencing */ GString *on_target_actions; GList *targets; time_t targets_age; gboolean has_attr_map; // Whether target's nodeid should be passed as a parameter to the agent gboolean include_nodeid; /* whether the cluster should automatically unfence nodes with the device */ gboolean automatic_unfencing; guint priority; uint32_t flags; // Group of enum st_device_flags GHashTable *params; GHashTable *aliases; GList *pending_ops; mainloop_timer_t *timer; crm_trigger_t *work; xmlNode *agent_metadata; /*! A verified device is one that has contacted the * agent successfully to perform a monitor operation */ gboolean verified; gboolean cib_registered; gboolean api_registered; gboolean dirty; } stonith_device_t; /* These values are used to index certain arrays by "phase". Usually an * operation has only one "phase", so phase is always zero. However, some * reboots are remapped to "off" then "on", in which case "reboot" will be * phase 0, "off" will be phase 1 and "on" will be phase 2. */ enum st_remap_phase { st_phase_requested = 0, st_phase_off = 1, st_phase_on = 2, st_phase_max = 3 }; typedef struct remote_fencing_op_s { /* The unique id associated with this operation */ char *id; /*! The node this operation will fence */ char *target; /*! The fencing action to perform on the target. (reboot, on, off) */ char *action; /*! When was the fencing action recorded (seconds since epoch) */ time_t created; /*! Marks if the final notifications have been sent to local stonith clients. */ gboolean notify_sent; /*! The number of query replies received */ guint replies; /*! The number of query replies expected */ guint replies_expected; /*! Does this node own control of this operation */ gboolean owner; /*! After query is complete, This the high level timer that expires the entire operation */ guint op_timer_total; /*! This timer expires the current fencing request. Many fencing * requests may exist in a single operation */ guint op_timer_one; /*! This timer expires the query request sent out to determine * what nodes are contain what devices, and who those devices can fence */ guint query_timer; /*! This is the default timeout to use for each fencing device if no * custom timeout is received in the query. */ gint base_timeout; /*! This is the calculated total timeout an operation can take before * expiring. This is calculated by adding together all the timeout * values associated with the devices this fencing operation may call */ gint total_timeout; /*! * Fencing delay (in seconds) requested by API client (used by controller to * implement \c PCMK_OPT_PRIORITY_FENCING_DELAY). A value of -1 means * disable all configured delays. */ int client_delay; /*! Delegate is the node being asked to perform a fencing action * on behalf of the node that owns the remote operation. Some operations * will involve multiple delegates. This value represents the final delegate * that is used. */ char *delegate; /*! The point at which the remote operation completed */ time_t completed; //! Group of enum stonith_call_options associated with this operation uint32_t call_options; /*! The current state of the remote operation. This indicates * what stage the op is in, query, exec, done, duplicate, failed. */ enum op_state state; /*! The node that owns the remote operation */ char *originator; /*! The local client id that initiated the fencing request */ char *client_id; /*! The client's call_id that initiated the fencing request */ int client_callid; /*! The name of client that initiated the fencing request */ char *client_name; /*! List of the received query results for all the nodes in the cpg group */ GList *query_results; /*! The original request that initiated the remote stonith operation */ xmlNode *request; /*! The current topology level being executed */ guint level; /*! The current operation phase being executed */ enum st_remap_phase phase; /*! Devices with automatic unfencing (always run if "on" requested, never if remapped) */ GList *automatic_list; /*! List of all devices at the currently executing topology level */ GList *devices_list; /*! Current entry in the topology device list */ GList *devices; /*! List of duplicate operations attached to this operation. Once this operation * completes, the duplicate operations will be closed out as well. */ GList *duplicates; /*! The point at which the remote operation completed(nsec) */ long long completed_nsec; /*! The (potentially intermediate) result of the operation */ pcmk__action_result_t result; } remote_fencing_op_t; void fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged); // Fencer-specific client flags enum st_client_flags { st_callback_unknown = UINT64_C(0), st_callback_notify_fence = (UINT64_C(1) << 0), st_callback_device_add = (UINT64_C(1) << 2), st_callback_device_del = (UINT64_C(1) << 4), st_callback_notify_history = (UINT64_C(1) << 5), st_callback_notify_history_synced = (UINT64_C(1) << 6) }; // How the user specified the target of a topology level enum fenced_target_by { fenced_target_by_unknown = -1, // Invalid or not yet parsed fenced_target_by_name, // By target name fenced_target_by_pattern, // By a pattern matching target names fenced_target_by_attribute, // By a node attribute/value on target }; /* * Complex fencing requirements are specified via fencing topologies. * A topology consists of levels; each level is a list of fencing devices. * Topologies are stored in a hash table by node name. When a node needs to be * fenced, if it has an entry in the topology table, the levels are tried * sequentially, and the devices in each level are tried sequentially. * Fencing is considered successful as soon as any level succeeds; * a level is considered successful if all its devices succeed. * Essentially, all devices at a given level are "and-ed" and the * levels are "or-ed". * * This structure is used for the topology table entries. * Topology levels start from 1, so levels[0] is unused and always NULL. */ typedef struct stonith_topology_s { enum fenced_target_by kind; // How target was specified /*! Node name regex or attribute name=value for which topology applies */ char *target; char *target_value; char *target_pattern; char *target_attribute; /*! Names of fencing devices at each topology level */ GList *levels[ST__LEVEL_COUNT]; } stonith_topology_t; void stonith_shutdown(int nsig); void init_device_list(void); void free_device_list(void); void init_topology_list(void); void free_topology_list(void); void free_stonith_remote_op_list(void); void init_stonith_remote_op_hash_table(GHashTable **table); void free_metadata_cache(void); void fenced_unregister_handlers(void); uint64_t get_stonith_flag(const char *name); void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *op_request, const char *remote_peer); int stonith_device_register(xmlNode *msg, gboolean from_cib); void stonith_device_remove(const char *id, bool from_cib); char *stonith_level_key(const xmlNode *msg, enum fenced_target_by); void fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result); void fenced_unregister_level(xmlNode *msg, char **desc, pcmk__action_result_t *result); stonith_topology_t *find_topology_for_host(const char *host); void do_local_reply(const xmlNode *notify_src, pcmk__client_t *client, int call_options); xmlNode *fenced_construct_reply(const xmlNode *request, xmlNode *data, const pcmk__action_result_t *result); void do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout); void fenced_send_notification(const char *type, const pcmk__action_result_t *result, xmlNode *data); void fenced_send_config_notification(const char *op, const pcmk__action_result_t *result, const char *desc); remote_fencing_op_t *initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request, gboolean manual_ack); void fenced_process_fencing_reply(xmlNode *msg); int process_remote_stonith_query(xmlNode * msg); void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer); void stonith_fence_history(xmlNode *msg, xmlNode **output, const char *remote_peer, int options); void stonith_fence_history_trim(void); bool fencing_peer_active(pcmk__node_status_t *peer); void set_fencing_completed(remote_fencing_op_t * op); int fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg); const char *fenced_device_reboot_action(const char *device_id); bool fenced_device_supports_on(const char *device_id); gboolean node_has_attr(const char *node, const char *name, const char *value); gboolean node_does_watchdog_fencing(const char *node); void fencing_topology_init(void); void setup_cib(void); void fenced_cib_cleanup(void); int fenced_scheduler_init(void); void fenced_set_local_node(const char *node_name); const char *fenced_get_local_node(void); void fenced_scheduler_cleanup(void); void fenced_scheduler_run(xmlNode *cib); static inline void fenced_set_protocol_error(pcmk__action_result_t *result) { pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Fencer API request missing required information (bug?)"); } /*! * \internal * \brief Get the device flag to use with a given action when searching devices * * \param[in] action Action to check * * \return st_device_supports_on if \p action is "on", otherwise * st_device_supports_none */ static inline uint32_t fenced_support_flag(const char *action) { if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { return st_device_supports_on; } return st_device_supports_none; } -extern gboolean stand_alone; extern GHashTable *device_list; extern GHashTable *topology; extern long long stonith_watchdog_timeout_ms; extern GList *stonith_watchdog_targets; extern GHashTable *stonith_remote_op_list; extern crm_exit_t exit_code; extern gboolean stonith_shutdown_flag; diff --git a/doc/sphinx/Pacemaker_Administration/alerts.rst b/doc/sphinx/Pacemaker_Administration/alerts.rst index 42efc8d266..ea2b0f9ed2 100644 --- a/doc/sphinx/Pacemaker_Administration/alerts.rst +++ b/doc/sphinx/Pacemaker_Administration/alerts.rst @@ -1,311 +1,301 @@ .. index:: single: alert; agents Alert Agents ------------ .. index:: single: alert; sample agents Using the Sample Alert Agents ############################# Pacemaker provides several sample alert agents, installed in ``/usr/share/pacemaker/alerts`` by default. While these sample scripts may be copied and used as-is, they are provided mainly as templates to be edited to suit your purposes. See their source code for the full set of instance attributes they support. .. topic:: Sending cluster events as SNMP v2c traps .. code-block:: xml .. note:: **SNMP alert agent attributes** The ``timestamp-format`` meta-attribute should always be set to ``%Y-%m-%d,%H:%M:%S.%01N`` when using the SNMP agent, to match the SNMP standard. The SNMP agent provides a number of instance attributes in addition to the one used in the example above. The most useful are ``trap_version``, which defaults to ``2c``, and ``trap_community``, which defaults to ``public``. See the source code for more details. .. topic:: Sending cluster events as SNMP v3 traps .. code-block:: xml .. note:: **SNMP v3 trap configuration** To use SNMP v3, ``trap_version`` must be set to ``3``. ``trap_community`` will be ignored. The example above uses the ``trap_options`` instance attribute to override the security level, authentication protocol, authentication user, and authentication password from snmp.conf. These will be passed to the snmptrap command. Passing the password on the command line is considered insecure; specify authentication and privacy options suitable for your environment. .. topic:: Sending cluster events as e-mails .. code-block:: xml .. index:: single: alert; agent development Writing an Alert Agent ###################### .. index:: single: alert; environment variables single: environment variable; alert agents .. table:: **Environment variables passed to alert agents** :class: longtable :widths: 1 3 +---------------------------+----------------------------------------------------------------+ | Environment Variable | Description | +===========================+================================================================+ | CRM_alert_kind | .. index:: | | | single:environment variable; CRM_alert_kind | | | single:CRM_alert_kind | | | | | | The type of alert (``node``, ``fencing``, ``resource``, or | | | ``attribute``) | +---------------------------+----------------------------------------------------------------+ | CRM_alert_node | .. index:: | | | single:environment variable; CRM_alert_node | | | single:CRM_alert_node | | | | | | Name of affected node | +---------------------------+----------------------------------------------------------------+ | CRM_alert_node_sequence | .. index:: | | | single:environment variable; CRM_alert_sequence | | | single:CRM_alert_sequence | | | | | | A sequence number increased whenever an alert is being issued | | | on the local node, which can be used to reference the order in | | | which alerts have been issued by Pacemaker. An alert for an | | | event that happened later in time reliably has a higher | | | sequence number than alerts for earlier events. | | | | | | Be aware that this number has no cluster-wide meaning. | +---------------------------+----------------------------------------------------------------+ | CRM_alert_recipient | .. index:: | | | single:environment variable; CRM_alert_recipient | | | single:CRM_alert_recipient | | | | | | The configured recipient | +---------------------------+----------------------------------------------------------------+ | CRM_alert_timestamp | .. index:: | | | single:environment variable; CRM_alert_timestamp | | | single:CRM_alert_timestamp | | | | | | A timestamp created prior to executing the agent, in the | | | format specified by the ``timestamp-format`` meta-attribute. | | | This allows the agent to have a reliable, high-precision time | | | of when the event occurred, regardless of when the agent | | | itself was invoked (which could potentially be delayed due to | | | system load, etc.). | +---------------------------+----------------------------------------------------------------+ | CRM_alert_timestamp_epoch | .. index:: | | | single:environment variable; CRM_alert_timestamp_epoch | | | single:CRM_alert_timestamp_epoch | | | | | | The same time as ``CRM_alert_timestamp``, expressed as the | | | integer number of seconds since January 1, 1970. This (along | | | with ``CRM_alert_timestamp_usec``) can be useful for alert | | | agents that need to format time in a specific way rather than | | | let the user configure it. | +---------------------------+----------------------------------------------------------------+ | CRM_alert_timestamp_usec | .. index:: | | | single:environment variable; CRM_alert_timestamp_usec | | | single:CRM_alert_timestamp_usec | | | | | | The same time as ``CRM_alert_timestamp``, expressed as the | | | integer number of microseconds since | | | ``CRM_alert_timestamp_epoch``. | +---------------------------+----------------------------------------------------------------+ | CRM_alert_version | .. index:: | | | single:environment variable; CRM_alert_version | | | single:CRM_alert_version | | | | | | The version of Pacemaker sending the alert | +---------------------------+----------------------------------------------------------------+ | CRM_alert_desc | .. index:: | | | single:environment variable; CRM_alert_desc | | | single:CRM_alert_desc | | | | | | Detail about event. For ``node`` alerts, this is the node's | | | current state (``member`` or ``lost``). For ``fencing`` | | | alerts, this is a summary of the requested fencing operation, | | | including origin, target, and fencing operation error code, if | | | any. For ``resource`` alerts, this is a readable string | | | equivalent of ``CRM_alert_status``. | +---------------------------+----------------------------------------------------------------+ | CRM_alert_nodeid | .. index:: | | | single:environment variable; CRM_alert_nodeid | | | single:CRM_alert_nodeid | | | | | | ID of node whose status changed (provided with ``node`` alerts | | | only) | +---------------------------+----------------------------------------------------------------+ | CRM_alert_rc | .. index:: | | | single:environment variable; CRM_alert_rc | | | single:CRM_alert_rc | | | | | | The numerical return code of the fencing or resource operation | | | (provided with ``fencing`` and ``resource`` alerts only) | +---------------------------+----------------------------------------------------------------+ | CRM_alert_task | .. index:: | | | single:environment variable; CRM_alert_task | | | single:CRM_alert_task | | | | | | The requested fencing or resource operation (provided with | | | ``fencing`` and ``resource`` alerts only) | +---------------------------+----------------------------------------------------------------+ | CRM_alert_exec_time | .. index:: | | | single:environment variable; CRM_alert_exec_time | | | single:CRM_alert_exec_time | | | | | | The (wall-clock) time, in milliseconds, that it took to | | | execute the action. If the action timed out, | | | ``CRM_alert_status`` will be 2, ``CRM_alert_desc`` will be | | | "Timed Out", and this value will be the action timeout. May | | | not be supported on all platforms. (``resource`` alerts only) | | | *(since 2.0.1)* | +---------------------------+----------------------------------------------------------------+ | CRM_alert_interval | .. index:: | | | single:environment variable; CRM_alert_interval | | | single:CRM_alert_interval | | | | | | The interval of the resource operation (``resource`` alerts | | | only) | +---------------------------+----------------------------------------------------------------+ | CRM_alert_rsc | .. index:: | | | single:environment variable; CRM_alert_rsc | | | single:CRM_alert_rsc | | | | | | The name of the affected resource (``resource`` alerts only) | +---------------------------+----------------------------------------------------------------+ | CRM_alert_status | .. index:: | | | single:environment variable; CRM_alert_status | | | single:CRM_alert_status | | | | | | A numerical code used by Pacemaker to represent the operation | | | result (``resource`` alerts only) | +---------------------------+----------------------------------------------------------------+ | CRM_alert_target_rc | .. index:: | | | single:environment variable; CRM_alert_target_rc | | | single:CRM_alert_target_rc | | | | | | The expected numerical return code of the operation | | | (``resource`` alerts only) | +---------------------------+----------------------------------------------------------------+ | CRM_alert_attribute_name | .. index:: | | | single:environment variable; CRM_alert_attribute_name | | | single:CRM_alert_attribute_name | | | | | | The name of the node attribute that changed (``attribute`` | | | alerts only) | +---------------------------+----------------------------------------------------------------+ | CRM_alert_attribute_value | .. index:: | | | single:environment variable; CRM_alert_attribute_value | | | single:CRM_alert_attribute_value | | | | | | The new value of the node attribute that changed | | | (``attribute`` alerts only) | +---------------------------+----------------------------------------------------------------+ Special concerns when writing alert agents: * Alert agents may be called with no recipient (if none is configured), so the agent must be able to handle this situation, even if it only exits in that case. (Users may modify the configuration in stages, and add a recipient later.) * If more than one recipient is configured for an alert, the alert agent will be called once per recipient. If an agent is not able to run concurrently, it should be configured with only a single recipient. The agent is free, however, to interpret the recipient as a list. * When a cluster event occurs, all alerts are fired off at the same time as separate processes. Depending on how many alerts and recipients are configured, and on what is done within the alert agents, a significant load burst may occur. The agent could be written to take this into consideration, for example by queueing resource-intensive actions into some other instance, instead of directly executing them. * Alert agents are run as the |CRM_DAEMON_USER| user, which has a minimal set of permissions. If an agent requires additional privileges, it is recommended to configure ``sudo`` to allow the agent to run the necessary commands as another user with the appropriate privileges. * As always, take care to validate and sanitize user-configured parameters, such as ``CRM_alert_timestamp`` (whose content is specified by the user-configured ``timestamp-format``), ``CRM_alert_recipient,`` and all instance attributes. Mostly this is needed simply to protect against configuration errors, but if some user can modify the CIB without having |CRM_DAEMON_USER| access to the cluster nodes, it is a potential security concern as well, to avoid the possibility of code injection. - -.. note:: **ocf:pacemaker:ClusterMon compatibility** - - The alerts interface is designed to be backward compatible with the external - scripts interface used by the ``ocf:pacemaker:ClusterMon`` resource, which - is now deprecated. To preserve this compatibility, the environment variables - passed to alert agents are available prepended with ``CRM_notify_`` - as well as ``CRM_alert_``. One break in compatibility is that ``ClusterMon`` - ran external scripts as the ``root`` user, while alert agents are run as the - |CRM_DAEMON_USER| user. diff --git a/include/crm/common/actions_internal.h b/include/crm/common/actions_internal.h index 09acabc517..95809ca058 100644 --- a/include/crm/common/actions_internal.h +++ b/include/crm/common/actions_internal.h @@ -1,283 +1,275 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_ACTIONS_INTERNAL__H #define PCMK__CRM_COMMON_ACTIONS_INTERNAL__H #include // bool #include // uint32_t, UINT32_C() #include // guint, GList, GHashTable #include // xmlNode #include // PCMK_ACTION_MONITOR #include // enum rsc_role_e #include // pcmk_resource_t, pcmk_node_t #include // pcmk__str_eq() #ifdef __cplusplus extern "C" { #endif // Action names as strings // @COMPAT Deprecated since 2.0.0 #define PCMK__ACTION_POWEROFF "poweroff" //! printf-style format to create operation key from resource, action, interval #define PCMK__OP_FMT "%s_%s_%u" /*! * \internal * \brief Set action flags for an action * * \param[in,out] action Action to set flags for * \param[in] flags_to_set Group of enum pcmk__action_flags to set */ #define pcmk__set_action_flags(action, flags_to_set) do { \ (action)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Action", (action)->uuid, \ (action)->flags, \ (flags_to_set), \ #flags_to_set); \ } while (0) /*! * \internal * \brief Clear action flags for an action * * \param[in,out] action Action to clear flags for * \param[in] flags_to_clear Group of enum pcmk__action_flags to clear */ #define pcmk__clear_action_flags(action, flags_to_clear) do { \ (action)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Action", (action)->uuid, \ (action)->flags, \ (flags_to_clear), \ #flags_to_clear); \ } while (0) /*! * \internal * \brief Set action flags for a flag group * * \param[in,out] action_flags Flag group to set flags for * \param[in] action_name Name of action being modified (for logging) * \param[in] to_set Group of enum pcmk__action_flags to set */ #define pcmk__set_raw_action_flags(action_flags, action_name, to_set) do { \ action_flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Action", action_name, \ (action_flags), \ (to_set), #to_set); \ } while (0) /*! * \internal * \brief Clear action flags for a flag group * * \param[in,out] action_flags Flag group to clear flags for * \param[in] action_name Name of action being modified (for logging) * \param[in] to_clear Group of enum pcmk__action_flags to clear */ #define pcmk__clear_raw_action_flags(action_flags, action_name, to_clear) \ do { \ action_flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "Action", action_name, \ (action_flags), \ (to_clear), #to_clear); \ } while (0) // Possible actions (including some pseudo-actions) enum pcmk__action_type { pcmk__action_unspecified = 0, // Unspecified or unknown action pcmk__action_monitor, // Monitor // Each "completed" action must be the regular action plus 1 pcmk__action_stop, // Stop pcmk__action_stopped, // Stop completed pcmk__action_start, // Start pcmk__action_started, // Start completed pcmk__action_notify, // Notify pcmk__action_notified, // Notify completed pcmk__action_promote, // Promote pcmk__action_promoted, // Promoted pcmk__action_demote, // Demote pcmk__action_demoted, // Demoted pcmk__action_shutdown, // Shut down node pcmk__action_fence, // Fence node }; // Action scheduling flags enum pcmk__action_flags { // No action flags set (compare with equality rather than bit set) pcmk__no_action_flags = 0, // Whether action does not require invoking an agent pcmk__action_pseudo = (UINT32_C(1) << 0), // Whether action is runnable pcmk__action_runnable = (UINT32_C(1) << 1), // Whether action should not be executed pcmk__action_optional = (UINT32_C(1) << 2), // Whether action should be added to transition graph even if optional pcmk__action_always_in_graph = (UINT32_C(1) << 3), // Whether operation-specific instance attributes have been unpacked yet pcmk__action_attrs_evaluated = (UINT32_C(1) << 4), // Whether action is allowed to be part of a live migration pcmk__action_migratable = (UINT32_C(1) << 7), // Whether action has been added to transition graph pcmk__action_added_to_graph = (UINT32_C(1) << 8), // Whether action is a stop to abort a dangling migration pcmk__action_migration_abort = (UINT32_C(1) << 11), - /* - * Whether action is an ordering point for minimum required instances - * (used to implement ordering after clones with \c PCMK_META_CLONE_MIN - * configured, and ordered sets with \c PCMK_XA_REQUIRE_ALL set to - * \c PCMK_VALUE_FALSE). - */ - pcmk__action_min_runnable = (UINT32_C(1) << 12), - // Whether action is recurring monitor that must be rescheduled if active pcmk__action_reschedule = (UINT32_C(1) << 13), // Whether action has already been processed by a recursive procedure pcmk__action_detect_loop = (UINT32_C(1) << 14), // Whether action's inputs have been de-duplicated yet pcmk__action_inputs_deduplicated = (UINT32_C(1) << 15), // Whether action can be executed on DC rather than own node pcmk__action_on_dc = (UINT32_C(1) << 16), }; /* Possible responses to a resource action failure * * The order is significant; the values are in order of increasing severity so * that they can be compared with less than and greater than. */ enum pcmk__on_fail { pcmk__on_fail_ignore, // Act as if failure didn't happen pcmk__on_fail_demote, // Demote if promotable, else stop pcmk__on_fail_restart, // Restart resource /* Fence the remote node created by the resource if fencing is enabled, * otherwise attempt to restart the resource (used internally for some * remote connection failures). */ pcmk__on_fail_reset_remote, pcmk__on_fail_restart_container, // Restart resource's container pcmk__on_fail_ban, // Ban resource from current node pcmk__on_fail_block, // Treat resource as unmanaged pcmk__on_fail_stop, // Stop resource and leave stopped pcmk__on_fail_standby_node, // Put resource's node in standby pcmk__on_fail_fence_node, // Fence resource's node }; // What resource needs before it can be recovered from a failed node enum pcmk__requires { pcmk__requires_nothing = 0, // Resource can be recovered immediately pcmk__requires_quorum = 1, // Resource can be recovered if quorate pcmk__requires_fencing = 2, // Resource can be recovered after fencing }; // Implementation of pcmk_action_t struct pcmk__action { int id; // Counter to identify action /* * When the controller aborts a transition graph, it sets an abort priority. * If this priority is higher, the action will still be executed anyway. * Pseudo-actions are always allowed, so this is irrelevant for them. */ int priority; pcmk_resource_t *rsc; // Resource to apply action to, if any pcmk_node_t *node; // Node to execute action on, if any xmlNode *op_entry; // Action XML configuration, if any char *task; // Action name char *uuid; // Action key char *cancel_task; // If task is "cancel", the action being cancelled char *reason; // Readable description of why action is needed uint32_t flags; // Group of enum pcmk__action_flags enum pcmk__requires needs; // Prerequisite for recovery enum pcmk__on_fail on_fail; // Response to failure enum rsc_role_e fail_role; // Resource role if action fails GHashTable *meta; // Meta-attributes relevant to action GHashTable *extra; // Action-specific instance attributes pcmk_scheduler_t *scheduler; // Scheduler data this action is part of /* Current count of runnable instance actions for "first" action in an * ordering dependency with pcmk__ar_min_runnable set. */ int runnable_before; /* * Number of instance actions for "first" action in an ordering dependency * with pcmk__ar_min_runnable set that must be runnable before this action * can be runnable. */ int required_runnable_before; // Actions in a relation with this one (as pcmk__related_action_t *) GList *actions_before; GList *actions_after; }; char *pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms); char *pcmk__notify_key(const char *rsc_id, const char *notify_type, const char *op_type); char *pcmk__transition_key(int transition_id, int action_id, int target_rc, const char *node); void pcmk__filter_op_for_digest(xmlNode *param_set); bool pcmk__is_fencing_action(const char *action); enum pcmk__action_type pcmk__parse_action(const char *action_name); const char *pcmk__action_text(enum pcmk__action_type action); const char *pcmk__on_fail_text(enum pcmk__on_fail on_fail); /*! * \internal * \brief Get a human-friendly action name * * \param[in] action_name Actual action name * \param[in] interval_ms Action interval (in milliseconds) * * \return Action name suitable for display */ static inline const char * pcmk__readable_action(const char *action_name, guint interval_ms) { if ((interval_ms == 0) && pcmk__str_eq(action_name, PCMK_ACTION_MONITOR, pcmk__str_none)) { return "probe"; } return action_name; } #ifdef __cplusplus } #endif #endif // PCMK__CRM_COMMON_ACTIONS_INTERNAL__H diff --git a/include/crm/common/alerts_internal.h b/include/crm/common/alerts_internal.h index 783bc883d0..e10af1ae1d 100644 --- a/include/crm/common/alerts_internal.h +++ b/include/crm/common/alerts_internal.h @@ -1,101 +1,101 @@ /* * Copyright 2015-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_ALERTS_INTERNAL__H #define PCMK__CRM_COMMON_ALERTS_INTERNAL__H #include #include #include #ifdef __cplusplus extern "C" { #endif /* Default-Timeout to use before killing a alerts script (in milliseconds) */ #define PCMK__ALERT_DEFAULT_TIMEOUT_MS (30000) /* Default-Format-String used to pass timestamps to the alerts scripts */ #define PCMK__ALERT_DEFAULT_TSTAMP_FORMAT "%H:%M:%S.%06N" enum pcmk__alert_flags { pcmk__alert_none = 0, pcmk__alert_node = (1 << 0), pcmk__alert_fencing = (1 << 1), pcmk__alert_resource = (1 << 2), pcmk__alert_attribute = (1 << 3), pcmk__alert_default = pcmk__alert_node|pcmk__alert_fencing| pcmk__alert_resource, }; typedef struct { char *id; char *path; char *tstamp_format; char *recipient; char **select_attribute_name; GHashTable *envvars; int timeout; uint32_t flags; } pcmk__alert_t; enum pcmk__alert_keys_e { PCMK__alert_key_recipient = 0, PCMK__alert_key_node, PCMK__alert_key_nodeid, PCMK__alert_key_rsc, PCMK__alert_key_task, PCMK__alert_key_interval, PCMK__alert_key_desc, PCMK__alert_key_status, PCMK__alert_key_target_rc, PCMK__alert_key_rc, PCMK__alert_key_kind, PCMK__alert_key_version, PCMK__alert_key_node_sequence, PCMK__alert_key_timestamp, PCMK__alert_key_attribute_name, PCMK__alert_key_attribute_value, PCMK__alert_key_timestamp_epoch, PCMK__alert_key_timestamp_usec, PCMK__alert_key_exec_time, PCMK__alert_key_select_kind, PCMK__alert_key_select_attribute_name }; #define PCMK__ALERT_INTERNAL_KEY_MAX 19 #define PCMK__ALERT_NODE_SEQUENCE "CRM_alert_node_sequence" -extern const char *pcmk__alert_keys[PCMK__ALERT_INTERNAL_KEY_MAX][3]; +extern const char *pcmk__alert_keys[PCMK__ALERT_INTERNAL_KEY_MAX]; pcmk__alert_t *pcmk__dup_alert(const pcmk__alert_t *entry); pcmk__alert_t *pcmk__alert_new(const char *id, const char *path); void pcmk__free_alert(pcmk__alert_t *entry); void pcmk__add_alert_key(GHashTable *table, enum pcmk__alert_keys_e name, const char *value); void pcmk__add_alert_key_int(GHashTable *table, enum pcmk__alert_keys_e name, int value); static inline const char * pcmk__alert_flag2text(enum pcmk__alert_flags flag) { switch (flag) { case pcmk__alert_node: return "node"; case pcmk__alert_fencing: return "fencing"; case pcmk__alert_resource: return "resource"; case pcmk__alert_attribute: return "attribute"; default: return "unknown"; } } #ifdef __cplusplus } #endif #endif // PCMK__CRM_COMMON_ALERTS_INTERNAL__H diff --git a/include/crm/common/nodes.h b/include/crm/common/nodes.h index ecf75aae58..c4acb91c4b 100644 --- a/include/crm/common/nodes.h +++ b/include/crm/common/nodes.h @@ -1,102 +1,105 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_NODES__H #define PCMK__CRM_COMMON_NODES__H #include // bool #include // gboolean, GList, GHashTable +#include // xmlNode #include // pcmk_resource_t, pcmk_scheduler_t #ifdef __cplusplus extern "C" { #endif /*! * \file * \brief Scheduler API for nodes * \ingroup core */ // Special node attributes #define PCMK_NODE_ATTR_MAINTENANCE "maintenance" #define PCMK_NODE_ATTR_STANDBY "standby" #define PCMK_NODE_ATTR_TERMINATE "terminate" //! \internal Do not use typedef struct pcmk__node_private pcmk__node_private_t; // Basic node information (all node objects for the same node share this) // @COMPAT Drop this struct once all members are moved to pcmk__node_private_t //!@{ //! \deprecated Do not use (public access will be removed in a future release) struct pcmk__node_details { /* @COMPAT Convert these gbooleans into new enum pcmk__node_flags values * when we no longer support versions of sbd that use them */ // NOTE: sbd (as of at least 1.5.2) uses this //! \deprecated Call pcmk_node_is_online() instead gboolean online; // Whether online // NOTE: sbd (as of at least 1.5.2) uses this //! \deprecated Call pcmk_node_is_pending() instead gboolean pending; // Whether controller membership is pending // NOTE: sbd (as of at least 1.5.2) uses this //! \deprecated Call !pcmk_node_is_clean() instead gboolean unclean; // Whether node requires fencing // NOTE: sbd (as of at least 1.5.2) uses this //! \deprecated Call pcmk_node_is_shutting_down() instead gboolean shutdown; // Whether shutting down // NOTE: sbd (as of at least 1.5.2) uses this //! \deprecated Call pcmk_node_is_in_maintenance() instead gboolean maintenance; // Whether in maintenance mode // NOTE: sbd (as of at least 1.5.2) uses this // \deprecated Call pcmk_foreach_active_resource() instead GList *running_rsc; // List of resources active on node }; //!@} // Implementation of pcmk_node_t // @COMPAT Make contents internal when we can break API backward compatibility //!@{ //! \deprecated Do not use (public access will be removed in a future release) struct pcmk__scored_node { struct pcmk__node_assignment *assign; // NOTE: sbd (as of at least 1.5.2) uses this struct pcmk__node_details *details; // Basic node information //! \internal Do not use pcmk__node_private_t *priv; }; //!@} bool pcmk_node_is_online(const pcmk_node_t *node); bool pcmk_node_is_pending(const pcmk_node_t *node); bool pcmk_node_is_clean(const pcmk_node_t *node); bool pcmk_node_is_shutting_down(const pcmk_node_t *node); bool pcmk_node_is_in_maintenance(const pcmk_node_t *node); bool pcmk_foreach_active_resource(pcmk_node_t *node, bool (*fn)(pcmk_resource_t *, void *), void *user_data); +const char *pcmk_cib_node_shutdown(xmlNode *cib, const char *node); + #ifdef __cplusplus } #endif #endif // PCMK__CRM_COMMON_NODES__H diff --git a/lib/common/alerts.c b/lib/common/alerts.c index eac3e2e953..0c685f0a9f 100644 --- a/lib/common/alerts.c +++ b/lib/common/alerts.c @@ -1,170 +1,130 @@ /* * Copyright 2015-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include -/* - * to allow script compatibility we can have more than one - * set of environment variables - */ -const char *pcmk__alert_keys[PCMK__ALERT_INTERNAL_KEY_MAX][3] = -{ - [PCMK__alert_key_recipient] = { - "CRM_notify_recipient", "CRM_alert_recipient", NULL - }, - [PCMK__alert_key_node] = { - "CRM_notify_node", "CRM_alert_node", NULL - }, - [PCMK__alert_key_nodeid] = { - "CRM_notify_nodeid", "CRM_alert_nodeid", NULL - }, - [PCMK__alert_key_rsc] = { - "CRM_notify_rsc", "CRM_alert_rsc", NULL - }, - [PCMK__alert_key_task] = { - "CRM_notify_task", "CRM_alert_task", NULL - }, - [PCMK__alert_key_interval] = { - "CRM_notify_interval", "CRM_alert_interval", NULL - }, - [PCMK__alert_key_desc] = { - "CRM_notify_desc", "CRM_alert_desc", NULL - }, - [PCMK__alert_key_status] = { - "CRM_notify_status", "CRM_alert_status", NULL - }, - [PCMK__alert_key_target_rc] = { - "CRM_notify_target_rc", "CRM_alert_target_rc", NULL - }, - [PCMK__alert_key_rc] = { - "CRM_notify_rc", "CRM_alert_rc", NULL - }, - [PCMK__alert_key_kind] = { - "CRM_notify_kind", "CRM_alert_kind", NULL - }, - [PCMK__alert_key_version] = { - "CRM_notify_version", "CRM_alert_version", NULL - }, - [PCMK__alert_key_node_sequence] = { - "CRM_notify_node_sequence", PCMK__ALERT_NODE_SEQUENCE, NULL - }, - [PCMK__alert_key_timestamp] = { - "CRM_notify_timestamp", "CRM_alert_timestamp", NULL - }, - [PCMK__alert_key_attribute_name] = { - "CRM_notify_attribute_name", "CRM_alert_attribute_name", NULL - }, - [PCMK__alert_key_attribute_value] = { - "CRM_notify_attribute_value", "CRM_alert_attribute_value", NULL - }, - [PCMK__alert_key_timestamp_epoch] = { - "CRM_notify_timestamp_epoch", "CRM_alert_timestamp_epoch", NULL - }, - [PCMK__alert_key_timestamp_usec] = { - "CRM_notify_timestamp_usec", "CRM_alert_timestamp_usec", NULL - }, - [PCMK__alert_key_exec_time] = { - "CRM_notify_exec_time", "CRM_alert_exec_time", NULL - } +const char *pcmk__alert_keys[PCMK__ALERT_INTERNAL_KEY_MAX] = { + [PCMK__alert_key_recipient] = "CRM_alert_recipient", + [PCMK__alert_key_node] = "CRM_alert_node", + [PCMK__alert_key_nodeid] = "CRM_alert_nodeid", + [PCMK__alert_key_rsc] = "CRM_alert_rsc", + [PCMK__alert_key_task] = "CRM_alert_task", + [PCMK__alert_key_interval] = "CRM_alert_interval", + [PCMK__alert_key_desc] = "CRM_alert_desc", + [PCMK__alert_key_status] = "CRM_alert_status", + [PCMK__alert_key_target_rc] = "CRM_alert_target_rc", + [PCMK__alert_key_rc] = "CRM_alert_rc", + [PCMK__alert_key_kind] = "CRM_alert_kind", + [PCMK__alert_key_version] = "CRM_alert_version", + [PCMK__alert_key_node_sequence] = PCMK__ALERT_NODE_SEQUENCE, + [PCMK__alert_key_timestamp] = "CRM_alert_timestamp", + [PCMK__alert_key_attribute_name] = "CRM_alert_attribute_name", + [PCMK__alert_key_attribute_value] = "CRM_alert_attribute_value", + [PCMK__alert_key_timestamp_epoch] = "CRM_alert_timestamp_epoch", + [PCMK__alert_key_timestamp_usec] = "CRM_alert_timestamp_usec", + [PCMK__alert_key_exec_time] = "CRM_alert_exec_time", }; /*! * \brief Create a new alert entry structure * * \param[in] id ID to use * \param[in] path Path to alert agent executable * * \return Pointer to newly allocated alert entry * \note Non-string fields will be filled in with defaults. * It is the caller's responsibility to free the result, * using pcmk__free_alert(). */ pcmk__alert_t * pcmk__alert_new(const char *id, const char *path) { pcmk__alert_t *entry = pcmk__assert_alloc(1, sizeof(pcmk__alert_t)); CRM_ASSERT((id != NULL) && (path != NULL)); entry->id = pcmk__str_copy(id); entry->path = pcmk__str_copy(path); entry->timeout = PCMK__ALERT_DEFAULT_TIMEOUT_MS; entry->flags = pcmk__alert_default; return entry; } void pcmk__free_alert(pcmk__alert_t *entry) { if (entry) { free(entry->id); free(entry->path); free(entry->tstamp_format); free(entry->recipient); g_strfreev(entry->select_attribute_name); if (entry->envvars) { g_hash_table_destroy(entry->envvars); } free(entry); } } /*! * \internal * \brief Duplicate an alert entry * * \param[in] entry Alert entry to duplicate * * \return Duplicate of alert entry */ pcmk__alert_t * pcmk__dup_alert(const pcmk__alert_t *entry) { pcmk__alert_t *new_entry = pcmk__alert_new(entry->id, entry->path); new_entry->timeout = entry->timeout; new_entry->flags = entry->flags; new_entry->envvars = pcmk__str_table_dup(entry->envvars); new_entry->tstamp_format = pcmk__str_copy(entry->tstamp_format); new_entry->recipient = pcmk__str_copy(entry->recipient); if (entry->select_attribute_name) { new_entry->select_attribute_name = g_strdupv(entry->select_attribute_name); } return new_entry; } void pcmk__add_alert_key(GHashTable *table, enum pcmk__alert_keys_e name, const char *value) { - for (const char **key = pcmk__alert_keys[name]; *key; key++) { - crm_trace("Inserting alert key %s = '%s'", *key, value); - if (value) { - pcmk__insert_dup(table, *key, value); - } else { - g_hash_table_remove(table, *key); - } + CRM_ASSERT((table != NULL) && (name >= 0) + && (name < PCMK__ALERT_INTERNAL_KEY_MAX)); + if (value == NULL) { + crm_trace("Removing alert key %s", pcmk__alert_keys[name]); + g_hash_table_remove(table, pcmk__alert_keys[name]); + } else { + crm_trace("Inserting alert key %s = '%s'", + pcmk__alert_keys[name], value); + pcmk__insert_dup(table, pcmk__alert_keys[name], value); } } void pcmk__add_alert_key_int(GHashTable *table, enum pcmk__alert_keys_e name, int value) { - for (const char **key = pcmk__alert_keys[name]; *key; key++) { - crm_trace("Inserting alert key %s = %d", *key, value); - g_hash_table_insert(table, pcmk__str_copy(*key), pcmk__itoa(value)); - } + CRM_ASSERT((table != NULL) && (name >= 0) + && (name < PCMK__ALERT_INTERNAL_KEY_MAX)); + crm_trace("Inserting alert key %s = %d", pcmk__alert_keys[name], value); + g_hash_table_insert(table, pcmk__str_copy(pcmk__alert_keys[name]), + pcmk__itoa(value)); } diff --git a/lib/common/nodes.c b/lib/common/nodes.c index 1809814941..13b97d4c99 100644 --- a/lib/common/nodes.c +++ b/lib/common/nodes.c @@ -1,162 +1,192 @@ /* * Copyright 2022-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include // xmlNode #include /*! * \internal * \brief Check whether a node is online * * \param[in] node Node to check * * \return true if \p node is online, otherwise false */ bool pcmk_node_is_online(const pcmk_node_t *node) { return (node != NULL) && node->details->online; } /*! * \internal * \brief Check whether a node is pending * * Check whether a node is pending. A node is pending if it is a member of the * cluster but not the controller group, which means it is in the process of * either joining or leaving the cluster. * * \param[in] node Node to check * * \return true if \p node is pending, otherwise false */ bool pcmk_node_is_pending(const pcmk_node_t *node) { return (node != NULL) && node->details->pending; } /*! * \internal * \brief Check whether a node is clean * * Check whether a node is clean. A node is clean if it is a cluster node or * remote node that has been seen by the cluster at least once, or the * startup-fencing cluster option is false; and the node, and its host if a * guest or bundle node, are not scheduled to be fenced. * * \param[in] node Node to check * * \return true if \p node is clean, otherwise false */ bool pcmk_node_is_clean(const pcmk_node_t *node) { return (node != NULL) && !(node->details->unclean); } /*! * \internal * \brief Check whether a node is shutting down * * \param[in] node Node to check * * \return true if \p node is shutting down, otherwise false */ bool pcmk_node_is_shutting_down(const pcmk_node_t *node) { return (node != NULL) && node->details->shutdown; } /*! * \internal * \brief Check whether a node is in maintenance mode * * \param[in] node Node to check * * \return true if \p node is in maintenance mode, otherwise false */ bool pcmk_node_is_in_maintenance(const pcmk_node_t *node) { return (node != NULL) && node->details->maintenance; } /*! * \internal * \brief Call a function for each resource active on a node * * Call a caller-supplied function with a caller-supplied argument for each * resource that is active on a given node. If the function returns false, this * function will return immediately without processing any remaining resources. * * \param[in] node Node to check * * \return Result of last call of \p fn (or false if none) */ bool pcmk_foreach_active_resource(pcmk_node_t *node, bool (*fn)(pcmk_resource_t *, void *), void *user_data) { bool result = false; if ((node != NULL) && (fn != NULL)) { for (GList *item = node->details->running_rsc; item != NULL; item = item->next) { result = fn((pcmk_resource_t *) item->data, user_data); if (!result) { break; } } } return result; } void pcmk__xe_add_node(xmlNode *xml, const char *node, int nodeid) { CRM_ASSERT(xml != NULL); if (node != NULL) { crm_xml_add(xml, PCMK__XA_ATTR_HOST, node); } if (nodeid > 0) { crm_xml_add_int(xml, PCMK__XA_ATTR_HOST_ID, nodeid); } } /*! * \internal * \brief Find a node by name in a list of nodes * * \param[in] nodes List of nodes (as pcmk_node_t*) * \param[in] node_name Name of node to find * * \return Node from \p nodes that matches \p node_name if any, otherwise NULL */ pcmk_node_t * pcmk__find_node_in_list(const GList *nodes, const char *node_name) { if (node_name != NULL) { for (const GList *iter = nodes; iter != NULL; iter = iter->next) { pcmk_node_t *node = (pcmk_node_t *) iter->data; if (pcmk__str_eq(node->priv->name, node_name, pcmk__str_casei)) { return node; } } } return NULL; } + +#define XP_SHUTDOWN "//" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='%s']/" \ + PCMK__XE_TRANSIENT_ATTRIBUTES "/" PCMK_XE_INSTANCE_ATTRIBUTES "/" \ + PCMK_XE_NVPAIR "[@" PCMK_XA_NAME "='" PCMK__NODE_ATTR_SHUTDOWN "']" + +/*! + * \brief Get value of a node's shutdown attribute from CIB, if present + * + * \param[in] cib CIB to check + * \param[in] node Name of node to check + * + * \return Value of shutdown attribute for \p node in \p cib if any, + * otherwise NULL + * \note The return value is a pointer into \p cib and so is valid only for the + * lifetime of that object. + */ +const char * +pcmk_cib_node_shutdown(xmlNode *cib, const char *node) +{ + if ((cib != NULL) && (node != NULL)) { + char *xpath = crm_strdup_printf(XP_SHUTDOWN, node); + xmlNode *match = get_xpath_object(xpath, cib, LOG_TRACE); + + free(xpath); + if (match != NULL) { + return crm_element_value(match, PCMK_XA_VALUE); + } + } + return NULL; +} diff --git a/lib/common/tests/nodes/Makefile.am b/lib/common/tests/nodes/Makefile.am index f52c615e4d..6c4964e1d0 100644 --- a/lib/common/tests/nodes/Makefile.am +++ b/lib/common/tests/nodes/Makefile.am @@ -1,23 +1,24 @@ # # Copyright 2024 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # include $(top_srcdir)/mk/tap.mk include $(top_srcdir)/mk/unittest.mk # Add "_test" to the end of all test program names to simplify .gitignore. check_PROGRAMS = pcmk__find_node_in_list_test \ + pcmk__xe_add_node_test \ + pcmk_cib_node_shutdown_test \ pcmk_foreach_active_resource_test \ pcmk_node_is_clean_test \ pcmk_node_is_in_maintenance_test \ pcmk_node_is_online_test \ pcmk_node_is_pending_test \ - pcmk_node_is_shutting_down_test \ - pcmk__xe_add_node_test + pcmk_node_is_shutting_down_test TESTS = $(check_PROGRAMS) diff --git a/lib/common/tests/nodes/pcmk_cib_node_shutdown_test.c b/lib/common/tests/nodes/pcmk_cib_node_shutdown_test.c new file mode 100644 index 0000000000..274279eb8e --- /dev/null +++ b/lib/common/tests/nodes/pcmk_cib_node_shutdown_test.c @@ -0,0 +1,70 @@ +/* + * Copyright 2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include // NULL +#include // xmlNode + +#include +#include + +// Minimum CIB structure needed for function's XPath search +#define CIB_XML \ + "<" PCMK_XE_CIB ">" \ + "<" PCMK_XE_STATUS ">" \ + "<" PCMK__XE_NODE_STATE " " PCMK_XA_UNAME "='node1'>" \ + "<" PCMK__XE_TRANSIENT_ATTRIBUTES ">" \ + "<" PCMK_XE_INSTANCE_ATTRIBUTES ">" \ + "<" PCMK_XE_NVPAIR " " \ + PCMK_XA_NAME "='" PCMK__NODE_ATTR_SHUTDOWN "' " \ + PCMK_XA_VALUE "='999'/>" \ + "" \ + "" \ + "" \ + "" \ + "" + +static void +null_args(void **state) +{ + xmlNode *xml = pcmk__xml_parse(CIB_XML); + + assert_non_null(xml); + assert_null(pcmk_cib_node_shutdown(NULL, NULL)); + assert_null(pcmk_cib_node_shutdown(xml, NULL)); + assert_null(pcmk_cib_node_shutdown(NULL, "node1")); + pcmk__xml_free(xml); +} + +static void +shutdown_absent(void **state) +{ + xmlNode *xml = pcmk__xml_parse(CIB_XML); + + assert_non_null(xml); + assert_null(pcmk_cib_node_shutdown(xml, "node")); + assert_null(pcmk_cib_node_shutdown(xml, "node10")); + pcmk__xml_free(xml); +} + +static void +shutdown_present(void **state) +{ + xmlNode *xml = pcmk__xml_parse(CIB_XML); + + assert_non_null(xml); + assert_string_equal(pcmk_cib_node_shutdown(xml, "node1"), "999"); + pcmk__xml_free(xml); +} + +PCMK__UNIT_TEST(NULL, NULL, + cmocka_unit_test(null_args), + cmocka_unit_test(shutdown_absent), + cmocka_unit_test(shutdown_present)) diff --git a/lib/lrmd/lrmd_alerts.c b/lib/lrmd/lrmd_alerts.c index b73ba90f0f..f04fc958c0 100644 --- a/lib/lrmd/lrmd_alerts.c +++ b/lib/lrmd/lrmd_alerts.c @@ -1,399 +1,394 @@ /* * Copyright 2015-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include static lrmd_key_value_t * alert_key2param(lrmd_key_value_t *head, enum pcmk__alert_keys_e name, const char *value) { - const char **key; - if (value == NULL) { value = ""; } - for (key = pcmk__alert_keys[name]; *key; key++) { - crm_trace("Setting alert key %s = '%s'", *key, value); - head = lrmd_key_value_add(head, *key, value); - } - return head; + crm_trace("Setting alert key %s = '%s'", pcmk__alert_keys[name], value); + return lrmd_key_value_add(head, pcmk__alert_keys[name], value); } static lrmd_key_value_t * alert_key2param_int(lrmd_key_value_t *head, enum pcmk__alert_keys_e name, int value) { char *value_s = pcmk__itoa(value); head = alert_key2param(head, name, value_s); free(value_s); return head; } static lrmd_key_value_t * alert_key2param_ms(lrmd_key_value_t *head, enum pcmk__alert_keys_e name, guint value) { char *value_s = crm_strdup_printf("%u", value); head = alert_key2param(head, name, value_s); free(value_s); return head; } static void set_ev_kv(gpointer key, gpointer value, gpointer user_data) { lrmd_key_value_t **head = (lrmd_key_value_t **) user_data; if (value) { crm_trace("Setting environment variable %s='%s'", (char*)key, (char*)value); *head = lrmd_key_value_add(*head, key, value); } } static lrmd_key_value_t * alert_envvar2params(lrmd_key_value_t *head, const pcmk__alert_t *entry) { if (entry->envvars) { g_hash_table_foreach(entry->envvars, set_ev_kv, &head); } return head; } /* * We could use g_strv_contains() instead of this function, * but that has only been available since glib 2.43.2. */ static gboolean is_target_alert(char **list, const char *value) { int target_list_num = 0; gboolean rc = FALSE; CRM_CHECK(value != NULL, return FALSE); if (list == NULL) { return TRUE; } target_list_num = g_strv_length(list); for (int cnt = 0; cnt < target_list_num; cnt++) { if (strcmp(list[cnt], value) == 0) { rc = TRUE; break; } } return rc; } /*! * \internal * \brief Execute alert agents for an event * * \param[in,out] lrmd Executor connection to use * \param[in] alert_list Alerts to execute * \param[in] kind Type of event that is being alerted for * \param[in] attr_name If pcmk__alert_attribute, the attribute name * \param[in,out] params Environment variables to pass to agents * * \retval pcmk_ok on success * \retval -1 if some alerts failed * \retval -2 if all alerts failed */ static int exec_alert_list(lrmd_t *lrmd, const GList *alert_list, enum pcmk__alert_flags kind, const char *attr_name, lrmd_key_value_t *params) { bool any_success = FALSE, any_failure = FALSE; const char *kind_s = pcmk__alert_flag2text(kind); pcmk__time_hr_t *now = NULL; char timestamp_epoch[20]; char timestamp_usec[7]; time_t epoch = 0; params = alert_key2param(params, PCMK__alert_key_kind, kind_s); params = alert_key2param(params, PCMK__alert_key_version, PACEMAKER_VERSION); for (const GList *iter = alert_list; iter != NULL; iter = g_list_next(iter)) { const pcmk__alert_t *entry = (pcmk__alert_t *) (iter->data); lrmd_key_value_t *copy_params = NULL; lrmd_key_value_t *head = NULL; int rc; if (!pcmk_is_set(entry->flags, kind)) { crm_trace("Filtering unwanted %s alert to %s via %s", kind_s, entry->recipient, entry->id); continue; } if ((kind == pcmk__alert_attribute) && !is_target_alert(entry->select_attribute_name, attr_name)) { crm_trace("Filtering unwanted attribute '%s' alert to %s via %s", attr_name, entry->recipient, entry->id); continue; } if (now == NULL) { now = pcmk__time_hr_now(&epoch); } crm_info("Sending %s alert via %s to %s", kind_s, entry->id, entry->recipient); /* Make a copy of the parameters, because each alert will be unique */ for (head = params; head != NULL; head = head->next) { copy_params = lrmd_key_value_add(copy_params, head->key, head->value); } copy_params = alert_key2param(copy_params, PCMK__alert_key_recipient, entry->recipient); if (now) { char *timestamp = pcmk__time_format_hr(entry->tstamp_format, now); if (timestamp) { copy_params = alert_key2param(copy_params, PCMK__alert_key_timestamp, timestamp); free(timestamp); } snprintf(timestamp_epoch, sizeof(timestamp_epoch), "%lld", (long long) epoch); copy_params = alert_key2param(copy_params, PCMK__alert_key_timestamp_epoch, timestamp_epoch); snprintf(timestamp_usec, sizeof(timestamp_usec), "%06d", now->useconds); copy_params = alert_key2param(copy_params, PCMK__alert_key_timestamp_usec, timestamp_usec); } copy_params = alert_envvar2params(copy_params, entry); rc = lrmd->cmds->exec_alert(lrmd, entry->id, entry->path, entry->timeout, copy_params); if (rc < 0) { crm_err("Could not execute alert %s: %s " QB_XS " rc=%d", entry->id, pcmk_strerror(rc), rc); any_failure = TRUE; } else { any_success = TRUE; } } if (now) { free(now); } if (any_failure) { return (any_success? -1 : -2); } return pcmk_ok; } /*! * \internal * \brief Send an alert for a node attribute change * * \param[in,out] lrmd Executor connection to use * \param[in] alert_list List of alert agents to execute * \param[in] node Name of node with attribute change * \param[in] nodeid Node ID of node with attribute change * \param[in] attr_name Name of attribute that changed * \param[in] attr_value New value of attribute that changed * * \retval pcmk_ok on success * \retval -1 if some alert agents failed * \retval -2 if all alert agents failed */ int lrmd_send_attribute_alert(lrmd_t *lrmd, const GList *alert_list, const char *node, uint32_t nodeid, const char *attr_name, const char *attr_value) { int rc = pcmk_ok; lrmd_key_value_t *params = NULL; if (lrmd == NULL) { return -2; } params = alert_key2param(params, PCMK__alert_key_node, node); params = alert_key2param_int(params, PCMK__alert_key_nodeid, nodeid); params = alert_key2param(params, PCMK__alert_key_attribute_name, attr_name); params = alert_key2param(params, PCMK__alert_key_attribute_value, attr_value); rc = exec_alert_list(lrmd, alert_list, pcmk__alert_attribute, attr_name, params); lrmd_key_value_freeall(params); return rc; } /*! * \internal * \brief Send an alert for a node membership event * * \param[in,out] lrmd Executor connection to use * \param[in] alert_list List of alert agents to execute * \param[in] node Name of node with change * \param[in] nodeid Node ID of node with change * \param[in] state New state of node with change * * \retval pcmk_ok on success * \retval -1 if some alert agents failed * \retval -2 if all alert agents failed */ int lrmd_send_node_alert(lrmd_t *lrmd, const GList *alert_list, const char *node, uint32_t nodeid, const char *state) { int rc = pcmk_ok; lrmd_key_value_t *params = NULL; if (lrmd == NULL) { return -2; } params = alert_key2param(params, PCMK__alert_key_node, node); params = alert_key2param(params, PCMK__alert_key_desc, state); params = alert_key2param_int(params, PCMK__alert_key_nodeid, nodeid); rc = exec_alert_list(lrmd, alert_list, pcmk__alert_node, NULL, params); lrmd_key_value_freeall(params); return rc; } /*! * \internal * \brief Send an alert for a fencing event * * \param[in,out] lrmd Executor connection to use * \param[in] alert_list List of alert agents to execute * \param[in] target Name of fence target node * \param[in] task Type of fencing event that occurred * \param[in] desc Readable description of event * \param[in] op_rc Result of fence action * * \retval pcmk_ok on success * \retval -1 if some alert agents failed * \retval -2 if all alert agents failed */ int lrmd_send_fencing_alert(lrmd_t *lrmd, const GList *alert_list, const char *target, const char *task, const char *desc, int op_rc) { int rc = pcmk_ok; lrmd_key_value_t *params = NULL; if (lrmd == NULL) { return -2; } params = alert_key2param(params, PCMK__alert_key_node, target); params = alert_key2param(params, PCMK__alert_key_task, task); params = alert_key2param(params, PCMK__alert_key_desc, desc); params = alert_key2param_int(params, PCMK__alert_key_rc, op_rc); rc = exec_alert_list(lrmd, alert_list, pcmk__alert_fencing, NULL, params); lrmd_key_value_freeall(params); return rc; } /*! * \internal * \brief Send an alert for a resource operation * * \param[in,out] lrmd Executor connection to use * \param[in] alert_list List of alert agents to execute * \param[in] node Name of node that executed operation * \param[in] op Resource operation * * \retval pcmk_ok on success * \retval -1 if some alert agents failed * \retval -2 if all alert agents failed */ int lrmd_send_resource_alert(lrmd_t *lrmd, const GList *alert_list, const char *node, const lrmd_event_data_t *op) { int rc = pcmk_ok; int target_rc = pcmk_ok; lrmd_key_value_t *params = NULL; if (lrmd == NULL) { return -2; } target_rc = rsc_op_expected_rc(op); if ((op->interval_ms == 0) && (target_rc == op->rc) && pcmk__str_eq(op->op_type, PCMK_ACTION_MONITOR, pcmk__str_casei)) { /* Don't send alerts for probes with the expected result. Leave it up to * the agent whether to alert for 'failed' probes. (Even if we find a * resource running, it was probably because someone did a clean-up of * the status section.) */ return pcmk_ok; } params = alert_key2param(params, PCMK__alert_key_node, node); params = alert_key2param(params, PCMK__alert_key_rsc, op->rsc_id); params = alert_key2param(params, PCMK__alert_key_task, op->op_type); params = alert_key2param_ms(params, PCMK__alert_key_interval, op->interval_ms); params = alert_key2param_int(params, PCMK__alert_key_target_rc, target_rc); params = alert_key2param_int(params, PCMK__alert_key_status, op->op_status); params = alert_key2param_int(params, PCMK__alert_key_rc, op->rc); /* Reoccurring operations do not set exec_time, so on timeout, set it * to the operation timeout since that's closer to the actual value. */ if ((op->op_status == PCMK_EXEC_TIMEOUT) && (op->exec_time == 0)) { params = alert_key2param_int(params, PCMK__alert_key_exec_time, op->timeout); } else { params = alert_key2param_int(params, PCMK__alert_key_exec_time, op->exec_time); } if (op->op_status == PCMK_EXEC_DONE) { params = alert_key2param(params, PCMK__alert_key_desc, services_ocf_exitcode_str(op->rc)); } else { params = alert_key2param(params, PCMK__alert_key_desc, pcmk_exec_status_str(op->op_status)); } rc = exec_alert_list(lrmd, alert_list, pcmk__alert_resource, NULL, params); lrmd_key_value_freeall(params); return rc; } diff --git a/lib/pacemaker/pcmk_sched_actions.c b/lib/pacemaker/pcmk_sched_actions.c index 526e66ce3e..4ff10d164c 100644 --- a/lib/pacemaker/pcmk_sched_actions.c +++ b/lib/pacemaker/pcmk_sched_actions.c @@ -1,1950 +1,1942 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include "libpacemaker_private.h" /*! * \internal * \brief Get the action flags relevant to ordering constraints * * \param[in,out] action Action to check * \param[in] node Node that *other* action in the ordering is on * (used only for clone resource actions) * * \return Action flags that should be used for orderings */ static uint32_t action_flags_for_ordering(pcmk_action_t *action, const pcmk_node_t *node) { bool runnable = false; uint32_t flags; // For non-resource actions, return the action flags if (action->rsc == NULL) { return action->flags; } /* For non-clone resources, or a clone action not assigned to a node, * return the flags as determined by the resource method without a node * specified. */ flags = action->rsc->priv->cmds->action_flags(action, NULL); if ((node == NULL) || !pcmk__is_clone(action->rsc)) { return flags; } /* Otherwise (i.e., for clone resource actions on a specific node), first * remember whether the non-node-specific action is runnable. */ runnable = pcmk_is_set(flags, pcmk__action_runnable); // Then recheck the resource method with the node flags = action->rsc->priv->cmds->action_flags(action, node); /* For clones in ordering constraints, the node-specific "runnable" doesn't * matter, just the non-node-specific setting (i.e., is the action runnable * anywhere). * * This applies only to runnable, and only for ordering constraints. This * function shouldn't be used for other types of constraints without * changes. Not very satisfying, but it's logical and appears to work well. */ if (runnable && !pcmk_is_set(flags, pcmk__action_runnable)) { pcmk__set_raw_action_flags(flags, action->rsc->id, pcmk__action_runnable); } return flags; } /*! * \internal * \brief Get action UUID that should be used with a resource ordering * * When an action is ordered relative to an action for a collective resource * (clone, group, or bundle), it actually needs to be ordered after all * instances of the collective have completed the relevant action (for example, * given "start CLONE then start RSC", RSC must wait until all instances of * CLONE have started). Given the UUID and resource of the first action in an * ordering, this returns the UUID of the action that should actually be used * for ordering (for example, "CLONE_started_0" instead of "CLONE_start_0"). * * \param[in] first_uuid UUID of first action in ordering * \param[in] first_rsc Resource of first action in ordering * * \return Newly allocated copy of UUID to use with ordering * \note It is the caller's responsibility to free the return value. */ static char * action_uuid_for_ordering(const char *first_uuid, const pcmk_resource_t *first_rsc) { guint interval_ms = 0; char *uuid = NULL; char *rid = NULL; char *first_task_str = NULL; enum pcmk__action_type first_task = pcmk__action_unspecified; enum pcmk__action_type remapped_task = pcmk__action_unspecified; // Only non-notify actions for collective resources need remapping if ((strstr(first_uuid, PCMK_ACTION_NOTIFY) != NULL) || (first_rsc->priv->variant < pcmk__rsc_variant_group)) { goto done; } // Only non-recurring actions need remapping CRM_ASSERT(parse_op_key(first_uuid, &rid, &first_task_str, &interval_ms)); if (interval_ms > 0) { goto done; } first_task = pcmk__parse_action(first_task_str); switch (first_task) { case pcmk__action_stop: case pcmk__action_start: case pcmk__action_notify: case pcmk__action_promote: case pcmk__action_demote: remapped_task = first_task + 1; break; case pcmk__action_stopped: case pcmk__action_started: case pcmk__action_notified: case pcmk__action_promoted: case pcmk__action_demoted: remapped_task = first_task; break; case pcmk__action_monitor: case pcmk__action_shutdown: case pcmk__action_fence: break; default: crm_err("Unknown action '%s' in ordering", first_task_str); break; } if (remapped_task != pcmk__action_unspecified) { /* If a clone or bundle has notifications enabled, the ordering will be * relative to when notifications have been sent for the remapped task. */ if (pcmk_is_set(first_rsc->flags, pcmk__rsc_notify) && (pcmk__is_clone(first_rsc) || pcmk__is_bundled(first_rsc))) { uuid = pcmk__notify_key(rid, "confirmed-post", pcmk__action_text(remapped_task)); } else { uuid = pcmk__op_key(rid, pcmk__action_text(remapped_task), 0); } pcmk__rsc_trace(first_rsc, "Remapped action UUID %s to %s for ordering purposes", first_uuid, uuid); } done: free(first_task_str); free(rid); return (uuid != NULL)? uuid : pcmk__str_copy(first_uuid); } /*! * \internal * \brief Get actual action that should be used with an ordering * * When an action is ordered relative to an action for a collective resource * (clone, group, or bundle), it actually needs to be ordered after all * instances of the collective have completed the relevant action (for example, * given "start CLONE then start RSC", RSC must wait until all instances of * CLONE have started). Given the first action in an ordering, this returns the * the action that should actually be used for ordering (for example, the * started action instead of the start action). * * \param[in] action First action in an ordering * * \return Actual action that should be used for the ordering */ static pcmk_action_t * action_for_ordering(pcmk_action_t *action) { pcmk_action_t *result = action; pcmk_resource_t *rsc = action->rsc; if (rsc == NULL) { return result; } if ((rsc->priv->variant >= pcmk__rsc_variant_group) && (action->uuid != NULL)) { char *uuid = action_uuid_for_ordering(action->uuid, rsc); result = find_first_action(rsc->priv->actions, uuid, NULL, NULL); if (result == NULL) { crm_warn("Not remapping %s to %s because %s does not have " "remapped action", action->uuid, uuid, rsc->id); result = action; } free(uuid); } return result; } /*! * \internal * \brief Wrapper for update_ordered_actions() method for readability * * \param[in,out] rsc Resource to call method for * \param[in,out] first 'First' action in an ordering * \param[in,out] then 'Then' action in an ordering * \param[in] node If not NULL, limit scope of ordering to this * node (only used when interleaving instances) * \param[in] flags Action flags for \p first for ordering purposes * \param[in] filter Action flags to limit scope of certain updates * (may include pcmk__action_optional to affect only * mandatory actions, and pe_action_runnable to * affect only runnable actions) * \param[in] type Group of enum pcmk__action_relation_flags to apply * \param[in,out] scheduler Scheduler data * * \return Group of enum pcmk__updated flags indicating what was updated */ static inline uint32_t update(pcmk_resource_t *rsc, pcmk_action_t *first, pcmk_action_t *then, const pcmk_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pcmk_scheduler_t *scheduler) { return rsc->priv->cmds->update_ordered_actions(first, then, node, flags, filter, type, scheduler); } /*! * \internal * \brief Update flags for ordering's actions appropriately for ordering's flags * * \param[in,out] first First action in an ordering * \param[in,out] then Then action in an ordering * \param[in] first_flags Action flags for \p first for ordering purposes * \param[in] then_flags Action flags for \p then for ordering purposes * \param[in,out] order Action wrapper for \p first in ordering * \param[in,out] scheduler Scheduler data * * \return Group of enum pcmk__updated flags */ static uint32_t update_action_for_ordering_flags(pcmk_action_t *first, pcmk_action_t *then, uint32_t first_flags, uint32_t then_flags, pcmk__related_action_t *order, pcmk_scheduler_t *scheduler) { uint32_t changed = pcmk__updated_none; /* The node will only be used for clones. If interleaved, node will be NULL, * otherwise the ordering scope will be limited to the node. Normally, the * whole 'then' clone should restart if 'first' is restarted, so then->node * is needed. */ pcmk_node_t *node = then->node; if (pcmk_is_set(order->flags, pcmk__ar_first_implies_same_node_then)) { /* For unfencing, only instances of 'then' on the same node as 'first' * (the unfencing operation) should restart, so reset node to * first->node, at which point this case is handled like a normal * pcmk__ar_first_implies_then. */ pcmk__clear_relation_flags(order->flags, pcmk__ar_first_implies_same_node_then); pcmk__set_relation_flags(order->flags, pcmk__ar_first_implies_then); node = first->node; pcmk__rsc_trace(then->rsc, "%s then %s: mapped " "pcmk__ar_first_implies_same_node_then to " "pcmk__ar_first_implies_then on %s", first->uuid, then->uuid, pcmk__node_name(node)); } if (pcmk_is_set(order->flags, pcmk__ar_first_implies_then)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags & pcmk__action_optional, pcmk__action_optional, pcmk__ar_first_implies_then, scheduler); } else if (!pcmk_is_set(first_flags, pcmk__action_optional) && pcmk_is_set(then->flags, pcmk__action_optional)) { pcmk__clear_action_flags(then, pcmk__action_optional); pcmk__set_updated_flags(changed, first, pcmk__updated_then); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_first_implies_then", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_intermediate_stop) && (then->rsc != NULL)) { enum pcmk__action_flags restart = pcmk__action_optional |pcmk__action_runnable; changed |= update(then->rsc, first, then, node, first_flags, restart, pcmk__ar_intermediate_stop, scheduler); pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_intermediate_stop", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_then_implies_first)) { if (first->rsc != NULL) { changed |= update(first->rsc, first, then, node, first_flags, pcmk__action_optional, pcmk__ar_then_implies_first, scheduler); } else if (!pcmk_is_set(first_flags, pcmk__action_optional) && pcmk_is_set(first->flags, pcmk__action_runnable)) { pcmk__clear_action_flags(first, pcmk__action_runnable); pcmk__set_updated_flags(changed, first, pcmk__updated_first); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_then_implies_first", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_promoted_then_implies_first)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags & pcmk__action_optional, pcmk__action_optional, pcmk__ar_promoted_then_implies_first, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after " "pcmk__ar_promoted_then_implies_first", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_min_runnable)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_runnable, pcmk__ar_min_runnable, scheduler); } else if (pcmk_is_set(first_flags, pcmk__action_runnable)) { // We have another runnable instance of "first" then->runnable_before++; /* Mark "then" as runnable if it requires a certain number of * "before" instances to be runnable, and they now are. */ if ((then->runnable_before >= then->required_runnable_before) && !pcmk_is_set(then->flags, pcmk__action_runnable)) { pcmk__set_action_flags(then, pcmk__action_runnable); pcmk__set_updated_flags(changed, first, pcmk__updated_then); } } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_min_runnable", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_nested_remote_probe) && (then->rsc != NULL)) { if (!pcmk_is_set(first_flags, pcmk__action_runnable) && (first->rsc != NULL) && (first->rsc->priv->active_nodes != NULL)) { pcmk__rsc_trace(then->rsc, "%s then %s: ignoring because first is stopping", first->uuid, then->uuid); order->flags = pcmk__ar_none; } else { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_runnable, pcmk__ar_unrunnable_first_blocks, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_nested_remote_probe", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_unrunnable_first_blocks)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_runnable, pcmk__ar_unrunnable_first_blocks, scheduler); } else if (!pcmk_is_set(first_flags, pcmk__action_runnable) && pcmk_is_set(then->flags, pcmk__action_runnable)) { pcmk__clear_action_flags(then, pcmk__action_runnable); pcmk__set_updated_flags(changed, first, pcmk__updated_then); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_unrunnable_first_blocks", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_unmigratable_then_blocks)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_optional, pcmk__ar_unmigratable_then_blocks, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after " "pcmk__ar_unmigratable_then_blocks", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_first_else_then)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_optional, pcmk__ar_first_else_then, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_first_else_then", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_ordered)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_runnable, pcmk__ar_ordered, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_ordered", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_asymmetric)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_runnable, pcmk__ar_asymmetric, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_asymmetric", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(first->flags, pcmk__action_runnable) && pcmk_is_set(order->flags, pcmk__ar_first_implies_then_graphed) && !pcmk_is_set(first_flags, pcmk__action_optional)) { pcmk__rsc_trace(then->rsc, "%s will be in graph because %s is required", then->uuid, first->uuid); pcmk__set_action_flags(then, pcmk__action_always_in_graph); // Don't bother marking 'then' as changed just for this } if (pcmk_is_set(order->flags, pcmk__ar_then_implies_first_graphed) && !pcmk_is_set(then_flags, pcmk__action_optional)) { pcmk__rsc_trace(then->rsc, "%s will be in graph because %s is required", first->uuid, then->uuid); pcmk__set_action_flags(first, pcmk__action_always_in_graph); // Don't bother marking 'first' as changed just for this } if (pcmk_any_flags_set(order->flags, pcmk__ar_first_implies_then |pcmk__ar_then_implies_first |pcmk__ar_intermediate_stop) && (first->rsc != NULL) && !pcmk_is_set(first->rsc->flags, pcmk__rsc_managed) && pcmk_is_set(first->rsc->flags, pcmk__rsc_blocked) && !pcmk_is_set(first->flags, pcmk__action_runnable) && pcmk__str_eq(first->task, PCMK_ACTION_STOP, pcmk__str_none)) { if (pcmk_is_set(then->flags, pcmk__action_runnable)) { pcmk__clear_action_flags(then, pcmk__action_runnable); pcmk__set_updated_flags(changed, first, pcmk__updated_then); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after checking whether first " "is blocked, unmanaged, unrunnable stop", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } return changed; } // Convenience macros for logging action properties #define action_type_str(flags) \ (pcmk_is_set((flags), pcmk__action_pseudo)? "pseudo-action" : "action") #define action_optional_str(flags) \ (pcmk_is_set((flags), pcmk__action_optional)? "optional" : "required") #define action_runnable_str(flags) \ (pcmk_is_set((flags), pcmk__action_runnable)? "runnable" : "unrunnable") #define action_node_str(a) \ (((a)->node == NULL)? "no node" : (a)->node->priv->name) /*! * \internal * \brief Update an action's flags for all orderings where it is "then" * * \param[in,out] then Action to update * \param[in,out] scheduler Scheduler data */ void pcmk__update_action_for_orderings(pcmk_action_t *then, pcmk_scheduler_t *scheduler) { GList *lpc = NULL; uint32_t changed = pcmk__updated_none; int last_flags = then->flags; pcmk__rsc_trace(then->rsc, "Updating %s %s (%s %s) on %s", action_type_str(then->flags), then->uuid, action_optional_str(then->flags), action_runnable_str(then->flags), action_node_str(then)); - if (pcmk_is_set(then->flags, pcmk__action_min_runnable)) { + if (then->required_runnable_before > 0) { /* Initialize current known "runnable before" actions. As * update_action_for_ordering_flags() is called for each of then's * before actions, this number will increment as runnable 'first' * actions are encountered. */ then->runnable_before = 0; - if (then->required_runnable_before == 0) { - /* @COMPAT This ordering constraint uses the deprecated - * PCMK_XA_REQUIRE_ALL=PCMK_VALUE_FALSE attribute. Treat it like - * PCMK_META_CLONE_MIN=1. - */ - then->required_runnable_before = 1; - } - /* The pcmk__ar_min_runnable clause of * update_action_for_ordering_flags() (called below) * will reset runnable if appropriate. */ pcmk__clear_action_flags(then, pcmk__action_runnable); } for (lpc = then->actions_before; lpc != NULL; lpc = lpc->next) { pcmk__related_action_t *other = lpc->data; pcmk_action_t *first = other->action; pcmk_node_t *then_node = then->node; pcmk_node_t *first_node = first->node; const uint32_t target = pcmk__rsc_node_assigned; if ((first->rsc != NULL) && pcmk__is_group(first->rsc) && pcmk__str_eq(first->task, PCMK_ACTION_START, pcmk__str_none)) { first_node = first->rsc->priv->fns->location(first->rsc, NULL, target); if (first_node != NULL) { pcmk__rsc_trace(first->rsc, "Found %s for 'first' %s", pcmk__node_name(first_node), first->uuid); } } if (pcmk__is_group(then->rsc) && pcmk__str_eq(then->task, PCMK_ACTION_START, pcmk__str_none)) { then_node = then->rsc->priv->fns->location(then->rsc, NULL, target); if (then_node != NULL) { pcmk__rsc_trace(then->rsc, "Found %s for 'then' %s", pcmk__node_name(then_node), then->uuid); } } // Disable constraint if it only applies when on same node, but isn't if (pcmk_is_set(other->flags, pcmk__ar_if_on_same_node) && (first_node != NULL) && (then_node != NULL) && !pcmk__same_node(first_node, then_node)) { pcmk__rsc_trace(then->rsc, "Disabled ordering %s on %s then %s on %s: " "not same node", other->action->uuid, pcmk__node_name(first_node), then->uuid, pcmk__node_name(then_node)); other->flags = pcmk__ar_none; continue; } pcmk__clear_updated_flags(changed, then, pcmk__updated_first); if ((first->rsc != NULL) && pcmk_is_set(other->flags, pcmk__ar_then_cancels_first) && !pcmk_is_set(then->flags, pcmk__action_optional)) { /* 'then' is required, so we must abandon 'first' * (e.g. a required stop cancels any agent reload). */ pcmk__set_action_flags(other->action, pcmk__action_optional); if (!strcmp(first->task, PCMK_ACTION_RELOAD_AGENT)) { pcmk__clear_rsc_flags(first->rsc, pcmk__rsc_reload); } } if ((first->rsc != NULL) && (then->rsc != NULL) && (first->rsc != then->rsc) && !is_parent(then->rsc, first->rsc)) { first = action_for_ordering(first); } if (first != other->action) { pcmk__rsc_trace(then->rsc, "Ordering %s after %s instead of %s", then->uuid, first->uuid, other->action->uuid); } pcmk__rsc_trace(then->rsc, "%s (%#.6x) then %s (%#.6x): type=%#.6x node=%s", first->uuid, first->flags, then->uuid, then->flags, other->flags, action_node_str(first)); if (first == other->action) { /* 'first' was not remapped (e.g. from 'start' to 'running'), which * could mean it is a non-resource action, a primitive resource * action, or already expanded. */ uint32_t first_flags, then_flags; first_flags = action_flags_for_ordering(first, then_node); then_flags = action_flags_for_ordering(then, first_node); changed |= update_action_for_ordering_flags(first, then, first_flags, then_flags, other, scheduler); /* 'first' was for a complex resource (clone, group, etc), * create a new dependency if necessary */ } else if (order_actions(first, then, other->flags)) { /* This was the first time 'first' and 'then' were associated, * start again to get the new actions_before list */ pcmk__set_updated_flags(changed, then, pcmk__updated_then); pcmk__rsc_trace(then->rsc, "Disabled ordering %s then %s in favor of %s " "then %s", other->action->uuid, then->uuid, first->uuid, then->uuid); other->flags = pcmk__ar_none; } if (pcmk_is_set(changed, pcmk__updated_first)) { crm_trace("Re-processing %s and its 'after' actions " "because it changed", first->uuid); for (GList *lpc2 = first->actions_after; lpc2 != NULL; lpc2 = lpc2->next) { pcmk__related_action_t *other = lpc2->data; pcmk__update_action_for_orderings(other->action, scheduler); } pcmk__update_action_for_orderings(first, scheduler); } } - if (pcmk_is_set(then->flags, pcmk__action_min_runnable)) { + if (then->required_runnable_before > 0) { if (last_flags == then->flags) { pcmk__clear_updated_flags(changed, then, pcmk__updated_then); } else { pcmk__set_updated_flags(changed, then, pcmk__updated_then); } } if (pcmk_is_set(changed, pcmk__updated_then)) { crm_trace("Re-processing %s and its 'after' actions because it changed", then->uuid); if (pcmk_is_set(last_flags, pcmk__action_runnable) && !pcmk_is_set(then->flags, pcmk__action_runnable)) { pcmk__block_colocation_dependents(then); } pcmk__update_action_for_orderings(then, scheduler); for (lpc = then->actions_after; lpc != NULL; lpc = lpc->next) { pcmk__related_action_t *other = lpc->data; pcmk__update_action_for_orderings(other->action, scheduler); } } } static inline bool is_primitive_action(const pcmk_action_t *action) { return (action != NULL) && pcmk__is_primitive(action->rsc); } /*! * \internal * \brief Clear a single action flag and set reason text * * \param[in,out] action Action whose flag should be cleared * \param[in] flag Action flag that should be cleared * \param[in] reason Action that is the reason why flag is being cleared */ #define clear_action_flag_because(action, flag, reason) do { \ if (pcmk_is_set((action)->flags, (flag))) { \ pcmk__clear_action_flags(action, flag); \ if ((action)->rsc != (reason)->rsc) { \ char *reason_text = pe__action2reason((reason), (flag)); \ pe_action_set_reason((action), reason_text, false); \ free(reason_text); \ } \ } \ } while (0) /*! * \internal * \brief Update actions in an asymmetric ordering * * If the "first" action in an asymmetric ordering is unrunnable, make the * "second" action unrunnable as well, if appropriate. * * \param[in] first 'First' action in an asymmetric ordering * \param[in,out] then 'Then' action in an asymmetric ordering */ static void handle_asymmetric_ordering(const pcmk_action_t *first, pcmk_action_t *then) { /* Only resource actions after an unrunnable 'first' action need updates for * asymmetric ordering. */ if ((then->rsc == NULL) || pcmk_is_set(first->flags, pcmk__action_runnable)) { return; } // Certain optional 'then' actions are unaffected by unrunnable 'first' if (pcmk_is_set(then->flags, pcmk__action_optional)) { enum rsc_role_e then_rsc_role; then_rsc_role = then->rsc->priv->fns->state(then->rsc, TRUE); if ((then_rsc_role == pcmk_role_stopped) && pcmk__str_eq(then->task, PCMK_ACTION_STOP, pcmk__str_none)) { /* If 'then' should stop after 'first' but is already stopped, the * ordering is irrelevant. */ return; } else if ((then_rsc_role >= pcmk_role_started) && pcmk__str_eq(then->task, PCMK_ACTION_START, pcmk__str_none) && pe__rsc_running_on_only(then->rsc, then->node)) { /* Similarly if 'then' should start after 'first' but is already * started on a single node. */ return; } } // 'First' can't run, so 'then' can't either clear_action_flag_because(then, pcmk__action_optional, first); clear_action_flag_because(then, pcmk__action_runnable, first); } /*! * \internal * \brief Set action bits appropriately when pcmk__ar_intermediate_stop is used * * \param[in,out] first 'First' action in ordering * \param[in,out] then 'Then' action in ordering * \param[in] filter What action flags to care about * * \note pcmk__ar_intermediate_stop is set for "stop resource before starting * it" and "stop later group member before stopping earlier group member" */ static void handle_restart_ordering(pcmk_action_t *first, pcmk_action_t *then, uint32_t filter) { const char *reason = NULL; CRM_ASSERT(is_primitive_action(first)); CRM_ASSERT(is_primitive_action(then)); // We need to update the action in two cases: // ... if 'then' is required if (pcmk_is_set(filter, pcmk__action_optional) && !pcmk_is_set(then->flags, pcmk__action_optional)) { reason = "restart"; } /* ... if 'then' is unrunnable action on same resource (if a resource * should restart but can't start, we still want to stop) */ if (pcmk_is_set(filter, pcmk__action_runnable) && !pcmk_is_set(then->flags, pcmk__action_runnable) && pcmk_is_set(then->rsc->flags, pcmk__rsc_managed) && (first->rsc == then->rsc)) { reason = "stop"; } if (reason == NULL) { return; } pcmk__rsc_trace(first->rsc, "Handling %s -> %s for %s", first->uuid, then->uuid, reason); // Make 'first' required if it is runnable if (pcmk_is_set(first->flags, pcmk__action_runnable)) { clear_action_flag_because(first, pcmk__action_optional, then); } // Make 'first' required if 'then' is required if (!pcmk_is_set(then->flags, pcmk__action_optional)) { clear_action_flag_because(first, pcmk__action_optional, then); } // Make 'first' unmigratable if 'then' is unmigratable if (!pcmk_is_set(then->flags, pcmk__action_migratable)) { clear_action_flag_because(first, pcmk__action_migratable, then); } // Make 'then' unrunnable if 'first' is required but unrunnable if (!pcmk_is_set(first->flags, pcmk__action_optional) && !pcmk_is_set(first->flags, pcmk__action_runnable)) { clear_action_flag_because(then, pcmk__action_runnable, first); } } /*! * \internal * \brief Update two actions according to an ordering between them * * Given information about an ordering of two actions, update the actions' flags * (and runnable_before members if appropriate) as appropriate for the ordering. * Effects may cascade to other orderings involving the actions as well. * * \param[in,out] first 'First' action in an ordering * \param[in,out] then 'Then' action in an ordering * \param[in] node If not NULL, limit scope of ordering to this node * (ignored) * \param[in] flags Action flags for \p first for ordering purposes * \param[in] filter Action flags to limit scope of certain updates (may * include pcmk__action_optional to affect only * mandatory actions, and pcmk__action_runnable to * affect only runnable actions) * \param[in] type Group of enum pcmk__action_relation_flags to apply * \param[in,out] scheduler Scheduler data * * \return Group of enum pcmk__updated flags indicating what was updated */ uint32_t pcmk__update_ordered_actions(pcmk_action_t *first, pcmk_action_t *then, const pcmk_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pcmk_scheduler_t *scheduler) { uint32_t changed = pcmk__updated_none; uint32_t then_flags = 0U; uint32_t first_flags = 0U; CRM_ASSERT((first != NULL) && (then != NULL) && (scheduler != NULL)); then_flags = then->flags; first_flags = first->flags; if (pcmk_is_set(type, pcmk__ar_asymmetric)) { handle_asymmetric_ordering(first, then); } if (pcmk_is_set(type, pcmk__ar_then_implies_first) && !pcmk_is_set(then_flags, pcmk__action_optional)) { // Then is required, and implies first should be, too if (pcmk_is_set(filter, pcmk__action_optional) && !pcmk_is_set(flags, pcmk__action_optional) && pcmk_is_set(first_flags, pcmk__action_optional)) { clear_action_flag_because(first, pcmk__action_optional, then); } if (pcmk_is_set(flags, pcmk__action_migratable) && !pcmk_is_set(then->flags, pcmk__action_migratable)) { clear_action_flag_because(first, pcmk__action_migratable, then); } } if (pcmk_is_set(type, pcmk__ar_promoted_then_implies_first) && (then->rsc != NULL) && (then->rsc->priv->orig_role == pcmk_role_promoted) && pcmk_is_set(filter, pcmk__action_optional) && !pcmk_is_set(then->flags, pcmk__action_optional)) { clear_action_flag_because(first, pcmk__action_optional, then); if (pcmk_is_set(first->flags, pcmk__action_migratable) && !pcmk_is_set(then->flags, pcmk__action_migratable)) { clear_action_flag_because(first, pcmk__action_migratable, then); } } if (pcmk_is_set(type, pcmk__ar_unmigratable_then_blocks) && pcmk_is_set(filter, pcmk__action_optional)) { if (!pcmk_all_flags_set(then->flags, pcmk__action_migratable |pcmk__action_runnable)) { clear_action_flag_because(first, pcmk__action_runnable, then); } if (!pcmk_is_set(then->flags, pcmk__action_optional)) { clear_action_flag_because(first, pcmk__action_optional, then); } } if (pcmk_is_set(type, pcmk__ar_first_else_then) && pcmk_is_set(filter, pcmk__action_optional) && !pcmk_is_set(first->flags, pcmk__action_runnable)) { clear_action_flag_because(then, pcmk__action_migratable, first); pcmk__clear_action_flags(then, pcmk__action_pseudo); } if (pcmk_is_set(type, pcmk__ar_unrunnable_first_blocks) && pcmk_is_set(filter, pcmk__action_runnable) && pcmk_is_set(then->flags, pcmk__action_runnable) && !pcmk_is_set(flags, pcmk__action_runnable)) { clear_action_flag_because(then, pcmk__action_runnable, first); clear_action_flag_because(then, pcmk__action_migratable, first); } if (pcmk_is_set(type, pcmk__ar_first_implies_then) && pcmk_is_set(filter, pcmk__action_optional) && pcmk_is_set(then->flags, pcmk__action_optional) && !pcmk_is_set(flags, pcmk__action_optional) && !pcmk_is_set(first->flags, pcmk__action_migratable)) { clear_action_flag_because(then, pcmk__action_optional, first); } if (pcmk_is_set(type, pcmk__ar_intermediate_stop)) { handle_restart_ordering(first, then, filter); } if (then_flags != then->flags) { pcmk__set_updated_flags(changed, first, pcmk__updated_then); pcmk__rsc_trace(then->rsc, "%s on %s: flags are now %#.6x (was %#.6x) " "because of 'first' %s (%#.6x)", then->uuid, pcmk__node_name(then->node), then->flags, then_flags, first->uuid, first->flags); if ((then->rsc != NULL) && (then->rsc->priv->parent != NULL)) { // Required to handle "X_stop then X_start" for cloned groups pcmk__update_action_for_orderings(then, scheduler); } } if (first_flags != first->flags) { pcmk__set_updated_flags(changed, first, pcmk__updated_first); pcmk__rsc_trace(first->rsc, "%s on %s: flags are now %#.6x (was %#.6x) " "because of 'then' %s (%#.6x)", first->uuid, pcmk__node_name(first->node), first->flags, first_flags, then->uuid, then->flags); } return changed; } /*! * \internal * \brief Trace-log an action (optionally with its dependent actions) * * \param[in] pre_text If not NULL, prefix the log with this plus ": " * \param[in] action Action to log * \param[in] details If true, recursively log dependent actions */ void pcmk__log_action(const char *pre_text, const pcmk_action_t *action, bool details) { const char *node_uname = NULL; const char *node_uuid = NULL; const char *desc = NULL; CRM_CHECK(action != NULL, return); if (!pcmk_is_set(action->flags, pcmk__action_pseudo)) { if (action->node != NULL) { node_uname = action->node->priv->name; node_uuid = action->node->priv->id; } else { node_uname = ""; } } switch (pcmk__parse_action(action->task)) { case pcmk__action_fence: case pcmk__action_shutdown: if (pcmk_is_set(action->flags, pcmk__action_pseudo)) { desc = "Pseudo "; } else if (pcmk_is_set(action->flags, pcmk__action_optional)) { desc = "Optional "; } else if (!pcmk_is_set(action->flags, pcmk__action_runnable)) { desc = "!!Non-Startable!! "; } else { desc = "(Provisional) "; } crm_trace("%s%s%sAction %d: %s%s%s%s%s%s", ((pre_text == NULL)? "" : pre_text), ((pre_text == NULL)? "" : ": "), desc, action->id, action->uuid, (node_uname? "\ton " : ""), (node_uname? node_uname : ""), (node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""), (node_uuid? ")" : "")); break; default: if (pcmk_is_set(action->flags, pcmk__action_optional)) { desc = "Optional "; } else if (pcmk_is_set(action->flags, pcmk__action_pseudo)) { desc = "Pseudo "; } else if (!pcmk_is_set(action->flags, pcmk__action_runnable)) { desc = "!!Non-Startable!! "; } else { desc = "(Provisional) "; } crm_trace("%s%s%sAction %d: %s %s%s%s%s%s%s", ((pre_text == NULL)? "" : pre_text), ((pre_text == NULL)? "" : ": "), desc, action->id, action->uuid, (action->rsc? action->rsc->id : ""), (node_uname? "\ton " : ""), (node_uname? node_uname : ""), (node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""), (node_uuid? ")" : "")); break; } if (details) { const GList *iter = NULL; const pcmk__related_action_t *other = NULL; crm_trace("\t\t====== Preceding Actions"); for (iter = action->actions_before; iter != NULL; iter = iter->next) { other = (const pcmk__related_action_t *) iter->data; pcmk__log_action("\t\t", other->action, false); } crm_trace("\t\t====== Subsequent Actions"); for (iter = action->actions_after; iter != NULL; iter = iter->next) { other = (const pcmk__related_action_t *) iter->data; pcmk__log_action("\t\t", other->action, false); } crm_trace("\t\t====== End"); } else { crm_trace("\t\t(before=%d, after=%d)", g_list_length(action->actions_before), g_list_length(action->actions_after)); } } /*! * \internal * \brief Create a new shutdown action for a node * * \param[in,out] node Node being shut down * * \return Newly created shutdown action for \p node */ pcmk_action_t * pcmk__new_shutdown_action(pcmk_node_t *node) { char *shutdown_id = NULL; pcmk_action_t *shutdown_op = NULL; CRM_ASSERT(node != NULL); shutdown_id = crm_strdup_printf("%s-%s", PCMK_ACTION_DO_SHUTDOWN, node->priv->name); shutdown_op = custom_action(NULL, shutdown_id, PCMK_ACTION_DO_SHUTDOWN, node, FALSE, node->priv->scheduler); pcmk__order_stops_before_shutdown(node, shutdown_op); pcmk__insert_meta(shutdown_op, PCMK__META_OP_NO_WAIT, PCMK_VALUE_TRUE); return shutdown_op; } /*! * \internal * \brief Calculate and add an operation digest to XML * * Calculate an operation digest, which enables us to later determine when a * restart is needed due to the resource's parameters being changed, and add it * to given XML. * * \param[in] op Operation result from executor * \param[in,out] update XML to add digest to */ static void add_op_digest_to_xml(const lrmd_event_data_t *op, xmlNode *update) { char *digest = NULL; xmlNode *args_xml = NULL; if (op->params == NULL) { return; } args_xml = pcmk__xe_create(NULL, PCMK_XE_PARAMETERS); g_hash_table_foreach(op->params, hash2field, args_xml); pcmk__filter_op_for_digest(args_xml); digest = pcmk__digest_operation(args_xml); crm_xml_add(update, PCMK__XA_OP_DIGEST, digest); pcmk__xml_free(args_xml); free(digest); } #define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" /*! * \internal * \brief Create XML for resource operation history update * * \param[in,out] parent Parent XML node to add to * \param[in,out] op Operation event data * \param[in] caller_version DC feature set * \param[in] target_rc Expected result of operation * \param[in] node Name of node on which operation was performed * \param[in] origin Arbitrary description of update source * * \return Newly created XML node for history update */ xmlNode * pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op, const char *caller_version, int target_rc, const char *node, const char *origin) { char *key = NULL; char *magic = NULL; char *op_id = NULL; char *op_id_additional = NULL; char *local_user_data = NULL; const char *exit_reason = NULL; xmlNode *xml_op = NULL; const char *task = NULL; CRM_CHECK(op != NULL, return NULL); crm_trace("Creating history XML for %s-interval %s action for %s on %s " "(DC version: %s, origin: %s)", pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id, ((node == NULL)? "no node" : node), caller_version, origin); task = op->op_type; /* Record a successful agent reload as a start, and a failed one as a * monitor, to make life easier for the scheduler when determining the * current state. * * @COMPAT We should check "reload" here only if the operation was for a * pre-OCF-1.1 resource agent, but we don't know that here, and we should * only ever get results for actions scheduled by us, so we can reasonably * assume any "reload" is actually a pre-1.1 agent reload. */ if (pcmk__str_any_of(task, PCMK_ACTION_RELOAD, PCMK_ACTION_RELOAD_AGENT, NULL)) { if (op->op_status == PCMK_EXEC_DONE) { task = PCMK_ACTION_START; } else { task = PCMK_ACTION_MONITOR; } } key = pcmk__op_key(op->rsc_id, task, op->interval_ms); if (pcmk__str_eq(task, PCMK_ACTION_NOTIFY, pcmk__str_none)) { const char *n_type = crm_meta_value(op->params, "notify_type"); const char *n_task = crm_meta_value(op->params, "notify_operation"); CRM_LOG_ASSERT(n_type != NULL); CRM_LOG_ASSERT(n_task != NULL); op_id = pcmk__notify_key(op->rsc_id, n_type, n_task); if (op->op_status != PCMK_EXEC_PENDING) { /* Ignore notify errors. * * @TODO It might be better to keep the correct result here, and * ignore it in process_graph_event(). */ lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); } /* Migration history is preserved separately, which usually matters for * multiple nodes and is important for future cluster transitions. */ } else if (pcmk__str_any_of(op->op_type, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { op_id = strdup(key); } else if (did_rsc_op_fail(op, target_rc)) { op_id = pcmk__op_key(op->rsc_id, "last_failure", 0); if (op->interval_ms == 0) { /* Ensure 'last' gets updated, in case PCMK_META_RECORD_PENDING is * true */ op_id_additional = pcmk__op_key(op->rsc_id, "last", 0); } exit_reason = op->exit_reason; } else if (op->interval_ms > 0) { op_id = strdup(key); } else { op_id = pcmk__op_key(op->rsc_id, "last", 0); } again: xml_op = pcmk__xe_first_child(parent, PCMK__XE_LRM_RSC_OP, PCMK_XA_ID, op_id); if (xml_op == NULL) { xml_op = pcmk__xe_create(parent, PCMK__XE_LRM_RSC_OP); } if (op->user_data == NULL) { crm_debug("Generating fake transition key for: " PCMK__OP_FMT " %d from %s", op->rsc_id, op->op_type, op->interval_ms, op->call_id, origin); local_user_data = pcmk__transition_key(-1, op->call_id, target_rc, FAKE_TE_ID); op->user_data = local_user_data; } if (magic == NULL) { magic = crm_strdup_printf("%d:%d;%s", op->op_status, op->rc, (const char *) op->user_data); } crm_xml_add(xml_op, PCMK_XA_ID, op_id); crm_xml_add(xml_op, PCMK__XA_OPERATION_KEY, key); crm_xml_add(xml_op, PCMK_XA_OPERATION, task); crm_xml_add(xml_op, PCMK_XA_CRM_DEBUG_ORIGIN, origin); crm_xml_add(xml_op, PCMK_XA_CRM_FEATURE_SET, caller_version); crm_xml_add(xml_op, PCMK__XA_TRANSITION_KEY, op->user_data); crm_xml_add(xml_op, PCMK__XA_TRANSITION_MAGIC, magic); crm_xml_add(xml_op, PCMK_XA_EXIT_REASON, pcmk__s(exit_reason, "")); crm_xml_add(xml_op, PCMK__META_ON_NODE, node); // For context during triage crm_xml_add_int(xml_op, PCMK__XA_CALL_ID, op->call_id); crm_xml_add_int(xml_op, PCMK__XA_RC_CODE, op->rc); crm_xml_add_int(xml_op, PCMK__XA_OP_STATUS, op->op_status); crm_xml_add_ms(xml_op, PCMK_META_INTERVAL, op->interval_ms); if ((op->t_run > 0) || (op->t_rcchange > 0) || (op->exec_time > 0) || (op->queue_time > 0)) { crm_trace("Timing data (" PCMK__OP_FMT "): " "last=%lld change=%lld exec=%u queue=%u", op->rsc_id, op->op_type, op->interval_ms, (long long) op->t_run, (long long) op->t_rcchange, op->exec_time, op->queue_time); if ((op->interval_ms > 0) && (op->t_rcchange > 0)) { // Recurring ops may have changed rc after initial run crm_xml_add_ll(xml_op, PCMK_XA_LAST_RC_CHANGE, (long long) op->t_rcchange); } else { crm_xml_add_ll(xml_op, PCMK_XA_LAST_RC_CHANGE, (long long) op->t_run); } crm_xml_add_int(xml_op, PCMK_XA_EXEC_TIME, op->exec_time); crm_xml_add_int(xml_op, PCMK_XA_QUEUE_TIME, op->queue_time); } if (pcmk__str_any_of(op->op_type, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { /* Record PCMK__META_MIGRATE_SOURCE and PCMK__META_MIGRATE_TARGET always * for migrate ops. */ const char *name = PCMK__META_MIGRATE_SOURCE; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); name = PCMK__META_MIGRATE_TARGET; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); } add_op_digest_to_xml(op, xml_op); if (op_id_additional) { free(op_id); op_id = op_id_additional; op_id_additional = NULL; goto again; } if (local_user_data) { free(local_user_data); op->user_data = NULL; } free(magic); free(op_id); free(key); return xml_op; } /*! * \internal * \brief Check whether an action shutdown-locks a resource to a node * * If the PCMK_OPT_SHUTDOWN_LOCK cluster property is set, resources will not be * recovered on a different node if cleanly stopped, and may start only on that * same node. This function checks whether that applies to a given action, so * that the transition graph can be marked appropriately. * * \param[in] action Action to check * * \return true if \p action locks its resource to the action's node, * otherwise false */ bool pcmk__action_locks_rsc_to_node(const pcmk_action_t *action) { // Only resource actions taking place on resource's lock node are locked if ((action == NULL) || (action->rsc == NULL) || !pcmk__same_node(action->node, action->rsc->priv->lock_node)) { return false; } /* During shutdown, only stops are locked (otherwise, another action such as * a demote would cause the controller to clear the lock) */ if (action->node->details->shutdown && (action->task != NULL) && (strcmp(action->task, PCMK_ACTION_STOP) != 0)) { return false; } return true; } /* lowest to highest */ static gint sort_action_id(gconstpointer a, gconstpointer b) { const pcmk__related_action_t *action_wrapper2 = a; const pcmk__related_action_t *action_wrapper1 = b; if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (action_wrapper1->action->id < action_wrapper2->action->id) { return 1; } if (action_wrapper1->action->id > action_wrapper2->action->id) { return -1; } return 0; } /*! * \internal * \brief Remove any duplicate action inputs, merging action flags * * \param[in,out] action Action whose inputs should be checked */ void pcmk__deduplicate_action_inputs(pcmk_action_t *action) { GList *item = NULL; GList *next = NULL; pcmk__related_action_t *last_input = NULL; action->actions_before = g_list_sort(action->actions_before, sort_action_id); for (item = action->actions_before; item != NULL; item = next) { pcmk__related_action_t *input = item->data; next = item->next; if ((last_input != NULL) && (input->action->id == last_input->action->id)) { crm_trace("Input %s (%d) duplicate skipped for action %s (%d)", input->action->uuid, input->action->id, action->uuid, action->id); /* For the purposes of scheduling, the ordering flags no longer * matter, but crm_simulate looks at certain ones when creating a * dot graph. Combining the flags is sufficient for that purpose. */ pcmk__set_relation_flags(last_input->flags, input->flags); if (input->graphed) { last_input->graphed = true; } free(item->data); action->actions_before = g_list_delete_link(action->actions_before, item); } else { last_input = input; input->graphed = false; } } } /*! * \internal * \brief Output all scheduled actions * * \param[in,out] scheduler Scheduler data */ void pcmk__output_actions(pcmk_scheduler_t *scheduler) { pcmk__output_t *out = scheduler->priv->out; // Output node (non-resource) actions for (GList *iter = scheduler->priv->actions; iter != NULL; iter = iter->next) { char *node_name = NULL; char *task = NULL; pcmk_action_t *action = (pcmk_action_t *) iter->data; if (action->rsc != NULL) { continue; // Resource actions will be output later } else if (pcmk_is_set(action->flags, pcmk__action_optional)) { continue; // This action was not scheduled } if (pcmk__str_eq(action->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { task = strdup("Shutdown"); } else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) { const char *op = g_hash_table_lookup(action->meta, PCMK__META_STONITH_ACTION); task = crm_strdup_printf("Fence (%s)", op); } else { continue; // Don't display other node action types } if (pcmk__is_guest_or_bundle_node(action->node)) { const pcmk_resource_t *remote = action->node->priv->remote; node_name = crm_strdup_printf("%s (resource: %s)", pcmk__node_name(action->node), remote->priv->launcher->id); } else if (action->node != NULL) { node_name = crm_strdup_printf("%s", pcmk__node_name(action->node)); } out->message(out, "node-action", task, node_name, action->reason); free(node_name); free(task); } // Output resource actions for (GList *iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; rsc->priv->cmds->output_actions(rsc); } } /*! * \internal * \brief Get action name needed to compare digest for configuration changes * * \param[in] task Action name from history * \param[in] interval_ms Action interval (in milliseconds) * * \return Action name whose digest should be compared */ static const char * task_for_digest(const char *task, guint interval_ms) { /* Certain actions need to be compared against the parameters used to start * the resource. */ if ((interval_ms == 0) && pcmk__str_any_of(task, PCMK_ACTION_MONITOR, PCMK_ACTION_MIGRATE_FROM, PCMK_ACTION_PROMOTE, NULL)) { task = PCMK_ACTION_START; } return task; } /*! * \internal * \brief Check whether only sanitized parameters to an action changed * * When collecting CIB files for troubleshooting, crm_report will mask * sensitive resource parameters. If simulations were run using that, affected * resources would appear to need a restart, which would complicate * troubleshooting. To avoid that, we save a "secure digest" of non-sensitive * parameters. This function used that digest to check whether only masked * parameters are different. * * \param[in] xml_op Resource history entry with secure digest * \param[in] digest_data Operation digest information being compared * \param[in] scheduler Scheduler data * * \return true if only sanitized parameters changed, otherwise false */ static bool only_sanitized_changed(const xmlNode *xml_op, const pcmk__op_digest_t *digest_data, const pcmk_scheduler_t *scheduler) { const char *digest_secure = NULL; if (!pcmk_is_set(scheduler->flags, pcmk__sched_sanitized)) { // The scheduler is not being run as a simulation return false; } digest_secure = crm_element_value(xml_op, PCMK__XA_OP_SECURE_DIGEST); return (digest_data->rc != pcmk__digest_match) && (digest_secure != NULL) && (digest_data->digest_secure_calc != NULL) && (strcmp(digest_data->digest_secure_calc, digest_secure) == 0); } /*! * \internal * \brief Force a restart due to a configuration change * * \param[in,out] rsc Resource that action is for * \param[in] task Name of action whose configuration changed * \param[in] interval_ms Action interval (in milliseconds) * \param[in,out] node Node where resource should be restarted */ static void force_restart(pcmk_resource_t *rsc, const char *task, guint interval_ms, pcmk_node_t *node) { char *key = pcmk__op_key(rsc->id, task, interval_ms); pcmk_action_t *required = custom_action(rsc, key, task, NULL, FALSE, rsc->priv->scheduler); pe_action_set_reason(required, "resource definition change", true); trigger_unfencing(rsc, node, "Device parameters changed", NULL, rsc->priv->scheduler); } /*! * \internal * \brief Schedule a reload of a resource on a node * * \param[in,out] data Resource to reload * \param[in] user_data Where resource should be reloaded */ static void schedule_reload(gpointer data, gpointer user_data) { pcmk_resource_t *rsc = data; const pcmk_node_t *node = user_data; pcmk_action_t *reload = NULL; // For collective resources, just call recursively for children if (rsc->priv->variant > pcmk__rsc_variant_primitive) { g_list_foreach(rsc->priv->children, schedule_reload, user_data); return; } // Skip the reload in certain situations if ((node == NULL) || !pcmk_is_set(rsc->flags, pcmk__rsc_managed) || pcmk_is_set(rsc->flags, pcmk__rsc_failed)) { pcmk__rsc_trace(rsc, "Skip reload of %s:%s%s %s", rsc->id, pcmk_is_set(rsc->flags, pcmk__rsc_managed)? "" : " unmanaged", pcmk_is_set(rsc->flags, pcmk__rsc_failed)? " failed" : "", (node == NULL)? "inactive" : node->priv->name); return; } /* If a resource's configuration changed while a start was pending, * force a full restart instead of a reload. */ if (pcmk_is_set(rsc->flags, pcmk__rsc_start_pending)) { pcmk__rsc_trace(rsc, "%s: preventing agent reload because start pending", rsc->id); custom_action(rsc, stop_key(rsc), PCMK_ACTION_STOP, node, FALSE, rsc->priv->scheduler); return; } // Schedule the reload pcmk__set_rsc_flags(rsc, pcmk__rsc_reload); reload = custom_action(rsc, reload_key(rsc), PCMK_ACTION_RELOAD_AGENT, node, FALSE, rsc->priv->scheduler); pe_action_set_reason(reload, "resource definition change", FALSE); // Set orderings so that a required stop or demote cancels the reload pcmk__new_ordering(NULL, NULL, reload, rsc, stop_key(rsc), NULL, pcmk__ar_ordered|pcmk__ar_then_cancels_first, rsc->priv->scheduler); pcmk__new_ordering(NULL, NULL, reload, rsc, demote_key(rsc), NULL, pcmk__ar_ordered|pcmk__ar_then_cancels_first, rsc->priv->scheduler); } /*! * \internal * \brief Handle any configuration change for an action * * Given an action from resource history, if the resource's configuration * changed since the action was done, schedule any actions needed (restart, * reload, unfencing, rescheduling recurring actions, etc.). * * \param[in,out] rsc Resource that action is for * \param[in,out] node Node that action was on * \param[in] xml_op Action XML from resource history * * \return true if action configuration changed, otherwise false */ bool pcmk__check_action_config(pcmk_resource_t *rsc, pcmk_node_t *node, const xmlNode *xml_op) { guint interval_ms = 0; const char *task = NULL; const pcmk__op_digest_t *digest_data = NULL; CRM_CHECK((rsc != NULL) && (node != NULL) && (xml_op != NULL), return false); task = crm_element_value(xml_op, PCMK_XA_OPERATION); CRM_CHECK(task != NULL, return false); crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &interval_ms); // If this is a recurring action, check whether it has been orphaned if (interval_ms > 0) { if (pcmk__find_action_config(rsc, task, interval_ms, false) != NULL) { pcmk__rsc_trace(rsc, "%s-interval %s for %s on %s is in configuration", pcmk__readable_interval(interval_ms), task, rsc->id, pcmk__node_name(node)); } else if (pcmk_is_set(rsc->priv->scheduler->flags, pcmk__sched_cancel_removed_actions)) { pcmk__schedule_cancel(rsc, crm_element_value(xml_op, PCMK__XA_CALL_ID), task, interval_ms, node, "orphan"); return true; } else { pcmk__rsc_debug(rsc, "%s-interval %s for %s on %s is orphaned", pcmk__readable_interval(interval_ms), task, rsc->id, pcmk__node_name(node)); return true; } } crm_trace("Checking %s-interval %s for %s on %s for configuration changes", pcmk__readable_interval(interval_ms), task, rsc->id, pcmk__node_name(node)); task = task_for_digest(task, interval_ms); digest_data = rsc_action_digest_cmp(rsc, xml_op, node, rsc->priv->scheduler); if (only_sanitized_changed(xml_op, digest_data, rsc->priv->scheduler)) { if (!pcmk__is_daemon && (rsc->priv->scheduler->priv->out != NULL)) { pcmk__output_t *out = rsc->priv->scheduler->priv->out; out->info(out, "Only 'private' parameters to %s-interval %s for %s " "on %s changed: %s", pcmk__readable_interval(interval_ms), task, rsc->id, pcmk__node_name(node), crm_element_value(xml_op, PCMK__XA_TRANSITION_MAGIC)); } return false; } switch (digest_data->rc) { case pcmk__digest_restart: crm_log_xml_debug(digest_data->params_restart, "params:restart"); force_restart(rsc, task, interval_ms, node); return true; case pcmk__digest_unknown: case pcmk__digest_mismatch: // Changes that can potentially be handled by an agent reload if (interval_ms > 0) { /* Recurring actions aren't reloaded per se, they are just * re-scheduled so the next run uses the new parameters. * The old instance will be cancelled automatically. */ crm_log_xml_debug(digest_data->params_all, "params:reschedule"); pcmk__reschedule_recurring(rsc, task, interval_ms, node); } else if (crm_element_value(xml_op, PCMK__XA_OP_RESTART_DIGEST) != NULL) { // Agent supports reload, so use it trigger_unfencing(rsc, node, "Device parameters changed (reload)", NULL, rsc->priv->scheduler); crm_log_xml_debug(digest_data->params_all, "params:reload"); schedule_reload((gpointer) rsc, (gpointer) node); } else { pcmk__rsc_trace(rsc, "Restarting %s " "because agent doesn't support reload", rsc->id); crm_log_xml_debug(digest_data->params_restart, "params:restart"); force_restart(rsc, task, interval_ms, node); } return true; default: break; } return false; } /*! * \internal * \brief Create a list of resource's action history entries, sorted by call ID * * \param[in] rsc_entry Resource's \c PCMK__XE_LRM_RSC_OP status XML * \param[out] start_index Where to store index of start-like action, if any * \param[out] stop_index Where to store index of stop action, if any */ static GList * rsc_history_as_list(const xmlNode *rsc_entry, int *start_index, int *stop_index) { GList *ops = NULL; for (xmlNode *rsc_op = pcmk__xe_first_child(rsc_entry, PCMK__XE_LRM_RSC_OP, NULL, NULL); rsc_op != NULL; rsc_op = pcmk__xe_next_same(rsc_op)) { ops = g_list_prepend(ops, rsc_op); } ops = g_list_sort(ops, sort_op_by_callid); calculate_active_ops(ops, start_index, stop_index); return ops; } /*! * \internal * \brief Process a resource's action history from the CIB status * * Given a resource's action history, if the resource's configuration * changed since the actions were done, schedule any actions needed (restart, * reload, unfencing, rescheduling recurring actions, clean-up, etc.). * (This also cancels recurring actions for maintenance mode, which is not * entirely related but convenient to do here.) * * \param[in] rsc_entry Resource's \c PCMK__XE_LRM_RSC_OP status XML * \param[in,out] rsc Resource whose history is being processed * \param[in,out] node Node whose history is being processed */ static void process_rsc_history(const xmlNode *rsc_entry, pcmk_resource_t *rsc, pcmk_node_t *node) { int offset = -1; int stop_index = 0; int start_index = 0; GList *sorted_op_list = NULL; if (pcmk_is_set(rsc->flags, pcmk__rsc_removed)) { if (pcmk__is_anonymous_clone(pe__const_top_resource(rsc, false))) { pcmk__rsc_trace(rsc, "Skipping configuration check " "for orphaned clone instance %s", rsc->id); } else { pcmk__rsc_trace(rsc, "Skipping configuration check and scheduling " "clean-up for orphaned resource %s", rsc->id); pcmk__schedule_cleanup(rsc, node, false); } return; } if (pe_find_node_id(rsc->priv->active_nodes, node->priv->id) == NULL) { if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, false)) { pcmk__schedule_cleanup(rsc, node, false); } pcmk__rsc_trace(rsc, "Skipping configuration check for %s " "because no longer active on %s", rsc->id, pcmk__node_name(node)); return; } pcmk__rsc_trace(rsc, "Checking for configuration changes for %s on %s", rsc->id, pcmk__node_name(node)); if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, true)) { pcmk__schedule_cleanup(rsc, node, false); } sorted_op_list = rsc_history_as_list(rsc_entry, &start_index, &stop_index); if (start_index < stop_index) { return; // Resource is stopped } for (GList *iter = sorted_op_list; iter != NULL; iter = iter->next) { xmlNode *rsc_op = (xmlNode *) iter->data; const char *task = NULL; guint interval_ms = 0; if (++offset < start_index) { // Skip actions that happened before a start continue; } task = crm_element_value(rsc_op, PCMK_XA_OPERATION); crm_element_value_ms(rsc_op, PCMK_META_INTERVAL, &interval_ms); if ((interval_ms > 0) && (pcmk_is_set(rsc->flags, pcmk__rsc_maintenance) || node->details->maintenance)) { // Maintenance mode cancels recurring operations pcmk__schedule_cancel(rsc, crm_element_value(rsc_op, PCMK__XA_CALL_ID), task, interval_ms, node, "maintenance mode"); } else if ((interval_ms > 0) || pcmk__strcase_any_of(task, PCMK_ACTION_MONITOR, PCMK_ACTION_START, PCMK_ACTION_PROMOTE, PCMK_ACTION_MIGRATE_FROM, NULL)) { /* If a resource operation failed, and the operation's definition * has changed, clear any fail count so they can be retried fresh. */ if (pe__bundle_needs_remote_name(rsc)) { /* We haven't assigned resources to nodes yet, so if the * REMOTE_CONTAINER_HACK is used, we may calculate the digest * based on the literal "#uname" value rather than the properly * substituted value. That would mistakenly make the action * definition appear to have been changed. Defer the check until * later in this case. */ pe__add_param_check(rsc_op, rsc, node, pcmk__check_active, rsc->priv->scheduler); } else if (pcmk__check_action_config(rsc, node, rsc_op) && (pe_get_failcount(node, rsc, NULL, pcmk__fc_effective, NULL) != 0)) { pe__clear_failcount(rsc, node, "action definition changed", rsc->priv->scheduler); } } } g_list_free(sorted_op_list); } /*! * \internal * \brief Process a node's action history from the CIB status * * Given a node's resource history, if the resource's configuration changed * since the actions were done, schedule any actions needed (restart, * reload, unfencing, rescheduling recurring actions, clean-up, etc.). * (This also cancels recurring actions for maintenance mode, which is not * entirely related but convenient to do here.) * * \param[in,out] node Node whose history is being processed * \param[in] lrm_rscs Node's \c PCMK__XE_LRM_RESOURCES from CIB status XML */ static void process_node_history(pcmk_node_t *node, const xmlNode *lrm_rscs) { crm_trace("Processing node history for %s", pcmk__node_name(node)); for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rscs, PCMK__XE_LRM_RESOURCE, NULL, NULL); rsc_entry != NULL; rsc_entry = pcmk__xe_next_same(rsc_entry)) { if (rsc_entry->children != NULL) { GList *result = pcmk__rscs_matching_id(pcmk__xe_id(rsc_entry), node->priv->scheduler); for (GList *iter = result; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; if (pcmk__is_primitive(rsc)) { process_rsc_history(rsc_entry, rsc, node); } } g_list_free(result); } } } // XPath to find a node's resource history #define XPATH_NODE_HISTORY "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \ "/" PCMK__XE_NODE_STATE \ "[@" PCMK_XA_UNAME "='%s']" \ "/" PCMK__XE_LRM "/" PCMK__XE_LRM_RESOURCES /*! * \internal * \brief Process any resource configuration changes in the CIB status * * Go through all nodes' resource history, and if a resource's configuration * changed since its actions were done, schedule any actions needed (restart, * reload, unfencing, rescheduling recurring actions, clean-up, etc.). * (This also cancels recurring actions for maintenance mode, which is not * entirely related but convenient to do here.) * * \param[in,out] scheduler Scheduler data */ void pcmk__handle_rsc_config_changes(pcmk_scheduler_t *scheduler) { crm_trace("Check resource and action configuration for changes"); /* Rather than iterate through the status section, iterate through the nodes * and search for the appropriate status subsection for each. This skips * orphaned nodes and lets us eliminate some cases before searching the XML. */ for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) { pcmk_node_t *node = (pcmk_node_t *) iter->data; /* Don't bother checking actions for a node that can't run actions ... * unless it's in maintenance mode, in which case we still need to * cancel any existing recurring monitors. */ if (node->details->maintenance || pcmk__node_available(node, false, false)) { char *xpath = NULL; xmlNode *history = NULL; xpath = crm_strdup_printf(XPATH_NODE_HISTORY, node->priv->name); history = get_xpath_object(xpath, scheduler->input, LOG_NEVER); free(xpath); process_node_history(node, history); } } } diff --git a/lib/pacemaker/pcmk_sched_ordering.c b/lib/pacemaker/pcmk_sched_ordering.c index 668f7fc7de..7d7cfe027d 100644 --- a/lib/pacemaker/pcmk_sched_ordering.c +++ b/lib/pacemaker/pcmk_sched_ordering.c @@ -1,1510 +1,1509 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include // PRIx32 #include #include #include #include #include "libpacemaker_private.h" enum pe_order_kind { pe_order_kind_optional, pe_order_kind_mandatory, pe_order_kind_serialize, }; enum ordering_symmetry { ordering_asymmetric, // the only relation in an asymmetric ordering ordering_symmetric, // the normal relation in a symmetric ordering ordering_symmetric_inverse, // the inverse relation in a symmetric ordering }; #define EXPAND_CONSTRAINT_IDREF(__set, __rsc, __name) do { \ __rsc = pcmk__find_constraint_resource(scheduler->priv->resources, \ __name); \ if (__rsc == NULL) { \ pcmk__config_err("%s: No resource found for %s", __set, __name);\ return pcmk_rc_unpack_error; \ } \ } while (0) static const char * invert_action(const char *action) { if (pcmk__str_eq(action, PCMK_ACTION_START, pcmk__str_none)) { return PCMK_ACTION_STOP; } else if (pcmk__str_eq(action, PCMK_ACTION_STOP, pcmk__str_none)) { return PCMK_ACTION_START; } else if (pcmk__str_eq(action, PCMK_ACTION_PROMOTE, pcmk__str_none)) { return PCMK_ACTION_DEMOTE; } else if (pcmk__str_eq(action, PCMK_ACTION_DEMOTE, pcmk__str_none)) { return PCMK_ACTION_PROMOTE; } else if (pcmk__str_eq(action, PCMK_ACTION_PROMOTED, pcmk__str_none)) { return PCMK_ACTION_DEMOTED; } else if (pcmk__str_eq(action, PCMK_ACTION_DEMOTED, pcmk__str_none)) { return PCMK_ACTION_PROMOTED; } else if (pcmk__str_eq(action, PCMK_ACTION_RUNNING, pcmk__str_none)) { return PCMK_ACTION_STOPPED; } else if (pcmk__str_eq(action, PCMK_ACTION_STOPPED, pcmk__str_none)) { return PCMK_ACTION_RUNNING; } pcmk__config_warn("Unknown action '%s' specified in order constraint", action); return NULL; } static enum pe_order_kind get_ordering_type(const xmlNode *xml_obj) { enum pe_order_kind kind_e = pe_order_kind_mandatory; const char *kind = crm_element_value(xml_obj, PCMK_XA_KIND); if (kind == NULL) { const char *score = crm_element_value(xml_obj, PCMK_XA_SCORE); kind_e = pe_order_kind_mandatory; if (score) { // @COMPAT deprecated informally since 1.0.7, formally since 2.0.1 int score_i = char2score(score); if (score_i == 0) { kind_e = pe_order_kind_optional; } pcmk__warn_once(pcmk__wo_order_score, "Support for '" PCMK_XA_SCORE "' in " PCMK_XE_RSC_ORDER " is deprecated and will be " "removed in a future release " "(use '" PCMK_XA_KIND "' instead)"); } } else if (pcmk__str_eq(kind, PCMK_VALUE_MANDATORY, pcmk__str_none)) { kind_e = pe_order_kind_mandatory; } else if (pcmk__str_eq(kind, PCMK_VALUE_OPTIONAL, pcmk__str_none)) { kind_e = pe_order_kind_optional; } else if (pcmk__str_eq(kind, PCMK_VALUE_SERIALIZE, pcmk__str_none)) { kind_e = pe_order_kind_serialize; } else { pcmk__config_err("Resetting '" PCMK_XA_KIND "' for constraint %s to " "'" PCMK_VALUE_MANDATORY "' because '%s' is not valid", pcmk__s(pcmk__xe_id(xml_obj), "missing ID"), kind); } return kind_e; } /*! * \internal * \brief Get ordering symmetry from XML * * \param[in] xml_obj Ordering XML * \param[in] parent_kind Default ordering kind * \param[in] parent_symmetrical_s Parent element's \c PCMK_XA_SYMMETRICAL * setting, if any * * \retval ordering_symmetric Ordering is symmetric * \retval ordering_asymmetric Ordering is asymmetric */ static enum ordering_symmetry get_ordering_symmetry(const xmlNode *xml_obj, enum pe_order_kind parent_kind, const char *parent_symmetrical_s) { int rc = pcmk_rc_ok; bool symmetric = false; enum pe_order_kind kind = parent_kind; // Default to parent's kind // Check ordering XML for explicit kind if ((crm_element_value(xml_obj, PCMK_XA_KIND) != NULL) || (crm_element_value(xml_obj, PCMK_XA_SCORE) != NULL)) { kind = get_ordering_type(xml_obj); } // Check ordering XML (and parent) for explicit PCMK_XA_SYMMETRICAL setting rc = pcmk__xe_get_bool_attr(xml_obj, PCMK_XA_SYMMETRICAL, &symmetric); if (rc != pcmk_rc_ok && parent_symmetrical_s != NULL) { symmetric = crm_is_true(parent_symmetrical_s); rc = pcmk_rc_ok; } if (rc == pcmk_rc_ok) { if (symmetric) { if (kind == pe_order_kind_serialize) { pcmk__config_warn("Ignoring " PCMK_XA_SYMMETRICAL " for '%s' because not valid with " PCMK_XA_KIND " of '" PCMK_VALUE_SERIALIZE "'", pcmk__xe_id(xml_obj)); } else { return ordering_symmetric; } } return ordering_asymmetric; } // Use default symmetry if (kind == pe_order_kind_serialize) { return ordering_asymmetric; } return ordering_symmetric; } /*! * \internal * \brief Get ordering flags appropriate to ordering kind * * \param[in] kind Ordering kind * \param[in] first Action name for 'first' action * \param[in] symmetry This ordering's symmetry role * * \return Minimal ordering flags appropriate to \p kind */ static uint32_t ordering_flags_for_kind(enum pe_order_kind kind, const char *first, enum ordering_symmetry symmetry) { uint32_t flags = pcmk__ar_none; // so we trace-log all flags set switch (kind) { case pe_order_kind_optional: pcmk__set_relation_flags(flags, pcmk__ar_ordered); break; case pe_order_kind_serialize: /* This flag is not used anywhere directly but means the relation * will not match an equality comparison against pcmk__ar_none or * pcmk__ar_ordered. */ pcmk__set_relation_flags(flags, pcmk__ar_serialize); break; case pe_order_kind_mandatory: pcmk__set_relation_flags(flags, pcmk__ar_ordered); switch (symmetry) { case ordering_asymmetric: pcmk__set_relation_flags(flags, pcmk__ar_asymmetric); break; case ordering_symmetric: pcmk__set_relation_flags(flags, pcmk__ar_first_implies_then); if (pcmk__strcase_any_of(first, PCMK_ACTION_START, PCMK_ACTION_PROMOTE, NULL)) { pcmk__set_relation_flags(flags, pcmk__ar_unrunnable_first_blocks); } break; case ordering_symmetric_inverse: pcmk__set_relation_flags(flags, pcmk__ar_then_implies_first); break; } break; } return flags; } /*! * \internal * \brief Find resource corresponding to ID specified in ordering * * \param[in] xml Ordering XML * \param[in] resource_attr XML attribute name for resource ID * \param[in] scheduler Scheduler data * * \return Resource corresponding to \p id, or NULL if none */ static pcmk_resource_t * get_ordering_resource(const xmlNode *xml, const char *resource_attr, const pcmk_scheduler_t *scheduler) { pcmk_resource_t *rsc = NULL; const char *rsc_id = crm_element_value(xml, resource_attr); if (rsc_id == NULL) { pcmk__config_err("Ignoring constraint '%s' without %s", pcmk__xe_id(xml), resource_attr); return NULL; } rsc = pcmk__find_constraint_resource(scheduler->priv->resources, rsc_id); if (rsc == NULL) { pcmk__config_err("Ignoring constraint '%s' because resource '%s' " "does not exist", pcmk__xe_id(xml), rsc_id); return NULL; } return rsc; } /*! * \internal * \brief Determine minimum number of 'first' instances required in ordering * * \param[in] rsc 'First' resource in ordering * \param[in] xml Ordering XML * * \return Minimum 'first' instances required (or 0 if not applicable) */ static int get_minimum_first_instances(const pcmk_resource_t *rsc, const xmlNode *xml) { const char *clone_min = NULL; bool require_all = false; if (!pcmk__is_clone(rsc)) { return 0; } clone_min = g_hash_table_lookup(rsc->priv->meta, PCMK_META_CLONE_MIN); if (clone_min != NULL) { int clone_min_int = 0; pcmk__scan_min_int(clone_min, &clone_min_int, 0); return clone_min_int; } /* @COMPAT 1.1.13: * PCMK_XA_REQUIRE_ALL=PCMK_VALUE_FALSE is deprecated equivalent of * PCMK_META_CLONE_MIN=1 */ if (pcmk__xe_get_bool_attr(xml, PCMK_XA_REQUIRE_ALL, &require_all) != ENODATA) { pcmk__warn_once(pcmk__wo_require_all, "Support for " PCMK_XA_REQUIRE_ALL " in ordering " "constraints is deprecated and will be removed in a " "future release (use " PCMK_META_CLONE_MIN " clone " "meta-attribute instead)"); if (!require_all) { return 1; } } return 0; } /*! * \internal * \brief Create orderings for a constraint with \c PCMK_META_CLONE_MIN > 0 * * \param[in] id Ordering ID * \param[in,out] rsc_first 'First' resource in ordering (a clone) * \param[in] action_first 'First' action in ordering * \param[in] rsc_then 'Then' resource in ordering * \param[in] action_then 'Then' action in ordering * \param[in] flags Ordering flags * \param[in] clone_min Minimum required instances of 'first' */ static void clone_min_ordering(const char *id, pcmk_resource_t *rsc_first, const char *action_first, pcmk_resource_t *rsc_then, const char *action_then, uint32_t flags, int clone_min) { // Create a pseudo-action for when the minimum instances are active char *task = crm_strdup_printf(PCMK_ACTION_CLONE_ONE_OR_MORE ":%s", id); pcmk_action_t *clone_min_met = get_pseudo_op(task, rsc_first->priv->scheduler); free(task); /* Require the pseudo-action to have the required number of actions to be * considered runnable before allowing the pseudo-action to be runnable. */ clone_min_met->required_runnable_before = clone_min; - pcmk__set_action_flags(clone_min_met, pcmk__action_min_runnable); // Order the actions for each clone instance before the pseudo-action for (GList *iter = rsc_first->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *child = iter->data; pcmk__new_ordering(child, pcmk__op_key(child->id, action_first, 0), NULL, NULL, NULL, clone_min_met, pcmk__ar_min_runnable |pcmk__ar_first_implies_then_graphed, rsc_first->priv->scheduler); } // Order "then" action after the pseudo-action (if runnable) pcmk__new_ordering(NULL, NULL, clone_min_met, rsc_then, pcmk__op_key(rsc_then->id, action_then, 0), NULL, flags|pcmk__ar_unrunnable_first_blocks, rsc_first->priv->scheduler); } /*! * \internal * \brief Update ordering flags for restart-type=restart * * \param[in] rsc 'Then' resource in ordering * \param[in] kind Ordering kind * \param[in] flag Ordering flag to set (when applicable) * \param[in,out] flags Ordering flag set to update * * \compat The \c PCMK__META_RESTART_TYPE resource meta-attribute is deprecated. * Eventually, it will be removed, and \c pcmk__restart_ignore will be * the only behavior, at which time this can just be removed entirely. */ #define handle_restart_type(rsc, kind, flag, flags) do { \ if (((kind) == pe_order_kind_optional) \ && ((rsc)->priv->restart_type == pcmk__restart_restart)) { \ pcmk__set_relation_flags((flags), (flag)); \ } \ } while (0) /*! * \internal * \brief Create new ordering for inverse of symmetric constraint * * \param[in] id Ordering ID (for logging only) * \param[in] kind Ordering kind * \param[in] rsc_first 'First' resource in ordering (a clone) * \param[in] action_first 'First' action in ordering * \param[in,out] rsc_then 'Then' resource in ordering * \param[in] action_then 'Then' action in ordering */ static void inverse_ordering(const char *id, enum pe_order_kind kind, pcmk_resource_t *rsc_first, const char *action_first, pcmk_resource_t *rsc_then, const char *action_then) { action_then = invert_action(action_then); action_first = invert_action(action_first); if ((action_then == NULL) || (action_first == NULL)) { pcmk__config_warn("Cannot invert constraint '%s' " "(please specify inverse manually)", id); } else { uint32_t flags = ordering_flags_for_kind(kind, action_first, ordering_symmetric_inverse); handle_restart_type(rsc_then, kind, pcmk__ar_then_implies_first, flags); pcmk__order_resource_actions(rsc_then, action_then, rsc_first, action_first, flags); } } static void unpack_simple_rsc_order(xmlNode *xml_obj, pcmk_scheduler_t *scheduler) { pcmk_resource_t *rsc_then = NULL; pcmk_resource_t *rsc_first = NULL; int min_required_before = 0; enum pe_order_kind kind = pe_order_kind_mandatory; uint32_t flags = pcmk__ar_none; enum ordering_symmetry symmetry; const char *action_then = NULL; const char *action_first = NULL; const char *id = NULL; CRM_CHECK(xml_obj != NULL, return); id = crm_element_value(xml_obj, PCMK_XA_ID); if (id == NULL) { pcmk__config_err("Ignoring <%s> constraint without " PCMK_XA_ID, xml_obj->name); return; } rsc_first = get_ordering_resource(xml_obj, PCMK_XA_FIRST, scheduler); if (rsc_first == NULL) { return; } rsc_then = get_ordering_resource(xml_obj, PCMK_XA_THEN, scheduler); if (rsc_then == NULL) { return; } action_first = crm_element_value(xml_obj, PCMK_XA_FIRST_ACTION); if (action_first == NULL) { action_first = PCMK_ACTION_START; } action_then = crm_element_value(xml_obj, PCMK_XA_THEN_ACTION); if (action_then == NULL) { action_then = action_first; } kind = get_ordering_type(xml_obj); symmetry = get_ordering_symmetry(xml_obj, kind, NULL); flags = ordering_flags_for_kind(kind, action_first, symmetry); handle_restart_type(rsc_then, kind, pcmk__ar_first_implies_then, flags); /* If there is a minimum number of instances that must be runnable before * the 'then' action is runnable, we use a pseudo-action for convenience: * minimum number of clone instances have runnable actions -> * pseudo-action is runnable -> dependency is runnable. */ min_required_before = get_minimum_first_instances(rsc_first, xml_obj); if (min_required_before > 0) { clone_min_ordering(id, rsc_first, action_first, rsc_then, action_then, flags, min_required_before); } else { pcmk__order_resource_actions(rsc_first, action_first, rsc_then, action_then, flags); } if (symmetry == ordering_symmetric) { inverse_ordering(id, kind, rsc_first, action_first, rsc_then, action_then); } } /*! * \internal * \brief Create a new ordering between two actions * * \param[in,out] first_rsc Resource for 'first' action (if NULL and * \p first_action is a resource action, that * resource will be used) * \param[in,out] first_action_task Action key for 'first' action (if NULL and * \p first_action is not NULL, its UUID will * be used) * \param[in,out] first_action 'first' action (if NULL, \p first_rsc and * \p first_action_task must be set) * * \param[in] then_rsc Resource for 'then' action (if NULL and * \p then_action is a resource action, that * resource will be used) * \param[in,out] then_action_task Action key for 'then' action (if NULL and * \p then_action is not NULL, its UUID will * be used) * \param[in] then_action 'then' action (if NULL, \p then_rsc and * \p then_action_task must be set) * * \param[in] flags Group of enum pcmk__action_relation_flags * \param[in,out] sched Scheduler data to add ordering to * * \note This function takes ownership of first_action_task and * then_action_task, which do not need to be freed by the caller. */ void pcmk__new_ordering(pcmk_resource_t *first_rsc, char *first_action_task, pcmk_action_t *first_action, pcmk_resource_t *then_rsc, char *then_action_task, pcmk_action_t *then_action, uint32_t flags, pcmk_scheduler_t *sched) { pcmk__action_relation_t *order = NULL; // One of action or resource must be specified for each side CRM_CHECK(((first_action != NULL) || (first_rsc != NULL)) && ((then_action != NULL) || (then_rsc != NULL)), free(first_action_task); free(then_action_task); return); if ((first_rsc == NULL) && (first_action != NULL)) { first_rsc = first_action->rsc; } if ((then_rsc == NULL) && (then_action != NULL)) { then_rsc = then_action->rsc; } order = pcmk__assert_alloc(1, sizeof(pcmk__action_relation_t)); order->id = sched->priv->next_ordering_id++; order->flags = flags; order->rsc1 = first_rsc; order->rsc2 = then_rsc; order->action1 = first_action; order->action2 = then_action; order->task1 = first_action_task; order->task2 = then_action_task; if ((order->task1 == NULL) && (first_action != NULL)) { order->task1 = strdup(first_action->uuid); } if ((order->task2 == NULL) && (then_action != NULL)) { order->task2 = strdup(then_action->uuid); } if ((order->rsc1 == NULL) && (first_action != NULL)) { order->rsc1 = first_action->rsc; } if ((order->rsc2 == NULL) && (then_action != NULL)) { order->rsc2 = then_action->rsc; } pcmk__rsc_trace(first_rsc, "Created ordering %d for %s then %s", (sched->priv->next_ordering_id - 1), pcmk__s(order->task1, "an underspecified action"), pcmk__s(order->task2, "an underspecified action")); sched->priv->ordering_constraints = g_list_prepend(sched->priv->ordering_constraints, order); pcmk__order_migration_equivalents(order); } /*! * \brief Unpack a set in an ordering constraint * * \param[in] set Set XML to unpack * \param[in] parent_kind \c PCMK_XE_RSC_ORDER XML \c PCMK_XA_KIND * attribute * \param[in] parent_symmetrical_s \c PCMK_XE_RSC_ORDER XML * \c PCMK_XA_SYMMETRICAL attribute * \param[in,out] scheduler Scheduler data * * \return Standard Pacemaker return code */ static int unpack_order_set(const xmlNode *set, enum pe_order_kind parent_kind, const char *parent_symmetrical_s, pcmk_scheduler_t *scheduler) { GList *set_iter = NULL; GList *resources = NULL; pcmk_resource_t *last = NULL; pcmk_resource_t *resource = NULL; int local_kind = parent_kind; bool sequential = false; uint32_t flags = pcmk__ar_ordered; enum ordering_symmetry symmetry; char *key = NULL; const char *id = pcmk__xe_id(set); const char *action = crm_element_value(set, PCMK_XA_ACTION); const char *sequential_s = crm_element_value(set, PCMK_XA_SEQUENTIAL); const char *kind_s = crm_element_value(set, PCMK_XA_KIND); if (action == NULL) { action = PCMK_ACTION_START; } if (kind_s) { local_kind = get_ordering_type(set); } if (sequential_s == NULL) { sequential_s = "1"; } sequential = crm_is_true(sequential_s); symmetry = get_ordering_symmetry(set, parent_kind, parent_symmetrical_s); flags = ordering_flags_for_kind(local_kind, action, symmetry); for (const xmlNode *xml_rsc = pcmk__xe_first_child(set, PCMK_XE_RESOURCE_REF, NULL, NULL); xml_rsc != NULL; xml_rsc = pcmk__xe_next_same(xml_rsc)) { EXPAND_CONSTRAINT_IDREF(id, resource, pcmk__xe_id(xml_rsc)); resources = g_list_append(resources, resource); } if (pcmk__list_of_1(resources)) { crm_trace("Single set: %s", id); goto done; } set_iter = resources; while (set_iter != NULL) { resource = (pcmk_resource_t *) set_iter->data; set_iter = set_iter->next; key = pcmk__op_key(resource->id, action, 0); if (local_kind == pe_order_kind_serialize) { /* Serialize before everything that comes after */ for (GList *iter = set_iter; iter != NULL; iter = iter->next) { pcmk_resource_t *then_rsc = iter->data; char *then_key = pcmk__op_key(then_rsc->id, action, 0); pcmk__new_ordering(resource, strdup(key), NULL, then_rsc, then_key, NULL, flags, scheduler); } } else if (sequential) { if (last != NULL) { pcmk__order_resource_actions(last, action, resource, action, flags); } last = resource; } free(key); } if (symmetry == ordering_asymmetric) { goto done; } last = NULL; action = invert_action(action); flags = ordering_flags_for_kind(local_kind, action, ordering_symmetric_inverse); set_iter = resources; while (set_iter != NULL) { resource = (pcmk_resource_t *) set_iter->data; set_iter = set_iter->next; if (sequential) { if (last != NULL) { pcmk__order_resource_actions(resource, action, last, action, flags); } last = resource; } } done: g_list_free(resources); return pcmk_rc_ok; } /*! * \brief Order two resource sets relative to each other * * \param[in] id Ordering ID (for logging) * \param[in] set1 First listed set * \param[in] set2 Second listed set * \param[in] kind Ordering kind * \param[in,out] scheduler Scheduler data * \param[in] symmetry Which ordering symmetry applies to this relation * * \return Standard Pacemaker return code */ static int order_rsc_sets(const char *id, const xmlNode *set1, const xmlNode *set2, enum pe_order_kind kind, pcmk_scheduler_t *scheduler, enum ordering_symmetry symmetry) { const xmlNode *xml_rsc = NULL; const xmlNode *xml_rsc_2 = NULL; pcmk_resource_t *rsc_1 = NULL; pcmk_resource_t *rsc_2 = NULL; const char *action_1 = crm_element_value(set1, PCMK_XA_ACTION); const char *action_2 = crm_element_value(set2, PCMK_XA_ACTION); uint32_t flags = pcmk__ar_none; bool require_all = true; (void) pcmk__xe_get_bool_attr(set1, PCMK_XA_REQUIRE_ALL, &require_all); if (action_1 == NULL) { action_1 = PCMK_ACTION_START; } if (action_2 == NULL) { action_2 = PCMK_ACTION_START; } if (symmetry == ordering_symmetric_inverse) { action_1 = invert_action(action_1); action_2 = invert_action(action_2); } if (pcmk__str_eq(PCMK_ACTION_STOP, action_1, pcmk__str_none) || pcmk__str_eq(PCMK_ACTION_DEMOTE, action_1, pcmk__str_none)) { /* Assuming: A -> ( B || C) -> D * The one-or-more logic only applies during the start/promote phase. * During shutdown neither B nor can shutdown until D is down, so simply * turn require_all back on. */ require_all = true; } flags = ordering_flags_for_kind(kind, action_1, symmetry); /* If we have an unordered set1, whether it is sequential or not is * irrelevant in regards to set2. */ if (!require_all) { char *task = crm_strdup_printf(PCMK_ACTION_ONE_OR_MORE ":%s", pcmk__xe_id(set1)); pcmk_action_t *unordered_action = get_pseudo_op(task, scheduler); free(task); - pcmk__set_action_flags(unordered_action, pcmk__action_min_runnable); + unordered_action->required_runnable_before = 1; for (xml_rsc = pcmk__xe_first_child(set1, PCMK_XE_RESOURCE_REF, NULL, NULL); xml_rsc != NULL; xml_rsc = pcmk__xe_next_same(xml_rsc)) { EXPAND_CONSTRAINT_IDREF(id, rsc_1, pcmk__xe_id(xml_rsc)); /* Add an ordering constraint between every element in set1 and the * pseudo action. If any action in set1 is runnable the pseudo * action will be runnable. */ pcmk__new_ordering(rsc_1, pcmk__op_key(rsc_1->id, action_1, 0), NULL, NULL, NULL, unordered_action, pcmk__ar_min_runnable |pcmk__ar_first_implies_then_graphed, scheduler); } for (xml_rsc_2 = pcmk__xe_first_child(set2, PCMK_XE_RESOURCE_REF, NULL, NULL); xml_rsc_2 != NULL; xml_rsc_2 = pcmk__xe_next_same(xml_rsc_2)) { EXPAND_CONSTRAINT_IDREF(id, rsc_2, pcmk__xe_id(xml_rsc_2)); /* Add an ordering constraint between the pseudo-action and every * element in set2. If the pseudo-action is runnable, every action * in set2 will be runnable. */ pcmk__new_ordering(NULL, NULL, unordered_action, rsc_2, pcmk__op_key(rsc_2->id, action_2, 0), NULL, flags|pcmk__ar_unrunnable_first_blocks, scheduler); } return pcmk_rc_ok; } if (pcmk__xe_attr_is_true(set1, PCMK_XA_SEQUENTIAL)) { if (symmetry == ordering_symmetric_inverse) { // Get the first one xml_rsc = pcmk__xe_first_child(set1, PCMK_XE_RESOURCE_REF, NULL, NULL); if (xml_rsc != NULL) { EXPAND_CONSTRAINT_IDREF(id, rsc_1, pcmk__xe_id(xml_rsc)); } } else { // Get the last one const char *rid = NULL; for (xml_rsc = pcmk__xe_first_child(set1, PCMK_XE_RESOURCE_REF, NULL, NULL); xml_rsc != NULL; xml_rsc = pcmk__xe_next_same(xml_rsc)) { rid = pcmk__xe_id(xml_rsc); } EXPAND_CONSTRAINT_IDREF(id, rsc_1, rid); } } if (pcmk__xe_attr_is_true(set2, PCMK_XA_SEQUENTIAL)) { if (symmetry == ordering_symmetric_inverse) { // Get the last one const char *rid = NULL; for (xml_rsc = pcmk__xe_first_child(set2, PCMK_XE_RESOURCE_REF, NULL, NULL); xml_rsc != NULL; xml_rsc = pcmk__xe_next_same(xml_rsc)) { rid = pcmk__xe_id(xml_rsc); } EXPAND_CONSTRAINT_IDREF(id, rsc_2, rid); } else { // Get the first one xml_rsc = pcmk__xe_first_child(set2, PCMK_XE_RESOURCE_REF, NULL, NULL); if (xml_rsc != NULL) { EXPAND_CONSTRAINT_IDREF(id, rsc_2, pcmk__xe_id(xml_rsc)); } } } if ((rsc_1 != NULL) && (rsc_2 != NULL)) { pcmk__order_resource_actions(rsc_1, action_1, rsc_2, action_2, flags); } else if (rsc_1 != NULL) { for (xml_rsc = pcmk__xe_first_child(set2, PCMK_XE_RESOURCE_REF, NULL, NULL); xml_rsc != NULL; xml_rsc = pcmk__xe_next_same(xml_rsc)) { EXPAND_CONSTRAINT_IDREF(id, rsc_2, pcmk__xe_id(xml_rsc)); pcmk__order_resource_actions(rsc_1, action_1, rsc_2, action_2, flags); } } else if (rsc_2 != NULL) { for (xml_rsc = pcmk__xe_first_child(set1, PCMK_XE_RESOURCE_REF, NULL, NULL); xml_rsc != NULL; xml_rsc = pcmk__xe_next_same(xml_rsc)) { EXPAND_CONSTRAINT_IDREF(id, rsc_1, pcmk__xe_id(xml_rsc)); pcmk__order_resource_actions(rsc_1, action_1, rsc_2, action_2, flags); } } else { for (xml_rsc = pcmk__xe_first_child(set1, PCMK_XE_RESOURCE_REF, NULL, NULL); xml_rsc != NULL; xml_rsc = pcmk__xe_next_same(xml_rsc)) { EXPAND_CONSTRAINT_IDREF(id, rsc_1, pcmk__xe_id(xml_rsc)); for (xmlNode *xml_rsc_2 = pcmk__xe_first_child(set2, PCMK_XE_RESOURCE_REF, NULL, NULL); xml_rsc_2 != NULL; xml_rsc_2 = pcmk__xe_next_same(xml_rsc_2)) { EXPAND_CONSTRAINT_IDREF(id, rsc_2, pcmk__xe_id(xml_rsc_2)); pcmk__order_resource_actions(rsc_1, action_1, rsc_2, action_2, flags); } } } return pcmk_rc_ok; } /*! * \internal * \brief If an ordering constraint uses resource tags, expand them * * \param[in,out] xml_obj Ordering constraint XML * \param[out] expanded_xml Equivalent XML with tags expanded * \param[in] scheduler Scheduler data * * \return Standard Pacemaker return code (specifically, pcmk_rc_ok on success, * and pcmk_rc_unpack_error on invalid configuration) */ static int unpack_order_tags(xmlNode *xml_obj, xmlNode **expanded_xml, const pcmk_scheduler_t *scheduler) { const char *id_first = NULL; const char *id_then = NULL; const char *action_first = NULL; const char *action_then = NULL; pcmk_resource_t *rsc_first = NULL; pcmk_resource_t *rsc_then = NULL; pcmk__idref_t *tag_first = NULL; pcmk__idref_t *tag_then = NULL; xmlNode *rsc_set_first = NULL; xmlNode *rsc_set_then = NULL; bool any_sets = false; // Check whether there are any resource sets with template or tag references *expanded_xml = pcmk__expand_tags_in_sets(xml_obj, scheduler); if (*expanded_xml != NULL) { crm_log_xml_trace(*expanded_xml, "Expanded " PCMK_XE_RSC_ORDER); return pcmk_rc_ok; } id_first = crm_element_value(xml_obj, PCMK_XA_FIRST); id_then = crm_element_value(xml_obj, PCMK_XA_THEN); if ((id_first == NULL) || (id_then == NULL)) { return pcmk_rc_ok; } if (!pcmk__valid_resource_or_tag(scheduler, id_first, &rsc_first, &tag_first)) { pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " "valid resource or tag", pcmk__xe_id(xml_obj), id_first); return pcmk_rc_unpack_error; } if (!pcmk__valid_resource_or_tag(scheduler, id_then, &rsc_then, &tag_then)) { pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " "valid resource or tag", pcmk__xe_id(xml_obj), id_then); return pcmk_rc_unpack_error; } if ((rsc_first != NULL) && (rsc_then != NULL)) { // Neither side references a template or tag return pcmk_rc_ok; } action_first = crm_element_value(xml_obj, PCMK_XA_FIRST_ACTION); action_then = crm_element_value(xml_obj, PCMK_XA_THEN_ACTION); *expanded_xml = pcmk__xml_copy(NULL, xml_obj); /* Convert template/tag reference in PCMK_XA_FIRST into constraint * PCMK_XE_RESOURCE_SET */ if (!pcmk__tag_to_set(*expanded_xml, &rsc_set_first, PCMK_XA_FIRST, true, scheduler)) { pcmk__xml_free(*expanded_xml); *expanded_xml = NULL; return pcmk_rc_unpack_error; } if (rsc_set_first != NULL) { if (action_first != NULL) { /* Move PCMK_XA_FIRST_ACTION into converted PCMK_XE_RESOURCE_SET as * PCMK_XA_ACTION */ crm_xml_add(rsc_set_first, PCMK_XA_ACTION, action_first); pcmk__xe_remove_attr(*expanded_xml, PCMK_XA_FIRST_ACTION); } any_sets = true; } /* Convert template/tag reference in PCMK_XA_THEN into constraint * PCMK_XE_RESOURCE_SET */ if (!pcmk__tag_to_set(*expanded_xml, &rsc_set_then, PCMK_XA_THEN, true, scheduler)) { pcmk__xml_free(*expanded_xml); *expanded_xml = NULL; return pcmk_rc_unpack_error; } if (rsc_set_then != NULL) { if (action_then != NULL) { /* Move PCMK_XA_THEN_ACTION into converted PCMK_XE_RESOURCE_SET as * PCMK_XA_ACTION */ crm_xml_add(rsc_set_then, PCMK_XA_ACTION, action_then); pcmk__xe_remove_attr(*expanded_xml, PCMK_XA_THEN_ACTION); } any_sets = true; } if (any_sets) { crm_log_xml_trace(*expanded_xml, "Expanded " PCMK_XE_RSC_ORDER); } else { pcmk__xml_free(*expanded_xml); *expanded_xml = NULL; } return pcmk_rc_ok; } /*! * \internal * \brief Unpack ordering constraint XML * * \param[in,out] xml_obj Ordering constraint XML to unpack * \param[in,out] scheduler Scheduler data */ void pcmk__unpack_ordering(xmlNode *xml_obj, pcmk_scheduler_t *scheduler) { xmlNode *set = NULL; xmlNode *last = NULL; xmlNode *orig_xml = NULL; xmlNode *expanded_xml = NULL; const char *id = crm_element_value(xml_obj, PCMK_XA_ID); const char *invert = crm_element_value(xml_obj, PCMK_XA_SYMMETRICAL); enum pe_order_kind kind = get_ordering_type(xml_obj); enum ordering_symmetry symmetry = get_ordering_symmetry(xml_obj, kind, NULL); // Expand any resource tags in the constraint XML if (unpack_order_tags(xml_obj, &expanded_xml, scheduler) != pcmk_rc_ok) { return; } if (expanded_xml != NULL) { orig_xml = xml_obj; xml_obj = expanded_xml; } // If the constraint has resource sets, unpack them for (set = pcmk__xe_first_child(xml_obj, PCMK_XE_RESOURCE_SET, NULL, NULL); set != NULL; set = pcmk__xe_next_same(set)) { set = pcmk__xe_resolve_idref(set, scheduler->input); if ((set == NULL) // Configuration error, message already logged || (unpack_order_set(set, kind, invert, scheduler) != pcmk_rc_ok)) { if (expanded_xml != NULL) { pcmk__xml_free(expanded_xml); } return; } if (last != NULL) { if (order_rsc_sets(id, last, set, kind, scheduler, symmetry) != pcmk_rc_ok) { if (expanded_xml != NULL) { pcmk__xml_free(expanded_xml); } return; } if ((symmetry == ordering_symmetric) && (order_rsc_sets(id, set, last, kind, scheduler, ordering_symmetric_inverse) != pcmk_rc_ok)) { if (expanded_xml != NULL) { pcmk__xml_free(expanded_xml); } return; } } last = set; } if (expanded_xml) { pcmk__xml_free(expanded_xml); xml_obj = orig_xml; } // If the constraint has no resource sets, unpack it as a simple ordering if (last == NULL) { return unpack_simple_rsc_order(xml_obj, scheduler); } } static bool ordering_is_invalid(pcmk_action_t *action, pcmk__related_action_t *input) { /* Prevent user-defined ordering constraints between resources * running in a guest node and the resource that defines that node. */ if (!pcmk_is_set(input->flags, pcmk__ar_guest_allowed) && (input->action->rsc != NULL) && pcmk__rsc_corresponds_to_guest(action->rsc, input->action->node)) { pcmk__config_warn("Invalid ordering constraint between %s and %s", input->action->rsc->id, action->rsc->id); return true; } /* If there's an order like * "rscB_stop node2"-> "load_stopped_node2" -> "rscA_migrate_to node1" * * then rscA is being migrated from node1 to node2, while rscB is being * migrated from node2 to node1. If there would be a graph loop, * break the order "load_stopped_node2" -> "rscA_migrate_to node1". */ if ((input->flags == pcmk__ar_if_on_same_node_or_target) && (action->rsc != NULL) && pcmk__str_eq(action->task, PCMK_ACTION_MIGRATE_TO, pcmk__str_none) && pcmk__graph_has_loop(action, action, input)) { return true; } return false; } void pcmk__disable_invalid_orderings(pcmk_scheduler_t *scheduler) { for (GList *iter = scheduler->priv->actions; iter != NULL; iter = iter->next) { pcmk_action_t *action = (pcmk_action_t *) iter->data; pcmk__related_action_t *input = NULL; for (GList *input_iter = action->actions_before; input_iter != NULL; input_iter = input_iter->next) { input = input_iter->data; if (ordering_is_invalid(action, input)) { input->flags = pcmk__ar_none; } } } } /*! * \internal * \brief Order stops on a node before the node's shutdown * * \param[in,out] node Node being shut down * \param[in] shutdown_op Shutdown action for node */ void pcmk__order_stops_before_shutdown(pcmk_node_t *node, pcmk_action_t *shutdown_op) { for (GList *iter = node->priv->scheduler->priv->actions; iter != NULL; iter = iter->next) { pcmk_action_t *action = (pcmk_action_t *) iter->data; // Only stops on the node shutting down are relevant if (!pcmk__same_node(action->node, node) || !pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_none)) { continue; } // Resources and nodes in maintenance mode won't be touched if (pcmk_is_set(action->rsc->flags, pcmk__rsc_maintenance)) { pcmk__rsc_trace(action->rsc, "Not ordering %s before shutdown of %s because " "resource in maintenance mode", action->uuid, pcmk__node_name(node)); continue; } else if (node->details->maintenance) { pcmk__rsc_trace(action->rsc, "Not ordering %s before shutdown of %s because " "node in maintenance mode", action->uuid, pcmk__node_name(node)); continue; } /* Don't touch a resource that is unmanaged or blocked, to avoid * blocking the shutdown (though if another action depends on this one, * we may still end up blocking) */ if (!pcmk_any_flags_set(action->rsc->flags, pcmk__rsc_managed|pcmk__rsc_blocked)) { pcmk__rsc_trace(action->rsc, "Not ordering %s before shutdown of %s because " "resource is unmanaged or blocked", action->uuid, pcmk__node_name(node)); continue; } pcmk__rsc_trace(action->rsc, "Ordering %s before shutdown of %s", action->uuid, pcmk__node_name(node)); pcmk__clear_action_flags(action, pcmk__action_optional); pcmk__new_ordering(action->rsc, NULL, action, NULL, strdup(PCMK_ACTION_DO_SHUTDOWN), shutdown_op, pcmk__ar_ordered|pcmk__ar_unrunnable_first_blocks, node->priv->scheduler); } } /*! * \brief Find resource actions matching directly or as child * * \param[in] rsc Resource to check * \param[in] original_key Action key to search for (possibly referencing * parent of \rsc) * * \return Newly allocated list of matching actions * \note It is the caller's responsibility to free the result with g_list_free() */ static GList * find_actions_by_task(const pcmk_resource_t *rsc, const char *original_key) { // Search under given task key directly GList *list = find_actions(rsc->priv->actions, original_key, NULL); if (list == NULL) { // Search again using this resource's ID char *key = NULL; char *task = NULL; guint interval_ms = 0; CRM_CHECK(parse_op_key(original_key, NULL, &task, &interval_ms), return NULL); key = pcmk__op_key(rsc->id, task, interval_ms); list = find_actions(rsc->priv->actions, key, NULL); free(key); free(task); } return list; } /*! * \internal * \brief Order relevant resource actions after a given action * * \param[in,out] first_action Action to order after (or NULL if none runnable) * \param[in] rsc Resource whose actions should be ordered * \param[in,out] order Ordering constraint being applied */ static void order_resource_actions_after(pcmk_action_t *first_action, const pcmk_resource_t *rsc, pcmk__action_relation_t *order) { GList *then_actions = NULL; uint32_t flags = pcmk__ar_none; CRM_CHECK((rsc != NULL) && (order != NULL), return); flags = order->flags; pcmk__rsc_trace(rsc, "Applying ordering %d for 'then' resource %s", order->id, rsc->id); if (order->action2 != NULL) { then_actions = g_list_prepend(NULL, order->action2); } else { then_actions = find_actions_by_task(rsc, order->task2); } if (then_actions == NULL) { pcmk__rsc_trace(rsc, "Ignoring ordering %d: no %s actions found for %s", order->id, order->task2, rsc->id); return; } if ((first_action != NULL) && (first_action->rsc == rsc) && pcmk_is_set(first_action->flags, pcmk__action_migration_abort)) { pcmk__rsc_trace(rsc, "Detected dangling migration ordering (%s then %s %s)", first_action->uuid, order->task2, rsc->id); pcmk__clear_relation_flags(flags, pcmk__ar_first_implies_then); } if ((first_action == NULL) && !pcmk_is_set(flags, pcmk__ar_first_implies_then)) { pcmk__rsc_debug(rsc, "Ignoring ordering %d for %s: No first action found", order->id, rsc->id); g_list_free(then_actions); return; } for (GList *iter = then_actions; iter != NULL; iter = iter->next) { pcmk_action_t *then_action_iter = (pcmk_action_t *) iter->data; if (first_action != NULL) { order_actions(first_action, then_action_iter, flags); } else { pcmk__clear_action_flags(then_action_iter, pcmk__action_runnable); crm_warn("%s of %s is unrunnable because there is no %s of %s " "to order it after", then_action_iter->task, rsc->id, order->task1, order->rsc1->id); } } g_list_free(then_actions); } static void rsc_order_first(pcmk_resource_t *first_rsc, pcmk__action_relation_t *order) { GList *first_actions = NULL; pcmk_action_t *first_action = order->action1; pcmk_resource_t *then_rsc = order->rsc2; CRM_ASSERT(first_rsc != NULL); pcmk__rsc_trace(first_rsc, "Applying ordering constraint %d (first: %s)", order->id, first_rsc->id); if (first_action != NULL) { first_actions = g_list_prepend(NULL, first_action); } else { first_actions = find_actions_by_task(first_rsc, order->task1); } if ((first_actions == NULL) && (first_rsc == then_rsc)) { pcmk__rsc_trace(first_rsc, "Ignoring constraint %d: first (%s for %s) not found", order->id, order->task1, first_rsc->id); } else if (first_actions == NULL) { char *key = NULL; char *op_type = NULL; guint interval_ms = 0; enum rsc_role_e first_role; parse_op_key(order->task1, NULL, &op_type, &interval_ms); key = pcmk__op_key(first_rsc->id, op_type, interval_ms); first_role = first_rsc->priv->fns->state(first_rsc, TRUE); if ((first_role == pcmk_role_stopped) && pcmk__str_eq(op_type, PCMK_ACTION_STOP, pcmk__str_none)) { free(key); pcmk__rsc_trace(first_rsc, "Ignoring constraint %d: first (%s for %s) " "not found", order->id, order->task1, first_rsc->id); } else if ((first_role == pcmk_role_unpromoted) && pcmk__str_eq(op_type, PCMK_ACTION_DEMOTE, pcmk__str_none)) { free(key); pcmk__rsc_trace(first_rsc, "Ignoring constraint %d: first (%s for %s) " "not found", order->id, order->task1, first_rsc->id); } else { pcmk__rsc_trace(first_rsc, "Creating first (%s for %s) for constraint %d ", order->task1, first_rsc->id, order->id); first_action = custom_action(first_rsc, key, op_type, NULL, TRUE, first_rsc->priv->scheduler); first_actions = g_list_prepend(NULL, first_action); } free(op_type); } if (then_rsc == NULL) { if (order->action2 == NULL) { pcmk__rsc_trace(first_rsc, "Ignoring constraint %d: then not found", order->id); return; } then_rsc = order->action2->rsc; } for (GList *iter = first_actions; iter != NULL; iter = iter->next) { first_action = iter->data; if (then_rsc == NULL) { order_actions(first_action, order->action2, order->flags); } else { order_resource_actions_after(first_action, then_rsc, order); } } g_list_free(first_actions); } // GFunc to call pcmk__block_colocation_dependents() static void block_colocation_dependents(gpointer data, gpointer user_data) { pcmk__block_colocation_dependents(data); } // GFunc to call pcmk__update_action_for_orderings() static void update_action_for_orderings(gpointer data, gpointer user_data) { pcmk__update_action_for_orderings((pcmk_action_t *) data, (pcmk_scheduler_t *) user_data); } /*! * \internal * \brief Apply all ordering constraints * * \param[in,out] sched Scheduler data */ void pcmk__apply_orderings(pcmk_scheduler_t *sched) { crm_trace("Applying ordering constraints"); /* Ordering constraints need to be processed in the order they were created. * rsc_order_first() and order_resource_actions_after() require the relevant * actions to already exist in some cases, but rsc_order_first() will create * the 'first' action in certain cases. Thus calling rsc_order_first() can * change the behavior of later-created orderings. * * Also, g_list_append() should be avoided for performance reasons, so we * prepend orderings when creating them and reverse the list here. * * @TODO This is brittle and should be carefully redesigned so that the * order of creation doesn't matter, and the reverse becomes unneeded. */ sched->priv->ordering_constraints = g_list_reverse(sched->priv->ordering_constraints); for (GList *iter = sched->priv->ordering_constraints; iter != NULL; iter = iter->next) { pcmk__action_relation_t *order = iter->data; pcmk_resource_t *rsc = order->rsc1; if (rsc != NULL) { rsc_order_first(rsc, order); continue; } rsc = order->rsc2; if (rsc != NULL) { order_resource_actions_after(order->action1, rsc, order); } else { crm_trace("Applying ordering constraint %d (non-resource actions)", order->id); order_actions(order->action1, order->action2, order->flags); } } g_list_foreach(sched->priv->actions, block_colocation_dependents, NULL); crm_trace("Ordering probes"); pcmk__order_probes(sched); crm_trace("Updating %d actions", g_list_length(sched->priv->actions)); g_list_foreach(sched->priv->actions, update_action_for_orderings, sched); pcmk__disable_invalid_orderings(sched); } /*! * \internal * \brief Order a given action after each action in a given list * * \param[in,out] after "After" action * \param[in,out] list List of "before" actions */ void pcmk__order_after_each(pcmk_action_t *after, GList *list) { const char *after_desc = (after->task == NULL)? after->uuid : after->task; for (GList *iter = list; iter != NULL; iter = iter->next) { pcmk_action_t *before = (pcmk_action_t *) iter->data; const char *before_desc = before->task? before->task : before->uuid; crm_debug("Ordering %s on %s before %s on %s", before_desc, pcmk__node_name(before->node), after_desc, pcmk__node_name(after->node)); order_actions(before, after, pcmk__ar_ordered); } } /*! * \internal * \brief Order promotions and demotions for restarts of a clone or bundle * * \param[in,out] rsc Clone or bundle to order */ void pcmk__promotable_restart_ordering(pcmk_resource_t *rsc) { // Order start and promote after all instances are stopped pcmk__order_resource_actions(rsc, PCMK_ACTION_STOPPED, rsc, PCMK_ACTION_START, pcmk__ar_ordered); pcmk__order_resource_actions(rsc, PCMK_ACTION_STOPPED, rsc, PCMK_ACTION_PROMOTE, pcmk__ar_ordered); // Order stop, start, and promote after all instances are demoted pcmk__order_resource_actions(rsc, PCMK_ACTION_DEMOTED, rsc, PCMK_ACTION_STOP, pcmk__ar_ordered); pcmk__order_resource_actions(rsc, PCMK_ACTION_DEMOTED, rsc, PCMK_ACTION_START, pcmk__ar_ordered); pcmk__order_resource_actions(rsc, PCMK_ACTION_DEMOTED, rsc, PCMK_ACTION_PROMOTE, pcmk__ar_ordered); // Order promote after all instances are started pcmk__order_resource_actions(rsc, PCMK_ACTION_RUNNING, rsc, PCMK_ACTION_PROMOTE, pcmk__ar_ordered); // Order demote after all instances are demoted pcmk__order_resource_actions(rsc, PCMK_ACTION_DEMOTE, rsc, PCMK_ACTION_DEMOTED, pcmk__ar_ordered); } diff --git a/lib/pacemaker/pcmk_verify.c b/lib/pacemaker/pcmk_verify.c index f7433277c1..9427facc74 100644 --- a/lib/pacemaker/pcmk_verify.c +++ b/lib/pacemaker/pcmk_verify.c @@ -1,157 +1,162 @@ /* * Copyright 2023-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" int pcmk__parse_cib(pcmk__output_t *out, const char *cib_source, xmlNodePtr *cib_object) { // @COMPAT Take an enum for cib_source instead of trying to figure it out? const char *first = cib_source; if (cib_source == NULL) { - crm_info("Reading XML from: live cluster"); return cib__signon_query(out, NULL, cib_object); } while (isspace(*first)) { first++; } if (*first == '<') { *cib_object = pcmk__xml_parse(cib_source); } else { *cib_object = pcmk__xml_read(cib_source); } - return (*cib_object == NULL)? ENODATA : pcmk_rc_ok; + return (*cib_object == NULL)? pcmk_rc_unpack_error : pcmk_rc_ok; } int pcmk__verify(pcmk_scheduler_t *scheduler, pcmk__output_t *out, xmlNode **cib_object) { int rc = pcmk_rc_ok; xmlNode *status = NULL; xmlNode *cib_object_copy = NULL; CRM_ASSERT(cib_object != NULL); + /* Without the CIB element, we can't get a schema to validate against, so + * report that separately from validation + */ if (!pcmk__xe_is(*cib_object, PCMK_XE_CIB)) { - rc = EBADMSG; - out->err(out, "This tool can only check complete configurations (i.e. those starting with )."); + out->err(out, + "Input is not a CIB (outermost element is %s not " + PCMK_XE_CIB ")", + pcmk__s((const char *) (*cib_object)->name, "unrecognizable")); + rc = pcmk_rc_schema_validation; goto verify_done; } status = pcmk_find_cib_element(*cib_object, PCMK_XE_STATUS); if (status == NULL) { pcmk__xe_create(*cib_object, PCMK_XE_STATUS); } if (!pcmk__validate_xml(*cib_object, NULL, (xmlRelaxNGValidityErrorFunc) out->err, out)) { pcmk__config_has_error = true; rc = pcmk_rc_schema_validation; goto verify_done; } rc = pcmk__update_configured_schema(cib_object, false); if (rc != pcmk_rc_ok) { pcmk__config_has_error = true; out->err(out, "The cluster will NOT be able to use this configuration.\n" "Please manually update the configuration to conform to the %s syntax.", pcmk__highest_schema_name()); goto verify_done; } /* Process the configuration to set pcmk__config_has_error and * pcmk__config_has_warning. * * @TODO Some parts of the configuration are unpacked only when needed (for * example, action configuration), so we aren't necessarily checking those. */ if (*cib_object != NULL) { unsigned long long flags = pcmk__sched_no_counts; if (status == NULL) { // No status available, so do minimal checks flags |= pcmk__sched_validate_only; } cib_object_copy = pcmk__xml_copy(NULL, *cib_object); /* The scheduler takes ownership of the XML object and potentially * frees it later. We want the caller of pcmk__verify to retain * ownership of the passed-in XML object, hence we pass in a copy * to the scheduler. - */ + */ pcmk__schedule_actions(cib_object_copy, flags, scheduler); } verify_done: if (pcmk__config_has_error) { rc = pcmk_rc_schema_validation; pcmk__config_err("CIB did not pass schema validation"); } else if (pcmk__config_has_warning) { rc = pcmk_rc_schema_validation; } return rc; } int pcmk_verify(xmlNodePtr *xml, const char *cib_source) { pcmk_scheduler_t *scheduler = NULL; pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; xmlNode *cib_object = NULL; rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { return rc; } pe__register_messages(out); pcmk__register_lib_messages(out); rc = pcmk__parse_cib(out, cib_source, &cib_object); if (rc != pcmk_rc_ok) { - out->err(out, "Couldn't parse input"); + out->err(out, "Verification failed: %s", pcmk_rc_str(rc)); goto done; } scheduler = pe_new_working_set(); if (scheduler == NULL) { rc = errno; out->err(out, "Couldn't allocate scheduler data: %s", pcmk_rc_str(rc)); goto done; } scheduler->priv->out = out; rc = pcmk__verify(scheduler, out, &cib_object); done: pe_free_working_set(scheduler); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); pcmk__xml_free(cib_object); return rc; } diff --git a/rpm/Makefile.am b/rpm/Makefile.am index a4dc3cb47c..35452bab60 100644 --- a/rpm/Makefile.am +++ b/rpm/Makefile.am @@ -1,284 +1,284 @@ # -# Copyright 2003-2023 the Pacemaker project contributors +# Copyright 2003-2024 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # # We want to support the use case where this file is fed straight to make # without running automake first, so define defaults for any automake variables # used in this file. top_srcdir ?= .. abs_srcdir ?= $(shell pwd) abs_builddir ?= $(abs_srcdir) MAKE ?= make PACKAGE ?= pacemaker AM_V_at ?= @ MKDIR_P ?= mkdir -p include $(top_srcdir)/mk/common.mk include $(top_srcdir)/mk/release.mk EXTRA_DIST = pacemaker.spec.in \ rpmlintrc # Extra options to pass to rpmbuild (this can be used to override the location # options this file normally passes, or to override macros used by the spec) RPM_EXTRA ?= # Where to put RPM artifacts; possible values: # # - subtree (default): RPM sources (i.e. TARFILE) in top-level build directory, # everything else in dedicated "rpm" subdirectory of build tree # # - anything else: The value will be treated as a directory path to be used for # all RPM artifacts. WARNING: The entire directory will get removed with # "make clean" or "make rpm-clean". # RPMDEST ?= subtree RPM_SPEC_DIR_subtree = $(abs_builddir)/SPECS RPM_SRCRPM_DIR_subtree = $(abs_builddir)/SRPMS RPM_OPTS_subtree = --define "_sourcedir $(abs_builddir)/.." \ --define "_topdir $(abs_builddir)" RPM_CLEAN_subtree = "$(abs_builddir)/BUILD" \ "$(abs_builddir)/BUILDROOT" \ "$(abs_builddir)/RPMS" \ "$(abs_builddir)/SPECS" \ "$(abs_builddir)/SRPMS" RPM_SPEC_DIR_other = $(RPMDEST)/SPECS RPM_SRCRPM_DIR_other = $(RPMDEST)/SRPMS RPM_OPTS_other = --define "_sourcedir $(abs_builddir)/.." \ --define "_topdir $(RPMDEST)" RPM_CLEAN_other = "$(RPMDEST)" RPMTYPE = $(shell case "$(RPMDEST)" in \ subtree$(rparen) echo subtree ;; \ *$(rparen) echo other ;; \ esac) RPM_SPEC_DIR = $(RPM_SPEC_DIR_$(RPMTYPE)) RPM_SRCRPM_DIR = $(RPM_SRCRPM_DIR_$(RPMTYPE)) RPM_OPTS = $(RPM_OPTS_$(RPMTYPE)) $(RPM_EXTRA) RPM_CLEAN = $(RPM_CLEAN_$(RPMTYPE)) WITH ?= --without doc # If $(BUILD_COUNTER) is an existing file, its contents will be used as the # spec version in built RPMs, unless $(SPECVERSION) is set to override it, # and the next increment will be written back to the file after building. -BUILD_COUNTER ?= $(shell test -e build.counter && echo build.counter || echo ../build.counter) +BUILD_COUNTER ?= build.counter LAST_COUNT = $(shell test -e "$(BUILD_COUNTER)" && cat "$(BUILD_COUNTER)" || echo 0) COUNT = $(shell expr 1 + $(LAST_COUNT)) SPECVERSION ?= $(COUNT) # SPEC_COMMIT is identical to TAG for DIST and tagged releases, otherwise it is # the short commit ID (which must be used in order for "make export" to use the # same archive name as "make dist") SPEC_COMMIT ?= $(shell \ case $(TAG) in \ Pacemaker-*|DIST$(rparen) \ echo '$(TAG)' ;; \ *$(rparen) \ "$(GIT)" log --pretty=format:%h -n 1 '$(TAG)';; \ esac)$(DIRTY_EXT) SPEC_ABBREV = $(shell printf %s '$(SPEC_COMMIT)' | wc -c) SPEC_RELEASE = $(shell case "$(WITH)" in \ *pre_release*$(rparen) \ [ "$(LAST_RELEASE)" = "$(TAG)" ] \ && echo "$(LAST_RELEASE)" \ || echo "$(NEXT_RELEASE)" ;; \ *$(rparen) \ echo "$(LAST_RELEASE)" ;; \ esac) SPEC_RELEASE_NO = $(shell echo $(SPEC_RELEASE) | sed -e s:Pacemaker-:: -e s:-.*::) MOCK_DIR = $(abs_builddir)/mock MOCK_OPTIONS ?= --resultdir="$(MOCK_DIR)" --no-cleanup-after F ?= $(shell test ! -e /etc/fedora-release && echo 0; test -e /etc/fedora-release && rpm --eval %{fedora}) ARCH ?= $(shell test ! -e /etc/fedora-release && uname -m; test -e /etc/fedora-release && rpm --eval %{_arch}) MOCK_CFG ?= $(shell test -e /etc/fedora-release && echo fedora-$(F)-$(ARCH)) distdir = $(top_distdir)/rpm TARFILE = $(abs_builddir)/../$(top_distdir).tar.gz # Create a source distribution based on a git archive. (If we aren't in a git # checkout, do a make dist instead.) .PHONY: export export: cd $(abs_srcdir)/..; \ if [ -z "$(CHECKOUT)" ] && [ -f "$(TARFILE)" ]; then \ echo "`date`: Using existing tarball: $(TARFILE)"; \ elif [ -z "$(CHECKOUT)" ]; then \ $(MAKE) $(AM_MAKEFLAGS) dist; \ echo "`date`: Rebuilt tarball: $(TARFILE)"; \ elif [ -n "$(DIRTY_EXT)" ]; then \ "$(GIT)" commit -m "DO-NOT-PUSH" -a; \ "$(GIT)" archive --prefix=$(top_distdir)/ -o "$(TARFILE)" \ HEAD^{tree}; \ "$(GIT)" reset --mixed HEAD^; \ echo "`date`: Rebuilt $(TARFILE)"; \ elif [ -f "$(TARFILE)" ]; then \ echo "`date`: Using existing tarball: $(TARFILE)"; \ else \ "$(GIT)" archive --prefix=$(top_distdir)/ -o "$(TARFILE)" \ $(TAG)^{tree}; \ echo "`date`: Rebuilt $(TARFILE)"; \ fi # Depend on spec-clean so the spec gets rebuilt every time $(RPM_SPEC_DIR)/$(PACKAGE).spec: spec-clean pacemaker.spec.in $(AM_V_at)$(MKDIR_P) "$(RPM_SPEC_DIR)" $(AM_V_GEN)if [ x"`"$(GIT)" ls-files \ -m pacemaker.spec.in 2>/dev/null`" != x ]; then \ cat "$(abs_srcdir)/pacemaker.spec.in"; \ elif "$(GIT)" cat-file -e $(TAG):rpm/pacemaker.spec.in \ 2>/dev/null; then \ "$(GIT)" show $(TAG):rpm/pacemaker.spec.in; \ elif "$(GIT)" cat-file -e $(TAG):pacemaker.spec.in 2>/dev/null; \ then \ "$(GIT)" show $(TAG):pacemaker.spec.in; \ else \ cat "$(abs_srcdir)/pacemaker.spec.in"; \ fi | sed \ -e 's/^\(%global pcmkversion \).*/\1$(SPEC_RELEASE_NO)/' \ -e 's/^\(%global specversion \).*/\1$(SPECVERSION)/' \ -e 's/^\(%global commit \).*/\1$(SPEC_COMMIT)/' \ -e 's/^\(%global commit_abbrev \).*/\1$(SPEC_ABBREV)/' \ -e "s/PACKAGE_DATE/$$(date +'%a %b %d %Y')/" \ -e 's/PACKAGE_VERSION/$(SPEC_RELEASE_NO)-$(SPECVERSION)/' \ > "$@"; \ if echo "$$(rpmlint --help 2>&1)" | grep -q "ignore-unused-rpmlintrc"; then \ rpmlint --ignore-unused-rpmlintrc --file rpmlintrc "$@"; \ else \ rpmlint --file rpmlintrc "$@"; \ fi .PHONY: spec $(PACKAGE).spec spec $(PACKAGE).spec: $(RPM_SPEC_DIR)/$(PACKAGE).spec spec-clean: -rm -f "$(RPM_SPEC_DIR)/$(PACKAGE).spec" .PHONY: srpm srpm: export srpm-clean $(RPM_SPEC_DIR)/$(PACKAGE).spec if [ -e "$(BUILD_COUNTER)" ]; then \ echo $(COUNT) > "$(BUILD_COUNTER)"; \ fi rpmbuild -bs $(RPM_OPTS) $(WITH) "$(RPM_SPEC_DIR)/$(PACKAGE).spec" .PHONY: srpm-clean srpm-clean: -rm -f "$(RPM_SRCRPM_DIR)"/*.src.rpm # e.g. make WITH="--with pre_release" rpm .PHONY: rpm rpm: srpm @echo To create custom builds, edit the flags and options in $(PACKAGE).spec first rpmbuild $(RPM_OPTS) $(WITH) --rebuild "$(RPM_SRCRPM_DIR)"/*.src.rpm .PHONY: rpm-clean rpm-clean: spec-clean srpm-clean -if [ -n "$(RPM_CLEAN)" ]; then rm -rf $(RPM_CLEAN); fi .PHONY: rpm-dep rpm-dep: $(RPM_SPEC_DIR)/$(PACKAGE).spec sudo yum-builddep "$(RPM_SPEC_DIR)/$(PACKAGE).spec" .PHONY: release release: $(MAKE) $(AM_MAKEFLAGS) TAG=$(LAST_RELEASE) rpm # Build the highest-versioned rc tag .PHONY: rc rc: @if [ -z "$(CHECKOUT)" ]; then \ echo 'This target must be run from a git checkout'; \ exit 1; \ fi $(MAKE) $(AM_MAKEFLAGS) TAG="$$("$(GIT)" tag -l 2>/dev/null \ | sed -n -e 's/^\(Pacemaker-[0-9.]*-rc[0-9]*\)$$/\1/p' \ | sort -Vr | head -n 1)" rpm .PHONY: chroot chroot: mock-$(MOCK_CFG) mock-install-$(MOCK_CFG) mock-sh-$(MOCK_CFG) @echo Done .PHONY: mock-next mock-next: $(MAKE) $(AM_MAKEFLAGS) F=$(shell expr 1 + $(F)) mock .PHONY: mock-rawhide mock-rawhide: $(MAKE) $(AM_MAKEFLAGS) F=rawhide mock mock-install-%: @echo "Installing packages" mock --root=$* $(MOCK_OPTIONS) --install "$(MOCK_DIR)"/*.rpm \ vi sudo valgrind lcov gdb fence-agents psmisc .PHONY: mock-install mock-install: mock-install-$(MOCK_CFG) @echo Done .PHONY: mock-sh mock-sh: mock-sh-$(MOCK_CFG) @echo Done mock-sh-%: @echo Connecting mock --root=$* $(MOCK_OPTIONS) --shell @echo Done mock-%: srpm mock-clean mock $(MOCK_OPTIONS) --root=$* --no-cleanup-after --rebuild \ $(WITH) "$(RPM_SRCRPM_DIR)"/*.src.rpm .PHONY: mock mock: mock-$(MOCK_CFG) @echo Done .PHONY: dirty dirty: $(MAKE) $(AM_MAKEFLAGS) DIRTY=yes mock .PHONY: mock-clean mock-clean: -rm -rf "$(MOCK_DIR)" # Make debugging makefile issues easier .PHONY: vars vars: @echo "CHECKOUT=$(CHECKOUT)" @echo "VERSION=$(VERSION)" @echo "COMMIT=$(COMMIT)" @echo "TAG=$(TAG)" @echo "DIRTY=$(DIRTY)" @echo "DIRTY_EXT=$(DIRTY_EXT)" @echo "LAST_RELEASE=$(LAST_RELEASE)" @echo "NEXT_RELEASE=$(NEXT_RELEASE)" @echo "top_distdir=$(top_distdir)" @echo "RPMDEST=$(RPMDEST)" @echo "RPMTYPE=$(RPMTYPE)" @echo "RPM_SPEC_DIR=$(RPM_SPEC_DIR)" @echo "RPM_SRCRPM_DIR=$(RPM_SRCRPM_DIR)" @echo "RPM_OPTS=$(RPM_OPTS)" @echo "RPM_CLEAN=$(RPM_CLEAN)" @echo "WITH=$(WITH)" @echo "BUILD_COUNTER=$(BUILD_COUNTER)" @echo "LAST_COUNT=$(LAST_COUNT)" @echo "COUNT=$(COUNT)" @echo "SPECVERSION=$(SPECVERSION)" @echo "SPEC_COMMIT=$(SPEC_COMMIT)" @echo "SPEC_ABBREV=$(SPEC_ABBREV)" @echo "SPEC_RELEASE=$(SPEC_RELEASE)" @echo "SPEC_RELEASE_NO=$(SPEC_RELEASE_NO)" @echo "TARFILE=$(TARFILE)" .PHONY: clean-local clean-local: mock-clean rpm-clean -rm -f "$(TARFILE)" diff --git a/tools/crm_verify.c b/tools/crm_verify.c index 8b83677218..0341813df4 100644 --- a/tools/crm_verify.c +++ b/tools/crm_verify.c @@ -1,306 +1,300 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include const char *SUMMARY = "Check a Pacemaker configuration for errors\n\n" "Check the well-formedness of a complete Pacemaker XML configuration,\n" "its conformance to the configured schema, and the presence of common\n" "misconfigurations. Problems reported as errors must be fixed before the\n" "cluster will work properly. It is left to the administrator to decide\n" "whether to fix problems reported as warnings."; struct { char *cib_save; gboolean use_live_cib; char *xml_file; gboolean xml_stdin; char *xml_string; unsigned int verbosity; } options; static GOptionEntry data_entries[] = { { "live-check", 'L', 0, G_OPTION_ARG_NONE, &options.use_live_cib, "Check the configuration used by the running cluster", NULL }, { "xml-file", 'x', 0, G_OPTION_ARG_FILENAME, &options.xml_file, "Check the configuration in the named file", "FILE" }, { "xml-pipe", 'p', 0, G_OPTION_ARG_NONE, &options.xml_stdin, "Check the configuration piped in via stdin", NULL }, { "xml-text", 'X', 0, G_OPTION_ARG_STRING, &options.xml_string, "Check the configuration in the supplied string", "XML" }, { NULL } }; static GOptionEntry addl_entries[] = { { "save-xml", 'S', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME, &options.cib_save, "Save verified XML to named file (most useful with -L)", "FILE" }, { NULL } }; static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; const char *description = "Examples:\n\n" "Check the consistency of the configuration in the running cluster:\n\n" "\tcrm_verify --live-check\n\n" "Check the consistency of the configuration in a given file and " "produce quiet output:\n\n" "\tcrm_verify --xml-file file.xml --quiet\n\n" "Check the consistency of the configuration in a given file and " "produce verbose output:\n\n" "\tcrm_verify --xml-file file.xml --verbose\n\n"; GOptionEntry extra_prog_entries[] = { { "quiet", 'q', 0, G_OPTION_ARG_NONE, &(args->quiet), "Don't print verify information", NULL }, { NULL } }; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, extra_prog_entries); g_option_context_set_description(context, description); pcmk__add_arg_group(context, "data", "Data sources:", "Show data options", data_entries); pcmk__add_arg_group(context, "additional", "Additional options:", "Show additional options", addl_entries); return context; } /*! * \internal * \brief Output a configuration error * * \param[in] ctx Output object * \param[in] msg printf(3)-style format string * \param[in] ... Format string arguments */ G_GNUC_PRINTF(2, 3) static void output_config_error(void *ctx, const char *msg, ...) { va_list ap; char *buf = NULL; pcmk__output_t *out = ctx; va_start(ap, msg); CRM_ASSERT(vasprintf(&buf, msg, ap) > 0); if (options.verbosity > 0) { out->err(out, "error: %s", buf); } va_end(ap); } /*! * \internal * \brief Output a configuration warning * * \param[in] ctx Output object * \param[in] msg printf(3)-style format string * \param[in] ... Format string arguments */ G_GNUC_PRINTF(2, 3) static void output_config_warning(void *ctx, const char *msg, ...) { va_list ap; char *buf = NULL; pcmk__output_t *out = ctx; va_start(ap, msg); CRM_ASSERT(vasprintf(&buf, msg, ap) > 0); if (options.verbosity > 0) { out->err(out, "warning: %s", buf); } va_end(ap); } int main(int argc, char **argv) { pcmk_scheduler_t *scheduler = NULL; int rc = pcmk_rc_ok; crm_exit_t exit_code = CRM_EX_OK; GError *error = NULL; pcmk__output_t *out = NULL; const char *cib_source = NULL; xmlNode *cib_object = NULL; GOptionGroup *output_group = NULL; - const char *failure_type = NULL; - pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, "xSX"); GOptionContext *context = build_arg_context(args, &output_group); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } if (args->verbosity > 0) { args->verbosity -= args->quiet; } pcmk__cli_init_logging("crm_verify", args->verbosity); rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; - g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } pcmk__register_lib_messages(out); pcmk__set_config_error_handler(output_config_error, out); pcmk__set_config_warning_handler(output_config_warning, out); if (pcmk__str_eq(args->output_ty, "xml", pcmk__str_none)) { args->verbosity = 1; } options.verbosity = args->verbosity; if (options.xml_file != NULL) { cib_source = options.xml_file; } else if (options.xml_string != NULL) { cib_source = options.xml_string; } else if (options.xml_stdin) { cib_source = "-"; } else if (options.use_live_cib) { cib_source = NULL; } else { rc = ENODATA; - g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "No input specified"); + g_set_error(&error, PCMK__RC_ERROR, rc, "No input specified"); goto done; } rc = pcmk__parse_cib(out, cib_source, &cib_object); - if (rc != pcmk_rc_ok) { - g_set_error(&error, PCMK__EXITC_ERROR, rc, "Couldn't parse input"); + g_set_error(&error, PCMK__RC_ERROR, rc, "Verification failed: %s", + pcmk_rc_str(rc)); goto done; } if (options.cib_save != NULL) { pcmk__xml_write_file(cib_object, options.cib_save, false); } scheduler = pe_new_working_set(); if (scheduler == NULL) { rc = errno; g_set_error(&error, PCMK__RC_ERROR, rc, "Could not allocate scheduler data: %s", pcmk_rc_str(rc)); goto done; } scheduler->priv->out = out; rc = pcmk__verify(scheduler, out, &cib_object); - if (rc == pcmk_rc_schema_validation) { + if ((rc == pcmk_rc_schema_validation) && !args->quiet) { + const char *failure_type = ""; + const char *verbose_hint = ""; + if (pcmk__config_has_error) { - failure_type = "Errors found during check: "; - } else if (pcmk__config_has_warning) { - failure_type = "Warnings found during check: "; - } else { - failure_type = ""; - } - - if (args->quiet) { - // User requested no output - - } else if (options.verbosity > 0) { - out->err(out, "%sconfig not valid", failure_type); - - } else { - out->err(out, "%sconfig not valid\n-V may provide more details", failure_type); - } + failure_type = " (with errors)"; + } else if (pcmk__config_has_warning) { + failure_type = " (with warnings)"; } + if (options.verbosity == 0) { + verbose_hint = " (-V may provide more detail)"; + } + out->err(out, "Configuration invalid%s%s", failure_type, verbose_hint); + } pe_free_working_set(scheduler); done: g_strfreev(processed_args); pcmk__free_arg_context(context); free(options.cib_save); free(options.xml_file); free(options.xml_string); if (cib_object != NULL) { pcmk__xml_free(cib_object); } if (exit_code == CRM_EX_OK) { exit_code = pcmk_rc2exitc(rc); } pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); }