Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F1842338
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
193 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/cts/cts-fencing.in b/cts/cts-fencing.in
index 83ba0fa3be..b0407614f1 100644
--- a/cts/cts-fencing.in
+++ b/cts/cts-fencing.in
@@ -1,1114 +1,1114 @@
#!@PYTHON@
""" Regression tests for Pacemaker's fencer
"""
__copyright__ = "Copyright 2012-2023 the Pacemaker project contributors"
__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
import argparse
import os
import sys
import subprocess
import tempfile
# These imports allow running from a source checkout after running `make`.
# Note that while this doesn't necessarily mean it will successfully run tests,
# but being able to see --help output can be useful.
if os.path.exists("@abs_top_srcdir@/python"):
sys.path.insert(0, "@abs_top_srcdir@/python")
if os.path.exists("@abs_top_builddir@/python") and "@abs_top_builddir@" != "@abs_top_srcdir@":
sys.path.insert(0, "@abs_top_builddir@/python")
from pacemaker.buildoptions import BuildOptions
from pacemaker.exitstatus import ExitStatus
from pacemaker._cts.corosync import Corosync, localname
from pacemaker._cts.errors import ExitCodeError, OutputFoundError, OutputNotFoundError, XmlValidationError
from pacemaker._cts.process import killall, exit_if_proc_running
from pacemaker._cts.test import Test, Tests
TEST_DIR = sys.path[0]
def update_path():
""" Set the PATH environment variable appropriately for the tests """
new_path = os.environ['PATH']
if os.path.exists("%s/cts-fencing.in" % TEST_DIR):
print("Running tests from the source tree: %s (%s)" % (BuildOptions._BUILD_DIR, TEST_DIR))
# For pacemaker-fenced and cts-fence-helper
new_path = "%s/daemons/fenced:%s" % (BuildOptions._BUILD_DIR, new_path)
new_path = "%s/tools:%s" % (BuildOptions._BUILD_DIR, new_path) # For stonith_admin
new_path = "%s/cts/support:%s" % (BuildOptions._BUILD_DIR, new_path) # For cts-support
else:
print("Running tests from the install tree: %s (not %s)" % (BuildOptions.DAEMON_DIR, TEST_DIR))
# For pacemaker-fenced, cts-fence-helper, and cts-support
new_path = "%s:%s" % (BuildOptions.DAEMON_DIR, new_path)
print('Using PATH="%s"' % new_path)
os.environ['PATH'] = new_path
class FenceTest(Test):
""" Executor for a single test """
def __init__(self, name, description, **kwargs):
Test.__init__(self, name, description, **kwargs)
if kwargs.get("with_cpg", False):
self._enable_corosync = True
self._daemon_options = ["-c"]
else:
self._enable_corosync = False
self._daemon_options = ["-s"]
self._daemon_location = "pacemaker-fenced"
def _kill_daemons(self):
killall(["pacemakerd", "pacemaker-fenced"])
def _start_daemons(self):
if self.verbose:
self._daemon_options += ["-V"]
print("Starting %s with %s" % (self._daemon_location, self._daemon_options))
cmd = ["pacemaker-fenced", "-l", self.logpath] + self._daemon_options
self._daemon_process = subprocess.Popen(cmd)
class FenceTests(Tests):
""" Collection of all fencing regression tests """
def __init__(self, **kwargs):
Tests.__init__(self, **kwargs)
self._corosync = Corosync(self.verbose, self.logdir, "cts-fencing")
def new_test(self, name, description, with_cpg=False):
""" Create a named test """
test = FenceTest(name, description, verbose=self.verbose, with_cpg=with_cpg,
timeout=self.timeout, force_wait=self.force_wait,
logdir=self.logdir)
self._tests.append(test)
return test
def run_cpg_only(self):
""" Run all corosync-enabled tests """
for test in self._tests:
if test._enable_corosync:
test.run()
def run_no_cpg(self):
""" Run all standalone tests """
for test in self._tests:
if not test._enable_corosync:
test.run()
def build_api_sanity_tests(self):
""" Register tests to verify basic API usage """
verbose_arg = ""
if self.verbose:
verbose_arg = "-V"
test = self.new_test("standalone_low_level_api_test", "Sanity test client api in standalone mode.")
test.add_cmd("cts-fence-helper", "-t %s" % (verbose_arg), validate=False)
test = self.new_test("cpg_low_level_api_test", "Sanity test client api using mainloop and cpg.", True)
test.add_cmd("cts-fence-helper", "-m %s" % (verbose_arg), validate=False)
def build_custom_timeout_tests(self):
""" Register tests to verify custom timeout usage """
# custom timeout without topology
test = self.new_test("cpg_custom_timeout_1",
"Verify per device timeouts work as expected without using topology.", True)
test.add_cmd('stonith_admin',
'--output-as=xml -R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node1 node2 node3"')
test.add_cmd('stonith_admin',
'--output-as=xml -R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=1"')
test.add_cmd('stonith_admin',
'--output-as=xml -R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=4"')
test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5")
# timeout is 5+1+4 = 10
test.add_log_pattern("Total timeout set to 12")
# custom timeout _WITH_ topology
test = self.new_test("cpg_custom_timeout_2",
"Verify per device timeouts work as expected _WITH_ topology.", True)
test.add_cmd('stonith_admin',
'--output-as=xml -R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node1 node2 node3"')
test.add_cmd('stonith_admin',
'--output-as=xml -R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=1"')
test.add_cmd('stonith_admin',
'--output-as=xml -R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node3" -o "pcmk_off_timeout=4000"')
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true1")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v false2")
test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5")
# timeout is 5+1+4000 = 4006
test.add_log_pattern("Total timeout set to 4807")
def build_fence_merge_tests(self):
""" Register tests to verify when fence operations should be merged """
### Simple test that overlapping fencing operations get merged
test = self.new_test("cpg_custom_merge_single",
"Verify overlapping identical fencing operations are merged, no fencing levels used.", True)
test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ")
test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10")
test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 10")
### one merger will happen
test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client")
### the pattern below signifies that both the original and duplicate operation completed
test.add_log_pattern("Operation 'off' targeting node3 by ")
test.add_log_pattern("Operation 'off' targeting node3 by ")
### Test that multiple mergers occur
test = self.new_test("cpg_custom_merge_multiple",
"Verify multiple overlapping identical fencing operations are merged", True)
test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"delay=2\" -o \"pcmk_host_list=node3\" ")
test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10")
test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10")
test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10")
test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10")
test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 10")
### 4 mergers should occur
test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client")
test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client")
test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client")
test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client")
### the pattern below signifies that both the original and duplicate operation completed
test.add_log_pattern("Operation 'off' targeting node3 by ")
test.add_log_pattern("Operation 'off' targeting node3 by ")
test.add_log_pattern("Operation 'off' targeting node3 by ")
test.add_log_pattern("Operation 'off' targeting node3 by ")
test.add_log_pattern("Operation 'off' targeting node3 by ")
### Test that multiple mergers occur with topologies used
test = self.new_test("cpg_custom_merge_with_topology",
"Verify multiple overlapping identical fencing operations are merged with fencing levels.",
True)
test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\" ")
test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false2")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true1")
test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10")
test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10")
test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10")
test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node3 -t 10")
test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 10")
### 4 mergers should occur
test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client")
test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client")
test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client")
test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client")
### the pattern below signifies that both the original and duplicate operation completed
test.add_log_pattern("Operation 'off' targeting node3 by ")
test.add_log_pattern("Operation 'off' targeting node3 by ")
test.add_log_pattern("Operation 'off' targeting node3 by ")
test.add_log_pattern("Operation 'off' targeting node3 by ")
test.add_log_pattern("Operation 'off' targeting node3 by ")
def build_fence_no_merge_tests(self):
""" Register tests to verify when fence operations should not be merged """
test = self.new_test("cpg_custom_no_merge",
"Verify differing fencing operations are not merged", True)
test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"")
test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3 node2\" ")
test.add_cmd("stonith_admin", "--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3 node2\"")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false2")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true1")
test.add_cmd_no_wait("stonith_admin", "--output-as=xml -F node2 -t 10")
test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 10")
test.add_log_pattern("Merging fencing action 'off' targeting node3 originating from client",
negative=True)
def build_standalone_tests(self):
""" Register a grab bag of tests that can be executed in standalone or corosync mode """
test_types = [
{
"prefix" : "standalone",
"use_cpg" : False,
},
{
"prefix" : "cpg",
"use_cpg" : True,
},
]
# test what happens when all devices timeout
for test_type in test_types:
test = self.new_test("%s_fence_multi_device_failure" % test_type["prefix"],
"Verify that all devices timeout, a fencing failure is returned.",
test_type["use_cpg"])
test.add_cmd("stonith_admin",
"--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R false3 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
if test_type["use_cpg"]:
test.add_cmd_expected_fail("stonith_admin", "--output-as=xml -F node3 -t 2", ExitStatus.TIMEOUT)
test.add_log_pattern("Total timeout set to 7")
else:
test.add_cmd_expected_fail("stonith_admin", "--output-as=xml -F node3 -t 2", ExitStatus.ERROR)
test.add_log_pattern("targeting node3 using false1 returned ")
test.add_log_pattern("targeting node3 using false2 returned ")
test.add_log_pattern("targeting node3 using false3 returned ")
# test what happens when multiple devices can fence a node, but the first device fails.
for test_type in test_types:
test = self.new_test("%s_fence_device_failure_rollover" % test_type["prefix"],
"Verify that when one fence device fails for a node, the others are tried.",
test_type["use_cpg"])
test.add_cmd("stonith_admin",
"--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5")
if test_type["use_cpg"]:
test.add_log_pattern("Total timeout set to 18")
# test what happens when we try to use a missing fence-agent.
for test_type in test_types:
test = self.new_test("%s_fence_missing_agent" % test_type["prefix"],
"Verify proper error-handling when using a non-existent fence-agent.",
test_type["use_cpg"])
test.add_cmd("stonith_admin",
"--output-as=xml -R true1 -a fence_missing -o \"mode=pass\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node2\"")
test.add_cmd_expected_fail("stonith_admin",
"--output-as=xml -F node3 -t 5",
ExitStatus.NOSUCH)
test.add_cmd("stonith_admin", "--output-as=xml -F node2 -t 5")
# simple topology test for one device
for test_type in test_types:
if not test_type["use_cpg"]:
continue
test = self.new_test("%s_topology_simple" % test_type["prefix"],
"Verify all fencing devices at a level are used.", test_type["use_cpg"])
test.add_cmd("stonith_admin",
"--output-as=xml -R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true")
test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5")
test.add_log_pattern("Total timeout set to 6")
test.add_log_pattern("targeting node3 using true returned 0")
# add topology, delete topology, verify fencing still works
for test_type in test_types:
if not test_type["use_cpg"]:
continue
test = self.new_test("%s_topology_add_remove" % test_type["prefix"],
"Verify fencing occurrs after all topology levels are removed",
test_type["use_cpg"])
test.add_cmd("stonith_admin",
"--output-as=xml -R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true")
test.add_cmd("stonith_admin", "--output-as=xml -d node3 -i 1")
test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5")
test.add_log_pattern("Total timeout set to 6")
test.add_log_pattern("targeting node3 using true returned 0")
# test what happens when the first fencing level has multiple devices.
for test_type in test_types:
if not test_type["use_cpg"]:
continue
test = self.new_test("%s_topology_device_fails" % test_type["prefix"],
"Verify if one device in a level fails, the other is tried.",
test_type["use_cpg"])
test.add_cmd("stonith_admin",
"--output-as=xml -R false -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R true -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true")
test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 20")
test.add_log_pattern("Total timeout set to 48")
test.add_log_pattern("targeting node3 using false returned 1")
test.add_log_pattern("targeting node3 using true returned 0")
# test what happens when the first fencing level fails.
for test_type in test_types:
if not test_type["use_cpg"]:
continue
test = self.new_test("%s_topology_multi_level_fails" % test_type["prefix"],
"Verify if one level fails, the next leve is tried.",
test_type["use_cpg"])
test.add_cmd("stonith_admin",
"--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true1")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true2")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v false2")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v true3")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v true4")
test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 3")
test.add_log_pattern("Total timeout set to 21")
test.add_log_pattern("targeting node3 using false1 returned 1")
test.add_log_pattern("targeting node3 using false2 returned 1")
test.add_log_pattern("targeting node3 using true3 returned 0")
test.add_log_pattern("targeting node3 using true4 returned 0")
# test what happens when the first fencing level had devices that no one has registered
for test_type in test_types:
if not test_type["use_cpg"]:
continue
test = self.new_test("%s_topology_missing_devices" % test_type["prefix"],
"Verify topology can continue with missing devices.",
test_type["use_cpg"])
test.add_cmd("stonith_admin",
"--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true1")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true2")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v false2")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v true3")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v true4")
test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5")
# Test what happens if multiple fencing levels are defined, and then the first one is removed.
for test_type in test_types:
if not test_type["use_cpg"]:
continue
test = self.new_test("%s_topology_level_removal" % test_type["prefix"],
"Verify level removal works.", test_type["use_cpg"])
test.add_cmd("stonith_admin",
"--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R true4 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R false2 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true1")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true2")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v false2")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v true3")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 3 -v true4")
# Now remove level 2, verify none of the devices in level two are hit.
test.add_cmd("stonith_admin", "--output-as=xml -d node3 -i 2")
test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 20")
test.add_log_pattern("Total timeout set to 96")
test.add_log_pattern("targeting node3 using false1 returned 1")
test.add_log_pattern("targeting node3 using false2 returned ",
negative=True)
test.add_log_pattern("targeting node3 using true3 returned 0")
test.add_log_pattern("targeting node3 using true4 returned 0")
# Test targeting a topology level by node name pattern.
for test_type in test_types:
if not test_type["use_cpg"]:
continue
test = self.new_test("%s_topology_level_pattern" % test_type["prefix"],
"Verify targeting topology by node name pattern works.",
test_type["use_cpg"])
test.add_cmd("stonith_admin",
"""--output-as=xml -R true -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node1 node2 node3" """)
test.add_cmd("stonith_admin", """--output-as=xml -r '@node.*' -i 1 -v true""")
test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5")
test.add_log_pattern("targeting node3 using true returned 0")
# test allowing commas and semicolons as delimiters in pcmk_host_list
for test_type in test_types:
test = self.new_test("%s_host_list_delimiters" % test_type["prefix"],
"Verify commas and semicolons can be used as pcmk_host_list delimiters",
test_type["use_cpg"])
test.add_cmd("stonith_admin",
"""--output-as=xml -R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node1,node2,node3" """)
test.add_cmd("stonith_admin",
"""--output-as=xml -R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=pcmk1;pcmk2;pcmk3" """)
test.add_cmd("stonith_admin", "stonith_admin --output-as=xml -F node2 -t 5")
test.add_cmd("stonith_admin", "stonith_admin --output-as=xml -F pcmk3 -t 5")
test.add_log_pattern("targeting node2 using true1 returned 0")
test.add_log_pattern("targeting pcmk3 using true2 returned 0")
# test the stonith builds the correct list of devices that can fence a node.
for test_type in test_types:
test = self.new_test("%s_list_devices" % test_type["prefix"],
"Verify list of devices that can fence a node is correct",
test_type["use_cpg"])
test.add_cmd("stonith_admin",
"--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd_check_stdout("stonith_admin", "--output-as=xml -l node1 -V", "true2", "true1")
test.add_cmd_check_stdout("stonith_admin", "--output-as=xml -l node1 -V", "true3", "true1")
# simple test of device monitor
for test_type in test_types:
test = self.new_test("%s_monitor" % test_type["prefix"],
"Verify device is reachable", test_type["use_cpg"])
test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -Q true1")
test.add_cmd("stonith_admin", "--output-as=xml -Q false1")
test.add_cmd_expected_fail("stonith_admin",
"--output-as=xml -Q true2",
ExitStatus.NOSUCH)
# Verify monitor occurs for duration of timeout period on failure
for test_type in test_types:
test = self.new_test("%s_monitor_timeout" % test_type["prefix"],
"Verify monitor uses duration of timeout period given.",
test_type["use_cpg"])
test.add_cmd("stonith_admin",
'--output-as=xml -R true1 -a fence_dummy -o "mode=fail" -o "monitor_mode=fail" -o "pcmk_host_list=node3"')
test.add_cmd_expected_fail("stonith_admin", "--output-as=xml -Q true1 -t 5", ExitStatus.ERROR)
test.add_log_pattern("Attempt 2 to execute")
# Verify monitor occurs for duration of timeout period on failure, but stops at max retries
for test_type in test_types:
test = self.new_test("%s_monitor_timeout_max_retries" % test_type["prefix"],
"Verify monitor retries until max retry value or timeout is hit.",
test_type["use_cpg"])
test.add_cmd("stonith_admin",
'--output-as=xml -R true1 -a fence_dummy -o "mode=fail" -o "monitor_mode=fail" -o "pcmk_host_list=node3"')
test.add_cmd_expected_fail("stonith_admin", "--output-as=xml -Q true1 -t 15", ExitStatus.ERROR)
test.add_log_pattern("Attempted to execute agent fence_dummy (list) the maximum number of times")
# simple register test
for test_type in test_types:
test = self.new_test("%s_register" % test_type["prefix"],
"Verify devices can be registered and un-registered",
test_type["use_cpg"])
test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -Q true1")
test.add_cmd("stonith_admin", "--output-as=xml -D true1")
test.add_cmd_expected_fail("stonith_admin",
"--output-as=xml -Q true1",
ExitStatus.NOSUCH)
# simple reboot test
for test_type in test_types:
test = self.new_test("%s_reboot" % test_type["prefix"],
"Verify devices can be rebooted",
test_type["use_cpg"])
test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -B node3 -t 5")
test.add_cmd("stonith_admin", "--output-as=xml -D true1")
test.add_cmd_expected_fail("stonith_admin",
"--output-as=xml -Q true1",
ExitStatus.NOSUCH)
# test fencing history.
for test_type in test_types:
if not test_type["use_cpg"]:
continue
test = self.new_test("%s_fence_history" % test_type["prefix"],
"Verify last fencing operation is returned.",
test_type["use_cpg"])
test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 5 -V")
test.add_cmd_check_stdout("stonith_admin", "--output-as=xml -H node3", 'action="off" target="node3" .* status="success"')
# simple test of dynamic list query
for test_type in test_types:
test = self.new_test("%s_dynamic_list_query" % test_type["prefix"],
"Verify dynamic list of fencing devices can be retrieved.",
test_type["use_cpg"])
test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1")
test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1")
test.add_cmd("stonith_admin", "--output-as=xml -R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1")
test.add_cmd_check_stdout("stonith_admin", "--output-as=xml -l fake_port_1", 'count="3"')
# fence using dynamic list query
for test_type in test_types:
test = self.new_test("%s_fence_dynamic_list_query" % test_type["prefix"],
"Verify dynamic list of fencing devices can be retrieved.",
test_type["use_cpg"])
test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1")
test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1")
test.add_cmd("stonith_admin", "--output-as=xml -R true3 -a fence_dummy -o mode=pass -o mock_dynamic_hosts=fake_port_1")
test.add_cmd("stonith_admin", "--output-as=xml -F fake_port_1 -t 5 -V")
# simple test of query using status action
for test_type in test_types:
test = self.new_test("%s_status_query" % test_type["prefix"],
"Verify dynamic list of fencing devices can be retrieved.",
test_type["use_cpg"])
test.add_cmd("stonith_admin", "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"")
test.add_cmd("stonith_admin", "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"")
test.add_cmd("stonith_admin", "--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_check=status\"")
test.add_cmd_check_stdout("stonith_admin", "--output-as=xml -l fake_port_1", 'count="3"')
# test what happens when no reboot action is advertised
for test_type in test_types:
test = self.new_test("%s_no_reboot_support" % test_type["prefix"],
"Verify reboot action defaults to off when no reboot action is advertised by agent.",
test_type["use_cpg"])
test.add_cmd("stonith_admin",
"--output-as=xml -R true1 -a fence_dummy_no_reboot -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -B node1 -t 5 -V")
test.add_log_pattern("does not support reboot")
test.add_log_pattern("using true1 returned 0")
# make sure reboot is used when reboot action is advertised
for test_type in test_types:
test = self.new_test("%s_with_reboot_support" % test_type["prefix"],
"Verify reboot action can be used when metadata advertises it.",
test_type["use_cpg"])
test.add_cmd("stonith_admin",
"--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -B node1 -t 5 -V")
test.add_log_pattern("does not advertise support for 'reboot', performing 'off'",
negative=True)
test.add_log_pattern("using true1 returned 0")
# make sure all fencing delays are applied correctly and taken into account by fencing timeouts with topology
for test_type in test_types:
if not test_type["use_cpg"]:
continue
test = self.new_test("%s_topology_delays" % test_type["prefix"],
"Verify all fencing delays are applied correctly and taken into account by fencing timeouts with topology.",
test_type["use_cpg"])
test.add_cmd("stonith_admin",
"--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\" -o \"pcmk_delay_base=1\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\" -o \"pcmk_delay_base=1\"")
# Resulting "random" delay will always be 1 since (rand() % (delay_max - delay_base)) is always 0 here.
test.add_cmd("stonith_admin",
"--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\" -o \"pcmk_delay_base=1\" -o \"pcmk_delay_max=2\"")
test.add_cmd("stonith_admin",
"--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true1")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true2")
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true3")
test.add_cmd("stonith_admin", "--output-as=xml -F node3 --delay 1")
# Total fencing timeout takes all fencing delays into account.
- test.add_log_pattern("Total timeout set to 577")
+ test.add_log_pattern("Total timeout set to 579")
# Fencing timeout for the first device takes the requested fencing delay into account.
# Fencing timeout also takes pcmk_delay_base into account.
test.add_log_pattern(r"Requesting that .* perform 'off' action targeting node3 using true1 .*145s.*",
regex=True)
# Requested fencing delay is applied only for the first device in the first level.
# Static delay from pcmk_delay_base is added.
test.add_log_pattern("Delaying 'off' action targeting node3 using true1 for 2s | timeout=120s requested_delay=1s base=1s max=1s")
# Fencing timeout no longer takes the requested fencing delay into account for further devices.
test.add_log_pattern(r"Requesting that .* perform 'off' action targeting node3 using false1 .*144s.*",
regex=True)
# Requested fencing delay is no longer applied for further devices.
test.add_log_pattern("Delaying 'off' action targeting node3 using false1 for 1s | timeout=120s requested_delay=0s base=1s max=1s")
# Fencing timeout takes pcmk_delay_max into account.
- test.add_log_pattern(r"Requesting that .* perform 'off' action targeting node3 using true2 .*144s.*",
+ test.add_log_pattern(r"Requesting that .* perform 'off' action targeting node3 using true2 .*146s.*",
regex=True)
- test.add_log_pattern("Delaying 'off' action targeting node3 using true2 for 1s | timeout=120s requested_delay=0s base=1s max=2s")
+ test.add_log_pattern("Delaying 'off' action targeting node3 using true2 for 1s | timeout=122s requested_delay=0s base=1s max=2s")
test.add_log_pattern("Delaying 'off' action targeting node3 using true3",
negative=True)
def build_nodeid_tests(self):
""" Register tests that use a corosync node id """
our_uname = localname()
### verify nodeid is supplied when nodeid is in the metadata parameters
test = self.new_test("cpg_supply_nodeid",
"Verify nodeid is given when fence agent has nodeid as parameter", True)
test.add_cmd("stonith_admin",
"--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -F %s -t 3" % (our_uname))
test.add_log_pattern("as nodeid with fence action 'off' targeting %s" % (our_uname))
### verify nodeid is _NOT_ supplied when nodeid is not in the metadata parameters
test = self.new_test("cpg_do_not_supply_nodeid",
"Verify nodeid is _NOT_ given when fence agent does not have nodeid as parameter",
True)
# use a host name that won't be in corosync.conf
test.add_cmd("stonith_admin",
"--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=regr-test\"")
test.add_cmd("stonith_admin", "--output-as=xml -F regr-test -t 3")
test.add_log_pattern("as nodeid with fence action 'off' targeting regr-test",
negative=True)
### verify nodeid use doesn't explode standalone mode
test = self.new_test("standalone_do_not_supply_nodeid",
"Verify nodeid in metadata parameter list doesn't kill standalone mode",
False)
test.add_cmd("stonith_admin",
"--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -F %s -t 3" % (our_uname))
test.add_log_pattern("as nodeid with fence action 'off' targeting %s" % our_uname,
negative=True)
def build_unfence_tests(self):
""" Register tests that verify unfencing """
our_uname = localname()
### verify unfencing using automatic unfencing
test = self.new_test("cpg_unfence_required_1",
"Verify require unfencing on all devices when automatic=true in agent's metadata",
True)
test.add_cmd('stonith_admin',
'--output-as=xml -R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname))
test.add_cmd('stonith_admin',
'--output-as=xml -R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname))
# both devices should be executed
test.add_log_pattern("using true1 returned 0")
test.add_log_pattern("using true2 returned 0")
### verify unfencing using automatic unfencing fails if any of the required agents fail
test = self.new_test("cpg_unfence_required_2",
"Verify require unfencing on all devices when automatic=true in agent's metadata",
True)
test.add_cmd('stonith_admin',
'--output-as=xml -R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname))
test.add_cmd('stonith_admin',
'--output-as=xml -R true2 -a fence_dummy_auto_unfence -o "mode=fail" -o "pcmk_host_list=%s"' % (our_uname))
test.add_cmd_expected_fail("stonith_admin", "--output-as=xml -U %s -t 6" % (our_uname), ExitStatus.ERROR)
### verify unfencing using automatic devices with topology
test = self.new_test("cpg_unfence_required_3",
"Verify require unfencing on all devices even when at different topology levels",
True)
test.add_cmd('stonith_admin',
'--output-as=xml -R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname))
test.add_cmd('stonith_admin',
'--output-as=xml -R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 1 -v true1" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v true2" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname))
test.add_log_pattern("using true1 returned 0")
test.add_log_pattern("using true2 returned 0")
### verify unfencing using automatic devices with topology
test = self.new_test("cpg_unfence_required_4",
"Verify all required devices are executed even with topology levels fail.",
True)
test.add_cmd('stonith_admin',
'--output-as=xml -R true1 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname))
test.add_cmd('stonith_admin',
'--output-as=xml -R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname))
test.add_cmd('stonith_admin',
'--output-as=xml -R true3 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname))
test.add_cmd('stonith_admin',
'--output-as=xml -R true4 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s node3"' % (our_uname))
test.add_cmd('stonith_admin',
'--output-as=xml -R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname))
test.add_cmd('stonith_admin',
'--output-as=xml -R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname))
test.add_cmd('stonith_admin',
'--output-as=xml -R false3 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname))
test.add_cmd('stonith_admin',
'--output-as=xml -R false4 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=%s node3"' % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 1 -v true1" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 1 -v false1" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v false2" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v true2" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v false3" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v true3" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 3 -v false4" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 4 -v true4" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname))
test.add_log_pattern("using true1 returned 0")
test.add_log_pattern("using true2 returned 0")
test.add_log_pattern("using true3 returned 0")
test.add_log_pattern("using true4 returned 0")
def build_unfence_on_target_tests(self):
""" Register tests that verify unfencing that runs on the target """
our_uname = localname()
### verify unfencing using on_target device
test = self.new_test("cpg_unfence_on_target_1",
"Verify unfencing with on_target = true", True)
test.add_cmd("stonith_admin",
"--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s\"" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname))
test.add_log_pattern("(on) to be executed on target")
### verify failure of unfencing using on_target device
test = self.new_test("cpg_unfence_on_target_2",
"Verify failure unfencing with on_target = true",
True)
test.add_cmd("stonith_admin",
"--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake_1234\"" % (our_uname))
test.add_cmd_expected_fail("stonith_admin",
"--output-as=xml -U node_fake_1234 -t 3",
ExitStatus.NOSUCH)
test.add_log_pattern("(on) to be executed on target")
### verify unfencing using on_target device with topology
test = self.new_test("cpg_unfence_on_target_3",
"Verify unfencing with on_target = true using topology",
True)
test.add_cmd("stonith_admin",
"--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
test.add_cmd("stonith_admin",
"--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node3\"" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 1 -v true1" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v true2" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname))
test.add_log_pattern("(on) to be executed on target")
### verify unfencing using on_target device with topology fails when target node doesn't exist
test = self.new_test("cpg_unfence_on_target_4",
"Verify unfencing failure with on_target = true using topology",
True)
test.add_cmd("stonith_admin",
"--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname))
test.add_cmd("stonith_admin",
"--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=%s node_fake\"" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 1 -v true1")
test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 2 -v true2")
test.add_cmd_expected_fail("stonith_admin",
"--output-as=xml -U node_fake -t 3",
ExitStatus.NOSUCH)
test.add_log_pattern("(on) to be executed on target")
def build_remap_tests(self):
""" Register tests that verify remapping of reboots to off-on """
test = self.new_test("cpg_remap_simple",
"Verify sequential topology reboot is remapped to all-off-then-all-on", True)
test.add_cmd("stonith_admin",
"""--output-as=xml -R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """
"""-o "pcmk_off_timeout=1" -o "pcmk_reboot_timeout=10" """)
test.add_cmd("stonith_admin",
"""--output-as=xml -R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """
"""-o "pcmk_off_timeout=2" -o "pcmk_reboot_timeout=20" """)
test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 1 -v true1 -v true2")
test.add_cmd("stonith_admin", "--output-as=xml -B node_fake -t 5")
test.add_log_pattern("Remapping multiple-device reboot targeting node_fake")
# timeout should be sum of off timeouts (1+2=3), not reboot timeouts (10+20=30)
test.add_log_pattern("Total timeout set to 3 for peer's fencing targeting node_fake")
test.add_log_pattern("perform 'off' action targeting node_fake using true1")
test.add_log_pattern("perform 'off' action targeting node_fake using true2")
test.add_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'")
# fence_dummy sets "on" as an on_target action
test.add_log_pattern("Ignoring true1 'on' failure (no capable peers) targeting node_fake")
test.add_log_pattern("Ignoring true2 'on' failure (no capable peers) targeting node_fake")
test.add_log_pattern("Undoing remap of reboot targeting node_fake")
test = self.new_test("cpg_remap_simple_off",
"Verify sequential topology reboot skips 'on' if "
"pcmk_reboot_action=off or agent doesn't support "
"'on'", True)
test.add_cmd("stonith_admin",
"--output-as=xml -R true1 -a fence_dummy -o mode=pass "
"-o pcmk_host_list=node_fake -o pcmk_off_timeout=1 "
"-o pcmk_reboot_timeout=10 -o pcmk_reboot_action=off")
test.add_cmd("stonith_admin",
"--output-as=xml -R true2 -a fence_dummy_no_on "
"-o mode=pass -o pcmk_host_list=node_fake "
"-o pcmk_off_timeout=2 -o pcmk_reboot_timeout=20")
test.add_cmd("stonith_admin",
"--output-as=xml -r node_fake -i 1 -v true1 -v true2")
test.add_cmd("stonith_admin", "--output-as=xml -B node_fake -t 5")
test.add_log_pattern("Remapping multiple-device reboot targeting node_fake")
# timeout should be sum of off timeouts (1+2=3), not reboot timeouts (10+20=30)
test.add_log_pattern("Total timeout set to 3 for peer's fencing targeting node_fake")
test.add_log_pattern("perform 'off' action targeting node_fake using true1")
test.add_log_pattern("perform 'off' action targeting node_fake using true2")
test.add_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'")
# "on" should be skipped
test.add_log_pattern("Not turning node_fake back on using "
"true1 because the device is configured "
"to stay off")
test.add_log_pattern("Not turning node_fake back on using true2"
" because the agent doesn't support 'on'")
test.add_log_pattern("Undoing remap of reboot targeting node_fake")
test = self.new_test("cpg_remap_automatic",
"Verify remapped topology reboot skips automatic 'on'", True)
test.add_cmd("stonith_admin",
"""--output-as=xml -R true1 -a fence_dummy_auto_unfence """
"""-o "mode=pass" -o "pcmk_host_list=node_fake" """)
test.add_cmd("stonith_admin",
"""--output-as=xml -R true2 -a fence_dummy_auto_unfence """
"""-o "mode=pass" -o "pcmk_host_list=node_fake" """)
test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 1 -v true1 -v true2")
test.add_cmd("stonith_admin", "--output-as=xml -B node_fake -t 5")
test.add_log_pattern("Remapping multiple-device reboot targeting node_fake")
test.add_log_pattern("perform 'off' action targeting node_fake using true1")
test.add_log_pattern("perform 'off' action targeting node_fake using true2")
test.add_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'")
test.add_log_pattern("Undoing remap of reboot targeting node_fake")
test.add_log_pattern("perform 'on' action targeting node_fake using",
negative=True)
test.add_log_pattern("'on' failure",
negative=True)
test = self.new_test("cpg_remap_complex_1",
"Verify remapped topology reboot in second level works if non-remapped first level fails",
True)
test.add_cmd("stonith_admin", """--output-as=xml -R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """)
test.add_cmd("stonith_admin", """--output-as=xml -R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """)
test.add_cmd("stonith_admin", """--output-as=xml -R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """)
test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 1 -v false1")
test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 2 -v true1 -v true2")
test.add_cmd("stonith_admin", "--output-as=xml -B node_fake -t 5")
test.add_log_pattern("perform 'reboot' action targeting node_fake using false1")
test.add_log_pattern("Remapping multiple-device reboot targeting node_fake")
test.add_log_pattern("perform 'off' action targeting node_fake using true1")
test.add_log_pattern("perform 'off' action targeting node_fake using true2")
test.add_log_pattern("Remapped 'off' targeting node_fake complete, remapping to 'on'")
test.add_log_pattern("Ignoring true1 'on' failure (no capable peers) targeting node_fake")
test.add_log_pattern("Ignoring true2 'on' failure (no capable peers) targeting node_fake")
test.add_log_pattern("Undoing remap of reboot targeting node_fake")
test = self.new_test("cpg_remap_complex_2",
"Verify remapped topology reboot failure in second level proceeds to third level",
True)
test.add_cmd("stonith_admin", """--output-as=xml -R false1 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """)
test.add_cmd("stonith_admin", """--output-as=xml -R false2 -a fence_dummy -o "mode=fail" -o "pcmk_host_list=node_fake" """)
test.add_cmd("stonith_admin", """--output-as=xml -R true1 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """)
test.add_cmd("stonith_admin", """--output-as=xml -R true2 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """)
test.add_cmd("stonith_admin", """--output-as=xml -R true3 -a fence_dummy -o "mode=pass" -o "pcmk_host_list=node_fake" """)
test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 1 -v false1")
test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 2 -v true1 -v false2 -v true3")
test.add_cmd("stonith_admin", "--output-as=xml -r node_fake -i 3 -v true2")
test.add_cmd("stonith_admin", "--output-as=xml -B node_fake -t 5")
test.add_log_pattern("perform 'reboot' action targeting node_fake using false1")
test.add_log_pattern("Remapping multiple-device reboot targeting node_fake")
test.add_log_pattern("perform 'off' action targeting node_fake using true1")
test.add_log_pattern("perform 'off' action targeting node_fake using false2")
test.add_log_pattern("Attempted to execute agent fence_dummy (off) the maximum number of times")
test.add_log_pattern("Undoing remap of reboot targeting node_fake")
test.add_log_pattern("perform 'reboot' action targeting node_fake using true2")
test.add_log_pattern("node_fake with true3",
negative=True)
def build_query_tests(self):
""" run stonith_admin --metadata for the fence_dummy agent and check command output """
test = self.new_test("get_metadata",
"Run stonith_admin --metadata for the fence_dummy agent", True)
test.add_cmd_check_stdout("stonith_admin", "--output-as=xml -a fence_dummy --metadata", '<shortdesc lang')
def build_metadata_tests(self):
""" run fence-agents coming with pacemaker with -o metadata and check for valid xml """
test = self.new_test("check_metadata_dummy",
"Run fence_dummy -o metadata and check for valid xml", False)
test.add_cmd("fence_dummy", "-o metadata", check_rng=False, check_stderr=False)
# fence_dummy prints on stderr to check that tools just listen on stdout
test = self.new_test("check_metadata_watchdog",
"Run fence_watchdog -o metadata and check for valid xml", False)
test.add_cmd("fence_watchdog", "-o metadata", check_rng=False)
def build_validate_tests(self):
""" run stonith_admin --validate for the fence_dummy agent and check command output """
test = self.new_test("validate_dummy",
"Run stonith_admin --validate-all and check output", False)
test.add_cmd_expected_fail("stonith_admin", "-a fence_dummy --validate --output-as=xml")
test.add_cmd("stonith_admin", """-a fence_dummy --validate -o "delay=5" --output-as=xml""", check_rng=False)
test.add_cmd_expected_fail("stonith_admin", """-a fence_dummy --validate -o "delay=15" --output-as=xml""")
def setup_environment(self, use_corosync):
""" Prepare the host before executing any tests """
if use_corosync:
self._corosync.start(kill_first=True)
subprocess.call(["cts-support", "install"])
def cleanup_environment(self, use_corosync):
""" Clean up the host after executing desired tests """
if use_corosync:
self._corosync.stop()
subprocess.call(["cts-support", "uninstall"])
def build_options():
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
description="Run pacemaker-fenced regression tests",
epilog="Example: Run only the test 'start_stop'\n"
"\t " + sys.argv[0] + " --run-only start_stop\n\n"
"Example: Run only the tests with the string 'systemd' present in them\n"
"\t " + sys.argv[0] + " --run-only-pattern systemd")
parser.add_argument("-c", "--cpg-only", action="store_true",
help="Only run tests that require corosync")
parser.add_argument("-l", "--list-tests", action="store_true",
help="Print out all registered tests")
parser.add_argument("-n", "--no-cpg", action="store_true",
help="Only run tests that do not require corosync")
parser.add_argument("-p", "--run-only-pattern", metavar='PATTERN',
help="Run only tests matching the given pattern")
parser.add_argument("-r", "--run-only", metavar='TEST',
help="Run a specific test")
parser.add_argument("-t", "--timeout", type=float, default=2,
help="Up to how many seconds each test case waits for the daemon to "
"be initialized. Defaults to 2. The value 0 means no limit.")
parser.add_argument("-w", "--force-wait", action="store_true",
help="Each test case waits the default/specified --timeout for the "
"daemon without tracking the log")
parser.add_argument("-V", "--verbose", action="store_true",
help="Verbose output")
args = parser.parse_args()
return args
def main():
""" Run fencing regression tests as specified by arguments """
update_path()
# Ensure all command output is in portable locale for comparison
os.environ['LC_ALL'] = "C"
opts = build_options()
exit_if_proc_running("pacemaker-fenced")
use_corosync = not opts.no_cpg
if use_corosync:
exit_if_proc_running("corosync")
# Create a temporary directory for log files (the directory and its
# contents will automatically be erased when done)
with tempfile.TemporaryDirectory(prefix="cts-fencing-") as logdir:
tests = FenceTests(verbose=opts.verbose, timeout=opts.timeout,
force_wait=opts.force_wait, logdir=logdir)
tests.build_standalone_tests()
tests.build_custom_timeout_tests()
tests.build_api_sanity_tests()
tests.build_fence_merge_tests()
tests.build_fence_no_merge_tests()
tests.build_unfence_tests()
tests.build_unfence_on_target_tests()
tests.build_nodeid_tests()
tests.build_remap_tests()
tests.build_query_tests()
tests.build_metadata_tests()
tests.build_validate_tests()
if opts.list_tests:
tests.print_list()
sys.exit(ExitStatus.OK)
print("Starting ...")
try:
tests.setup_environment(use_corosync)
except TimeoutError:
print("corosync did not start in time, exiting")
sys.exit(ExitStatus.TIMEOUT)
if opts.run_only_pattern:
tests.run_tests_matching(opts.run_only_pattern)
tests.print_results()
elif opts.run_only:
tests.run_single(opts.run_only)
tests.print_results()
elif opts.no_cpg:
tests.run_no_cpg()
tests.print_results()
elif opts.cpg_only:
tests.run_cpg_only()
tests.print_results()
else:
tests.run_tests()
tests.print_results()
tests.cleanup_environment(use_corosync)
tests.exit()
if __name__ == "__main__":
main()
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 82644bcddf..ba63cf890e 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -1,3674 +1,3674 @@
/*
* Copyright 2009-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <ctype.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/cluster/internal.h>
#include <crm/common/mainloop.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <pacemaker-fenced.h>
GHashTable *device_list = NULL;
GHashTable *topology = NULL;
static GList *cmd_list = NULL;
static GHashTable *fenced_handlers = NULL;
struct device_search_s {
/* target of fence action */
char *host;
/* requested fence action */
char *action;
/* timeout to use if a device is queried dynamically for possible targets */
int per_device_timeout;
/* number of registered fencing devices at time of request */
int replies_needed;
/* number of device replies received so far */
int replies_received;
/* whether the target is eligible to perform requested action (or off) */
bool allow_suicide;
/* private data to pass to search callback function */
void *user_data;
/* function to call when all replies have been received */
void (*callback) (GList * devices, void *user_data);
/* devices capable of performing requested action (or off if remapping) */
GList *capable;
/* Whether to perform searches that support the action */
uint32_t support_action_only;
};
static gboolean stonith_device_dispatch(gpointer user_data);
static void st_child_done(int pid, const pcmk__action_result_t *result,
void *user_data);
static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer,
pcmk__client_t *client);
static void search_devices_record_result(struct device_search_s *search, const char *device,
gboolean can_fence);
static int get_agent_metadata(const char *agent, xmlNode **metadata);
static void read_action_metadata(stonith_device_t *device);
static enum fenced_target_by unpack_level_kind(const xmlNode *level);
typedef struct async_command_s {
int id;
int pid;
int fd_stdout;
int options;
int default_timeout; /* seconds */
int timeout; /* seconds */
int start_delay; // seconds (-1 means disable static/random fencing delays)
int delay_id;
char *op;
char *origin;
char *client;
char *client_name;
char *remote_op_id;
char *target;
uint32_t target_nodeid;
char *action;
char *device;
GList *device_list;
GList *next_device_iter; // device_list entry for next device to execute
void *internal_user_data;
void (*done_cb) (int pid, const pcmk__action_result_t *result,
void *user_data);
guint timer_sigterm;
guint timer_sigkill;
/*! If the operation timed out, this is the last signal
* we sent to the process to get it to terminate */
int last_timeout_signo;
stonith_device_t *active_on;
stonith_device_t *activating_on;
} async_command_t;
static xmlNode *construct_async_reply(const async_command_t *cmd,
const pcmk__action_result_t *result);
static gboolean
is_action_required(const char *action, const stonith_device_t *device)
{
return (device != NULL) && device->automatic_unfencing
&& pcmk__str_eq(action, "on", pcmk__str_none);
}
static int
get_action_delay_max(const stonith_device_t *device, const char *action)
{
const char *value = NULL;
int delay_max = 0;
if (!pcmk__is_fencing_action(action)) {
return 0;
}
value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX);
if (value) {
delay_max = crm_parse_interval_spec(value) / 1000;
}
return delay_max;
}
static int
get_action_delay_base(const stonith_device_t *device, const char *action,
const char *target)
{
char *hash_value = NULL;
int delay_base = 0;
if (!pcmk__is_fencing_action(action)) {
return 0;
}
hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE);
if (hash_value) {
char *value = strdup(hash_value);
char *valptr = value;
CRM_ASSERT(value != NULL);
if (target != NULL) {
for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) {
char *mapval = strchr(val, ':');
if (mapval == NULL || mapval[1] == 0) {
crm_err("pcmk_delay_base: empty value in mapping", val);
continue;
}
if (mapval != val && strncasecmp(target, val, (size_t)(mapval - val)) == 0) {
value = mapval + 1;
crm_debug("pcmk_delay_base mapped to %s for %s",
value, target);
break;
}
}
}
if (strchr(value, ':') == 0) {
delay_base = crm_parse_interval_spec(value) / 1000;
}
free(valptr);
}
return delay_base;
}
/*!
* \internal
* \brief Override STONITH timeout with pcmk_*_timeout if available
*
* \param[in] device STONITH device to use
* \param[in] action STONITH action name
* \param[in] default_timeout Timeout to use if device does not have
* a pcmk_*_timeout parameter for action
*
* \return Value of pcmk_(action)_timeout if available, otherwise default_timeout
* \note For consistency, it would be nice if reboot/off/on timeouts could be
* set the same way as start/stop/monitor timeouts, i.e. with an
* <operation> entry in the fencing resource configuration. However that
* is insufficient because fencing devices may be registered directly via
* the fencer's register_device() API instead of going through the CIB
* (e.g. stonith_admin uses it for its -R option, and the executor uses it
* to ensure a device is registered when a command is issued). As device
* properties, pcmk_*_timeout parameters can be grabbed by the fencer when
* the device is registered, whether by CIB change or API call.
*/
static int
get_action_timeout(const stonith_device_t *device, const char *action,
int default_timeout)
{
if (action && device && device->params) {
char buffer[64] = { 0, };
const char *value = NULL;
/* If "reboot" was requested but the device does not support it,
* we will remap to "off", so check timeout for "off" instead
*/
if (pcmk__str_eq(action, "reboot", pcmk__str_none)
&& !pcmk_is_set(device->flags, st_device_supports_reboot)) {
crm_trace("%s doesn't support reboot, using timeout for off instead",
device->id);
action = "off";
}
/* If the device config specified an action-specific timeout, use it */
snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action);
value = g_hash_table_lookup(device->params, buffer);
if (value) {
return atoi(value);
}
}
return default_timeout;
}
/*!
* \internal
* \brief Get the currently executing device for a fencing operation
*
* \param[in] cmd Fencing operation to check
*
* \return Currently executing device for \p cmd if any, otherwise NULL
*/
static stonith_device_t *
cmd_device(const async_command_t *cmd)
{
if ((cmd == NULL) || (cmd->device == NULL) || (device_list == NULL)) {
return NULL;
}
return g_hash_table_lookup(device_list, cmd->device);
}
/*!
* \internal
* \brief Return the configured reboot action for a given device
*
* \param[in] device_id Device ID
*
* \return Configured reboot action for \p device_id
*/
const char *
fenced_device_reboot_action(const char *device_id)
{
const char *action = NULL;
if ((device_list != NULL) && (device_id != NULL)) {
stonith_device_t *device = g_hash_table_lookup(device_list, device_id);
if ((device != NULL) && (device->params != NULL)) {
action = g_hash_table_lookup(device->params, "pcmk_reboot_action");
}
}
return pcmk__s(action, "reboot");
}
/*!
* \internal
* \brief Check whether a given device supports the "on" action
*
* \param[in] device_id Device ID
*
* \return true if \p device_id supports "on", otherwise false
*/
bool
fenced_device_supports_on(const char *device_id)
{
if ((device_list != NULL) && (device_id != NULL)) {
stonith_device_t *device = g_hash_table_lookup(device_list, device_id);
if (device != NULL) {
return pcmk_is_set(device->flags, st_device_supports_on);
}
}
return false;
}
static void
free_async_command(async_command_t * cmd)
{
if (!cmd) {
return;
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
cmd_list = g_list_remove(cmd_list, cmd);
g_list_free_full(cmd->device_list, free);
free(cmd->device);
free(cmd->action);
free(cmd->target);
free(cmd->remote_op_id);
free(cmd->client);
free(cmd->client_name);
free(cmd->origin);
free(cmd->op);
free(cmd);
}
/*!
* \internal
* \brief Create a new asynchronous fencing operation from request XML
*
* \param[in] msg Fencing request XML (from IPC or CPG)
*
* \return Newly allocated fencing operation on success, otherwise NULL
*
* \note This asserts on memory errors, so a NULL return indicates an
* unparseable message.
*/
static async_command_t *
create_async_command(xmlNode *msg)
{
xmlNode *op = NULL;
async_command_t *cmd = NULL;
if (msg == NULL) {
return NULL;
}
op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR);
if (op == NULL) {
return NULL;
}
cmd = calloc(1, sizeof(async_command_t));
CRM_ASSERT(cmd != NULL);
// All messages must include these
cmd->action = crm_element_value_copy(op, F_STONITH_ACTION);
cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION);
cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID);
if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) {
free_async_command(cmd);
return NULL;
}
crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id));
crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options));
crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay));
crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout));
cmd->timeout = cmd->default_timeout;
cmd->origin = crm_element_value_copy(msg, F_ORIG);
cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID);
cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME);
cmd->target = crm_element_value_copy(op, F_STONITH_TARGET);
cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE);
cmd->done_cb = st_child_done;
// Track in global command list
cmd_list = g_list_append(cmd_list, cmd);
return cmd;
}
static int
get_action_limit(stonith_device_t * device)
{
const char *value = NULL;
int action_limit = 1;
value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT);
if ((value == NULL)
|| (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok)
|| (action_limit == 0)) {
action_limit = 1;
}
return action_limit;
}
static int
get_active_cmds(stonith_device_t * device)
{
int counter = 0;
GList *gIter = NULL;
GList *gIterNext = NULL;
CRM_CHECK(device != NULL, return 0);
for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
async_command_t *cmd = gIter->data;
gIterNext = gIter->next;
if (cmd->active_on == device) {
counter++;
}
}
return counter;
}
static void
fork_cb(int pid, void *user_data)
{
async_command_t *cmd = (async_command_t *) user_data;
stonith_device_t * device =
/* in case of a retry we've done the move from
activating_on to active_on already
*/
cmd->activating_on?cmd->activating_on:cmd->active_on;
CRM_ASSERT(device);
crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout",
cmd->action, pid,
((cmd->target == NULL)? "" : " targeting "),
pcmk__s(cmd->target, ""), device->id, cmd->timeout);
cmd->active_on = device;
cmd->activating_on = NULL;
}
static int
get_agent_metadata_cb(gpointer data) {
stonith_device_t *device = data;
guint period_ms;
switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
case pcmk_rc_ok:
if (device->agent_metadata) {
read_action_metadata(device);
stonith__device_parameter_flags(&(device->flags), device->id,
device->agent_metadata);
}
return G_SOURCE_REMOVE;
case EAGAIN:
period_ms = pcmk__mainloop_timer_get_period(device->timer);
if (period_ms < 160 * 1000) {
mainloop_timer_set_period(device->timer, 2 * period_ms);
}
return G_SOURCE_CONTINUE;
default:
return G_SOURCE_REMOVE;
}
}
/*!
* \internal
* \brief Call a command's action callback for an internal (not library) result
*
* \param[in,out] cmd Command to report result for
* \param[in] execution_status Execution status to use for result
* \param[in] exit_status Exit status to use for result
* \param[in] exit_reason Exit reason to use for result
*/
static void
report_internal_result(async_command_t *cmd, int exit_status,
int execution_status, const char *exit_reason)
{
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
pcmk__set_result(&result, exit_status, execution_status, exit_reason);
cmd->done_cb(0, &result, cmd);
pcmk__reset_result(&result);
}
static gboolean
stonith_device_execute(stonith_device_t * device)
{
int exec_rc = 0;
const char *action_str = NULL;
const char *host_arg = NULL;
async_command_t *cmd = NULL;
stonith_action_t *action = NULL;
int active_cmds = 0;
int action_limit = 0;
GList *gIter = NULL;
GList *gIterNext = NULL;
CRM_CHECK(device != NULL, return FALSE);
active_cmds = get_active_cmds(device);
action_limit = get_action_limit(device);
if (action_limit > -1 && active_cmds >= action_limit) {
crm_trace("%s is over its action limit of %d (%u active action%s)",
device->id, action_limit, active_cmds,
pcmk__plural_s(active_cmds));
return TRUE;
}
for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) {
async_command_t *pending_op = gIter->data;
gIterNext = gIter->next;
if (pending_op && pending_op->delay_id) {
crm_trace("Operation '%s'%s%s using %s was asked to run too early, "
"waiting for start delay of %ds",
pending_op->action,
((pending_op->target == NULL)? "" : " targeting "),
pcmk__s(pending_op->target, ""),
device->id, pending_op->start_delay);
continue;
}
device->pending_ops = g_list_remove_link(device->pending_ops, gIter);
g_list_free_1(gIter);
cmd = pending_op;
break;
}
if (cmd == NULL) {
crm_trace("No actions using %s are needed", device->id);
return TRUE;
}
if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
if (pcmk__is_fencing_action(cmd->action)) {
if (node_does_watchdog_fencing(stonith_our_uname)) {
pcmk__panic(__func__);
goto done;
}
} else {
crm_info("Faking success for %s watchdog operation", cmd->action);
report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
goto done;
}
}
#if SUPPORT_CIBSECRETS
exec_rc = pcmk__substitute_secrets(device->id, device->params);
if (exec_rc != pcmk_rc_ok) {
if (pcmk__str_eq(cmd->action, "stop", pcmk__str_none)) {
crm_info("Proceeding with stop operation for %s "
"despite being unable to load CIB secrets (%s)",
device->id, pcmk_rc_str(exec_rc));
} else {
crm_err("Considering %s unconfigured "
"because unable to load CIB secrets: %s",
device->id, pcmk_rc_str(exec_rc));
report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS,
"Failed to get CIB secrets");
goto done;
}
}
#endif
action_str = cmd->action;
if (pcmk__str_eq(cmd->action, "reboot", pcmk__str_none)
&& !pcmk_is_set(device->flags, st_device_supports_reboot)) {
crm_notice("Remapping 'reboot' action%s%s using %s to 'off' "
"because agent '%s' does not support reboot",
((cmd->target == NULL)? "" : " targeting "),
pcmk__s(cmd->target, ""), device->id, device->agent);
action_str = "off";
}
if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) {
host_arg = "port";
} else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) {
host_arg = "plug";
}
action = stonith__action_create(device->agent, action_str, cmd->target,
cmd->target_nodeid, cmd->timeout,
device->params, device->aliases, host_arg);
/* for async exec, exec_rc is negative for early error exit
otherwise handling of success/errors is done via callbacks */
cmd->activating_on = device;
exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb,
fork_cb);
if (exec_rc < 0) {
cmd->activating_on = NULL;
cmd->done_cb(0, stonith__action_result(action), cmd);
stonith__destroy_action(action);
}
done:
/* Device might get triggered to work by multiple fencing commands
* simultaneously. Trigger the device again to make sure any
* remaining concurrent commands get executed. */
if (device->pending_ops) {
mainloop_set_trigger(device->work);
}
return TRUE;
}
static gboolean
stonith_device_dispatch(gpointer user_data)
{
return stonith_device_execute(user_data);
}
static gboolean
start_delay_helper(gpointer data)
{
async_command_t *cmd = data;
stonith_device_t *device = cmd_device(cmd);
cmd->delay_id = 0;
if (device) {
mainloop_set_trigger(device->work);
}
return FALSE;
}
static void
schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
{
int delay_max = 0;
int delay_base = 0;
int requested_delay = cmd->start_delay;
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(device != NULL, return);
if (cmd->device) {
free(cmd->device);
}
if (device->include_nodeid && (cmd->target != NULL)) {
crm_node_t *node = crm_get_peer(0, cmd->target);
cmd->target_nodeid = node->id;
}
cmd->device = strdup(device->id);
cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout);
if (cmd->remote_op_id) {
crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s "
"with op id %.8s and timeout %ds",
cmd->action,
(cmd->target == NULL)? "" : " targeting ",
pcmk__s(cmd->target, ""),
device->id, cmd->origin, cmd->remote_op_id, cmd->timeout);
} else {
crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds",
cmd->action,
(cmd->target == NULL)? "" : " targeting ",
pcmk__s(cmd->target, ""),
device->id, cmd->client, cmd->timeout);
}
device->pending_ops = g_list_append(device->pending_ops, cmd);
mainloop_set_trigger(device->work);
// Value -1 means disable any static/random fencing delays
if (requested_delay < 0) {
return;
}
delay_max = get_action_delay_max(device, cmd->action);
delay_base = get_action_delay_base(device, cmd->action, cmd->target);
if (delay_max == 0) {
delay_max = delay_base;
}
if (delay_max < delay_base) {
crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than "
PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s "
"(limiting to maximum delay)",
delay_base, delay_max, cmd->action, device->id);
delay_base = delay_max;
}
if (delay_max > 0) {
// coverity[dont_call] We're not using rand() for security
cmd->start_delay +=
((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
+ delay_base;
}
if (cmd->start_delay > 0) {
crm_notice("Delaying '%s' action%s%s using %s for %ds " CRM_XS
" timeout=%ds requested_delay=%ds base=%ds max=%ds",
cmd->action,
(cmd->target == NULL)? "" : " targeting ",
pcmk__s(cmd->target, ""),
device->id, cmd->start_delay, cmd->timeout,
requested_delay, delay_base, delay_max);
cmd->delay_id =
g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd);
}
}
static void
free_device(gpointer data)
{
GList *gIter = NULL;
stonith_device_t *device = data;
g_hash_table_destroy(device->params);
g_hash_table_destroy(device->aliases);
for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) {
async_command_t *cmd = gIter->data;
crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action);
report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"Device was removed before action could be executed");
}
g_list_free(device->pending_ops);
g_list_free_full(device->targets, free);
if (device->timer) {
mainloop_timer_stop(device->timer);
mainloop_timer_del(device->timer);
}
mainloop_destroy_trigger(device->work);
free_xml(device->agent_metadata);
free(device->namespace);
if (device->on_target_actions != NULL) {
g_string_free(device->on_target_actions, TRUE);
}
free(device->agent);
free(device->id);
free(device);
}
void free_device_list(void)
{
if (device_list != NULL) {
g_hash_table_destroy(device_list);
device_list = NULL;
}
}
void
init_device_list(void)
{
if (device_list == NULL) {
device_list = pcmk__strkey_table(NULL, free_device);
}
}
static GHashTable *
build_port_aliases(const char *hostmap, GList ** targets)
{
char *name = NULL;
int last = 0, lpc = 0, max = 0, added = 0;
GHashTable *aliases = pcmk__strikey_table(free, free);
if (hostmap == NULL) {
return aliases;
}
max = strlen(hostmap);
for (; lpc <= max; lpc++) {
switch (hostmap[lpc]) {
/* Skip escaped chars */
case '\\':
lpc++;
break;
/* Assignment chars */
case '=':
case ':':
if (lpc > last) {
free(name);
name = calloc(1, 1 + lpc - last);
memcpy(name, hostmap + last, lpc - last);
}
last = lpc + 1;
break;
/* Delimeter chars */
/* case ',': Potentially used to specify multiple ports */
case 0:
case ';':
case ' ':
case '\t':
if (name) {
char *value = NULL;
int k = 0;
value = calloc(1, 1 + lpc - last);
memcpy(value, hostmap + last, lpc - last);
for (int i = 0; value[i] != '\0'; i++) {
if (value[i] != '\\') {
value[k++] = value[i];
}
}
value[k] = '\0';
crm_debug("Adding alias '%s'='%s'", name, value);
g_hash_table_replace(aliases, name, value);
if (targets) {
*targets = g_list_append(*targets, strdup(value));
}
value = NULL;
name = NULL;
added++;
} else if (lpc > last) {
crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last);
}
last = lpc + 1;
break;
}
if (hostmap[lpc] == 0) {
break;
}
}
if (added == 0) {
crm_info("No host mappings detected in '%s'", hostmap);
}
free(name);
return aliases;
}
GHashTable *metadata_cache = NULL;
void
free_metadata_cache(void) {
if (metadata_cache != NULL) {
g_hash_table_destroy(metadata_cache);
metadata_cache = NULL;
}
}
static void
init_metadata_cache(void) {
if (metadata_cache == NULL) {
metadata_cache = pcmk__strkey_table(free, free);
}
}
int
get_agent_metadata(const char *agent, xmlNode ** metadata)
{
char *buffer = NULL;
if (metadata == NULL) {
return EINVAL;
}
*metadata = NULL;
if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) {
return pcmk_rc_ok;
}
init_metadata_cache();
buffer = g_hash_table_lookup(metadata_cache, agent);
if (buffer == NULL) {
stonith_t *st = stonith_api_new();
int rc;
if (st == NULL) {
crm_warn("Could not get agent meta-data: "
"API memory allocation failed");
return EAGAIN;
}
rc = st->cmds->metadata(st, st_opt_sync_call, agent,
NULL, &buffer, 10);
stonith_api_delete(st);
if (rc || !buffer) {
crm_err("Could not retrieve metadata for fencing agent %s", agent);
return EAGAIN;
}
g_hash_table_replace(metadata_cache, strdup(agent), buffer);
}
*metadata = string2xml(buffer);
return pcmk_rc_ok;
}
static gboolean
is_nodeid_required(xmlNode * xml)
{
xmlXPathObjectPtr xpath = NULL;
if (stand_alone) {
return FALSE;
}
if (!xml) {
return FALSE;
}
xpath = xpath_search(xml, "//parameter[@name='nodeid']");
if (numXpathResults(xpath) <= 0) {
freeXpathObject(xpath);
return FALSE;
}
freeXpathObject(xpath);
return TRUE;
}
static void
read_action_metadata(stonith_device_t *device)
{
xmlXPathObjectPtr xpath = NULL;
int max = 0;
int lpc = 0;
if (device->agent_metadata == NULL) {
return;
}
xpath = xpath_search(device->agent_metadata, "//action");
max = numXpathResults(xpath);
if (max <= 0) {
freeXpathObject(xpath);
return;
}
for (lpc = 0; lpc < max; lpc++) {
const char *action = NULL;
xmlNode *match = getXpathResult(xpath, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match == NULL) { continue; };
action = crm_element_value(match, "name");
if (pcmk__str_eq(action, "list", pcmk__str_none)) {
stonith__set_device_flags(device->flags, device->id,
st_device_supports_list);
} else if (pcmk__str_eq(action, "status", pcmk__str_none)) {
stonith__set_device_flags(device->flags, device->id,
st_device_supports_status);
} else if (pcmk__str_eq(action, "reboot", pcmk__str_none)) {
stonith__set_device_flags(device->flags, device->id,
st_device_supports_reboot);
} else if (pcmk__str_eq(action, "on", pcmk__str_none)) {
/* "automatic" means the cluster will unfence node when it joins */
/* "required" is a deprecated synonym for "automatic" */
if (pcmk__xe_attr_is_true(match, "automatic") || pcmk__xe_attr_is_true(match, "required")) {
device->automatic_unfencing = TRUE;
}
stonith__set_device_flags(device->flags, device->id,
st_device_supports_on);
}
if ((action != NULL) && pcmk__xe_attr_is_true(match, "on_target")) {
pcmk__add_word(&(device->on_target_actions), 64, action);
}
}
freeXpathObject(xpath);
}
/*!
* \internal
* \brief Set a pcmk_*_action parameter if not already set
*
* \param[in,out] params Device parameters
* \param[in] action Name of action
* \param[in] value Value to use if action is not already set
*/
static void
map_action(GHashTable *params, const char *action, const char *value)
{
char *key = crm_strdup_printf("pcmk_%s_action", action);
if (g_hash_table_lookup(params, key)) {
crm_warn("Ignoring %s='%s', see %s instead",
STONITH_ATTR_ACTION_OP, value, key);
free(key);
} else {
crm_warn("Mapping %s='%s' to %s='%s'",
STONITH_ATTR_ACTION_OP, value, key, value);
g_hash_table_insert(params, key, strdup(value));
}
}
/*!
* \internal
* \brief Create device parameter table from XML
*
* \param[in] name Device name (used for logging only)
* \param[in] dev XML containing device parameters
*/
static GHashTable *
xml2device_params(const char *name, const xmlNode *dev)
{
GHashTable *params = xml2list(dev);
const char *value;
/* Action should never be specified in the device configuration,
* but we support it for users who are familiar with other software
* that worked that way.
*/
value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP);
if (value != NULL) {
crm_warn("%s has '%s' parameter, which should never be specified in configuration",
name, STONITH_ATTR_ACTION_OP);
if (*value == '\0') {
crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP);
} else if (strcmp(value, "reboot") == 0) {
crm_warn("Ignoring %s='reboot' (see stonith-action cluster property instead)",
STONITH_ATTR_ACTION_OP);
} else if (strcmp(value, "off") == 0) {
map_action(params, "reboot", value);
} else {
map_action(params, "off", value);
map_action(params, "reboot", value);
}
g_hash_table_remove(params, STONITH_ATTR_ACTION_OP);
}
return params;
}
static const char *
target_list_type(stonith_device_t * dev)
{
const char *check_type = NULL;
check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK);
if (check_type == NULL) {
if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) {
check_type = "static-list";
} else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) {
check_type = "static-list";
} else if (pcmk_is_set(dev->flags, st_device_supports_list)) {
check_type = "dynamic-list";
} else if (pcmk_is_set(dev->flags, st_device_supports_status)) {
check_type = "status";
} else {
check_type = PCMK__VALUE_NONE;
}
}
return check_type;
}
static stonith_device_t *
build_device_from_xml(xmlNode *dev)
{
const char *value;
stonith_device_t *device = NULL;
char *agent = crm_element_value_copy(dev, "agent");
CRM_CHECK(agent != NULL, return device);
device = calloc(1, sizeof(stonith_device_t));
CRM_CHECK(device != NULL, {free(agent); return device;});
device->id = crm_element_value_copy(dev, XML_ATTR_ID);
device->agent = agent;
device->namespace = crm_element_value_copy(dev, "namespace");
device->params = xml2device_params(device->id, dev);
value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST);
if (value) {
device->targets = stonith__parse_targets(value);
}
value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP);
device->aliases = build_port_aliases(value, &(device->targets));
value = target_list_type(device);
if (!pcmk__str_eq(value, "static-list", pcmk__str_casei) && device->targets) {
/* Other than "static-list", dev-> targets is unnecessary. */
g_list_free_full(device->targets, free);
device->targets = NULL;
}
switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
case pcmk_rc_ok:
if (device->agent_metadata) {
read_action_metadata(device);
stonith__device_parameter_flags(&(device->flags), device->id,
device->agent_metadata);
}
break;
case EAGAIN:
if (device->timer == NULL) {
device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
TRUE, get_agent_metadata_cb, device);
}
if (!mainloop_timer_running(device->timer)) {
mainloop_timer_start(device->timer);
}
break;
default:
break;
}
value = g_hash_table_lookup(device->params, "nodeid");
if (!value) {
device->include_nodeid = is_nodeid_required(device->agent_metadata);
}
value = crm_element_value(dev, "rsc_provides");
if (pcmk__str_eq(value, PCMK__VALUE_UNFENCING, pcmk__str_casei)) {
device->automatic_unfencing = TRUE;
}
if (is_action_required("on", device)) {
crm_info("Fencing device '%s' requires unfencing", device->id);
}
if (device->on_target_actions != NULL) {
crm_info("Fencing device '%s' requires actions (%s) to be executed "
"on target", device->id,
(const char *) device->on_target_actions->str);
}
device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device);
/* TODO: Hook up priority */
return device;
}
static void
schedule_internal_command(const char *origin,
stonith_device_t * device,
const char *action,
const char *target,
int timeout,
void *internal_user_data,
void (*done_cb) (int pid,
const pcmk__action_result_t *result,
void *user_data))
{
async_command_t *cmd = NULL;
cmd = calloc(1, sizeof(async_command_t));
cmd->id = -1;
cmd->default_timeout = timeout ? timeout : 60;
cmd->timeout = cmd->default_timeout;
cmd->action = strdup(action);
pcmk__str_update(&cmd->target, target);
cmd->device = strdup(device->id);
cmd->origin = strdup(origin);
cmd->client = strdup(crm_system_name);
cmd->client_name = strdup(crm_system_name);
cmd->internal_user_data = internal_user_data;
cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */
schedule_stonith_command(cmd, device);
}
// Fence agent status commands use custom exit status codes
enum fence_status_code {
fence_status_invalid = -1,
fence_status_active = 0,
fence_status_unknown = 1,
fence_status_inactive = 2,
};
static void
status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data)
{
async_command_t *cmd = user_data;
struct device_search_s *search = cmd->internal_user_data;
stonith_device_t *dev = cmd_device(cmd);
gboolean can = FALSE;
free_async_command(cmd);
if (!dev) {
search_devices_record_result(search, NULL, FALSE);
return;
}
mainloop_set_trigger(dev->work);
if (result->execution_status != PCMK_EXEC_DONE) {
crm_warn("Assuming %s cannot fence %s "
"because status could not be executed: %s%s%s%s",
dev->id, search->host,
pcmk_exec_status_str(result->execution_status),
((result->exit_reason == NULL)? "" : " ("),
((result->exit_reason == NULL)? "" : result->exit_reason),
((result->exit_reason == NULL)? "" : ")"));
search_devices_record_result(search, dev->id, FALSE);
return;
}
switch (result->exit_status) {
case fence_status_unknown:
crm_trace("%s reported it cannot fence %s", dev->id, search->host);
break;
case fence_status_active:
case fence_status_inactive:
crm_trace("%s reported it can fence %s", dev->id, search->host);
can = TRUE;
break;
default:
crm_warn("Assuming %s cannot fence %s "
"(status returned unknown code %d)",
dev->id, search->host, result->exit_status);
break;
}
search_devices_record_result(search, dev->id, can);
}
static void
dynamic_list_search_cb(int pid, const pcmk__action_result_t *result,
void *user_data)
{
async_command_t *cmd = user_data;
struct device_search_s *search = cmd->internal_user_data;
stonith_device_t *dev = cmd_device(cmd);
gboolean can_fence = FALSE;
free_async_command(cmd);
/* Host/alias must be in the list output to be eligible to be fenced
*
* Will cause problems if down'd nodes aren't listed or (for virtual nodes)
* if the guest is still listed despite being moved to another machine
*/
if (!dev) {
search_devices_record_result(search, NULL, FALSE);
return;
}
mainloop_set_trigger(dev->work);
if (pcmk__result_ok(result)) {
crm_info("Refreshing target list for %s", dev->id);
g_list_free_full(dev->targets, free);
dev->targets = stonith__parse_targets(result->action_stdout);
dev->targets_age = time(NULL);
} else if (dev->targets != NULL) {
if (result->execution_status == PCMK_EXEC_DONE) {
crm_info("Reusing most recent target list for %s "
"because list returned error code %d",
dev->id, result->exit_status);
} else {
crm_info("Reusing most recent target list for %s "
"because list could not be executed: %s%s%s%s",
dev->id, pcmk_exec_status_str(result->execution_status),
((result->exit_reason == NULL)? "" : " ("),
((result->exit_reason == NULL)? "" : result->exit_reason),
((result->exit_reason == NULL)? "" : ")"));
}
} else { // We have never successfully executed list
if (result->execution_status == PCMK_EXEC_DONE) {
crm_warn("Assuming %s cannot fence %s "
"because list returned error code %d",
dev->id, search->host, result->exit_status);
} else {
crm_warn("Assuming %s cannot fence %s "
"because list could not be executed: %s%s%s%s",
dev->id, search->host,
pcmk_exec_status_str(result->execution_status),
((result->exit_reason == NULL)? "" : " ("),
((result->exit_reason == NULL)? "" : result->exit_reason),
((result->exit_reason == NULL)? "" : ")"));
}
/* Fall back to pcmk_host_check="status" if the user didn't explicitly
* specify "dynamic-list".
*/
if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) {
crm_notice("Switching to pcmk_host_check='status' for %s", dev->id);
g_hash_table_replace(dev->params, strdup(PCMK_STONITH_HOST_CHECK),
strdup("status"));
}
}
if (dev->targets) {
const char *alias = g_hash_table_lookup(dev->aliases, search->host);
if (!alias) {
alias = search->host;
}
if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) {
can_fence = TRUE;
}
}
search_devices_record_result(search, dev->id, can_fence);
}
/*!
* \internal
* \brief Returns true if any key in first is not in second or second has a different value for key
*/
static int
device_params_diff(GHashTable *first, GHashTable *second) {
char *key = NULL;
char *value = NULL;
GHashTableIter gIter;
g_hash_table_iter_init(&gIter, first);
while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) {
if(strstr(key, "CRM_meta") == key) {
continue;
} else if(strcmp(key, "crm_feature_set") == 0) {
continue;
} else {
char *other_value = g_hash_table_lookup(second, key);
if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) {
crm_trace("Different value for %s: %s != %s", key, other_value, value);
return 1;
}
}
}
return 0;
}
/*!
* \internal
* \brief Checks to see if an identical device already exists in the device_list
*/
static stonith_device_t *
device_has_duplicate(const stonith_device_t *device)
{
stonith_device_t *dup = g_hash_table_lookup(device_list, device->id);
if (!dup) {
crm_trace("No match for %s", device->id);
return NULL;
} else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) {
crm_trace("Different agent: %s != %s", dup->agent, device->agent);
return NULL;
}
/* Use calculate_operation_digest() here? */
if (device_params_diff(device->params, dup->params) ||
device_params_diff(dup->params, device->params)) {
return NULL;
}
crm_trace("Match");
return dup;
}
int
stonith_device_register(xmlNode *dev, gboolean from_cib)
{
stonith_device_t *dup = NULL;
stonith_device_t *device = build_device_from_xml(dev);
guint ndevices = 0;
int rv = pcmk_ok;
CRM_CHECK(device != NULL, return -ENOMEM);
/* do we have a watchdog-device? */
if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) ||
pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do {
if (stonith_watchdog_timeout_ms <= 0) {
crm_err("Ignoring watchdog fence device without "
"stonith-watchdog-timeout set.");
rv = -ENODEV;
/* fall through to cleanup & return */
} else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
crm_err("Ignoring watchdog fence device with unknown "
"agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.",
device->agent?device->agent:"");
rv = -ENODEV;
/* fall through to cleanup & return */
} else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID,
pcmk__str_none)) {
crm_err("Ignoring watchdog fence device "
"named %s !='"STONITH_WATCHDOG_ID"'.",
device->id?device->id:"");
rv = -ENODEV;
/* fall through to cleanup & return */
} else {
if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT,
pcmk__str_none)) {
/* this either has an empty list or the targets
configured for watchdog-fencing
*/
g_list_free_full(stonith_watchdog_targets, free);
stonith_watchdog_targets = device->targets;
device->targets = NULL;
}
if (node_does_watchdog_fencing(stonith_our_uname)) {
g_list_free_full(device->targets, free);
device->targets = stonith__parse_targets(stonith_our_uname);
g_hash_table_replace(device->params,
strdup(PCMK_STONITH_HOST_LIST),
strdup(stonith_our_uname));
/* proceed as with any other stonith-device */
break;
}
crm_debug("Skip registration of watchdog fence device on node not in host-list.");
/* cleanup and fall through to more cleanup and return */
device->targets = NULL;
stonith_device_remove(device->id, from_cib);
}
free_device(device);
return rv;
} while (0);
dup = device_has_duplicate(device);
if (dup) {
ndevices = g_hash_table_size(device_list);
crm_debug("Device '%s' already in device list (%d active device%s)",
device->id, ndevices, pcmk__plural_s(ndevices));
free_device(device);
device = dup;
dup = g_hash_table_lookup(device_list, device->id);
dup->dirty = FALSE;
} else {
stonith_device_t *old = g_hash_table_lookup(device_list, device->id);
if (from_cib && old && old->api_registered) {
/* If the cib is writing over an entry that is shared with a stonith client,
* copy any pending ops that currently exist on the old entry to the new one.
* Otherwise the pending ops will be reported as failures
*/
crm_info("Overwriting existing entry for %s from CIB", device->id);
device->pending_ops = old->pending_ops;
device->api_registered = TRUE;
old->pending_ops = NULL;
if (device->pending_ops) {
mainloop_set_trigger(device->work);
}
}
g_hash_table_replace(device_list, device->id, device);
ndevices = g_hash_table_size(device_list);
crm_notice("Added '%s' to device list (%d active device%s)",
device->id, ndevices, pcmk__plural_s(ndevices));
}
if (from_cib) {
device->cib_registered = TRUE;
} else {
device->api_registered = TRUE;
}
return pcmk_ok;
}
void
stonith_device_remove(const char *id, bool from_cib)
{
stonith_device_t *device = g_hash_table_lookup(device_list, id);
guint ndevices = 0;
if (!device) {
ndevices = g_hash_table_size(device_list);
crm_info("Device '%s' not found (%d active device%s)",
id, ndevices, pcmk__plural_s(ndevices));
return;
}
if (from_cib) {
device->cib_registered = FALSE;
} else {
device->verified = FALSE;
device->api_registered = FALSE;
}
if (!device->cib_registered && !device->api_registered) {
g_hash_table_remove(device_list, id);
ndevices = g_hash_table_size(device_list);
crm_info("Removed '%s' from device list (%d active device%s)",
id, ndevices, pcmk__plural_s(ndevices));
} else {
crm_trace("Not removing '%s' from device list (%d active) because "
"still registered via:%s%s",
id, g_hash_table_size(device_list),
(device->cib_registered? " cib" : ""),
(device->api_registered? " api" : ""));
}
}
/*!
* \internal
* \brief Return the number of stonith levels registered for a node
*
* \param[in] tp Node's topology table entry
*
* \return Number of non-NULL levels in topology entry
* \note This function is used only for log messages.
*/
static int
count_active_levels(const stonith_topology_t *tp)
{
int lpc = 0;
int count = 0;
for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
if (tp->levels[lpc] != NULL) {
count++;
}
}
return count;
}
static void
free_topology_entry(gpointer data)
{
stonith_topology_t *tp = data;
int lpc = 0;
for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
if (tp->levels[lpc] != NULL) {
g_list_free_full(tp->levels[lpc], free);
}
}
free(tp->target);
free(tp->target_value);
free(tp->target_pattern);
free(tp->target_attribute);
free(tp);
}
void
free_topology_list(void)
{
if (topology != NULL) {
g_hash_table_destroy(topology);
topology = NULL;
}
}
void
init_topology_list(void)
{
if (topology == NULL) {
topology = pcmk__strkey_table(NULL, free_topology_entry);
}
}
char *
stonith_level_key(const xmlNode *level, enum fenced_target_by mode)
{
if (mode == fenced_target_by_unknown) {
mode = unpack_level_kind(level);
}
switch (mode) {
case fenced_target_by_name:
return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET);
case fenced_target_by_pattern:
return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN);
case fenced_target_by_attribute:
return crm_strdup_printf("%s=%s",
crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE),
crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE));
default:
return crm_strdup_printf("unknown-%s", ID(level));
}
}
/*!
* \internal
* \brief Parse target identification from topology level XML
*
* \param[in] level Topology level XML to parse
*
* \return How to identify target of \p level
*/
static enum fenced_target_by
unpack_level_kind(const xmlNode *level)
{
if (crm_element_value(level, XML_ATTR_STONITH_TARGET) != NULL) {
return fenced_target_by_name;
}
if (crm_element_value(level, XML_ATTR_STONITH_TARGET_PATTERN) != NULL) {
return fenced_target_by_pattern;
}
if (!stand_alone /* if standalone, there's no attribute manager */
&& (crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE) != NULL)
&& (crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE) != NULL)) {
return fenced_target_by_attribute;
}
return fenced_target_by_unknown;
}
static stonith_key_value_t *
parse_device_list(const char *devices)
{
int lpc = 0;
int max = 0;
int last = 0;
stonith_key_value_t *output = NULL;
if (devices == NULL) {
return output;
}
max = strlen(devices);
for (lpc = 0; lpc <= max; lpc++) {
if (devices[lpc] == ',' || devices[lpc] == 0) {
char *line = strndup(devices + last, lpc - last);
output = stonith_key_value_add(output, NULL, line);
free(line);
last = lpc + 1;
}
}
return output;
}
/*!
* \internal
* \brief Unpack essential information from topology request XML
*
* \param[in] xml Request XML to search
* \param[out] mode If not NULL, where to store level kind
* \param[out] target If not NULL, where to store representation of target
* \param[out] id If not NULL, where to store level number
* \param[out] desc If not NULL, where to store log-friendly level description
*
* \return Topology level XML from within \p xml, or NULL if not found
* \note The caller is responsible for freeing \p *target and \p *desc if set.
*/
static xmlNode *
unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target,
int *id, char **desc)
{
enum fenced_target_by local_mode = fenced_target_by_unknown;
char *local_target = NULL;
int local_id = 0;
/* The level element can be the top element or lower. If top level, don't
* search by xpath, because it might give multiple hits if the XML is the
* CIB.
*/
if ((xml != NULL)
&& !pcmk__str_eq(TYPE(xml), XML_TAG_FENCING_LEVEL, pcmk__str_none)) {
xml = get_xpath_object("//" XML_TAG_FENCING_LEVEL, xml, LOG_WARNING);
}
if (xml == NULL) {
if (desc != NULL) {
*desc = crm_strdup_printf("missing");
}
} else {
local_mode = unpack_level_kind(xml);
local_target = stonith_level_key(xml, local_mode);
crm_element_value_int(xml, XML_ATTR_STONITH_INDEX, &local_id);
if (desc != NULL) {
*desc = crm_strdup_printf("%s[%d]", local_target, local_id);
}
}
if (mode != NULL) {
*mode = local_mode;
}
if (id != NULL) {
*id = local_id;
}
if (target != NULL) {
*target = local_target;
} else {
free(local_target);
}
return xml;
}
/*!
* \internal
* \brief Register a fencing topology level for a target
*
* Given an XML request specifying the target name, level index, and device IDs
* for the level, this will create an entry for the target in the global topology
* table if one does not already exist, then append the specified device IDs to
* the entry's device list for the specified level.
*
* \param[in] msg XML request for STONITH level registration
* \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]"
* \param[out] result Where to set result of registration
*/
void
fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result)
{
int id = 0;
xmlNode *level;
enum fenced_target_by mode;
char *target;
stonith_topology_t *tp;
stonith_key_value_t *dIter = NULL;
stonith_key_value_t *devices = NULL;
CRM_CHECK((msg != NULL) && (result != NULL), return);
level = unpack_level_request(msg, &mode, &target, &id, desc);
if (level == NULL) {
fenced_set_protocol_error(result);
return;
}
// Ensure an ID was given (even the client API adds an ID)
if (pcmk__str_empty(ID(level))) {
crm_warn("Ignoring registration for topology level without ID");
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Topology level is invalid without ID");
return;
}
// Ensure a valid target was specified
if (mode == fenced_target_by_unknown) {
crm_warn("Ignoring registration for topology level '%s' "
"without valid target", ID(level));
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Invalid target for topology level '%s'",
ID(level));
return;
}
// Ensure level ID is in allowed range
if ((id <= 0) || (id >= ST_LEVEL_MAX)) {
crm_warn("Ignoring topology registration for %s with invalid level %d",
target, id);
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Invalid level number '%s' for topology level '%s'",
pcmk__s(crm_element_value(level,
XML_ATTR_STONITH_INDEX),
""),
ID(level));
return;
}
/* Find or create topology table entry */
tp = g_hash_table_lookup(topology, target);
if (tp == NULL) {
tp = calloc(1, sizeof(stonith_topology_t));
if (tp == NULL) {
pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
strerror(ENOMEM));
free(target);
return;
}
tp->kind = mode;
tp->target = target;
tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE);
tp->target_pattern = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN);
tp->target_attribute = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE);
g_hash_table_replace(topology, tp->target, tp);
crm_trace("Added %s (%d) to the topology (%d active entries)",
target, (int) mode, g_hash_table_size(topology));
} else {
free(target);
}
if (tp->levels[id] != NULL) {
crm_info("Adding to the existing %s[%d] topology entry",
tp->target, id);
}
devices = parse_device_list(crm_element_value(level, XML_ATTR_STONITH_DEVICES));
for (dIter = devices; dIter; dIter = dIter->next) {
const char *device = dIter->value;
crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id);
tp->levels[id] = g_list_append(tp->levels[id], strdup(device));
}
stonith_key_value_freeall(devices, 1, 1);
{
int nlevels = count_active_levels(tp);
crm_info("Target %s has %d active fencing level%s",
tp->target, nlevels, pcmk__plural_s(nlevels));
}
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
}
/*!
* \internal
* \brief Unregister a fencing topology level for a target
*
* Given an XML request specifying the target name and level index (or 0 for all
* levels), this will remove any corresponding entry for the target from the
* global topology table.
*
* \param[in] msg XML request for STONITH level registration
* \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]"
* \param[out] result Where to set result of unregistration
*/
void
fenced_unregister_level(xmlNode *msg, char **desc,
pcmk__action_result_t *result)
{
int id = -1;
stonith_topology_t *tp;
char *target;
xmlNode *level = NULL;
CRM_CHECK(result != NULL, return);
level = unpack_level_request(msg, NULL, &target, &id, desc);
if (level == NULL) {
fenced_set_protocol_error(result);
return;
}
// Ensure level ID is in allowed range
if ((id < 0) || (id >= ST_LEVEL_MAX)) {
crm_warn("Ignoring topology unregistration for %s with invalid level %d",
target, id);
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Invalid level number '%s' for topology level %s",
pcmk__s(crm_element_value(level,
XML_ATTR_STONITH_INDEX),
"<null>"),
// Client API doesn't add ID to unregistration XML
pcmk__s(ID(level), ""));
return;
}
tp = g_hash_table_lookup(topology, target);
if (tp == NULL) {
guint nentries = g_hash_table_size(topology);
crm_info("No fencing topology found for %s (%d active %s)",
target, nentries,
pcmk__plural_alt(nentries, "entry", "entries"));
} else if (id == 0 && g_hash_table_remove(topology, target)) {
guint nentries = g_hash_table_size(topology);
crm_info("Removed all fencing topology entries related to %s "
"(%d active %s remaining)", target, nentries,
pcmk__plural_alt(nentries, "entry", "entries"));
} else if (tp->levels[id] != NULL) {
guint nlevels;
g_list_free_full(tp->levels[id], free);
tp->levels[id] = NULL;
nlevels = count_active_levels(tp);
crm_info("Removed level %d from fencing topology for %s "
"(%d active level%s remaining)",
id, target, nlevels, pcmk__plural_s(nlevels));
}
free(target);
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
}
static char *
list_to_string(GList *list, const char *delim, gboolean terminate_with_delim)
{
int max = g_list_length(list);
size_t delim_len = delim?strlen(delim):0;
size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0);
char *rv;
GList *gIter;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
const char *value = (const char *) gIter->data;
alloc_size += strlen(value);
}
rv = calloc(alloc_size, sizeof(char));
if (rv) {
char *pos = rv;
const char *lead_delim = "";
for (gIter = list; gIter != NULL; gIter = gIter->next) {
const char *value = (const char *) gIter->data;
pos = &pos[sprintf(pos, "%s%s", lead_delim, value)];
lead_delim = delim;
}
if (max && terminate_with_delim) {
sprintf(pos, "%s", delim);
}
}
return rv;
}
/*!
* \internal
* \brief Execute a fence agent action directly (and asynchronously)
*
* Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
* directly on a specified device. Only list, monitor, and status actions are
* expected to use this call, though it should work with any agent command.
*
* \param[in] msg Request XML specifying action
* \param[out] result Where to store result of action
*
* \note If the action is monitor, the device must be registered via the API
* (CIB registration is not sufficient), because monitor should not be
* possible unless the device is "started" (API registered).
*/
static void
execute_agent_action(xmlNode *msg, pcmk__action_result_t *result)
{
xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR);
xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR);
const char *id = crm_element_value(dev, F_STONITH_DEVICE);
const char *action = crm_element_value(op, F_STONITH_ACTION);
async_command_t *cmd = NULL;
stonith_device_t *device = NULL;
if ((id == NULL) || (action == NULL)) {
crm_info("Malformed API action request: device %s, action %s",
(id? id : "not specified"),
(action? action : "not specified"));
fenced_set_protocol_error(result);
return;
}
if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
// Watchdog agent actions are implemented internally
if (stonith_watchdog_timeout_ms <= 0) {
pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"Watchdog fence device not configured");
return;
} else if (pcmk__str_eq(action, "list", pcmk__str_none)) {
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
pcmk__set_result_output(result,
list_to_string(stonith_watchdog_targets,
"\n", TRUE),
NULL);
return;
} else if (pcmk__str_eq(action, "monitor", pcmk__str_none)) {
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return;
}
}
device = g_hash_table_lookup(device_list, id);
if (device == NULL) {
crm_info("Ignoring API '%s' action request because device %s not found",
action, id);
pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"'%s' not found", id);
return;
} else if (!device->api_registered && !strcmp(action, "monitor")) {
// Monitors may run only on "started" (API-registered) devices
crm_info("Ignoring API '%s' action request because device %s not active",
action, id);
pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"'%s' not active", id);
return;
}
cmd = create_async_command(msg);
if (cmd == NULL) {
crm_log_xml_warn(msg, "invalid");
fenced_set_protocol_error(result);
return;
}
schedule_stonith_command(cmd, device);
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
}
static void
search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence)
{
search->replies_received++;
if (can_fence && device) {
if (search->support_action_only != st_device_supports_none) {
stonith_device_t *dev = g_hash_table_lookup(device_list, device);
if (dev && !pcmk_is_set(dev->flags, search->support_action_only)) {
return;
}
}
search->capable = g_list_append(search->capable, strdup(device));
}
if (search->replies_needed == search->replies_received) {
guint ndevices = g_list_length(search->capable);
crm_debug("Search found %d device%s that can perform '%s' targeting %s",
ndevices, pcmk__plural_s(ndevices),
(search->action? search->action : "unknown action"),
(search->host? search->host : "any node"));
search->callback(search->capable, search->user_data);
free(search->host);
free(search->action);
free(search);
}
}
/*!
* \internal
* \brief Check whether the local host is allowed to execute a fencing action
*
* \param[in] device Fence device to check
* \param[in] action Fence action to check
* \param[in] target Hostname of fence target
* \param[in] allow_suicide Whether self-fencing is allowed for this operation
*
* \return TRUE if local host is allowed to execute action, FALSE otherwise
*/
static gboolean
localhost_is_eligible(const stonith_device_t *device, const char *action,
const char *target, gboolean allow_suicide)
{
gboolean localhost_is_target = pcmk__str_eq(target, stonith_our_uname,
pcmk__str_casei);
if ((device != NULL) && (action != NULL)
&& (device->on_target_actions != NULL)
&& (strstr((const char*) device->on_target_actions->str,
action) != NULL)) {
if (!localhost_is_target) {
crm_trace("Operation '%s' using %s can only be executed for local "
"host, not %s", action, device->id, target);
return FALSE;
}
} else if (localhost_is_target && !allow_suicide) {
crm_trace("'%s' operation does not support self-fencing", action);
return FALSE;
}
return TRUE;
}
/*!
* \internal
* \brief Check if local node is allowed to execute (possibly remapped) action
*
* \param[in] device Fence device to check
* \param[in] action Fence action to check
* \param[in] target Node name of fence target
* \param[in] allow_self Whether self-fencing is allowed for this operation
*
* \return true if local node is allowed to execute \p action or any actions it
* might be remapped to, otherwise false
*/
static bool
localhost_is_eligible_with_remap(const stonith_device_t *device,
const char *action, const char *target,
gboolean allow_self)
{
// Check exact action
if (localhost_is_eligible(device, action, target, allow_self)) {
return true;
}
// Check potential remaps
if (pcmk__str_eq(action, "reboot", pcmk__str_none)) {
/* "reboot" might get remapped to "off" then "on", so even if reboot is
* disallowed, return true if either of those is allowed. We'll report
* the disallowed actions with the results. We never allow self-fencing
* for remapped "on" actions because the target is off at that point.
*/
if (localhost_is_eligible(device, "off", target, allow_self)
|| localhost_is_eligible(device, "on", target, FALSE)) {
return true;
}
}
return false;
}
static void
can_fence_host_with_device(stonith_device_t *dev,
struct device_search_s *search)
{
gboolean can = FALSE;
const char *check_type = "Internal bug";
const char *target = NULL;
const char *alias = NULL;
const char *dev_id = "Unspecified device";
const char *action = (search == NULL)? NULL : search->action;
CRM_CHECK((dev != NULL) && (action != NULL), goto search_report_results);
if (dev->id != NULL) {
dev_id = dev->id;
}
target = search->host;
if (target == NULL) {
can = TRUE;
check_type = "No target";
goto search_report_results;
}
/* Answer immediately if the device does not support the action
* or the local node is not allowed to perform it
*/
if (pcmk__str_eq(action, "on", pcmk__str_none)
&& !pcmk_is_set(dev->flags, st_device_supports_on)) {
check_type = "Agent does not support 'on'";
goto search_report_results;
} else if (!localhost_is_eligible_with_remap(dev, action, target,
search->allow_suicide)) {
check_type = "This node is not allowed to execute action";
goto search_report_results;
}
// Check eligibility as specified by pcmk_host_check
check_type = target_list_type(dev);
alias = g_hash_table_lookup(dev->aliases, target);
if (pcmk__str_eq(check_type, PCMK__VALUE_NONE, pcmk__str_casei)) {
can = TRUE;
} else if (pcmk__str_eq(check_type, "static-list", pcmk__str_casei)) {
if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) {
can = TRUE;
} else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)
&& g_hash_table_lookup(dev->aliases, target)) {
can = TRUE;
}
} else if (pcmk__str_eq(check_type, "dynamic-list", pcmk__str_casei)) {
time_t now = time(NULL);
if (dev->targets == NULL || dev->targets_age + 60 < now) {
int device_timeout = get_action_timeout(dev, "list", search->per_device_timeout);
if (device_timeout > search->per_device_timeout) {
crm_notice("Since the pcmk_list_timeout(%ds) parameter of %s is larger than stonith-timeout(%ds), timeout may occur",
device_timeout, dev_id, search->per_device_timeout);
}
crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
check_type, dev_id, target, action);
schedule_internal_command(__func__, dev, "list", NULL,
search->per_device_timeout, search, dynamic_list_search_cb);
/* we'll respond to this search request async in the cb */
return;
}
if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets,
pcmk__str_casei)) {
can = TRUE;
}
} else if (pcmk__str_eq(check_type, "status", pcmk__str_casei)) {
int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout);
if (device_timeout > search->per_device_timeout) {
crm_notice("Since the pcmk_status_timeout(%ds) parameter of %s is larger than stonith-timeout(%ds), timeout may occur",
device_timeout, dev_id, search->per_device_timeout);
}
crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
check_type, dev_id, target, action);
schedule_internal_command(__func__, dev, "status", target,
search->per_device_timeout, search, status_search_cb);
/* we'll respond to this search request async in the cb */
return;
} else {
crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type);
check_type = "Invalid " PCMK_STONITH_HOST_CHECK;
}
search_report_results:
crm_info("%s is%s eligible to fence (%s) %s%s%s%s: %s",
dev_id, (can? "" : " not"), pcmk__s(action, "unspecified action"),
pcmk__s(target, "unspecified target"),
(alias == NULL)? "" : " (as '", pcmk__s(alias, ""),
(alias == NULL)? "" : "')", check_type);
search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can);
}
static void
search_devices(gpointer key, gpointer value, gpointer user_data)
{
stonith_device_t *dev = value;
struct device_search_s *search = user_data;
can_fence_host_with_device(dev, search);
}
#define DEFAULT_QUERY_TIMEOUT 20
static void
get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data,
void (*callback) (GList * devices, void *user_data), uint32_t support_action_only)
{
struct device_search_s *search;
guint ndevices = g_hash_table_size(device_list);
if (ndevices == 0) {
callback(NULL, user_data);
return;
}
search = calloc(1, sizeof(struct device_search_s));
if (!search) {
crm_crit("Cannot search for capable fence devices: %s",
strerror(ENOMEM));
callback(NULL, user_data);
return;
}
pcmk__str_update(&search->host, host);
pcmk__str_update(&search->action, action);
search->per_device_timeout = timeout;
search->allow_suicide = suicide;
search->callback = callback;
search->user_data = user_data;
search->support_action_only = support_action_only;
/* We are guaranteed this many replies, even if a device is
* unregistered while the search is in progress.
*/
search->replies_needed = ndevices;
crm_debug("Searching %d device%s to see which can execute '%s' targeting %s",
ndevices, pcmk__plural_s(ndevices),
(search->action? search->action : "unknown action"),
(search->host? search->host : "any node"));
g_hash_table_foreach(device_list, search_devices, search);
}
struct st_query_data {
xmlNode *reply;
char *remote_peer;
char *client_id;
char *target;
char *action;
int call_options;
};
/*!
* \internal
* \brief Add action-specific attributes to query reply XML
*
* \param[in,out] xml XML to add attributes to
* \param[in] action Fence action
* \param[in] device Fence device
* \param[in] target Fence target
*/
static void
add_action_specific_attributes(xmlNode *xml, const char *action,
const stonith_device_t *device,
const char *target)
{
int action_specific_timeout;
int delay_max;
int delay_base;
CRM_CHECK(xml && action && device, return);
if (is_action_required(action, device)) {
crm_trace("Action '%s' is required using %s", action, device->id);
crm_xml_add_int(xml, F_STONITH_DEVICE_REQUIRED, 1);
}
action_specific_timeout = get_action_timeout(device, action, 0);
if (action_specific_timeout) {
crm_trace("Action '%s' has timeout %dms using %s",
action, action_specific_timeout, device->id);
crm_xml_add_int(xml, F_STONITH_ACTION_TIMEOUT, action_specific_timeout);
}
delay_max = get_action_delay_max(device, action);
if (delay_max > 0) {
- crm_trace("Action '%s' has maximum random delay %dms using %s",
+ crm_trace("Action '%s' has maximum random delay %ds using %s",
action, delay_max, device->id);
- crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max / 1000);
+ crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max);
}
delay_base = get_action_delay_base(device, action, target);
if (delay_base > 0) {
- crm_xml_add_int(xml, F_STONITH_DELAY_BASE, delay_base / 1000);
+ crm_xml_add_int(xml, F_STONITH_DELAY_BASE, delay_base);
}
if ((delay_max > 0) && (delay_base == 0)) {
- crm_trace("Action '%s' has maximum random delay %dms using %s",
+ crm_trace("Action '%s' has maximum random delay %ds using %s",
action, delay_max, device->id);
} else if ((delay_max == 0) && (delay_base > 0)) {
- crm_trace("Action '%s' has a static delay of %dms using %s",
+ crm_trace("Action '%s' has a static delay of %ds using %s",
action, delay_base, device->id);
} else if ((delay_max > 0) && (delay_base > 0)) {
- crm_trace("Action '%s' has a minimum delay of %dms and a randomly chosen "
- "maximum delay of %dms using %s",
+ crm_trace("Action '%s' has a minimum delay of %ds and a randomly chosen "
+ "maximum delay of %ds using %s",
action, delay_base, delay_max, device->id);
}
}
/*!
* \internal
* \brief Add "disallowed" attribute to query reply XML if appropriate
*
* \param[in,out] xml XML to add attribute to
* \param[in] action Fence action
* \param[in] device Fence device
* \param[in] target Fence target
* \param[in] allow_suicide Whether self-fencing is allowed
*/
static void
add_disallowed(xmlNode *xml, const char *action, const stonith_device_t *device,
const char *target, gboolean allow_suicide)
{
if (!localhost_is_eligible(device, action, target, allow_suicide)) {
crm_trace("Action '%s' using %s is disallowed for local host",
action, device->id);
pcmk__xe_set_bool_attr(xml, F_STONITH_ACTION_DISALLOWED, true);
}
}
/*!
* \internal
* \brief Add child element with action-specific values to query reply XML
*
* \param[in,out] xml XML to add attribute to
* \param[in] action Fence action
* \param[in] device Fence device
* \param[in] target Fence target
* \param[in] allow_suicide Whether self-fencing is allowed
*/
static void
add_action_reply(xmlNode *xml, const char *action,
const stonith_device_t *device, const char *target,
gboolean allow_suicide)
{
xmlNode *child = create_xml_node(xml, F_STONITH_ACTION);
crm_xml_add(child, XML_ATTR_ID, action);
add_action_specific_attributes(child, action, device, target);
add_disallowed(child, action, device, target, allow_suicide);
}
static void
stonith_query_capable_device_cb(GList * devices, void *user_data)
{
struct st_query_data *query = user_data;
int available_devices = 0;
xmlNode *dev = NULL;
xmlNode *list = NULL;
GList *lpc = NULL;
pcmk__client_t *client = NULL;
if (query->client_id != NULL) {
client = pcmk__find_client_by_id(query->client_id);
if ((client == NULL) && (query->remote_peer == NULL)) {
crm_trace("Skipping reply to %s: no longer a client",
query->client_id);
goto done;
}
}
/* Pack the results into XML */
list = create_xml_node(NULL, __func__);
crm_xml_add(list, F_STONITH_TARGET, query->target);
for (lpc = devices; lpc != NULL; lpc = lpc->next) {
stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data);
const char *action = query->action;
if (!device) {
/* It is possible the device got unregistered while
* determining who can fence the target */
continue;
}
available_devices++;
dev = create_xml_node(list, F_STONITH_DEVICE);
crm_xml_add(dev, XML_ATTR_ID, device->id);
crm_xml_add(dev, "namespace", device->namespace);
crm_xml_add(dev, "agent", device->agent);
crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified);
crm_xml_add_int(dev, F_STONITH_DEVICE_SUPPORT_FLAGS, device->flags);
/* If the originating fencer wants to reboot the node, and we have a
* capable device that doesn't support "reboot", remap to "off" instead.
*/
if (!pcmk_is_set(device->flags, st_device_supports_reboot)
&& pcmk__str_eq(query->action, "reboot", pcmk__str_none)) {
crm_trace("%s doesn't support reboot, using values for off instead",
device->id);
action = "off";
}
/* Add action-specific values if available */
add_action_specific_attributes(dev, action, device, query->target);
if (pcmk__str_eq(query->action, "reboot", pcmk__str_none)) {
/* A "reboot" *might* get remapped to "off" then "on", so after
* sending the "reboot"-specific values in the main element, we add
* sub-elements for "off" and "on" values.
*
* We short-circuited earlier if "reboot", "off" and "on" are all
* disallowed for the local host. However if only one or two are
* disallowed, we send back the results and mark which ones are
* disallowed. If "reboot" is disallowed, this might cause problems
* with older fencer versions, which won't check for it. Older
* versions will ignore "off" and "on", so they are not a problem.
*/
add_disallowed(dev, action, device, query->target,
pcmk_is_set(query->call_options, st_opt_allow_suicide));
add_action_reply(dev, "off", device, query->target,
pcmk_is_set(query->call_options, st_opt_allow_suicide));
add_action_reply(dev, "on", device, query->target, FALSE);
}
/* A query without a target wants device parameters */
if (query->target == NULL) {
xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS);
g_hash_table_foreach(device->params, hash2field, attrs);
}
}
crm_xml_add_int(list, F_STONITH_AVAILABLE_DEVICES, available_devices);
if (query->target) {
crm_debug("Found %d matching device%s for target '%s'",
available_devices, pcmk__plural_s(available_devices),
query->target);
} else {
crm_debug("%d device%s installed",
available_devices, pcmk__plural_s(available_devices));
}
if (list != NULL) {
crm_log_xml_trace(list, "Add query results");
add_message_xml(query->reply, F_STONITH_CALLDATA, list);
}
stonith_send_reply(query->reply, query->call_options, query->remote_peer,
client);
done:
free_xml(query->reply);
free(query->remote_peer);
free(query->client_id);
free(query->target);
free(query->action);
free(query);
free_xml(list);
g_list_free_full(devices, free);
}
/*!
* \internal
* \brief Log the result of an asynchronous command
*
* \param[in] cmd Command the result is for
* \param[in] result Result of command
* \param[in] pid Process ID of command, if available
* \param[in] next Alternate device that will be tried if command failed
* \param[in] op_merged Whether this command was merged with an earlier one
*/
static void
log_async_result(const async_command_t *cmd,
const pcmk__action_result_t *result,
int pid, const char *next, bool op_merged)
{
int log_level = LOG_ERR;
int output_log_level = LOG_NEVER;
guint devices_remaining = g_list_length(cmd->next_device_iter);
GString *msg = g_string_sized_new(80); // Reasonable starting size
// Choose log levels appropriately if we have a result
if (pcmk__result_ok(result)) {
log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE;
if ((result->action_stdout != NULL)
&& !pcmk__str_eq(cmd->action, "metadata", pcmk__str_none)) {
output_log_level = LOG_DEBUG;
}
next = NULL;
} else {
log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR;
if ((result->action_stdout != NULL)
&& !pcmk__str_eq(cmd->action, "metadata", pcmk__str_none)) {
output_log_level = LOG_WARNING;
}
}
// Build the log message piece by piece
pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL);
if (pid != 0) {
g_string_append_printf(msg, "[%d] ", pid);
}
if (cmd->target != NULL) {
pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL);
}
if (cmd->device != NULL) {
pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL);
}
// Add exit status or execution status as appropriate
if (result->execution_status == PCMK_EXEC_DONE) {
g_string_append_printf(msg, "returned %d", result->exit_status);
} else {
pcmk__g_strcat(msg, "could not be executed: ",
pcmk_exec_status_str(result->execution_status), NULL);
}
// Add exit reason and next device if appropriate
if (result->exit_reason != NULL) {
pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL);
}
if (next != NULL) {
pcmk__g_strcat(msg, ", retrying with ", next, NULL);
}
if (devices_remaining > 0) {
g_string_append_printf(msg, " (%u device%s remaining)",
(unsigned int) devices_remaining,
pcmk__plural_s(devices_remaining));
}
g_string_append_printf(msg, " " CRM_XS " %scall %d from %s",
(op_merged? "merged " : ""), cmd->id,
cmd->client_name);
// Log the result
do_crm_log(log_level, "%s", msg->str);
g_string_free(msg, TRUE);
// Log the output (which may have multiple lines), if appropriate
if (output_log_level != LOG_NEVER) {
char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid);
crm_log_output(output_log_level, prefix, result->action_stdout);
free(prefix);
}
}
/*!
* \internal
* \brief Reply to requester after asynchronous command completion
*
* \param[in] cmd Command that completed
* \param[in] result Result of command
* \param[in] pid Process ID of command, if available
* \param[in] merged If true, command was merged with another, not executed
*/
static void
send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result,
int pid, bool merged)
{
xmlNode *reply = NULL;
pcmk__client_t *client = NULL;
CRM_CHECK((cmd != NULL) && (result != NULL), return);
log_async_result(cmd, result, pid, NULL, merged);
if (cmd->client != NULL) {
client = pcmk__find_client_by_id(cmd->client);
if ((client == NULL) && (cmd->origin == NULL)) {
crm_trace("Skipping reply to %s: no longer a client", cmd->client);
return;
}
}
reply = construct_async_reply(cmd, result);
if (merged) {
pcmk__xe_set_bool_attr(reply, F_STONITH_MERGED, true);
}
if (!stand_alone && pcmk__is_fencing_action(cmd->action)
&& pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) {
/* The target was also the originator, so broadcast the result on its
* behalf (since it will be unable to).
*/
crm_trace("Broadcast '%s' result for %s (target was also originator)",
cmd->action, cmd->target);
crm_xml_add(reply, F_SUBTYPE, "broadcast");
crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY);
send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
} else {
// Reply only to the originator
stonith_send_reply(reply, cmd->options, cmd->origin, client);
}
crm_log_xml_trace(reply, "Reply");
free_xml(reply);
if (stand_alone) {
/* Do notification with a clean data object */
xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
stonith__xe_set_result(notify_data, result);
crm_xml_add(notify_data, F_STONITH_TARGET, cmd->target);
crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op);
crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost");
crm_xml_add(notify_data, F_STONITH_DEVICE, cmd->device);
crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client);
fenced_send_notification(T_STONITH_NOTIFY_FENCE, result, notify_data);
fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
}
}
static void
cancel_stonith_command(async_command_t * cmd)
{
stonith_device_t *device = cmd_device(cmd);
if (device) {
crm_trace("Cancel scheduled '%s' action using %s",
cmd->action, device->id);
device->pending_ops = g_list_remove(device->pending_ops, cmd);
}
}
/*!
* \internal
* \brief Cancel and reply to any duplicates of a just-completed operation
*
* Check whether any fencing operations are scheduled to do the same thing as
* one that just succeeded. If so, rather than performing the same operation
* twice, return the result of this operation for all matching pending commands.
*
* \param[in,out] cmd Fencing operation that just succeeded
* \param[in] result Result of \p cmd
* \param[in] pid If nonzero, process ID of agent invocation (for logs)
*
* \note Duplicate merging will do the right thing for either type of remapped
* reboot. If the executing fencer remapped an unsupported reboot to off,
* then cmd->action will be "reboot" and will be merged with any other
* reboot requests. If the originating fencer remapped a topology reboot
* to off then on, we will get here once with cmd->action "off" and once
* with "on", and they will be merged separately with similar requests.
*/
static void
reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result,
int pid)
{
GList *next = NULL;
for (GList *iter = cmd_list; iter != NULL; iter = next) {
async_command_t *cmd_other = iter->data;
next = iter->next; // We might delete this entry, so grab next now
if (cmd == cmd_other) {
continue;
}
/* A pending operation matches if:
* 1. The client connections are different.
* 2. The target is the same.
* 3. The fencing action is the same.
* 4. The device scheduled to execute the action is the same.
*/
if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) ||
!pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) ||
!pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) ||
!pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) {
continue;
}
crm_notice("Merging fencing action '%s'%s%s originating from "
"client %s with identical fencing request from client %s",
cmd_other->action,
(cmd_other->target == NULL)? "" : " targeting ",
pcmk__s(cmd_other->target, ""), cmd_other->client_name,
cmd->client_name);
// Stop tracking the duplicate, send its result, and cancel it
cmd_list = g_list_remove_link(cmd_list, iter);
send_async_reply(cmd_other, result, pid, true);
cancel_stonith_command(cmd_other);
free_async_command(cmd_other);
g_list_free_1(iter);
}
}
/*!
* \internal
* \brief Return the next required device (if any) for an operation
*
* \param[in,out] cmd Fencing operation that just succeeded
*
* \return Next device required for action if any, otherwise NULL
*/
static stonith_device_t *
next_required_device(async_command_t *cmd)
{
for (GList *iter = cmd->next_device_iter; iter != NULL; iter = iter->next) {
stonith_device_t *next_device = g_hash_table_lookup(device_list,
iter->data);
if (is_action_required(cmd->action, next_device)) {
/* This is only called for successful actions, so it's OK to skip
* non-required devices.
*/
cmd->next_device_iter = iter->next;
return next_device;
}
}
return NULL;
}
static void
st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
{
async_command_t *cmd = user_data;
stonith_device_t *device = NULL;
stonith_device_t *next_device = NULL;
CRM_CHECK(cmd != NULL, return);
device = cmd_device(cmd);
cmd->active_on = NULL;
/* The device is ready to do something else now */
if (device) {
if (!device->verified && pcmk__result_ok(result) &&
(pcmk__strcase_any_of(cmd->action, "list", "monitor", "status", NULL))) {
device->verified = TRUE;
}
mainloop_set_trigger(device->work);
}
if (pcmk__result_ok(result)) {
next_device = next_required_device(cmd);
} else if ((cmd->next_device_iter != NULL)
&& !is_action_required(cmd->action, device)) {
/* if this device didn't work out, see if there are any others we can try.
* if the failed device was 'required', we can't pick another device. */
next_device = g_hash_table_lookup(device_list,
cmd->next_device_iter->data);
cmd->next_device_iter = cmd->next_device_iter->next;
}
if (next_device == NULL) {
send_async_reply(cmd, result, pid, false);
if (pcmk__result_ok(result)) {
reply_to_duplicates(cmd, result, pid);
}
free_async_command(cmd);
} else { // This operation requires more fencing
log_async_result(cmd, result, pid, next_device->id, false);
schedule_stonith_command(cmd, next_device);
}
}
static gint
sort_device_priority(gconstpointer a, gconstpointer b)
{
const stonith_device_t *dev_a = a;
const stonith_device_t *dev_b = b;
if (dev_a->priority > dev_b->priority) {
return -1;
} else if (dev_a->priority < dev_b->priority) {
return 1;
}
return 0;
}
static void
stonith_fence_get_devices_cb(GList * devices, void *user_data)
{
async_command_t *cmd = user_data;
stonith_device_t *device = NULL;
guint ndevices = g_list_length(devices);
crm_info("Found %d matching device%s for target '%s'",
ndevices, pcmk__plural_s(ndevices), cmd->target);
if (devices != NULL) {
/* Order based on priority */
devices = g_list_sort(devices, sort_device_priority);
device = g_hash_table_lookup(device_list, devices->data);
}
if (device == NULL) { // No device found
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"No device configured for target '%s'",
cmd->target);
send_async_reply(cmd, &result, 0, false);
pcmk__reset_result(&result);
free_async_command(cmd);
g_list_free_full(devices, free);
} else { // Device found, schedule it for fencing
cmd->device_list = devices;
cmd->next_device_iter = devices->next;
schedule_stonith_command(cmd, device);
}
}
/*!
* \internal
* \brief Execute a fence action via the local node
*
* \param[in] msg Fencing request
* \param[out] result Where to store result of fence action
*/
static void
fence_locally(xmlNode *msg, pcmk__action_result_t *result)
{
const char *device_id = NULL;
stonith_device_t *device = NULL;
async_command_t *cmd = NULL;
xmlNode *dev = NULL;
CRM_CHECK((msg != NULL) && (result != NULL), return);
dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
cmd = create_async_command(msg);
if (cmd == NULL) {
crm_log_xml_warn(msg, "invalid");
fenced_set_protocol_error(result);
return;
}
device_id = crm_element_value(dev, F_STONITH_DEVICE);
if (device_id != NULL) {
device = g_hash_table_lookup(device_list, device_id);
if (device == NULL) {
crm_err("Requested device '%s' is not available", device_id);
pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"Requested device '%s' not found", device_id);
return;
}
schedule_stonith_command(cmd, device);
} else {
const char *host = crm_element_value(dev, F_STONITH_TARGET);
if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) {
int nodeid = 0;
crm_node_t *node = NULL;
pcmk__scan_min_int(host, &nodeid, 0);
node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY);
if (node != NULL) {
host = node->uname;
}
}
/* If we get to here, then self-fencing is implicitly allowed */
get_capable_devices(host, cmd->action, cmd->default_timeout,
TRUE, cmd, stonith_fence_get_devices_cb,
fenced_support_flag(cmd->action));
}
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
}
/*!
* \internal
* \brief Build an XML reply for a fencing operation
*
* \param[in] request Request that reply is for
* \param[in] data If not NULL, add to reply as call data
* \param[in] result Full result of fencing operation
*
* \return Newly created XML reply
* \note The caller is responsible for freeing the result.
* \note This has some overlap with construct_async_reply(), but that copies
* values from an async_command_t, whereas this one copies them from the
* request.
*/
xmlNode *
fenced_construct_reply(const xmlNode *request, xmlNode *data,
const pcmk__action_result_t *result)
{
xmlNode *reply = NULL;
reply = create_xml_node(NULL, T_STONITH_REPLY);
crm_xml_add(reply, "st_origin", __func__);
crm_xml_add(reply, F_TYPE, T_STONITH_NG);
stonith__xe_set_result(reply, result);
if (request == NULL) {
/* Most likely, this is the result of a stonith operation that was
* initiated before we came up. Unfortunately that means we lack enough
* information to provide clients with a full result.
*
* @TODO Maybe synchronize this information at start-up?
*/
crm_warn("Missing request information for client notifications for "
"operation with result '%s' (initiated before we came up?)",
pcmk_exec_status_str(result->execution_status));
} else {
const char *name = NULL;
const char *value = NULL;
// Attributes to copy from request to reply
const char *names[] = {
F_STONITH_OPERATION,
F_STONITH_CALLID,
F_STONITH_CLIENTID,
F_STONITH_CLIENTNAME,
F_STONITH_REMOTE_OP_ID,
F_STONITH_CALLOPTS
};
for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) {
name = names[lpc];
value = crm_element_value(request, name);
crm_xml_add(reply, name, value);
}
if (data != NULL) {
add_message_xml(reply, F_STONITH_CALLDATA, data);
}
}
return reply;
}
/*!
* \internal
* \brief Build an XML reply to an asynchronous fencing command
*
* \param[in] cmd Fencing command that reply is for
* \param[in] result Command result
*/
static xmlNode *
construct_async_reply(const async_command_t *cmd,
const pcmk__action_result_t *result)
{
xmlNode *reply = create_xml_node(NULL, T_STONITH_REPLY);
crm_xml_add(reply, "st_origin", __func__);
crm_xml_add(reply, F_TYPE, T_STONITH_NG);
crm_xml_add(reply, F_STONITH_OPERATION, cmd->op);
crm_xml_add(reply, F_STONITH_DEVICE, cmd->device);
crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client);
crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name);
crm_xml_add(reply, F_STONITH_TARGET, cmd->target);
crm_xml_add(reply, F_STONITH_ACTION, cmd->op);
crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin);
crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id);
crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options);
stonith__xe_set_result(reply, result);
return reply;
}
bool fencing_peer_active(crm_node_t *peer)
{
if (peer == NULL) {
return FALSE;
} else if (peer->uname == NULL) {
return FALSE;
} else if (pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
return TRUE;
}
return FALSE;
}
void
set_fencing_completed(remote_fencing_op_t *op)
{
struct timespec tv;
qb_util_timespec_from_epoch_get(&tv);
op->completed = tv.tv_sec;
op->completed_nsec = tv.tv_nsec;
}
/*!
* \internal
* \brief Look for alternate node needed if local node shouldn't fence target
*
* \param[in] target Node that must be fenced
*
* \return Name of an alternate node that should fence \p target if any,
* or NULL otherwise
*/
static const char *
check_alternate_host(const char *target)
{
if (pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) {
GHashTableIter gIter;
crm_node_t *entry = NULL;
g_hash_table_iter_init(&gIter, crm_peer_cache);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
if (fencing_peer_active(entry)
&& !pcmk__str_eq(entry->uname, target, pcmk__str_casei)) {
crm_notice("Forwarding self-fencing request to %s",
entry->uname);
return entry->uname;
}
}
crm_warn("Will handle own fencing because no peer can");
}
return NULL;
}
/*!
* \internal
* \brief Send a reply to a CPG peer or IPC client
*
* \param[in] reply XML reply to send
* \param[in] call_options Send synchronously if st_opt_sync_call is set
* \param[in] remote_peer If not NULL, name of peer node to send CPG reply
* \param[in,out] client If not NULL, client to send IPC reply
*/
static void
stonith_send_reply(xmlNode *reply, int call_options, const char *remote_peer,
pcmk__client_t *client)
{
CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)),
return);
if (remote_peer == NULL) {
do_local_reply(reply, client, call_options);
} else {
send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng,
reply, FALSE);
}
}
static void
remove_relay_op(xmlNode * request)
{
xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, request, LOG_TRACE);
const char *relay_op_id = NULL;
const char *op_id = NULL;
const char *client_name = NULL;
const char *target = NULL;
remote_fencing_op_t *relay_op = NULL;
if (dev) {
target = crm_element_value(dev, F_STONITH_TARGET);
}
relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID_RELAY);
op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID);
client_name = crm_element_value(request, F_STONITH_CLIENTNAME);
/* Delete RELAY operation. */
if (relay_op_id && target && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) {
relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id);
if (relay_op) {
GHashTableIter iter;
remote_fencing_op_t *list_op = NULL;
g_hash_table_iter_init(&iter, stonith_remote_op_list);
/* If the operation to be deleted is registered as a duplicate, delete the registration. */
while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) {
GList *dup_iter = NULL;
if (list_op != relay_op) {
for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) {
remote_fencing_op_t *other = dup_iter->data;
if (other == relay_op) {
other->duplicates = g_list_remove(other->duplicates, relay_op);
break;
}
}
}
}
crm_debug("Deleting relay op %s ('%s'%s%s for %s), "
"replaced by op %s ('%s'%s%s for %s)",
relay_op->id, relay_op->action,
(relay_op->target == NULL)? "" : " targeting ",
pcmk__s(relay_op->target, ""),
relay_op->client_name, op_id, relay_op->action,
(target == NULL)? "" : " targeting ", pcmk__s(target, ""),
client_name);
g_hash_table_remove(stonith_remote_op_list, relay_op_id);
}
}
}
/*!
* \internal
* \brief Check whether an API request was sent by a privileged user
*
* API commands related to fencing configuration may be done only by privileged
* IPC users (i.e. root or hacluster), because all other users should go through
* the CIB to have ACLs applied. If no client was given, this is a peer request,
* which is always allowed.
*
* \param[in] c IPC client that sent request (or NULL if sent by CPG peer)
* \param[in] op Requested API operation (for logging only)
*
* \return true if sender is peer or privileged client, otherwise false
*/
static inline bool
is_privileged(const pcmk__client_t *c, const char *op)
{
if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) {
return true;
} else {
crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
pcmk__s(op, ""), pcmk__client_name(c));
return false;
}
}
// CRM_OP_REGISTER
static xmlNode *
handle_register_request(pcmk__request_t *request)
{
xmlNode *reply = create_xml_node(NULL, "reply");
CRM_ASSERT(request->ipc_client != NULL);
crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER);
crm_xml_add(reply, F_STONITH_CLIENTID, request->ipc_client->id);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
pcmk__set_request_flags(request, pcmk__request_reuse_options);
return reply;
}
// STONITH_OP_EXEC
static xmlNode *
handle_agent_request(pcmk__request_t *request)
{
execute_agent_action(request->xml, &request->result);
if (request->result.execution_status == PCMK_EXEC_PENDING) {
return NULL;
}
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_TIMEOUT_UPDATE
static xmlNode *
handle_update_timeout_request(pcmk__request_t *request)
{
const char *call_id = crm_element_value(request->xml, F_STONITH_CALLID);
const char *client_id = crm_element_value(request->xml, F_STONITH_CLIENTID);
int op_timeout = 0;
crm_element_value_int(request->xml, F_STONITH_TIMEOUT, &op_timeout);
do_stonith_async_timeout_update(client_id, call_id, op_timeout);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
// STONITH_OP_QUERY
static xmlNode *
handle_query_request(pcmk__request_t *request)
{
int timeout = 0;
xmlNode *dev = NULL;
const char *action = NULL;
const char *target = NULL;
const char *client_id = crm_element_value(request->xml, F_STONITH_CLIENTID);
struct st_query_data *query = NULL;
if (request->peer != NULL) {
// Record it for the future notification
create_remote_stonith_op(client_id, request->xml, TRUE);
}
/* Delete the DC node RELAY operation. */
remove_relay_op(request->xml);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
dev = get_xpath_object("//@" F_STONITH_ACTION, request->xml, LOG_NEVER);
if (dev != NULL) {
const char *device = crm_element_value(dev, F_STONITH_DEVICE);
if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) {
return NULL; // No query or reply necessary
}
target = crm_element_value(dev, F_STONITH_TARGET);
action = crm_element_value(dev, F_STONITH_ACTION);
}
crm_log_xml_trace(request->xml, "Query");
query = calloc(1, sizeof(struct st_query_data));
CRM_ASSERT(query != NULL);
query->reply = fenced_construct_reply(request->xml, NULL, &request->result);
pcmk__str_update(&query->remote_peer, request->peer);
pcmk__str_update(&query->client_id, client_id);
pcmk__str_update(&query->target, target);
pcmk__str_update(&query->action, action);
query->call_options = request->call_options;
crm_element_value_int(request->xml, F_STONITH_TIMEOUT, &timeout);
get_capable_devices(target, action, timeout,
pcmk_is_set(query->call_options, st_opt_allow_suicide),
query, stonith_query_capable_device_cb, st_device_supports_none);
return NULL;
}
// T_STONITH_NOTIFY
static xmlNode *
handle_notify_request(pcmk__request_t *request)
{
const char *flag_name = NULL;
CRM_ASSERT(request->ipc_client != NULL);
flag_name = crm_element_value(request->xml, F_STONITH_NOTIFY_ACTIVATE);
if (flag_name != NULL) {
crm_debug("Enabling %s callbacks for client %s",
flag_name, pcmk__request_origin(request));
pcmk__set_client_flags(request->ipc_client, get_stonith_flag(flag_name));
}
flag_name = crm_element_value(request->xml, F_STONITH_NOTIFY_DEACTIVATE);
if (flag_name != NULL) {
crm_debug("Disabling %s callbacks for client %s",
flag_name, pcmk__request_origin(request));
pcmk__clear_client_flags(request->ipc_client,
get_stonith_flag(flag_name));
}
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
pcmk__set_request_flags(request, pcmk__request_reuse_options);
return pcmk__ipc_create_ack(request->ipc_flags, "ack", NULL, CRM_EX_OK);
}
// STONITH_OP_RELAY
static xmlNode *
handle_relay_request(pcmk__request_t *request)
{
xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request->xml,
LOG_TRACE);
crm_notice("Received forwarded fencing request from "
"%s %s to fence (%s) peer %s",
pcmk__request_origin_type(request),
pcmk__request_origin(request),
crm_element_value(dev, F_STONITH_ACTION),
crm_element_value(dev, F_STONITH_TARGET));
if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) {
fenced_set_protocol_error(&request->result);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
return NULL;
}
// STONITH_OP_FENCE
static xmlNode *
handle_fence_request(pcmk__request_t *request)
{
if ((request->peer != NULL) || stand_alone) {
fence_locally(request->xml, &request->result);
} else if (pcmk_is_set(request->call_options, st_opt_manual_ack)) {
switch (fenced_handle_manual_confirmation(request->ipc_client,
request->xml)) {
case pcmk_rc_ok:
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE,
NULL);
break;
case EINPROGRESS:
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
NULL);
break;
default:
fenced_set_protocol_error(&request->result);
break;
}
} else {
const char *alternate_host = NULL;
xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request->xml,
LOG_TRACE);
const char *target = crm_element_value(dev, F_STONITH_TARGET);
const char *action = crm_element_value(dev, F_STONITH_ACTION);
const char *device = crm_element_value(dev, F_STONITH_DEVICE);
if (request->ipc_client != NULL) {
int tolerance = 0;
crm_notice("Client %s wants to fence (%s) %s using %s",
pcmk__request_origin(request), action,
target, (device? device : "any device"));
crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance);
if (stonith_check_fence_tolerance(tolerance, target, action)) {
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE,
NULL);
return fenced_construct_reply(request->xml, NULL,
&request->result);
}
alternate_host = check_alternate_host(target);
} else {
crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'",
request->peer, action, target,
(device == NULL)? "(any)" : device);
}
if (alternate_host != NULL) {
const char *client_id = NULL;
remote_fencing_op_t *op = NULL;
if (request->ipc_client->id == 0) {
client_id = crm_element_value(request->xml, F_STONITH_CLIENTID);
} else {
client_id = request->ipc_client->id;
}
/* Create a duplicate fencing operation to relay with the client ID.
* When a query response is received, this operation should be
* deleted to avoid keeping the duplicate around.
*/
op = create_remote_stonith_op(client_id, request->xml, FALSE);
crm_xml_add(request->xml, F_STONITH_OPERATION, STONITH_OP_RELAY);
crm_xml_add(request->xml, F_STONITH_CLIENTID,
request->ipc_client->id);
crm_xml_add(request->xml, F_STONITH_REMOTE_OP_ID, op->id);
send_cluster_message(crm_get_peer(0, alternate_host),
crm_msg_stonith_ng, request->xml, FALSE);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
NULL);
} else if (initiate_remote_stonith_op(request->ipc_client, request->xml,
FALSE) == NULL) {
fenced_set_protocol_error(&request->result);
} else {
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
NULL);
}
}
if (request->result.execution_status == PCMK_EXEC_PENDING) {
return NULL;
}
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_FENCE_HISTORY
static xmlNode *
handle_history_request(pcmk__request_t *request)
{
xmlNode *reply = NULL;
xmlNode *data = NULL;
stonith_fence_history(request->xml, &data, request->peer,
request->call_options);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
if (!pcmk_is_set(request->call_options, st_opt_discard_reply)) {
/* When the local node broadcasts its history, it sets
* st_opt_discard_reply and doesn't need a reply.
*/
reply = fenced_construct_reply(request->xml, data, &request->result);
}
free_xml(data);
return reply;
}
// STONITH_OP_DEVICE_ADD
static xmlNode *
handle_device_add_request(pcmk__request_t *request)
{
const char *op = crm_element_value(request->xml, F_STONITH_OPERATION);
xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request->xml,
LOG_ERR);
if (is_privileged(request->ipc_client, op)) {
int rc = stonith_device_register(dev, FALSE);
pcmk__set_result(&request->result,
((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
stonith__legacy2status(rc),
((rc == pcmk_ok)? NULL : pcmk_strerror(rc)));
} else {
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must register device via CIB");
}
fenced_send_device_notification(op, &request->result,
(dev == NULL)? NULL : ID(dev));
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_DEVICE_DEL
static xmlNode *
handle_device_delete_request(pcmk__request_t *request)
{
xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request->xml,
LOG_ERR);
const char *device_id = crm_element_value(dev, XML_ATTR_ID);
const char *op = crm_element_value(request->xml, F_STONITH_OPERATION);
if (is_privileged(request->ipc_client, op)) {
stonith_device_remove(device_id, false);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
} else {
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must delete device via CIB");
}
fenced_send_device_notification(op, &request->result, device_id);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_LEVEL_ADD
static xmlNode *
handle_level_add_request(pcmk__request_t *request)
{
char *desc = NULL;
const char *op = crm_element_value(request->xml, F_STONITH_OPERATION);
if (is_privileged(request->ipc_client, op)) {
fenced_register_level(request->xml, &desc, &request->result);
} else {
unpack_level_request(request->xml, NULL, NULL, NULL, &desc);
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must add level via CIB");
}
fenced_send_level_notification(op, &request->result, desc);
free(desc);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_LEVEL_DEL
static xmlNode *
handle_level_delete_request(pcmk__request_t *request)
{
char *desc = NULL;
const char *op = crm_element_value(request->xml, F_STONITH_OPERATION);
if (is_privileged(request->ipc_client, op)) {
fenced_unregister_level(request->xml, &desc, &request->result);
} else {
unpack_level_request(request->xml, NULL, NULL, NULL, &desc);
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must delete level via CIB");
}
fenced_send_level_notification(op, &request->result, desc);
free(desc);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// CRM_OP_RM_NODE_CACHE
static xmlNode *
handle_cache_request(pcmk__request_t *request)
{
int node_id = 0;
const char *name = NULL;
crm_element_value_int(request->xml, XML_ATTR_ID, &node_id);
name = crm_element_value(request->xml, XML_ATTR_UNAME);
reap_crm_member(node_id, name);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
static xmlNode *
handle_unknown_request(pcmk__request_t *request)
{
crm_err("Unknown IPC request %s from %s %s",
request->op, pcmk__request_origin_type(request),
pcmk__request_origin(request));
pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
"Unknown IPC request type '%s' (bug?)", request->op);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
static void
fenced_register_handlers(void)
{
pcmk__server_command_t handlers[] = {
{ CRM_OP_REGISTER, handle_register_request },
{ STONITH_OP_EXEC, handle_agent_request },
{ STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request },
{ STONITH_OP_QUERY, handle_query_request },
{ T_STONITH_NOTIFY, handle_notify_request },
{ STONITH_OP_RELAY, handle_relay_request },
{ STONITH_OP_FENCE, handle_fence_request },
{ STONITH_OP_FENCE_HISTORY, handle_history_request },
{ STONITH_OP_DEVICE_ADD, handle_device_add_request },
{ STONITH_OP_DEVICE_DEL, handle_device_delete_request },
{ STONITH_OP_LEVEL_ADD, handle_level_add_request },
{ STONITH_OP_LEVEL_DEL, handle_level_delete_request },
{ CRM_OP_RM_NODE_CACHE, handle_cache_request },
{ NULL, handle_unknown_request },
};
fenced_handlers = pcmk__register_handlers(handlers);
}
void
fenced_unregister_handlers(void)
{
if (fenced_handlers != NULL) {
g_hash_table_destroy(fenced_handlers);
fenced_handlers = NULL;
}
}
static void
handle_request(pcmk__request_t *request)
{
xmlNode *reply = NULL;
const char *reason = NULL;
if (fenced_handlers == NULL) {
fenced_register_handlers();
}
reply = pcmk__process_request(request, fenced_handlers);
if (reply != NULL) {
if (pcmk_is_set(request->flags, pcmk__request_reuse_options)
&& (request->ipc_client != NULL)) {
/* Certain IPC-only commands must reuse the call options from the
* original request rather than the ones set by stonith_send_reply()
* -> do_local_reply().
*/
pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply,
request->ipc_flags);
request->ipc_client->request_id = 0;
} else {
stonith_send_reply(reply, request->call_options,
request->peer, request->ipc_client);
}
free_xml(reply);
}
reason = request->result.exit_reason;
crm_debug("Processed %s request from %s %s: %s%s%s%s",
request->op, pcmk__request_origin_type(request),
pcmk__request_origin(request),
pcmk_exec_status_str(request->result.execution_status),
(reason == NULL)? "" : " (",
(reason == NULL)? "" : reason,
(reason == NULL)? "" : ")");
}
static void
handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer)
{
// Copy, because request might be freed before we want to log this
char *op = crm_element_value_copy(request, F_STONITH_OPERATION);
if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
process_remote_stonith_query(request);
} else if (pcmk__str_any_of(op, T_STONITH_NOTIFY, STONITH_OP_FENCE, NULL)) {
fenced_process_fencing_reply(request);
} else {
crm_err("Ignoring unknown %s reply from %s %s",
pcmk__s(op, "untyped"), ((client == NULL)? "peer" : "client"),
((client == NULL)? remote_peer : pcmk__client_name(client)));
crm_log_xml_warn(request, "UnknownOp");
free(op);
return;
}
crm_debug("Processed %s reply from %s %s",
op, ((client == NULL)? "peer" : "client"),
((client == NULL)? remote_peer : pcmk__client_name(client)));
free(op);
}
/*!
* \internal
* \brief Handle a message from an IPC client or CPG peer
*
* \param[in,out] client If not NULL, IPC client that sent message
* \param[in] id If from IPC client, IPC message ID
* \param[in] flags Message flags
* \param[in,out] message Message XML
* \param[in] remote_peer If not NULL, CPG peer that sent message
*/
void
stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
xmlNode *message, const char *remote_peer)
{
int call_options = st_opt_none;
bool is_reply = false;
CRM_CHECK(message != NULL, return);
if (get_xpath_object("//" T_STONITH_REPLY, message, LOG_NEVER) != NULL) {
is_reply = true;
}
crm_element_value_int(message, F_STONITH_CALLOPTS, &call_options);
crm_debug("Processing %ssynchronous %s %s %u from %s %s",
pcmk_is_set(call_options, st_opt_sync_call)? "" : "a",
crm_element_value(message, F_STONITH_OPERATION),
(is_reply? "reply" : "request"), id,
((client == NULL)? "peer" : "client"),
((client == NULL)? remote_peer : pcmk__client_name(client)));
if (pcmk_is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(client == NULL || client->request_id == id);
}
if (is_reply) {
handle_reply(client, message, remote_peer);
} else {
pcmk__request_t request = {
.ipc_client = client,
.ipc_id = id,
.ipc_flags = flags,
.peer = remote_peer,
.xml = message,
.call_options = call_options,
.result = PCMK__UNKNOWN_RESULT,
};
request.op = crm_element_value_copy(request.xml, F_STONITH_OPERATION);
CRM_CHECK(request.op != NULL, return);
if (pcmk_is_set(request.call_options, st_opt_sync_call)) {
pcmk__set_request_flags(&request, pcmk__request_sync);
}
handle_request(&request);
pcmk__reset_request(&request);
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Nov 23, 4:10 PM (17 h, 6 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1018865
Default Alt Text
(193 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment