Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/cts/cts-exec.in b/cts/cts-exec.in
index 3b6d56afbc..8cc203fcb9 100644
--- a/cts/cts-exec.in
+++ b/cts/cts-exec.in
@@ -1,1219 +1,1219 @@
#!@PYTHON@
""" Regression tests for Pacemaker's pacemaker-execd
"""
# Pacemaker targets compatibility with Python 2.7 and 3.2+
from __future__ import print_function, unicode_literals, absolute_import, division
__copyright__ = "Copyright 2012-2019 the Pacemaker project contributors"
__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
import io
import os
import stat
import sys
import subprocess
import shlex
import shutil
import time
# Where to find test binaries
# Prefer the source tree if available
BUILD_DIR = "@abs_top_builddir@"
TEST_DIR = sys.path[0]
SBIN_DIR = "@sbindir@"
# File permissions for executable scripts we create
EXECMODE = stat.S_IRUSR | stat.S_IXUSR | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH
# These values must be kept in sync with include/crm/crm.h
class CrmExit(object):
OK = 0
ERROR = 1
INVALID_PARAM = 2
UNIMPLEMENT_FEATURE = 3
INSUFFICIENT_PRIV = 4
NOT_INSTALLED = 5
NOT_CONFIGURED = 6
NOT_RUNNING = 7
USAGE = 64
DATAERR = 65
NOINPUT = 66
NOUSER = 67
NOHOST = 68
UNAVAILABLE = 69
SOFTWARE = 70
OSERR = 71
OSFILE = 72
CANTCREAT = 73
IOERR = 74
TEMPFAIL = 75
PROTOCOL = 76
NOPERM = 77
CONFIG = 78
FATAL = 100
PANIC = 101
DISCONNECT = 102
SOLO = 103
DIGEST = 104
NOSUCH = 105
QUORUM = 106
UNSAFE = 107
EXISTS = 108
MULTIPLE = 109
OLD = 110
TIMEOUT = 124
MAX = 255
def update_path():
""" Set the PATH environment variable appropriately for the tests """
new_path = os.environ['PATH']
if os.path.exists("%s/cts-exec.in" % TEST_DIR):
print("Running tests from the source tree: %s (%s)" % (BUILD_DIR, TEST_DIR))
# For pacemaker-execd, cts-exec-helper, and pacemaker-remoted
new_path = "%s/daemons/execd:%s" % (BUILD_DIR, new_path)
new_path = "%s/tools:%s" % (BUILD_DIR, new_path) # For crm_resource
# For pacemaker-fenced
new_path = "%s/daemons/fenced:%s" % (BUILD_DIR, new_path)
# For cts-support
new_path = "%s/cts:%s" % (BUILD_DIR, new_path)
else:
print("Running tests from the install tree: @CRM_DAEMON_DIR@ (not %s)" % TEST_DIR)
# For cts-exec-helper, cts-support, pacemaker-execd, pacemaker-fenced,
# and pacemaker-remoted
new_path = "@CRM_DAEMON_DIR@:%s" % (new_path)
print('Using PATH="{}"'.format(new_path))
os.environ['PATH'] = new_path
def pipe_output(pipes, stdout=True, stderr=False):
""" Wrapper to get text output from pipes regardless of Python version """
output = ""
pipe_outputs = pipes.communicate()
if sys.version_info < (3,):
if stdout:
output = output + pipe_outputs[0]
if stderr:
output = output + pipe_outputs[1]
else:
if stdout:
output = output + pipe_outputs[0].decode(sys.stdout.encoding)
if stderr:
output = output + pipe_outputs[1].decode(sys.stderr.encoding)
return output
def output_from_command(command):
""" Run a command, and return its standard output. """
test = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE)
test.wait()
return pipe_output(test).split("\n")
class TestError(Exception):
""" Base class for exceptions in this module """
pass
class ExitCodeError(TestError):
""" Exception raised when command exit status is unexpected """
def __init__(self, exit_code):
self.exit_code = exit_code
def __str__(self):
return repr(self.exit_code)
class OutputNotFoundError(TestError):
""" Exception raised when command output does not contain wanted string """
def __init__(self, output):
self.output = output
def __str__(self):
return repr(self.output)
class OutputFoundError(TestError):
""" Exception raised when command output contains unwanted string """
def __init__(self, output):
self.output = output
def __str__(self):
return repr(self.output)
class Test(object):
""" Executor for a single pacemaker-execd regression test """
def __init__(self, name, description, verbose=0, tls=0):
self.name = name
self.description = description
self.cmds = []
if tls:
self.daemon_location = "pacemaker-remoted"
else:
self.daemon_location = "pacemaker-execd"
self.test_tool_location = "cts-exec-helper"
self.verbose = verbose
self.tls = tls
self.result_txt = ""
self.cmd_tool_output = ""
self.result_exitcode = CrmExit.OK
self.execd_process = None
self.stonith_process = None
self.executed = 0
def __new_cmd(self, cmd, args, exitcode, stdout_match="", no_wait=0, stdout_negative_match="", kill=None):
""" Add a command to be executed as part of this test """
if self.verbose and cmd == self.test_tool_location:
args = args + " -V "
if (cmd == self.test_tool_location) and self.tls:
args = args + " -S "
self.cmds.append(
{
"cmd" : cmd,
"kill" : kill,
"args" : args,
"expected_exitcode" : exitcode,
"stdout_match" : stdout_match,
"stdout_negative_match" : stdout_negative_match,
"no_wait" : no_wait,
"cmd_output" : "",
}
)
def start_environment(self):
""" Prepare the host for running a test """
### make sure we are in full control here ###
cmd = shlex.split("killall -q -9 pacemaker-fenced lt-pacemaker-fenced pacemaker-execd lt-pacemaker-execd cts-exec-helper lt-cts-exec-helper pacemaker-remoted")
test = subprocess.Popen(cmd, stdout=subprocess.PIPE)
test.wait()
additional_args = ""
if self.tls == 0:
self.stonith_process = subprocess.Popen(shlex.split("pacemaker-fenced -s"))
if self.verbose:
additional_args = additional_args + " -V"
self.execd_process = subprocess.Popen(shlex.split("%s %s -l /tmp/pacemaker-execd-regression.log"
% (self.daemon_location, additional_args)))
time.sleep(1)
def clean_environment(self):
""" Clean up the host after running a test """
if self.execd_process:
self.execd_process.terminate()
self.execd_process.wait()
if self.verbose:
print("Daemon output")
logfile = io.open('/tmp/pacemaker-execd-regression.log', 'rt', errors='replace')
for line in logfile:
print(line.strip().encode('utf-8', 'replace'))
os.remove('/tmp/pacemaker-execd-regression.log')
if self.stonith_process:
self.stonith_process.terminate()
self.stonith_process.wait()
self.execd_process = None
self.stonith_process = None
def add_sys_cmd(self, cmd, args):
""" Add a simple command to be executed as part of this test """
self.__new_cmd(cmd, args, CrmExit.OK, "")
def add_cmd_check_stdout(self, args, match, no_match=""):
""" Add a command with expected output to be executed as part of this test """
self.__new_cmd(self.test_tool_location, args, CrmExit.OK, match, 0, no_match)
def add_cmd(self, args):
""" Add a cts-exec-helper command to be executed as part of this test """
self.__new_cmd(self.test_tool_location, args, CrmExit.OK, "")
def add_cmd_and_kill(self, kill_proc, args):
""" Add a cts-exec-helper command and system command to be executed as part of this test """
self.__new_cmd(self.test_tool_location, args, CrmExit.OK, "", kill=kill_proc)
def add_expected_fail_cmd(self, args, exitcode=CrmExit.ERROR):
""" Add a cts-exec-helper command to be executed as part of this test and expected to fail """
self.__new_cmd(self.test_tool_location, args, exitcode, "")
def get_exitcode(self):
""" Return the exit status of the last test execution """
return self.result_exitcode
def print_result(self, filler):
""" Print the result of the last test execution """
print("%s%s" % (filler, self.result_txt))
def run_cmd(self, args):
""" Execute a command as part of this test """
cmd = shlex.split(args['args'])
cmd.insert(0, args['cmd'])
if self.verbose:
print("\n\nRunning: "+" ".join(cmd))
test = subprocess.Popen(cmd, stdout=subprocess.PIPE)
if args['kill']:
if self.verbose:
print("Also running: "+args['kill'])
### Typically, the kill argument is used to detect some sort of
### failure. Without yielding for a few seconds here, the process
### launched earlier that is listening for the failure may not have
### time to connect to pacemaker-execd.
time.sleep(2)
subprocess.Popen(shlex.split(args['kill']))
if args['no_wait'] == 0:
test.wait()
else:
return CrmExit.OK
output = pipe_output(test)
args['cmd_output'] = output
if test.returncode != args['expected_exitcode']:
raise ExitCodeError(test.returncode)
if args['stdout_match'] != "" and output.count(args['stdout_match']) == 0:
raise OutputNotFoundError(output)
if args['stdout_negative_match'] != "" and output.count(args['stdout_negative_match']) != 0:
raise OutputFoundError(output)
def set_error(self, step, cmd):
""" Record failure of this test """
msg = "FAILURE - '%s' failed at step %d. Command: %s %s"
self.result_txt = msg % (self.name, step, cmd['cmd'], cmd['args'])
self.result_exitcode = CrmExit.ERROR
def run(self):
""" Execute this test. """
res = 0
i = 1
if self.tls and self.name.count("stonith") != 0:
self.result_txt = "SKIPPED - '%s' - disabled when testing pacemaker_remote" % (self.name)
print(self.result_txt)
return res
self.start_environment()
if self.verbose:
print("\n--- START TEST - %s" % self.name)
self.result_txt = "SUCCESS - '%s'" % (self.name)
self.result_exitcode = CrmExit.OK
for cmd in self.cmds:
try:
self.run_cmd(cmd)
except ExitCodeError as e:
print(cmd['cmd_output'])
print("Step %d FAILED - command returned %s, expected %d" % (i, e, cmd['expected_exitcode']))
self.set_error(i, cmd);
break
except OutputNotFoundError as e:
print("Step %d FAILED - '%s' was not found in command output: %s" % (i, cmd['stdout_match'], e))
self.set_error(i, cmd);
break
except OutputFoundError as e:
print("Step %d FAILED - '%s' was found in command output: %s" % (i, cmd['stdout_negative_match'], e))
self.set_error(i, cmd);
break
if self.verbose:
print(cmd['cmd_output'].strip())
print("Step %d SUCCESS" % (i))
i = i + 1
self.clean_environment()
print(self.result_txt)
if self.verbose:
print("--- END TEST - %s\n" % self.name)
self.executed = 1
return res
class Tests(object):
""" Collection of all pacemaker-execd regression tests """
def __init__(self, verbose=0, tls=0):
self.tests = []
self.verbose = verbose
self.tls = tls
self.rsc_classes = output_from_command("crm_resource --list-standards")
self.rsc_classes = self.rsc_classes[:-1] # Strip trailing empty line
self.installed_files = []
self.action_timeout = " -t 9000 "
if self.tls:
self.rsc_classes.remove("stonith")
if "systemd" in self.rsc_classes:
try:
# This code doesn't need this import, but pacemaker-cts-dummyd
# does, so ensure the dependency is available rather than cause
# all systemd tests to fail.
import systemd.daemon
except ImportError:
print("Python systemd bindings not found.")
print("The tests for systemd class are not going to be run.")
self.rsc_classes.remove("systemd")
print("Testing resource classes", repr(self.rsc_classes))
self.common_cmds = {
"ocf_reg_line" : "-c register_rsc -r ocf_test_rsc "+self.action_timeout+" -C ocf -P pacemaker -T Dummy",
"ocf_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"",
"ocf_unreg_line" : "-c unregister_rsc -r \"ocf_test_rsc\" "+self.action_timeout,
"ocf_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:ocf_test_rsc action:none rc:ok op_status:complete\"",
"ocf_start_line" : "-c exec -r \"ocf_test_rsc\" -a \"start\" "+self.action_timeout,
"ocf_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:start rc:ok op_status:complete\" ",
"ocf_stop_line" : "-c exec -r \"ocf_test_rsc\" -a \"stop\" "+self.action_timeout,
"ocf_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:stop rc:ok op_status:complete\" ",
"ocf_monitor_line" : '-c exec -r ocf_test_rsc -a monitor -i 2s ' + self.action_timeout,
"ocf_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout,
"ocf_cancel_line" : '-c cancel -r ocf_test_rsc -a monitor -i 2s ' + self.action_timeout,
"ocf_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:ocf_test_rsc action:monitor rc:ok op_status:Cancelled\" ",
"systemd_reg_line" : "-c register_rsc -r systemd_test_rsc " +
self.action_timeout +
" -C systemd -T pacemaker-cts-dummyd@3",
"systemd_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"",
"systemd_unreg_line" : "-c unregister_rsc -r \"systemd_test_rsc\" "+self.action_timeout,
"systemd_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:systemd_test_rsc action:none rc:ok op_status:complete\"",
"systemd_start_line" : "-c exec -r \"systemd_test_rsc\" -a \"start\" "+self.action_timeout,
"systemd_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:start rc:ok op_status:complete\" ",
"systemd_stop_line" : "-c exec -r \"systemd_test_rsc\" -a \"stop\" "+self.action_timeout,
"systemd_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:stop rc:ok op_status:complete\" ",
"systemd_monitor_line" : '-c exec -r systemd_test_rsc -a monitor -i 2s ' + self.action_timeout,
"systemd_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:complete\" -t 15000 ",
"systemd_cancel_line" : '-c cancel -r systemd_test_rsc -a monitor -i 2s ' + self.action_timeout,
"systemd_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:systemd_test_rsc action:monitor rc:ok op_status:Cancelled\" ",
"upstart_reg_line" : "-c register_rsc -r upstart_test_rsc "+self.action_timeout+" -C upstart -T pacemaker-cts-dummyd",
"upstart_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"",
"upstart_unreg_line" : "-c unregister_rsc -r \"upstart_test_rsc\" "+self.action_timeout,
"upstart_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:upstart_test_rsc action:none rc:ok op_status:complete\"",
"upstart_start_line" : "-c exec -r \"upstart_test_rsc\" -a \"start\" "+self.action_timeout,
"upstart_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:start rc:ok op_status:complete\" ",
"upstart_stop_line" : "-c exec -r \"upstart_test_rsc\" -a \"stop\" "+self.action_timeout,
"upstart_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:stop rc:ok op_status:complete\" ",
"upstart_monitor_line" : '-c exec -r upstart_test_rsc -a monitor -i 2s ' + self.action_timeout,
"upstart_monitor_event" : '-l "NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:complete" -t 15000',
"upstart_cancel_line" : '-c cancel -r upstart_test_rsc -a monitor -i 2s ' + self.action_timeout,
"upstart_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:upstart_test_rsc action:monitor rc:ok op_status:Cancelled\" ",
"service_reg_line" : "-c register_rsc -r service_test_rsc "+self.action_timeout+" -C service -T LSBDummy",
"service_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:service_test_rsc action:none rc:ok op_status:complete\"",
"service_unreg_line" : "-c unregister_rsc -r \"service_test_rsc\" "+self.action_timeout,
"service_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:service_test_rsc action:none rc:ok op_status:complete\"",
"service_start_line" : "-c exec -r \"service_test_rsc\" -a \"start\" "+self.action_timeout,
"service_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:start rc:ok op_status:complete\" ",
"service_stop_line" : "-c exec -r \"service_test_rsc\" -a \"stop\" "+self.action_timeout,
"service_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:stop rc:ok op_status:complete\" ",
"service_monitor_line" : '-c exec -r service_test_rsc -a monitor -i 2s ' + self.action_timeout,
"service_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout,
"service_cancel_line" : '-c cancel -r service_test_rsc -a monitor -i 2s ' + self.action_timeout,
"service_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:service_test_rsc action:monitor rc:ok op_status:Cancelled\" ",
"lsb_reg_line" : "-c register_rsc -r lsb_test_rsc "+self.action_timeout+" -C lsb -T LSBDummy",
"lsb_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\" ",
"lsb_unreg_line" : "-c unregister_rsc -r \"lsb_test_rsc\" "+self.action_timeout,
"lsb_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:lsb_test_rsc action:none rc:ok op_status:complete\"",
"lsb_start_line" : "-c exec -r \"lsb_test_rsc\" -a \"start\" "+self.action_timeout,
"lsb_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:start rc:ok op_status:complete\" ",
"lsb_stop_line" : "-c exec -r \"lsb_test_rsc\" -a \"stop\" "+self.action_timeout,
"lsb_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:stop rc:ok op_status:complete\" ",
"lsb_monitor_line" : '-c exec -r lsb_test_rsc -a status -i 2s ' + self.action_timeout,
"lsb_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:complete\" "+self.action_timeout,
"lsb_cancel_line" : '-c cancel -r lsb_test_rsc -a status -i 2s ' + self.action_timeout,
"lsb_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:lsb_test_rsc action:status rc:ok op_status:Cancelled\" ",
"stonith_reg_line" : "-c register_rsc -r stonith_test_rsc " + self.action_timeout +
" -C stonith -P pacemaker -T fence_dummy",
"stonith_reg_event" : "-l \"NEW_EVENT event_type:register rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\" ",
"stonith_unreg_line" : "-c unregister_rsc -r \"stonith_test_rsc\" "+self.action_timeout,
"stonith_unreg_event" : "-l \"NEW_EVENT event_type:unregister rsc_id:stonith_test_rsc action:none rc:ok op_status:complete\"",
"stonith_start_line" : '-c exec -r stonith_test_rsc -a start ' + self.action_timeout,
"stonith_start_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:start rc:ok op_status:complete\" ",
"stonith_stop_line" : "-c exec -r \"stonith_test_rsc\" -a \"stop\" "+self.action_timeout,
"stonith_stop_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:stop rc:ok op_status:complete\" ",
"stonith_monitor_line" : '-c exec -r stonith_test_rsc -a monitor -i 2s ' + self.action_timeout,
"stonith_monitor_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout,
"stonith_cancel_line" : '-c cancel -r stonith_test_rsc -a monitor -i 2s ' + self.action_timeout,
"stonith_cancel_event" : "-l \"NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:Cancelled\" ",
}
def new_test(self, name, description):
""" Create a named test """
test = Test(name, description, self.verbose, self.tls)
self.tests.append(test)
return test
def setup_test_environment(self):
""" Prepare the host before executing any tests """
os.system("service pacemaker_remote stop")
self.cleanup_test_environment()
if self.tls and not os.path.isfile("/etc/pacemaker/authkey"):
print("Installing /etc/pacemaker/authkey ...")
os.system("mkdir -p /etc/pacemaker")
os.system("dd if=/dev/urandom of=/etc/pacemaker/authkey bs=4096 count=1")
self.installed_files.append("/etc/pacemaker/authkey")
# If we're in build directory, install agents if not already installed
if os.path.exists("%s/cts/cts-exec.in" % BUILD_DIR):
if not os.path.exists("@OCF_RA_DIR@/pacemaker"):
# @TODO remember which components were created and remove them
os.makedirs("@OCF_RA_DIR@/pacemaker", 0o755)
for agent in ["Dummy", "Stateful", "ping"]:
agent_source = "%s/extra/resources/%s" % (BUILD_DIR, agent)
agent_dest = "@OCF_RA_DIR@/pacemaker/%s" % (agent)
if not os.path.exists(agent_dest):
print("Installing %s ..." % (agent_dest))
shutil.copyfile(agent_source, agent_dest)
os.chmod(agent_dest, EXECMODE)
self.installed_files.append(agent_dest)
subprocess.call(["cts-support", "install"])
def cleanup_test_environment(self):
""" Clean up the host after executing desired tests """
for installed_file in self.installed_files:
print("Removing %s ..." % (installed_file))
os.remove(installed_file)
subprocess.call(["cts-support", "uninstall"])
def build_generic_tests(self):
""" Register tests that apply to all resource classes """
common_cmds = self.common_cmds
### register/unregister tests ###
for rsc in self.rsc_classes:
test = self.new_test("generic_registration_%s" % (rsc),
"Simple resource registration test for %s standard" % (rsc))
test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)])
test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)])
### start/stop tests ###
for rsc in self.rsc_classes:
test = self.new_test("generic_start_stop_%s" % (rsc), "Simple start and stop test for %s standard" % (rsc))
test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)])
test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)])
test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)])
test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)])
### monitor cancel test ###
for rsc in self.rsc_classes:
test = self.new_test("generic_monitor_cancel_%s" % (rsc),
"Simple monitor cancel test for %s standard" % (rsc))
test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)])
test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)])
test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
### If this fails, that means the monitor may not be getting rescheduled ####
test.add_cmd(common_cmds["%s_monitor_event" % (rsc)])
### If this fails, that means the monitor may not be getting rescheduled ####
test.add_cmd(common_cmds["%s_monitor_event" % (rsc)])
test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)])
### If this happens the monitor did not actually cancel correctly. ###
test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT)
### If this happens the monitor did not actually cancel correctly. ###
test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT)
test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)])
test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)])
### monitor duplicate test ###
for rsc in self.rsc_classes:
test = self.new_test("generic_monitor_duplicate_%s" % (rsc),
"Test creation and canceling of duplicate monitors for %s standard" % (rsc))
test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)])
test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)])
test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
### If this fails, that means the monitor may not be getting rescheduled ####
test.add_cmd(common_cmds["%s_monitor_event" % (rsc)])
### If this fails, that means the monitor may not be getting rescheduled ####
test.add_cmd(common_cmds["%s_monitor_event" % (rsc)])
# Add the duplicate monitors
test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
# verify we still get update events
### If this fails, that means the monitor may not be getting rescheduled ####
test.add_cmd(common_cmds["%s_monitor_event" % (rsc)])
# cancel the monitor, if the duplicate merged with the original, we should no longer see monitor updates
test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)])
### If this happens the monitor did not actually cancel correctly. ###
test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT)
### If this happens the monitor did not actually cancel correctly. ###
test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT)
test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)])
test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)])
### stop implies cancel test ###
for rsc in self.rsc_classes:
test = self.new_test("generic_stop_implies_cancel_%s" % (rsc),
"Verify stopping a resource implies cancel of recurring ops for %s standard" % (rsc))
test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)])
test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)])
test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
### If this fails, that means the monitor may not be getting rescheduled ####
test.add_cmd(common_cmds["%s_monitor_event" % (rsc)])
### If this fails, that means the monitor may not be getting rescheduled ####
test.add_cmd(common_cmds["%s_monitor_event" % (rsc)])
test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)])
### If this happens the monitor did not actually cancel correctly. ###
test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT)
### If this happens the monitor did not actually cancel correctly. ###
test.add_expected_fail_cmd(common_cmds["%s_monitor_event" % (rsc)], CrmExit.TIMEOUT)
test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)])
def build_multi_rsc_tests(self):
""" Register complex tests that involve managing multiple resouces of different types """
common_cmds = self.common_cmds
# do not use service and systemd at the same time, it is the same resource.
### register start monitor stop unregister resources of each type at the same time. ###
test = self.new_test("multi_rsc_start_stop_all",
"Start, monitor, and stop resources of multiple types and classes")
for rsc in self.rsc_classes:
test.add_cmd(common_cmds["%s_reg_line" % (rsc)] + " " + common_cmds["%s_reg_event" % (rsc)])
for rsc in self.rsc_classes:
test.add_cmd(common_cmds["%s_start_line" % (rsc)] + " " + common_cmds["%s_start_event" % (rsc)])
for rsc in self.rsc_classes:
test.add_cmd(common_cmds["%s_monitor_line" % (rsc)] + " " + common_cmds["%s_monitor_event" % (rsc)])
for rsc in self.rsc_classes:
### If this fails, that means the monitor is not being rescheduled ####
test.add_cmd(common_cmds["%s_monitor_event" % (rsc)])
for rsc in self.rsc_classes:
test.add_cmd(common_cmds["%s_cancel_line" % (rsc)] + " " + common_cmds["%s_cancel_event" % (rsc)])
for rsc in self.rsc_classes:
test.add_cmd(common_cmds["%s_stop_line" % (rsc)] + " " + common_cmds["%s_stop_event" % (rsc)])
for rsc in self.rsc_classes:
test.add_cmd(common_cmds["%s_unreg_line" % (rsc)] + " " + common_cmds["%s_unreg_event" % (rsc)])
def build_negative_tests(self):
""" Register tests related to how pacemaker-execd handles failures """
### ocf start timeout test ###
test = self.new_test("ocf_start_timeout", "Force start timeout to occur, verify start failure.")
test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "
+ self.action_timeout +
"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
# -t must be less than self.action_timeout
test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" -k \"op_sleep\" -v \"5\" -t 1000 -w")
test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out" '
+ self.action_timeout)
test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout +
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ")
test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout +
"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
### stonith start timeout test ###
test = self.new_test("stonith_start_timeout", "Force start timeout to occur, verify start failure.")
test.add_cmd('-c register_rsc -r test_rsc ' +
'-C stonith -P pacemaker -T fence_dummy ' +
self.action_timeout +
'-l "NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete"')
test.add_cmd('-c exec -r test_rsc -a start -k monitor_delay -v 30 ' +
'-t 1000 -w') # -t must be less than self.action_timeout
- test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:Timed Out" '
+ test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:OCF_TIMEOUT op_status:Timed Out" '
+ self.action_timeout)
test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout +
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ")
test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout +
"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
### stonith component fail ###
common_cmds = self.common_cmds
test = self.new_test("stonith_component_fail", "Kill stonith component after pacemaker-execd connects")
test.add_cmd(common_cmds["stonith_reg_line"] + " " + common_cmds["stonith_reg_event"])
test.add_cmd(common_cmds["stonith_start_line"] + " " + common_cmds["stonith_start_event"])
test.add_cmd('-c exec -r stonith_test_rsc -a monitor -i 600s '
'-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:ok op_status:complete" '
+ self.action_timeout)
test.add_cmd_and_kill("killall -9 -q pacemaker-fenced lt-pacemaker-fenced",
'-l "NEW_EVENT event_type:exec_complete rsc_id:stonith_test_rsc action:monitor rc:unknown error op_status:error" -t 15000')
test.add_cmd(common_cmds["stonith_unreg_line"] + " " + common_cmds["stonith_unreg_event"])
### monitor fail for ocf resources ###
test = self.new_test("monitor_fail_ocf", "Force ocf monitor to fail, verify failure is reported.")
test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "
+ self.action_timeout +
"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" " + self.action_timeout +
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" " + self.action_timeout +
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
test.add_cmd('-c exec -r test_rsc -a monitor -i 1s '
+ self.action_timeout +
'-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"')
test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"'
+ self.action_timeout)
test.add_cmd('-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"'
+ self.action_timeout)
test.add_cmd_and_kill("rm -f @localstatedir@/run/Dummy-test_rsc.state",
'-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete" ' + self.action_timeout)
test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s ' + self.action_timeout +
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ")
test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "
+ self.action_timeout, CrmExit.TIMEOUT)
test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "
+ self.action_timeout, CrmExit.TIMEOUT)
test.add_cmd("-c unregister_rsc -r \"test_rsc\" "
+ self.action_timeout +
"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
### verify notify changes only for monitor operation. ###
test = self.new_test("monitor_changes_only", "Verify when flag is set, only monitor changes are notified.")
test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+" -o "
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
test.add_cmd('-c exec -r test_rsc -a monitor -i 1s '
+ self.action_timeout +
' -o -l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete" ')
test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT)
test.add_cmd_and_kill('rm -f @localstatedir@/run/Dummy-test_rsc.state', '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete"' + self.action_timeout)
test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s'
+ self.action_timeout +
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ")
test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT)
test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT)
test.add_cmd('-c unregister_rsc -r "test_rsc" ' + self.action_timeout +
'-l "NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete"')
### monitor fail for systemd resource ###
if "systemd" in self.rsc_classes:
test = self.new_test("monitor_fail_systemd", "Force systemd monitor to fail, verify failure is reported..")
test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T pacemaker-cts-dummyd@3 " +
self.action_timeout +
"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
test.add_cmd('-c exec -r test_rsc -a monitor -i 1s '
+ self.action_timeout +
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ")
test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
test.add_cmd_and_kill("killall -9 -q pacemaker-cts-dummyd",
'-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete"' + self.action_timeout)
test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s' + self.action_timeout +
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ")
test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT)
test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT)
test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
### monitor fail for upstart resource ###
if "upstart" in self.rsc_classes:
test = self.new_test("monitor_fail_upstart", "Force upstart monitor to fail, verify failure is reported..")
test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T pacemaker-cts-dummyd "+self.action_timeout+
"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
test.add_cmd('-c exec -r test_rsc -a monitor -i 1s ' + self.action_timeout +
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ")
test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
test.add_cmd_and_kill('killall -9 -q dd', '-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete"' + self.action_timeout)
test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s'
+ self.action_timeout +
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ")
test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT)
test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT)
test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
### Cancel non-existent operation on a resource ###
test = self.new_test("cancel_non_existent_op", "Attempt to cancel the wrong monitor operation, verify expected failure")
test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
test.add_cmd('-c exec -r test_rsc -a monitor -i 1s '
+ self.action_timeout +
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ")
test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout)
### interval is wrong, should fail
test.add_expected_fail_cmd('-c cancel -r test_rsc -a monitor -i 2s' + self.action_timeout +
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ")
### action name is wrong, should fail
test.add_expected_fail_cmd('-c cancel -r test_rsc -a stop -i 1s' + self.action_timeout +
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:not running op_status:Cancelled\" ")
test.add_cmd("-c unregister_rsc -r \"test_rsc\" " + self.action_timeout +
"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
### Attempt to invoke non-existent rsc id ###
test = self.new_test("invoke_non_existent_rsc", "Attempt to perform operations on a non-existent rsc id.")
test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:unknown error op_status:complete\" ")
test.add_expected_fail_cmd("-c exec -r test_rsc -a stop "+self.action_timeout+
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ")
test.add_expected_fail_cmd('-c exec -r test_rsc -a monitor -i 6s '
+ self.action_timeout +
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" ")
test.add_expected_fail_cmd("-c cancel -r test_rsc -a start "+self.action_timeout+
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ")
test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
### Register and start a resource that doesn't exist, systemd ###
if "systemd" in self.rsc_classes:
test = self.new_test("start_uninstalled_systemd", "Register uninstalled systemd agent, try to start, verify expected failure")
test.add_cmd("-c register_rsc -r \"test_rsc\" -C systemd -T this_is_fake1234 "+self.action_timeout+
"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ")
test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
if "upstart" in self.rsc_classes:
test = self.new_test("start_uninstalled_upstart", "Register uninstalled upstart agent, try to start, verify expected failure")
test.add_cmd("-c register_rsc -r \"test_rsc\" -C upstart -T this_is_fake1234 "+self.action_timeout+
"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ")
test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
### Register and start a resource that doesn't exist, ocf ###
test = self.new_test("start_uninstalled_ocf", "Register uninstalled ocf agent, try to start, verify expected failure.")
test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pacemaker -T this_is_fake1234 "+self.action_timeout+
"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ")
test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
### Register ocf with non-existent provider ###
test = self.new_test("start_ocf_bad_provider", "Register ocf agent with a non-existent provider, verify expected failure.")
test.add_cmd("-c register_rsc -r \"test_rsc\" -C ocf -P pancakes -T Dummy "+self.action_timeout+
"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:not installed op_status:Not installed\" ")
test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
### Register ocf with empty provider field ###
test = self.new_test("start_ocf_no_provider", "Register ocf agent with a no provider, verify expected failure.")
test.add_expected_fail_cmd("-c register_rsc -r \"test_rsc\" -C ocf -T Dummy "+self.action_timeout+
"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
test.add_expected_fail_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Error\" ")
test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
def build_stress_tests(self):
""" Register stress tests """
timeout = "-t 20000"
iterations = 25
test = self.new_test("ocf_stress", "Verify OCF agent handling works under load")
for i in range(iterations):
test.add_cmd("-c register_rsc -r rsc_%s %s -C ocf -P heartbeat -T Dummy -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i))
test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i))
test.add_cmd('-c exec -r rsc_%s -a monitor %s -i 1s '
'-l "NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete"' % (i, timeout, i))
for i in range(iterations):
test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i))
test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i))
if "systemd" in self.rsc_classes:
test = self.new_test("systemd_stress", "Verify systemd dbus connection works under load")
for i in range(iterations):
test.add_cmd("-c register_rsc -r rsc_%s %s -C systemd -T pacemaker-cts-dummyd@3 -l \"NEW_EVENT event_type:register rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i))
test.add_cmd("-c exec -r rsc_%s -a start %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:start rc:ok op_status:complete\"" % (i, timeout, i))
test.add_cmd('-c exec -r rsc_%s -a monitor %s -i 1s '
'-l "NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:monitor rc:ok op_status:complete"' % (i, timeout, i))
for i in range(iterations):
test.add_cmd("-c exec -r rsc_%s -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:rsc_%s action:stop rc:ok op_status:complete\"" % (i, timeout, i))
test.add_cmd("-c unregister_rsc -r rsc_%s %s -l \"NEW_EVENT event_type:unregister rsc_id:rsc_%s action:none rc:ok op_status:complete\"" % (i, timeout, i))
iterations = 9
timeout = "-t 30000"
### Verify recurring op in-flight collision is handled in series properly
test = self.new_test("rsc_inflight_collision", "Verify recurring ops do not collide with other operations for the same rsc.")
test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy "
"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout)
test.add_cmd("-c exec -r test_rsc -a start %s -k op_sleep -v 1 -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\"" % (timeout))
for i in range(iterations):
test.add_cmd('-c exec -r test_rsc -a monitor %s -i 100%dms '
'-k op_sleep -v 2 '
'-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"' % (timeout, i))
test.add_cmd("-c exec -r test_rsc -a stop %s -l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\"" % (timeout))
test.add_cmd("-c unregister_rsc -r test_rsc %s -l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\"" % (timeout))
def build_custom_tests(self):
""" Register tests that target specific cases """
### verify resource temporary folder is created and used by OCF agents. ###
test = self.new_test("rsc_tmp_dir", "Verify creation and use of rsc temporary state directory")
test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@")
test.add_cmd("-c register_rsc -r test_rsc -P heartbeat -C ocf -T Dummy "
"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout)
test.add_cmd("-c exec -r test_rsc -a start -t 4000")
test.add_sys_cmd("ls", "-al @CRM_RSCTMP_DIR@")
test.add_sys_cmd("ls", "@CRM_RSCTMP_DIR@/Dummy-test_rsc.state")
test.add_cmd("-c exec -r test_rsc -a stop -t 4000")
test.add_cmd("-c unregister_rsc -r test_rsc "+self.action_timeout+
"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
### start delay then stop test ###
test = self.new_test("start_delay", "Verify start delay works as expected.")
test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy "
"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout)
test.add_cmd("-c exec -r test_rsc -s 6000 -a start -w -t 6000")
test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 2000", CrmExit.TIMEOUT)
test.add_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 6000")
test.add_cmd("-c exec -r test_rsc -a stop " + self.action_timeout +
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:stop rc:ok op_status:complete\" ")
test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout +
"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
### start delay, but cancel before it gets a chance to start. ###
test = self.new_test("start_delay_cancel", "Using start_delay, start a rsc, but cancel the start op before execution.")
test.add_cmd("-c register_rsc -r test_rsc -P pacemaker -C ocf -T Dummy "
"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" "+self.action_timeout)
test.add_cmd("-c exec -r test_rsc -s 5000 -a start -w -t 4000")
test.add_cmd("-c cancel -r test_rsc -a start " + self.action_timeout +
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:Cancelled\" ")
test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" -t 5000", CrmExit.TIMEOUT)
test.add_cmd("-c unregister_rsc -r test_rsc " + self.action_timeout +
"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
### Register a bunch of resources, verify we can get info on them ###
test = self.new_test("verify_get_rsc_info", "Register multiple resources, verify retrieval of rsc info.")
if "systemd" in self.rsc_classes:
test.add_cmd("-c register_rsc -r rsc1 -C systemd -T pacemaker-cts-dummyd@3 "+self.action_timeout)
test.add_cmd("-c get_rsc_info -r rsc1 ")
test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout)
test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ")
if "upstart" in self.rsc_classes:
test.add_cmd("-c register_rsc -r rsc1 -C upstart -T pacemaker-cts-dummyd "+self.action_timeout)
test.add_cmd("-c get_rsc_info -r rsc1 ")
test.add_cmd("-c unregister_rsc -r rsc1 "+self.action_timeout)
test.add_expected_fail_cmd("-c get_rsc_info -r rsc1 ")
test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout)
test.add_cmd("-c get_rsc_info -r rsc2 ")
test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout)
test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ")
### Register duplicate, verify only one entry exists and can still be removed.
test = self.new_test("duplicate_registration", "Register resource multiple times, verify only one entry exists and can be removed.")
test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout)
test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy")
test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Dummy -P pacemaker "+self.action_timeout)
test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Dummy")
test.add_cmd("-c register_rsc -r rsc2 -C ocf -T Stateful -P pacemaker "+self.action_timeout)
test.add_cmd_check_stdout("-c get_rsc_info -r rsc2 ", "id:rsc2 class:ocf provider:pacemaker type:Stateful")
test.add_cmd("-c unregister_rsc -r rsc2 "+self.action_timeout)
test.add_expected_fail_cmd("-c get_rsc_info -r rsc2 ")
### verify the option to only send notification to the original client. ###
test = self.new_test("notify_orig_client_only", "Verify option to only send notifications to the client originating the action.")
test.add_cmd("-c register_rsc -r \"test_rsc\" -C \"ocf\" -P \"pacemaker\" -T \"Dummy\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:register rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
test.add_cmd("-c exec -r \"test_rsc\" -a \"start\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:start rc:ok op_status:complete\" ")
test.add_cmd('-c exec -r \"test_rsc\" -a \"monitor\" -i 1s '
+ self.action_timeout + ' -n '
'-l "NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete"')
# this will fail because the monitor notifications should only go to the original caller, which no longer exists.
test.add_expected_fail_cmd("-l \"NEW_EVENT event_type:exec_complete rsc_id:test_rsc action:monitor rc:ok op_status:complete\" "+self.action_timeout, CrmExit.TIMEOUT)
test.add_cmd('-c cancel -r test_rsc -a monitor -i 1s -t 6000 ')
test.add_cmd("-c unregister_rsc -r \"test_rsc\" "+self.action_timeout+
"-l \"NEW_EVENT event_type:unregister rsc_id:test_rsc action:none rc:ok op_status:complete\" ")
### get metadata ###
test = self.new_test("get_ocf_metadata", "Retrieve metadata for a resource")
test.add_cmd_check_stdout("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Dummy\"",
"resource-agent name=\"Dummy\"")
test.add_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"Stateful\"")
test.add_expected_fail_cmd("-c metadata -P \"pacemaker\" -T \"Stateful\"")
test.add_expected_fail_cmd("-c metadata -C \"ocf\" -P \"pacemaker\" -T \"fake_agent\"")
### get metadata ###
test = self.new_test("get_lsb_metadata", "Retrieve metadata for a resource")
test.add_cmd_check_stdout("-c metadata -C \"lsb\" -T \"LSBDummy\"",
"resource-agent name='LSBDummy'")
### get stonith metadata ###
test = self.new_test("get_stonith_metadata", "Retrieve stonith metadata for a resource")
test.add_cmd_check_stdout("-c metadata -C \"stonith\" -P \"pacemaker\" -T \"fence_dummy\"",
"resource-agent name=\"fence_dummy\"")
### get metadata ###
if "systemd" in self.rsc_classes:
test = self.new_test("get_systemd_metadata", "Retrieve metadata for a resource")
test.add_cmd_check_stdout("-c metadata -C \"systemd\" -T \"pacemaker-cts-dummyd@\"",
"resource-agent name=\"pacemaker-cts-dummyd@\"")
### get metadata ###
if "upstart" in self.rsc_classes:
test = self.new_test("get_upstart_metadata", "Retrieve metadata for a resource")
test.add_cmd_check_stdout("-c metadata -C \"upstart\" -T \"pacemaker-cts-dummyd\"",
"resource-agent name=\"pacemaker-cts-dummyd\"")
### get ocf providers ###
test = self.new_test("list_ocf_providers",
"Retrieve list of available resource providers, verifies pacemaker is a provider.")
test.add_cmd_check_stdout("-c list_ocf_providers ", "pacemaker")
test.add_cmd_check_stdout("-c list_ocf_providers -T ping", "pacemaker")
### Verify agents only exist in their lists ###
test = self.new_test("verify_agent_lists", "Verify the agent lists contain the right data.")
test.add_cmd_check_stdout("-c list_agents ", "Stateful") ### ocf ###
test.add_cmd_check_stdout("-c list_agents -C ocf", "Stateful")
test.add_cmd_check_stdout("-c list_agents -C lsb", "", "Stateful") ### should not exist
test.add_cmd_check_stdout("-c list_agents -C service", "", "Stateful") ### should not exist
test.add_cmd_check_stdout("-c list_agents ", "LSBDummy") ### init.d ###
test.add_cmd_check_stdout("-c list_agents -C lsb", "LSBDummy")
test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy")
test.add_cmd_check_stdout("-c list_agents -C ocf", "", "pacemaker-cts-dummyd@") ### should not exist
test.add_cmd_check_stdout("-c list_agents -C ocf", "", "pacemaker-cts-dummyd@") ### should not exist
test.add_cmd_check_stdout("-c list_agents -C lsb", "", "fence_dummy") ### should not exist
test.add_cmd_check_stdout("-c list_agents -C service", "", "fence_dummy") ### should not exist
test.add_cmd_check_stdout("-c list_agents -C ocf", "", "fence_dummy") ### should not exist
if "systemd" in self.rsc_classes:
test.add_cmd_check_stdout("-c list_agents ", "pacemaker-cts-dummyd@") ### systemd ###
test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy")
test.add_cmd_check_stdout("-c list_agents -C systemd", "", "Stateful") ### should not exist
test.add_cmd_check_stdout("-c list_agents -C systemd", "pacemaker-cts-dummyd@")
test.add_cmd_check_stdout("-c list_agents -C systemd", "", "fence_dummy") ### should not exist
if "upstart" in self.rsc_classes:
test.add_cmd_check_stdout("-c list_agents ", "pacemaker-cts-dummyd") ### upstart ###
test.add_cmd_check_stdout("-c list_agents -C service", "LSBDummy")
test.add_cmd_check_stdout("-c list_agents -C upstart", "", "Stateful") ### should not exist
test.add_cmd_check_stdout("-c list_agents -C upstart", "pacemaker-cts-dummyd")
test.add_cmd_check_stdout("-c list_agents -C upstart", "", "fence_dummy") ### should not exist
if "stonith" in self.rsc_classes:
test.add_cmd_check_stdout("-c list_agents -C stonith", "fence_dummy") ### stonith ###
test.add_cmd_check_stdout("-c list_agents -C stonith", "", "pacemaker-cts-dummyd@") ### should not exist
test.add_cmd_check_stdout("-c list_agents -C stonith", "", "Stateful") ### should not exist
test.add_cmd_check_stdout("-c list_agents ", "fence_dummy")
def print_list(self):
""" List all registered tests """
print("\n==== %d TESTS FOUND ====" % (len(self.tests)))
print("%35s - %s" % ("TEST NAME", "TEST DESCRIPTION"))
print("%35s - %s" % ("--------------------", "--------------------"))
for test in self.tests:
print("%35s - %s" % (test.name, test.description))
print("==== END OF LIST ====\n")
def run_single(self, name):
""" Run a single named test """
for test in self.tests:
if test.name == name:
test.run()
break
def run_tests_matching(self, pattern):
""" Run all tests whose name matches a pattern """
for test in self.tests:
if test.name.count(pattern) != 0:
test.run()
def run_tests(self):
""" Run all tests """
for test in self.tests:
test.run()
def exit(self):
""" Exit (with error status code if any test failed) """
for test in self.tests:
if test.executed == 0:
continue
if test.get_exitcode() != CrmExit.OK:
sys.exit(CrmExit.ERROR)
sys.exit(CrmExit.OK)
def print_results(self):
""" Print summary of results of executed tests """
failures = 0
success = 0
print("\n\n======= FINAL RESULTS ==========")
print("\n--- FAILURE RESULTS:")
for test in self.tests:
if test.executed == 0:
continue
if test.get_exitcode() != CrmExit.OK:
failures = failures + 1
test.print_result(" ")
else:
success = success + 1
if failures == 0:
print(" None")
print("\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures))
class TestOptions(object):
""" Option handler """
def __init__(self):
self.options = {}
self.options['list-tests'] = 0
self.options['run-all'] = 1
self.options['run-only'] = ""
self.options['run-only-pattern'] = ""
self.options['verbose'] = 0
self.options['invalid-arg'] = ""
self.options['show-usage'] = 0
self.options['pacemaker-remote'] = 0
def build_options(self, argv):
""" Set options based on command-line arguments """
args = argv[1:]
skip = 0
for i in range(0, len(args)):
if skip:
skip = 0
continue
elif args[i] == "-h" or args[i] == "--help":
self.options['show-usage'] = 1
elif args[i] == "-l" or args[i] == "--list-tests":
self.options['list-tests'] = 1
elif args[i] == "-V" or args[i] == "--verbose":
self.options['verbose'] = 1
elif args[i] == "-R" or args[i] == "--pacemaker-remote":
self.options['pacemaker-remote'] = 1
elif args[i] == "-r" or args[i] == "--run-only":
self.options['run-only'] = args[i+1]
skip = 1
elif args[i] == "-p" or args[i] == "--run-only-pattern":
self.options['run-only-pattern'] = args[i+1]
skip = 1
def show_usage(self):
""" Show command usage """
print("usage: " + sys.argv[0] + " [options]")
print("If no options are provided, all tests will run")
print("Options:")
print("\t [--help | -h] Show usage")
print("\t [--list-tests | -l] Print out all registered tests.")
print("\t [--run-only | -r 'testname'] Run a specific test")
print("\t [--verbose | -V] Verbose output")
print("\t [--pacemaker-remote | -R Test pacemaker-remoted binary instead of pacemaker-execd")
print("\t [--run-only-pattern | -p 'string'] Run only tests containing the string value")
print("\n\tExample: Run only the test 'start_stop'")
print("\t\t " + sys.argv[0] + " --run-only start_stop")
print("\n\tExample: Run only the tests with the string 'systemd' present in them")
print("\t\t " + sys.argv[0] + " --run-only-pattern systemd")
def main(argv):
""" Run pacemaker-execd regression tests as specified by arguments """
update_path()
opts = TestOptions()
opts.build_options(argv)
tests = Tests(opts.options['verbose'], opts.options['pacemaker-remote'])
tests.build_generic_tests()
tests.build_multi_rsc_tests()
tests.build_negative_tests()
tests.build_custom_tests()
tests.build_stress_tests()
tests.setup_test_environment()
print("Starting ...")
if opts.options['list-tests']:
tests.print_list()
elif opts.options['show-usage']:
opts.show_usage()
elif opts.options['run-only-pattern'] != "":
tests.run_tests_matching(opts.options['run-only-pattern'])
tests.print_results()
elif opts.options['run-only'] != "":
tests.run_single(opts.options['run-only'])
tests.print_results()
else:
tests.run_tests()
tests.print_results()
tests.cleanup_test_environment()
tests.exit()
if __name__ == "__main__":
main(sys.argv)
diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
index 1718dced41..6dc591f8da 100644
--- a/daemons/execd/execd_commands.c
+++ b/daemons/execd/execd_commands.c
@@ -1,1684 +1,1832 @@
/*
- * Copyright 2012-2018 David Vossel <davidvossel@gmail.com>
+ * Copyright 2012-2019 David Vossel <davidvossel@gmail.com>
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <glib.h>
#include <unistd.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/common/mainloop.h>
#include <crm/common/ipc.h>
#include <crm/common/ipcs.h>
#include <crm/msg_xml.h>
#include "pacemaker-execd.h"
#ifdef HAVE_SYS_TIMEB_H
# include <sys/timeb.h>
#endif
#define EXIT_REASON_MAX_LEN 128
GHashTable *rsc_list = NULL;
typedef struct lrmd_cmd_s {
int timeout;
guint interval_ms;
int start_delay;
int timeout_orig;
int call_id;
int exec_rc;
int lrmd_op_status;
int call_opts;
/* Timer ids, must be removed on cmd destruction. */
int delay_id;
int stonith_recurring_id;
int rsc_deleted;
int service_flags;
char *client_id;
char *origin;
char *rsc_id;
char *action;
char *real_action;
char *exit_reason;
char *output;
char *userdata_str;
#ifdef HAVE_SYS_TIMEB_H
/* Recurring and systemd operations may involve more than one executor
* command per operation, so they need info about the original and the most
* recent.
*/
struct timeb t_first_run; /* Timestamp of when op first ran */
struct timeb t_run; /* Timestamp of when op most recently ran */
struct timeb t_first_queue; /* Timestamp of when op first was queued */
struct timeb t_queue; /* Timestamp of when op most recently was queued */
struct timeb t_rcchange; /* Timestamp of last rc change */
#endif
int first_notify_sent;
int last_notify_rc;
int last_notify_op_status;
int last_pid;
GHashTable *params;
} lrmd_cmd_t;
static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
static gboolean lrmd_rsc_dispatch(gpointer user_data);
static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
static void
log_finished(lrmd_cmd_t * cmd, int exec_time, int queue_time)
{
char pid_str[32] = { 0, };
int log_level = LOG_INFO;
if (cmd->last_pid) {
snprintf(pid_str, 32, "%d", cmd->last_pid);
}
if (safe_str_eq(cmd->action, "monitor")) {
log_level = LOG_DEBUG;
}
#ifdef HAVE_SYS_TIMEB_H
do_crm_log(log_level,
"finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d exec-time:%dms queue-time:%dms",
cmd->rsc_id, cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str,
cmd->exec_rc, exec_time, queue_time);
#else
do_crm_log(log_level, "finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d",
cmd->rsc_id,
cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str, cmd->exec_rc);
#endif
}
static void
log_execute(lrmd_cmd_t * cmd)
{
int log_level = LOG_INFO;
if (safe_str_eq(cmd->action, "monitor")) {
log_level = LOG_DEBUG;
}
do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
cmd->rsc_id, cmd->action, cmd->call_id);
}
static const char *
normalize_action_name(lrmd_rsc_t * rsc, const char *action)
{
if (safe_str_eq(action, "monitor") &&
is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) {
return "status";
}
return action;
}
static lrmd_rsc_t *
build_rsc_from_xml(xmlNode * msg)
{
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
lrmd_rsc_t *rsc = NULL;
rsc = calloc(1, sizeof(lrmd_rsc_t));
crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts);
rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS);
rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER);
rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE);
rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc);
return rsc;
}
static lrmd_cmd_t *
create_lrmd_cmd(xmlNode * msg, crm_client_t * client)
{
int call_options = 0;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
lrmd_cmd_t *cmd = NULL;
cmd = calloc(1, sizeof(lrmd_cmd_t));
crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options);
cmd->call_opts = call_options;
cmd->client_id = strdup(client->id);
crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id);
crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval_ms);
crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout);
crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay);
cmd->timeout_orig = cmd->timeout;
cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN);
cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION);
cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR);
cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
cmd->params = xml2list(rsc_xml);
if (safe_str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block")) {
crm_debug("Setting flag to leave pid group on timeout and only kill action pid for " CRM_OP_FMT,
cmd->rsc_id, cmd->action, cmd->interval_ms);
cmd->service_flags |= SVC_ACTION_LEAVE_GROUP;
}
return cmd;
}
static void
-free_lrmd_cmd(lrmd_cmd_t * cmd)
+stop_recurring_timer(lrmd_cmd_t *cmd)
{
- if (cmd->stonith_recurring_id) {
- g_source_remove(cmd->stonith_recurring_id);
+ if (cmd) {
+ if (cmd->stonith_recurring_id) {
+ g_source_remove(cmd->stonith_recurring_id);
+ }
+ cmd->stonith_recurring_id = 0;
}
+}
+
+static void
+free_lrmd_cmd(lrmd_cmd_t * cmd)
+{
+ stop_recurring_timer(cmd);
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->params) {
g_hash_table_destroy(cmd->params);
}
free(cmd->origin);
free(cmd->action);
free(cmd->real_action);
free(cmd->userdata_str);
free(cmd->rsc_id);
free(cmd->output);
free(cmd->exit_reason);
free(cmd->client_id);
free(cmd);
}
static gboolean
stonith_recurring_op_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc;
cmd->stonith_recurring_id = 0;
if (!cmd->rsc_id) {
return FALSE;
}
rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
CRM_ASSERT(rsc != NULL);
/* take it out of recurring_ops list, and put it in the pending ops
* to be executed */
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef HAVE_SYS_TIMEB_H
ftime(&cmd->t_queue);
if (cmd->t_first_queue.time == 0) {
cmd->t_first_queue = cmd->t_queue;
}
#endif
mainloop_set_trigger(rsc->work);
return FALSE;
}
+static inline void
+start_recurring_timer(lrmd_cmd_t *cmd)
+{
+ if (cmd && (cmd->interval_ms > 0)) {
+ cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms,
+ stonith_recurring_op_helper,
+ cmd);
+ }
+}
+
static gboolean
start_delay_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc = NULL;
cmd->delay_id = 0;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (rsc) {
mainloop_set_trigger(rsc->work);
}
return FALSE;
}
static gboolean
merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
GListPtr gIter = NULL;
lrmd_cmd_t * dup = NULL;
gboolean dup_pending = FALSE;
if (cmd->interval_ms == 0) {
return 0;
}
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
dup = gIter->data;
if (safe_str_eq(cmd->action, dup->action)
&& (cmd->interval_ms == dup->interval_ms)) {
dup_pending = TRUE;
goto merge_dup;
}
}
/* if dup is in recurring_ops list, that means it has already executed
* and is in the interval loop. we can't just remove it in this case. */
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
dup = gIter->data;
if (safe_str_eq(cmd->action, dup->action)
&& (cmd->interval_ms == dup->interval_ms)) {
goto merge_dup;
}
}
return FALSE;
merge_dup:
/* This should not occur. If it does, we need to investigate how something
* like this is possible in the controller.
*/
crm_warn("Duplicate recurring op entry detected (" CRM_OP_FMT "), merging with previous op entry",
rsc->rsc_id,
normalize_action_name(rsc, dup->action),
dup->interval_ms);
/* merge */
dup->first_notify_sent = 0;
free(dup->userdata_str);
dup->userdata_str = cmd->userdata_str;
cmd->userdata_str = NULL;
dup->call_id = cmd->call_id;
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
/* if we are waiting for the next interval, kick it off now */
if (dup_pending == TRUE) {
- g_source_remove(cmd->stonith_recurring_id);
- cmd->stonith_recurring_id = 0;
+ stop_recurring_timer(cmd);
stonith_recurring_op_helper(cmd);
}
} else if (dup_pending == FALSE) {
/* if we've already handed this to the service lib, kick off an early execution */
services_action_kick(rsc->rsc_id,
normalize_action_name(rsc, dup->action),
dup->interval_ms);
}
free_lrmd_cmd(cmd);
return TRUE;
}
static void
schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
gboolean dup_processed = FALSE;
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(rsc != NULL, return);
crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
dup_processed = merge_recurring_duplicate(rsc, cmd);
if (dup_processed) {
/* duplicate recurring cmd found, cmds merged */
return;
}
/* The controller expects the executor to automatically cancel
* recurring operations before a resource stops.
*/
if (safe_str_eq(cmd->action, "stop")) {
cancel_all_recurring(rsc, NULL);
}
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef HAVE_SYS_TIMEB_H
ftime(&cmd->t_queue);
if (cmd->t_first_queue.time == 0) {
cmd->t_first_queue = cmd->t_queue;
}
#endif
mainloop_set_trigger(rsc->work);
if (cmd->start_delay) {
cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
}
static xmlNode *
create_lrmd_reply(const char *origin, int rc, int call_id)
{
xmlNode *reply = create_xml_node(NULL, T_LRMD_REPLY);
crm_xml_add(reply, F_LRMD_ORIGIN, origin);
crm_xml_add_int(reply, F_LRMD_RC, rc);
crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
return reply;
}
static void
send_client_notify(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
crm_client_t *client = value;
int rc;
if (client == NULL) {
crm_err("Asked to send event to NULL client");
return;
} else if (client->name == NULL) {
crm_trace("Asked to send event to client with no name");
return;
}
rc = lrmd_server_send_notify(client, update_msg);
if ((rc <= 0) && (rc != -ENOTCONN)) {
crm_warn("Could not notify client %s/%s: %s " CRM_XS " rc=%d",
client->name, client->id,
(rc? pcmk_strerror(rc) : "no data sent"), rc);
}
}
#ifdef HAVE_SYS_TIMEB_H
/*!
* \internal
* \brief Return difference between two times in milliseconds
*
* \param[in] now More recent time (or NULL to use current time)
* \param[in] old Earlier time
*
* \return milliseconds difference (or 0 if old is NULL or has time zero)
*/
static int
time_diff_ms(struct timeb *now, struct timeb *old)
{
struct timeb local_now = { 0, };
if (now == NULL) {
ftime(&local_now);
now = &local_now;
}
if ((old == NULL) || (old->time == 0)) {
return 0;
}
return difftime(now->time, old->time) * 1000 + now->millitm - old->millitm;
}
/*!
* \internal
* \brief Reset a command's operation times to their original values.
*
* Reset a command's run and queued timestamps to the timestamps of the original
* command, so we report the entire time since then and not just the time since
* the most recent command (for recurring and systemd operations).
*
* /param[in] cmd Executor command object to reset
*
* /note It's not obvious what the queued time should be for a systemd
* start/stop operation, which might go like this:
* initial command queued 5ms, runs 3s
* monitor command queued 10ms, runs 10s
* monitor command queued 10ms, runs 10s
* Is the queued time for that operation 5ms, 10ms or 25ms? The current
* implementation will report 5ms. If it's 25ms, then we need to
* subtract 20ms from the total exec time so as not to count it twice.
* We can implement that later if it matters to anyone ...
*/
static void
cmd_original_times(lrmd_cmd_t * cmd)
{
cmd->t_run = cmd->t_first_run;
cmd->t_queue = cmd->t_first_queue;
}
#endif
static void
send_cmd_complete_notify(lrmd_cmd_t * cmd)
{
int exec_time = 0;
int queue_time = 0;
xmlNode *notify = NULL;
#ifdef HAVE_SYS_TIMEB_H
exec_time = time_diff_ms(NULL, &cmd->t_run);
queue_time = time_diff_ms(&cmd->t_run, &cmd->t_queue);
#endif
log_finished(cmd, exec_time, queue_time);
/* if the first notify result for a cmd has already been sent earlier, and the
* the option to only send notifies on result changes is set. Check to see
* if the last result is the same as the new one. If so, suppress this update */
if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) {
if (cmd->last_notify_rc == cmd->exec_rc &&
cmd->last_notify_op_status == cmd->lrmd_op_status) {
/* only send changes */
return;
}
}
cmd->first_notify_sent = 1;
cmd->last_notify_rc = cmd->exec_rc;
cmd->last_notify_op_status = cmd->lrmd_op_status;
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout);
crm_xml_add_ms(notify, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay);
crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc);
crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status);
crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id);
crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted);
#ifdef HAVE_SYS_TIMEB_H
crm_xml_add_int(notify, F_LRMD_RSC_RUN_TIME, cmd->t_run.time);
crm_xml_add_int(notify, F_LRMD_RSC_RCCHANGE_TIME, cmd->t_rcchange.time);
crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time);
crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time);
#endif
crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC);
crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id);
if(cmd->real_action) {
crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action);
} else {
crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action);
}
crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output);
crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->exit_reason);
if (cmd->params) {
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS);
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
hash2smartfield((gpointer) key, (gpointer) value, args);
}
}
if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) {
crm_client_t *client = crm_client_get_by_id(cmd->client_id);
if (client) {
send_client_notify(client->id, client, notify);
}
} else if (client_connections != NULL) {
g_hash_table_foreach(client_connections, send_client_notify, notify);
}
free_xml(notify);
}
static void
send_generic_notify(int rc, xmlNode * request)
{
if (client_connections != NULL) {
int call_id = 0;
xmlNode *notify = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *op = crm_element_value(request, F_LRMD_OPERATION);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(notify, F_LRMD_RC, rc);
crm_xml_add_int(notify, F_LRMD_CALLID, call_id);
crm_xml_add(notify, F_LRMD_OPERATION, op);
crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id);
g_hash_table_foreach(client_connections, send_client_notify, notify);
free_xml(notify);
}
}
static void
cmd_reset(lrmd_cmd_t * cmd)
{
cmd->lrmd_op_status = 0;
cmd->last_pid = 0;
#ifdef HAVE_SYS_TIMEB_H
memset(&cmd->t_run, 0, sizeof(cmd->t_run));
memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
#endif
free(cmd->exit_reason);
cmd->exit_reason = NULL;
free(cmd->output);
cmd->output = NULL;
}
static void
cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
{
crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action,
rsc ? rsc->active : NULL, cmd);
if (rsc && (rsc->active == cmd)) {
rsc->active = NULL;
mainloop_set_trigger(rsc->work);
}
if (!rsc) {
cmd->rsc_deleted = 1;
}
/* reset original timeout so client notification has correct information */
cmd->timeout = cmd->timeout_orig;
send_cmd_complete_notify(cmd);
if (cmd->interval_ms && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) {
if (rsc) {
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else if (cmd->interval_ms == 0) {
if (rsc) {
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else {
/* Clear all the values pertaining just to the last iteration of a recurring op. */
cmd_reset(cmd);
}
}
static int
ocf2uniform_rc(int rc)
{
if (rc < 0 || rc > PCMK_OCF_FAILED_MASTER) {
return PCMK_OCF_UNKNOWN_ERROR;
}
return rc;
}
static int
stonith2uniform_rc(const char *action, int rc)
{
- if (rc == -ENODEV) {
- if (safe_str_eq(action, "stop")) {
+ switch (rc) {
+ case pcmk_ok:
rc = PCMK_OCF_OK;
- } else if (safe_str_eq(action, "start")) {
- rc = PCMK_OCF_NOT_INSTALLED;
- } else {
- rc = PCMK_OCF_NOT_RUNNING;
- }
- } else if (rc != 0) {
- rc = PCMK_OCF_UNKNOWN_ERROR;
+ break;
+
+ case -ENODEV:
+ /* This should be possible only for probes in practice, but
+ * interpret for all actions to be safe.
+ */
+ if (safe_str_eq(action, "monitor")) {
+ rc = PCMK_OCF_NOT_RUNNING;
+ } else if (safe_str_eq(action, "stop")) {
+ rc = PCMK_OCF_OK;
+ } else {
+ rc = PCMK_OCF_NOT_INSTALLED;
+ }
+ break;
+
+ case -EOPNOTSUPP:
+ rc = PCMK_OCF_UNIMPLEMENT_FEATURE;
+ break;
+
+ case -ETIME:
+ case -ETIMEDOUT:
+ rc = PCMK_OCF_TIMEOUT;
+ break;
+
+ default:
+ rc = PCMK_OCF_UNKNOWN_ERROR;
+ break;
}
return rc;
}
#if SUPPORT_NAGIOS
static int
nagios2uniform_rc(const char *action, int rc)
{
if (rc < 0) {
return PCMK_OCF_UNKNOWN_ERROR;
}
switch (rc) {
case NAGIOS_STATE_OK:
return PCMK_OCF_OK;
case NAGIOS_INSUFFICIENT_PRIV:
return PCMK_OCF_INSUFFICIENT_PRIV;
case NAGIOS_NOT_INSTALLED:
return PCMK_OCF_NOT_INSTALLED;
case NAGIOS_STATE_WARNING:
case NAGIOS_STATE_CRITICAL:
case NAGIOS_STATE_UNKNOWN:
case NAGIOS_STATE_DEPENDENT:
default:
return PCMK_OCF_UNKNOWN_ERROR;
}
return PCMK_OCF_UNKNOWN_ERROR;
}
#endif
static int
get_uniform_rc(const char *standard, const char *action, int rc)
{
if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_OCF)) {
return ocf2uniform_rc(rc);
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_STONITH)) {
return stonith2uniform_rc(action, rc);
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD)) {
return rc;
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_UPSTART)) {
return rc;
#if SUPPORT_NAGIOS
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_NAGIOS)) {
return nagios2uniform_rc(action, rc);
#endif
} else {
return services_get_ocf_exitcode(action, rc);
}
}
static int
action_get_uniform_rc(svc_action_t * action)
{
lrmd_cmd_t *cmd = action->cb_data;
return get_uniform_rc(action->standard, cmd->action, action->rc);
}
void
notify_of_new_client(crm_client_t *new_client)
{
crm_client_t *client = NULL;
GHashTableIter iter;
xmlNode *notify = NULL;
char *key = NULL;
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT);
g_hash_table_iter_init(&iter, client_connections);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & client)) {
if (safe_str_eq(client->id, new_client->id)) {
continue;
}
send_client_notify((gpointer) key, (gpointer) client, (gpointer) notify);
}
free_xml(notify);
}
static char *
parse_exit_reason(const char *output)
{
const char *cur = NULL;
const char *last = NULL;
static int cookie_len = 0;
char *eol = NULL;
size_t reason_len = EXIT_REASON_MAX_LEN;
if (output == NULL) {
return NULL;
}
if (!cookie_len) {
cookie_len = strlen(PCMK_OCF_REASON_PREFIX);
}
cur = strstr(output, PCMK_OCF_REASON_PREFIX);
for (; cur != NULL; cur = strstr(cur, PCMK_OCF_REASON_PREFIX)) {
/* skip over the cookie delimiter string */
cur += cookie_len;
last = cur;
}
if (last == NULL) {
return NULL;
}
// Truncate everything after a new line, and limit reason string size
eol = strchr(last, '\n');
if (eol) {
reason_len = QB_MIN(reason_len, eol - last);
}
return strndup(last, reason_len);
}
void
client_disconnect_cleanup(const char *client_id)
{
GHashTableIter iter;
lrmd_rsc_t *rsc = NULL;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
if (rsc->call_opts & lrmd_opt_drop_recurring) {
/* This client is disconnecting, drop any recurring operations
* it may have initiated on the resource */
cancel_all_recurring(rsc, client_id);
}
}
}
static void
action_complete(svc_action_t * action)
{
lrmd_rsc_t *rsc;
lrmd_cmd_t *cmd = action->cb_data;
const char *rclass = NULL;
bool goagain = false;
if (!cmd) {
crm_err("Completed executor action (%s) does not match any known operations",
action->id);
return;
}
#ifdef HAVE_SYS_TIMEB_H
if (cmd->exec_rc != action->rc) {
ftime(&cmd->t_rcchange);
}
#endif
cmd->last_pid = action->pid;
cmd->exec_rc = action_get_uniform_rc(action);
cmd->lrmd_op_status = action->status;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE)) {
rclass = resources_find_service_class(rsc->type);
} else if(rsc) {
rclass = rsc->class;
}
if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD)) {
if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->action, "start")) {
/* systemd I curse thee!
*
* systemd returns from start actions after the start _begins_
* not after it completes.
*
* So we have to jump through a few hoops so that we don't
* report 'complete' to the rest of pacemaker until, you know,
* it's actually done.
*/
goagain = true;
cmd->real_action = cmd->action;
cmd->action = strdup("monitor");
} else if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->action, "stop")) {
goagain = true;
cmd->real_action = cmd->action;
cmd->action = strdup("monitor");
} else if(cmd->real_action) {
/* Ok, so this is the follow up monitor action to check if start actually completed */
if(cmd->lrmd_op_status == PCMK_LRM_OP_DONE && cmd->exec_rc == PCMK_OCF_PENDING) {
goagain = true;
} else if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->real_action, "stop")) {
goagain = true;
} else {
#ifdef HAVE_SYS_TIMEB_H
int time_sum = time_diff_ms(NULL, &cmd->t_first_run);
int timeout_left = cmd->timeout_orig - time_sum;
crm_debug("%s %s is now complete (elapsed=%dms, remaining=%dms): %s (%d)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc);
cmd_original_times(cmd);
#endif
if(cmd->lrmd_op_status == PCMK_LRM_OP_DONE && cmd->exec_rc == PCMK_OCF_NOT_RUNNING && safe_str_eq(cmd->real_action, "stop")) {
cmd->exec_rc = PCMK_OCF_OK;
}
}
}
}
#if SUPPORT_NAGIOS
if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS)) {
if (safe_str_eq(cmd->action, "monitor") &&
(cmd->interval_ms == 0) && cmd->exec_rc == PCMK_OCF_OK) {
/* Successfully executed --version for the nagios plugin */
cmd->exec_rc = PCMK_OCF_NOT_RUNNING;
} else if (safe_str_eq(cmd->action, "start") && cmd->exec_rc != PCMK_OCF_OK) {
goagain = true;
}
}
#endif
/* Wrapping this section in ifdef implies that systemd resources are not
* fully supported on platforms without sys/timeb.h. Since timeb is
* obsolete, we should eventually prefer a clock_gettime() implementation
* (wrapped in its own ifdef) with timeb as a fallback.
*/
if(goagain) {
#ifdef HAVE_SYS_TIMEB_H
int time_sum = time_diff_ms(NULL, &cmd->t_first_run);
int timeout_left = cmd->timeout_orig - time_sum;
int delay = cmd->timeout_orig / 10;
if(delay >= timeout_left && timeout_left > 20) {
delay = timeout_left/2;
}
delay = QB_MIN(2000, delay);
if (delay < timeout_left) {
cmd->start_delay = delay;
cmd->timeout = timeout_left;
if(cmd->exec_rc == PCMK_OCF_OK) {
crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);
} else if(cmd->exec_rc == PCMK_OCF_PENDING) {
crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);
} else {
crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->action, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc, time_sum, timeout_left, delay);
}
cmd_reset(cmd);
if(rsc) {
rsc->active = NULL;
}
schedule_lrmd_cmd(rsc, cmd);
/* Don't finalize cmd, we're not done with it yet */
return;
} else {
crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)",
cmd->rsc_id, cmd->real_action?cmd->real_action:cmd->action, cmd->exec_rc, time_sum, timeout_left);
cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT;
cmd->exec_rc = PCMK_OCF_TIMEOUT;
cmd_original_times(cmd);
}
#endif
}
if (action->stderr_data) {
cmd->output = strdup(action->stderr_data);
cmd->exit_reason = parse_exit_reason(action->stderr_data);
} else if (action->stdout_data) {
cmd->output = strdup(action->stdout_data);
}
cmd_finalize(cmd, rsc);
}
-static void
-stonith_action_complete(lrmd_cmd_t * cmd, int rc)
+/*!
+ * \internal
+ * \brief Determine operation status of a stonith operation
+ *
+ * Non-stonith resource operations get their operation status directly from the
+ * service library, but the fencer does not have an equivalent, so we must infer
+ * an operation status from the fencer API's return code.
+ *
+ * \param[in] action Name of action performed on stonith resource
+ * \param[in] interval_ms Action interval
+ * \param[in] rc Action result from fencer
+ *
+ * \return Operation status corresponding to fencer API return code
+ */
+static int
+stonith_rc2status(const char *action, guint interval_ms, int rc)
{
- bool recurring = (cmd->interval_ms > 0);
- lrmd_rsc_t *rsc = NULL;
+ int status = PCMK_LRM_OP_DONE;
- cmd->exec_rc = get_uniform_rc(PCMK_RESOURCE_CLASS_STONITH, cmd->action, rc);
+ switch (rc) {
+ case pcmk_ok:
+ break;
+
+ case -EOPNOTSUPP:
+ case -EPROTONOSUPPORT:
+ status = PCMK_LRM_OP_NOTSUPPORTED;
+ break;
+
+ case -ETIME:
+ case -ETIMEDOUT:
+ status = PCMK_LRM_OP_TIMEOUT;
+ break;
+
+ case -ENOTCONN:
+ case -ECOMM:
+ // Couldn't talk to fencer
+ status = PCMK_LRM_OP_ERROR;
+ break;
+
+ case -ENODEV:
+ // The device is not registered with the fencer
+
+ if (safe_str_neq(action, "monitor")) {
+ status = PCMK_LRM_OP_ERROR;
+
+ } else if (interval_ms > 0) {
+ /* If we get here, the fencer somehow lost the registration of a
+ * previously active device (possibly due to crash and respawn). In
+ * that case, we need to indicate that the recurring monitor needs
+ * to be cancelled.
+ */
+ status = PCMK_LRM_OP_CANCELLED;
+ }
+ break;
- rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
+ default:
+ break;
+ }
+ return status;
+}
- if (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED) {
- recurring = FALSE;
- /* do nothing */
+static void
+stonith_action_complete(lrmd_cmd_t * cmd, int rc)
+{
+ // This can be NULL if resource was removed before command completed
+ lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
- } else if (rc == -ENODEV && safe_str_eq(cmd->action, "monitor")) {
- // The device is not registered with the fencer
+ cmd->exec_rc = stonith2uniform_rc(cmd->action, rc);
- if (recurring) {
- /* If we get here, the fencer somehow lost the registration of a
- * previously active device (possibly due to crash and respawn). In
- * that case, we need to indicate that the recurring monitor needs
- * to be cancelled.
- */
- cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
- recurring = FALSE;
- } else {
- cmd->lrmd_op_status = PCMK_LRM_OP_DONE;
- }
- cmd->exec_rc = PCMK_OCF_NOT_RUNNING;
-
- } else if (rc) {
- /* Attempt to map return codes to op status if possible */
- switch (rc) {
- case -EPROTONOSUPPORT:
- cmd->lrmd_op_status = PCMK_LRM_OP_NOTSUPPORTED;
- break;
- case -ETIME:
- cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT;
- break;
- default:
- /* TODO: This looks wrong. Status should be _DONE and exec_rc set to an error */
- cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
- }
- } else {
- /* command successful */
- cmd->lrmd_op_status = PCMK_LRM_OP_DONE;
- if (safe_str_eq(cmd->action, "start") && rsc) {
- rsc->stonith_started = 1;
+ /* This function may be called with status already set to cancelled, if a
+ * pending action was aborted. Otherwise, we need to determine status from
+ * the fencer return code.
+ */
+ if (cmd->lrmd_op_status != PCMK_LRM_OP_CANCELLED) {
+ cmd->lrmd_op_status = stonith_rc2status(cmd->action, cmd->interval_ms,
+ rc);
+
+ // Certain successful actions change the known state of the resource
+ if (rsc && (cmd->exec_rc == PCMK_OCF_OK)) {
+ if (safe_str_eq(cmd->action, "start")) {
+ rsc->stonith_started = 1;
+ } else if (safe_str_eq(cmd->action, "stop")) {
+ rsc->stonith_started = 0;
+ }
}
}
- if (recurring && rsc) {
- if (cmd->stonith_recurring_id) {
- g_source_remove(cmd->stonith_recurring_id);
- }
- cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms,
- stonith_recurring_op_helper,
- cmd);
+ /* The recurring timer should not be running at this point in any case, but
+ * as a failsafe, stop it if it is.
+ */
+ stop_recurring_timer(cmd);
+
+ /* Reschedule this command if appropriate. If a recurring command is *not*
+ * rescheduled, its status must be PCMK_LRM_OP_CANCELLED, otherwise it will
+ * not be removed from recurring_ops by cmd_finalize().
+ */
+ if (rsc && (cmd->interval_ms > 0)
+ && (cmd->lrmd_op_status != PCMK_LRM_OP_CANCELLED)) {
+ start_recurring_timer(cmd);
}
cmd_finalize(cmd, rsc);
}
static void
lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
stonith_action_complete(data->userdata, data->rc);
}
void
stonith_connection_failed(void)
{
GHashTableIter iter;
GList *cmd_list = NULL;
GList *cmd_iter = NULL;
lrmd_rsc_t *rsc = NULL;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
if (rsc->active) {
cmd_list = g_list_append(cmd_list, rsc->active);
}
if (rsc->recurring_ops) {
cmd_list = g_list_concat(cmd_list, rsc->recurring_ops);
}
if (rsc->pending_ops) {
cmd_list = g_list_concat(cmd_list, rsc->pending_ops);
}
rsc->pending_ops = rsc->recurring_ops = NULL;
}
}
if (!cmd_list) {
return;
}
crm_err("Connection to fencer failed, finalizing %d pending operations",
g_list_length(cmd_list));
for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
stonith_action_complete(cmd_iter->data, -ENOTCONN);
}
g_list_free(cmd_list);
}
+/*!
+ * \internal
+ * \brief Execute a stonith resource "start" action
+ *
+ * Start a stonith resource by registering it with the fencer.
+ * (Stonith agents don't have a start command.)
+ *
+ * \param[in] stonith_api Connection to fencer
+ * \param[in] rsc Stonith resource to start
+ * \param[in] cmd Start command to execute
+ *
+ * \return pcmk_ok on success, -errno otherwise
+ */
static int
-lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
+execd_stonith_start(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
{
- int rc = 0;
- int do_monitor = 0;
+ char *key = NULL;
+ char *value = NULL;
+ stonith_key_value_t *device_params = NULL;
+ int rc = pcmk_ok;
- stonith_t *stonith_api = get_stonith_connection();
+ // Convert command parameters to stonith API key/values
+ if (cmd->params) {
+ GHashTableIter iter;
- if (!stonith_api) {
- cmd->exec_rc = get_uniform_rc(PCMK_RESOURCE_CLASS_STONITH, cmd->action,
- -ENOTCONN);
- cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
- cmd_finalize(cmd, rsc);
- return -EUNATCH;
+ g_hash_table_iter_init(&iter, cmd->params);
+ while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
+ device_params = stonith_key_value_add(device_params, key, value);
+ }
}
- if (safe_str_eq(cmd->action, "start")) {
- char *key = NULL;
- char *value = NULL;
- stonith_key_value_t *device_params = NULL;
+ /* The fencer will automatically register devices via CIB notifications
+ * when the CIB changes, but to avoid a possible race condition between
+ * the fencer receiving the notification and the executor requesting that
+ * resource, the executor registers the device as well. The fencer knows how
+ * to handle duplicate registrations.
+ */
+ rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call,
+ cmd->rsc_id, rsc->provider,
+ rsc->type, device_params);
- if (cmd->params) {
- GHashTableIter iter;
+ stonith_key_value_freeall(device_params, 1, 1);
+ return rc;
+}
- g_hash_table_iter_init(&iter, cmd->params);
- while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
- device_params = stonith_key_value_add(device_params, key, value);
- }
- }
+/*!
+ * \internal
+ * \brief Execute a stonith resource "stop" action
+ *
+ * Stop a stonith resource by unregistering it with the fencer.
+ * (Stonith agents don't have a stop command.)
+ *
+ * \param[in] stonith_api Connection to fencer
+ * \param[in] rsc Stonith resource to stop
+ *
+ * \return pcmk_ok on success, -errno otherwise
+ */
+static inline int
+execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc)
+{
+ /* @TODO Failure would indicate a problem communicating with fencer;
+ * perhaps we should try reconnecting and retrying a few times?
+ */
+ return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call,
+ rsc->rsc_id);
+}
+
+/*!
+ * \internal
+ * \brief Execute a one-time stonith resource "monitor" action
+ *
+ * Probe a stonith resource by checking whether we started it
+ *
+ * \param[in] rsc Stonith resource to probe
+ *
+ * \return pcmk_ok if started, -errno otherwise
+ */
+static inline int
+execd_stonith_probe(lrmd_rsc_t *rsc)
+{
+ return rsc->stonith_started? 0 : -ENODEV;
+}
+
+/*!
+ * \internal
+ * \brief Initiate a stonith resource agent "monitor" action
+ *
+ * \param[in] stonith_api Connection to fencer
+ * \param[in] rsc Stonith resource to monitor
+ * \param[in] cmd Monitor command being executed
+ *
+ * \return pcmk_ok if monitor was successfully initiated, -errno otherwise
+ */
+static inline int
+execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
+{
+ int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id,
+ cmd->timeout / 1000);
- /* Stonith automatically registers devices from the IPC when changes
- * occur, but to avoid a possible race condition between stonith
- * receiving the IPC update and the executor requesting that resource,
- * the executor still registers the device as well. Stonith knows how to
- * handle duplicate device registrations correctly.
- */
- rc = stonith_api->cmds->register_device(stonith_api,
- st_opt_sync_call,
- cmd->rsc_id,
- rsc->provider, rsc->type, device_params);
-
- stonith_key_value_freeall(device_params, 1, 1);
+ rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd,
+ "lrmd_stonith_callback",
+ lrmd_stonith_callback);
+ if (rc == TRUE) {
+ rsc->active = cmd;
+ rc = pcmk_ok;
+ } else {
+ rc = -pcmk_err_generic;
+ }
+ return rc;
+}
+
+static void
+lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
+{
+ int rc = 0;
+ bool do_monitor = FALSE;
+
+ stonith_t *stonith_api = get_stonith_connection();
+
+ if (!stonith_api) {
+ rc = -ENOTCONN;
+
+ } else if (safe_str_eq(cmd->action, "start")) {
+ rc = execd_stonith_start(stonith_api, rsc, cmd);
if (rc == 0) {
- do_monitor = 1;
+ do_monitor = TRUE;
}
+
} else if (safe_str_eq(cmd->action, "stop")) {
- rc = stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, cmd->rsc_id);
- rsc->stonith_started = 0;
+ rc = execd_stonith_stop(stonith_api, rsc);
+
} else if (safe_str_eq(cmd->action, "monitor")) {
if (cmd->interval_ms > 0) {
- do_monitor = 1;
+ do_monitor = TRUE;
} else {
- rc = rsc->stonith_started ? 0 : -ENODEV;
+ rc = execd_stonith_probe(rsc);
}
}
- if (!do_monitor) {
- goto cleanup_stonith_exec;
- }
-
- rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id, cmd->timeout / 1000);
-
- rc = stonith_api->cmds->register_callback(stonith_api,
- rc,
- 0,
- 0,
- cmd, "lrmd_stonith_callback", lrmd_stonith_callback);
-
- /* don't cleanup yet, we will find out the result of the monitor later */
- if (rc > 0) {
- rsc->active = cmd;
- return rc;
- } else if (rc == 0) {
- rc = -1;
+ if (do_monitor) {
+ rc = execd_stonith_monitor(stonith_api, rsc, cmd);
+ if (rc == pcmk_ok) {
+ // Don't clean up yet, we will find out result of the monitor later
+ return;
+ }
}
- cleanup_stonith_exec:
stonith_action_complete(cmd, rc);
- return rc;
}
static int
lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
svc_action_t *action = NULL;
GHashTable *params_copy = NULL;
CRM_ASSERT(rsc);
CRM_ASSERT(cmd);
crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
#if SUPPORT_NAGIOS
/* Recurring operations are cancelled anyway for a stop operation */
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS)
&& safe_str_eq(cmd->action, "stop")) {
cmd->exec_rc = PCMK_OCF_OK;
goto exec_done;
}
#endif
params_copy = crm_str_table_dup(cmd->params);
action = resources_action_create(rsc->rsc_id, rsc->class, rsc->provider,
rsc->type,
normalize_action_name(rsc, cmd->action),
cmd->interval_ms, cmd->timeout,
params_copy, cmd->service_flags);
if (!action) {
crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id);
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
goto exec_done;
}
action->cb_data = cmd;
/* 'cmd' may not be valid after this point if
* services_action_async() returned TRUE
*
* Upstart and systemd both synchronously determine monitor/status
* results and call action_complete (which may free 'cmd') if necessary.
*/
if (services_action_async(action, action_complete)) {
return TRUE;
}
cmd->exec_rc = action->rc;
if(action->status != PCMK_LRM_OP_DONE) {
cmd->lrmd_op_status = action->status;
} else {
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
}
services_action_free(action);
action = NULL;
exec_done:
cmd_finalize(cmd, rsc);
return TRUE;
}
static gboolean
lrmd_rsc_execute(lrmd_rsc_t * rsc)
{
lrmd_cmd_t *cmd = NULL;
CRM_CHECK(rsc != NULL, return FALSE);
if (rsc->active) {
crm_trace("%s is still active", rsc->rsc_id);
return TRUE;
}
if (rsc->pending_ops) {
GList *first = rsc->pending_ops;
cmd = first->data;
if (cmd->delay_id) {
crm_trace
("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
cmd->rsc_id, cmd->action, cmd->start_delay);
return TRUE;
}
rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
g_list_free_1(first);
#ifdef HAVE_SYS_TIMEB_H
if (cmd->t_first_run.time == 0) {
ftime(&cmd->t_first_run);
}
ftime(&cmd->t_run);
#endif
}
if (!cmd) {
crm_trace("Nothing further to do for %s", rsc->rsc_id);
return TRUE;
}
rsc->active = cmd; /* only one op at a time for a rsc */
if (cmd->interval_ms) {
rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
}
log_execute(cmd);
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
lrmd_rsc_execute_stonith(rsc, cmd);
} else {
lrmd_rsc_execute_service_lib(rsc, cmd);
}
return TRUE;
}
static gboolean
lrmd_rsc_dispatch(gpointer user_data)
{
return lrmd_rsc_execute(user_data);
}
void
free_rsc(gpointer data)
{
GListPtr gIter = NULL;
lrmd_rsc_t *rsc = data;
int is_stonith = safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH);
gIter = rsc->pending_ops;
while (gIter != NULL) {
GListPtr next = gIter->next;
lrmd_cmd_t *cmd = gIter->data;
/* command was never executed */
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, NULL);
gIter = next;
}
/* frees list, but not list elements. */
g_list_free(rsc->pending_ops);
gIter = rsc->recurring_ops;
while (gIter != NULL) {
GListPtr next = gIter->next;
lrmd_cmd_t *cmd = gIter->data;
if (is_stonith) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
/* If a stonith command is in-flight, just mark it as cancelled;
* it is not safe to finalize/free the cmd until the stonith api
* says it has either completed or timed out.
*/
if (rsc->active != cmd) {
cmd_finalize(cmd, NULL);
}
} else {
/* This command is already handed off to service library,
* let service library cancel it and tell us via the callback
* when it is cancelled. The rsc can be safely destroyed
* even if we are waiting for the cancel result */
services_action_cancel(rsc->rsc_id,
normalize_action_name(rsc, cmd->action),
cmd->interval_ms);
}
gIter = next;
}
/* frees list, but not list elements. */
g_list_free(rsc->recurring_ops);
free(rsc->rsc_id);
free(rsc->class);
free(rsc->provider);
free(rsc->type);
mainloop_destroy_trigger(rsc->work);
free(rsc);
}
static xmlNode *
process_lrmd_signon(crm_client_t *client, xmlNode *request, int call_id)
{
xmlNode *reply = NULL;
int rc = pcmk_ok;
const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER);
const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION);
if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) {
crm_err("Cluster API version must be greater than or equal to %s, not %s",
LRMD_MIN_PROTOCOL_VERSION, protocol_version);
rc = -EPROTO;
}
reply = create_lrmd_reply(__FUNCTION__, rc, call_id);
crm_xml_add(reply, F_LRMD_OPERATION, CRM_OP_REGISTER);
crm_xml_add(reply, F_LRMD_CLIENTID, client->id);
crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
if (crm_is_true(is_ipc_provider)) {
// This is a remote connection from a cluster node's controller
#ifdef SUPPORT_REMOTE
ipc_proxy_add_provider(client);
#endif
}
return reply;
}
static int
process_lrmd_rsc_register(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = build_rsc_from_xml(request);
lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
if (dup &&
safe_str_eq(rsc->class, dup->class) &&
safe_str_eq(rsc->provider, dup->provider) && safe_str_eq(rsc->type, dup->type)) {
crm_warn("Can't add, RSC '%s' already present in the rsc list (%d active resources)",
rsc->rsc_id, g_hash_table_size(rsc_list));
free_rsc(rsc);
return rc;
}
g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
crm_info("Added '%s' to the rsc list (%d active resources)",
rsc->rsc_id, g_hash_table_size(rsc_list));
return rc;
}
static xmlNode *
process_lrmd_get_rsc_info(xmlNode *request, int call_id)
{
int rc = pcmk_ok;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
xmlNode *reply = NULL;
lrmd_rsc_t *rsc = NULL;
if (rsc_id == NULL) {
rc = -ENODEV;
} else {
rsc = g_hash_table_lookup(rsc_list, rsc_id);
if (rsc == NULL) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
rc = -ENODEV;
}
}
reply = create_lrmd_reply(__FUNCTION__, rc, call_id);
if (rsc) {
crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id);
crm_xml_add(reply, F_LRMD_CLASS, rsc->class);
crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider);
crm_xml_add(reply, F_LRMD_TYPE, rsc->type);
}
return reply;
}
static int
process_lrmd_rsc_unregister(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
if (!rsc_id) {
return -ENODEV;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
return pcmk_ok;
}
if (rsc->active) {
/* let the caller know there are still active ops on this rsc to watch for */
crm_trace("Operation still in progress: %p", rsc->active);
rc = -EINPROGRESS;
}
g_hash_table_remove(rsc_list, rsc_id);
return rc;
}
static int
process_lrmd_rsc_exec(crm_client_t * client, uint32_t id, xmlNode * request)
{
lrmd_rsc_t *rsc = NULL;
lrmd_cmd_t *cmd = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
int call_id;
if (!rsc_id) {
return -EINVAL;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
return -ENODEV;
}
cmd = create_lrmd_cmd(request, client);
call_id = cmd->call_id;
/* Don't reference cmd after handing it off to be scheduled.
* The cmd could get merged and freed. */
schedule_lrmd_cmd(rsc, cmd);
return call_id;
}
static int
cancel_op(const char *rsc_id, const char *action, guint interval_ms)
{
GListPtr gIter = NULL;
lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
/* How to cancel an action.
* 1. Check pending ops list, if it hasn't been handed off
* to the service library or stonith recurring list remove
* it there and that will stop it.
* 2. If it isn't in the pending ops list, then it's either a
* recurring op in the stonith recurring list, or the service
* library's recurring list. Stop it there
* 3. If not found in any lists, then this operation has either
* been executed already and is not a recurring operation, or
* never existed.
*/
if (!rsc) {
return -ENODEV;
}
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (safe_str_eq(cmd->action, action)
&& (cmd->interval_ms == interval_ms)) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, rsc);
return pcmk_ok;
}
}
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
/* The service library does not handle stonith operations.
* We have to handle recurring stonith operations ourselves. */
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (safe_str_eq(cmd->action, action)
&& (cmd->interval_ms == interval_ms)) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
if (rsc->active != cmd) {
cmd_finalize(cmd, rsc);
}
return pcmk_ok;
}
}
} else if (services_action_cancel(rsc_id,
normalize_action_name(rsc, action),
interval_ms) == TRUE) {
/* The service library will tell the action_complete callback function
* this action was cancelled, which will destroy the cmd and remove
* it from the recurring_op list. Do not do that in this function
* if the service library says it cancelled it. */
return pcmk_ok;
}
return -EOPNOTSUPP;
}
static void
cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
{
GList *cmd_list = NULL;
GList *cmd_iter = NULL;
/* Notice a copy of each list is created when concat is called.
* This prevents odd behavior from occurring when the cmd_list
* is iterated through later on. It is possible the cancel_op
* function may end up modifying the recurring_ops and pending_ops
* lists. If we did not copy those lists, our cmd_list iteration
* could get messed up.*/
if (rsc->recurring_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
}
if (rsc->pending_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
}
if (!cmd_list) {
return;
}
for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
lrmd_cmd_t *cmd = cmd_iter->data;
if (cmd->interval_ms == 0) {
continue;
}
if (client_id && safe_str_neq(cmd->client_id, client_id)) {
continue;
}
cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms);
}
/* frees only the copied list data, not the cmds */
g_list_free(cmd_list);
}
static int
process_lrmd_rsc_cancel(crm_client_t * client, uint32_t id, xmlNode * request)
{
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION);
guint interval_ms = 0;
crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &interval_ms);
if (!rsc_id || !action) {
return -EINVAL;
}
return cancel_op(rsc_id, action, interval_ms);
}
static void
add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc)
{
xmlNode *rsc_xml = create_xml_node(reply, F_LRMD_RSC);
crm_xml_add(rsc_xml, F_LRMD_RSC_ID, rsc->rsc_id);
for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) {
lrmd_cmd_t *cmd = item->data;
xmlNode *op_xml = create_xml_node(rsc_xml, T_LRMD_RSC_OP);
crm_xml_add(op_xml, F_LRMD_RSC_ACTION,
(cmd->real_action? cmd->real_action : cmd->action));
crm_xml_add_ms(op_xml, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
crm_xml_add_int(op_xml, F_LRMD_TIMEOUT, cmd->timeout_orig);
}
}
static xmlNode *
process_lrmd_get_recurring(xmlNode *request, int call_id)
{
int rc = pcmk_ok;
const char *rsc_id = NULL;
lrmd_rsc_t *rsc = NULL;
xmlNode *reply = NULL;
xmlNode *rsc_xml = NULL;
// Resource ID is optional
rsc_xml = first_named_child(request, F_LRMD_CALLDATA);
if (rsc_xml) {
rsc_xml = first_named_child(rsc_xml, F_LRMD_RSC);
}
if (rsc_xml) {
rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
}
// If resource ID is specified, resource must exist
if (rsc_id != NULL) {
rsc = g_hash_table_lookup(rsc_list, rsc_id);
if (rsc == NULL) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
rc = -ENODEV;
}
}
reply = create_lrmd_reply(__FUNCTION__, rc, call_id);
// If resource ID is not specified, check all resources
if (rsc_id == NULL) {
GHashTableIter iter;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &rsc)) {
add_recurring_op_xml(reply, rsc);
}
} else if (rsc) {
add_recurring_op_xml(reply, rsc);
}
return reply;
}
void
process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
int call_id = 0;
const char *op = crm_element_value(request, F_LRMD_OPERATION);
int do_reply = 0;
int do_notify = 0;
xmlNode *reply = NULL;
crm_trace("Processing %s operation from %s", op, client->id);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
if (crm_str_eq(op, CRM_OP_IPC_FWD, TRUE)) {
#ifdef SUPPORT_REMOTE
ipc_proxy_forward_client(client, request);
#endif
do_reply = 1;
} else if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) {
reply = process_lrmd_signon(client, request, call_id);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_REG, TRUE)) {
rc = process_lrmd_rsc_register(client, id, request);
do_notify = 1;
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_INFO, TRUE)) {
reply = process_lrmd_get_rsc_info(request, call_id);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_UNREG, TRUE)) {
rc = process_lrmd_rsc_unregister(client, id, request);
/* don't notify anyone about failed un-registers */
if (rc == pcmk_ok || rc == -EINPROGRESS) {
do_notify = 1;
}
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_EXEC, TRUE)) {
rc = process_lrmd_rsc_exec(client, id, request);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_CANCEL, TRUE)) {
rc = process_lrmd_rsc_cancel(client, id, request);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_POKE, TRUE)) {
do_notify = 1;
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_CHECK, TRUE)) {
xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA);
const char *timeout = crm_element_value(data, F_LRMD_WATCHDOG);
CRM_LOG_ASSERT(data != NULL);
check_sbd_timeout(timeout);
} else if (crm_str_eq(op, LRMD_OP_ALERT_EXEC, TRUE)) {
rc = process_lrmd_alert_exec(client, id, request);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_GET_RECURRING, TRUE)) {
reply = process_lrmd_get_recurring(request, call_id);
do_reply = 1;
} else {
rc = -EOPNOTSUPP;
do_reply = 1;
crm_err("Unknown %s from %s", op, client->name);
crm_log_xml_warn(request, "UnknownOp");
}
crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d",
op, client->id, rc, do_reply, do_notify);
if (do_reply) {
int send_rc = pcmk_ok;
if (reply == NULL) {
reply = create_lrmd_reply(__FUNCTION__, rc, call_id);
}
send_rc = lrmd_server_send_reply(client, id, reply);
free_xml(reply);
if (send_rc < 0) {
crm_warn("Reply to client %s failed: %s " CRM_XS " %d",
client->name, pcmk_strerror(send_rc), send_rc);
}
}
if (do_notify) {
send_generic_notify(rc, request);
}
}

File Metadata

Mime Type
text/x-diff
Expires
Mon, Apr 21, 7:13 PM (12 h, 29 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1665448
Default Alt Text
(135 KB)

Event Timeline