diff --git a/cts/CTStests.py b/cts/CTStests.py
index be7fd7f323..8e171fc27c 100644
--- a/cts/CTStests.py
+++ b/cts/CTStests.py
@@ -1,3135 +1,3137 @@
""" Test-specific classes for Pacemaker's Cluster Test Suite (CTS)
"""
# Pacemaker targets compatibility with Python 2.7 and 3.2+
from __future__ import print_function, unicode_literals, absolute_import, division
__copyright__ = "Copyright 2000-2019 the Pacemaker project contributors"
__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
#
# SPECIAL NOTE:
#
# Tests may NOT implement any cluster-manager-specific code in them.
# EXTEND the ClusterManager object to provide the base capabilities
# the test needs if you need to do something that the current CM classes
# do not. Otherwise you screw up the whole point of the object structure
# in CTS.
#
# Thank you.
#
import os
import re
import time
import subprocess
import tempfile
from stat import *
from cts import CTS
from cts.CTSaudits import *
from cts.CTSvars import *
from cts.patterns import PatternSelector
from cts.logging import LogFactory
from cts.remote import RemoteFactory, input_wrapper
from cts.watcher import LogWatcher
from cts.environment import EnvFactory
AllTestClasses = [ ]
class CTSTest(object):
'''
A Cluster test.
We implement the basic set of properties and behaviors for a generic
cluster test.
Cluster tests track their own statistics.
We keep each of the kinds of counts we track as separate {name,value}
pairs.
'''
def __init__(self, cm):
#self.name="the unnamed test"
self.Stats = {"calls":0
, "success":0
, "failure":0
, "skipped":0
, "auditfail":0}
# if not issubclass(cm.__class__, ClusterManager):
# raise ValueError("Must be a ClusterManager object")
self.CM = cm
self.Env = EnvFactory().getInstance()
self.rsh = RemoteFactory().getInstance()
self.logger = LogFactory()
self.templates = PatternSelector(cm["Name"])
self.Audits = []
self.timeout = 120
self.passed = 1
self.is_loop = 0
self.is_unsafe = 0
self.is_docker_unsafe = 0
self.is_experimental = 0
self.is_container = 0
self.is_valgrind = 0
self.benchmark = 0 # which tests to benchmark
self.timer = {} # timers
def log(self, args):
self.logger.log(args)
def debug(self, args):
self.logger.debug(args)
def has_key(self, key):
return key in self.Stats
def __setitem__(self, key, value):
self.Stats[key] = value
def __getitem__(self, key):
if str(key) == "0":
raise ValueError("Bad call to 'foo in X', should reference 'foo in X.Stats' instead")
if key in self.Stats:
return self.Stats[key]
return None
def log_mark(self, msg):
self.debug("MARK: test %s %s %d" % (self.name,msg,time.time()))
return
def get_timer(self,key = "test"):
try: return self.timer[key]
except: return 0
def set_timer(self,key = "test"):
self.timer[key] = time.time()
return self.timer[key]
def log_timer(self,key = "test"):
elapsed = 0
if key in self.timer:
elapsed = time.time() - self.timer[key]
s = key == "test" and self.name or "%s:%s" % (self.name,key)
self.debug("%s runtime: %.2f" % (s, elapsed))
del self.timer[key]
return elapsed
def incr(self, name):
'''Increment (or initialize) the value associated with the given name'''
if not name in self.Stats:
self.Stats[name] = 0
self.Stats[name] = self.Stats[name]+1
# Reset the test passed boolean
if name == "calls":
self.passed = 1
def failure(self, reason="none"):
'''Increment the failure count'''
self.passed = 0
self.incr("failure")
self.logger.log(("Test %s" % self.name).ljust(35) + " FAILED: %s" % reason)
return None
def success(self):
'''Increment the success count'''
self.incr("success")
return 1
def skipped(self):
'''Increment the skipped count'''
self.incr("skipped")
return 1
def __call__(self, node):
'''Perform the given test'''
raise ValueError("Abstract Class member (__call__)")
self.incr("calls")
return self.failure()
def audit(self):
passed = 1
if len(self.Audits) > 0:
for audit in self.Audits:
if not audit():
self.logger.log("Internal %s Audit %s FAILED." % (self.name, audit.name()))
self.incr("auditfail")
passed = 0
return passed
def setup(self, node):
'''Setup the given test'''
return self.success()
def teardown(self, node):
'''Tear down the given test'''
return self.success()
def create_watch(self, patterns, timeout, name=None):
if not name:
name = self.name
return LogWatcher(self.Env["LogFileName"], patterns, name, timeout, kind=self.Env["LogWatcher"], hosts=self.Env["nodes"])
def local_badnews(self, prefix, watch, local_ignore=[]):
errcount = 0
if not prefix:
prefix = "LocalBadNews:"
ignorelist = []
ignorelist.append(" CTS: ")
ignorelist.append(prefix)
ignorelist.extend(local_ignore)
while errcount < 100:
match = watch.look(0)
if match:
add_err = 1
for ignore in ignorelist:
if add_err == 1 and re.search(ignore, match):
add_err = 0
if add_err == 1:
self.logger.log(prefix + " " + match)
errcount = errcount + 1
else:
break
else:
self.logger.log("Too many errors!")
watch.end()
return errcount
def is_applicable(self):
return self.is_applicable_common()
def is_applicable_common(self):
'''Return TRUE if we are applicable in the current test configuration'''
#raise ValueError("Abstract Class member (is_applicable)")
if self.is_loop and not self.Env["loop-tests"]:
return 0
elif self.is_unsafe and not self.Env["unsafe-tests"]:
return 0
elif self.is_valgrind and not self.Env["valgrind-tests"]:
return 0
elif self.is_experimental and not self.Env["experimental-tests"]:
return 0
elif self.is_docker_unsafe and self.Env["docker"]:
return 0
elif self.is_container and not self.Env["container-tests"]:
return 0
elif self.Env["benchmark"] and self.benchmark == 0:
return 0
return 1
def find_ocfs2_resources(self, node):
self.r_o2cb = None
self.r_ocfs2 = []
(rc, lines) = self.rsh(node, "crm_resource -c", None)
for line in lines:
if re.search("^Resource", line):
r = AuditResource(self.CM, line)
if r.rtype == "o2cb" and r.parent != "NA":
self.debug("Found o2cb: %s" % self.r_o2cb)
self.r_o2cb = r.parent
if re.search("^Constraint", line):
c = AuditConstraint(self.CM, line)
if c.type == "rsc_colocation" and c.target == self.r_o2cb:
self.r_ocfs2.append(c.rsc)
self.debug("Found ocfs2 filesystems: %s" % repr(self.r_ocfs2))
return len(self.r_ocfs2)
def canrunnow(self, node):
'''Return TRUE if we can meaningfully run right now'''
return 1
def errorstoignore(self):
'''Return list of errors which are 'normal' and should be ignored'''
return []
class StopTest(CTSTest):
'''Stop (deactivate) the cluster manager on a node'''
def __init__(self, cm):
CTSTest.__init__(self, cm)
self.name = "Stop"
def __call__(self, node):
'''Perform the 'stop' test. '''
self.incr("calls")
if self.CM.ShouldBeStatus[node] != "up":
return self.skipped()
patterns = []
# Technically we should always be able to notice ourselves stopping
patterns.append(self.templates["Pat:We_stopped"] % node)
# Any active node needs to notice this one left
# (note that this won't work if we have multiple partitions)
for other in self.Env["nodes"]:
if self.CM.ShouldBeStatus[other] == "up" and other != node:
patterns.append(self.templates["Pat:They_stopped"] %(other, self.CM.key_for_node(node)))
#self.debug("Checking %s will notice %s left"%(other, node))
watch = self.create_watch(patterns, self.Env["DeadTime"])
watch.setwatch()
if node == self.CM.OurNode:
self.incr("us")
else:
if self.CM.upcount() <= 1:
self.incr("all")
else:
self.incr("them")
self.CM.StopaCM(node)
watch_result = watch.lookforall()
failreason = None
UnmatchedList = "||"
if watch.unmatched:
(rc, output) = self.rsh(node, "/bin/ps axf", None)
for line in output:
self.debug(line)
(rc, output) = self.rsh(node, "/usr/sbin/dlm_tool dump", None)
for line in output:
self.debug(line)
for regex in watch.unmatched:
self.logger.log ("ERROR: Shutdown pattern not found: %s" % (regex))
UnmatchedList += regex + "||";
failreason = "Missing shutdown pattern"
self.CM.cluster_stable(self.Env["DeadTime"])
if not watch.unmatched or self.CM.upcount() == 0:
return self.success()
if len(watch.unmatched) >= self.CM.upcount():
return self.failure("no match against (%s)" % UnmatchedList)
if failreason == None:
return self.success()
else:
return self.failure(failreason)
#
# We don't register StopTest because it's better when called by
# another test...
#
class StartTest(CTSTest):
'''Start (activate) the cluster manager on a node'''
def __init__(self, cm, debug=None):
CTSTest.__init__(self,cm)
self.name = "start"
self.debug = debug
def __call__(self, node):
'''Perform the 'start' test. '''
self.incr("calls")
if self.CM.upcount() == 0:
self.incr("us")
else:
self.incr("them")
if self.CM.ShouldBeStatus[node] != "down":
return self.skipped()
elif self.CM.StartaCM(node):
return self.success()
else:
return self.failure("Startup %s on node %s failed"
% (self.Env["Name"], node))
#
# We don't register StartTest because it's better when called by
# another test...
#
class FlipTest(CTSTest):
'''If it's running, stop it. If it's stopped start it.
Overthrow the status quo...
'''
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "Flip"
self.start = StartTest(cm)
self.stop = StopTest(cm)
def __call__(self, node):
'''Perform the 'Flip' test. '''
self.incr("calls")
if self.CM.ShouldBeStatus[node] == "up":
self.incr("stopped")
ret = self.stop(node)
type = "up->down"
# Give the cluster time to recognize it's gone...
time.sleep(self.Env["StableTime"])
elif self.CM.ShouldBeStatus[node] == "down":
self.incr("started")
ret = self.start(node)
type = "down->up"
else:
return self.skipped()
self.incr(type)
if ret:
return self.success()
else:
return self.failure("%s failure" % type)
# Register FlipTest as a good test to run
AllTestClasses.append(FlipTest)
class RestartTest(CTSTest):
'''Stop and restart a node'''
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "Restart"
self.start = StartTest(cm)
self.stop = StopTest(cm)
self.benchmark = 1
def __call__(self, node):
'''Perform the 'restart' test. '''
self.incr("calls")
self.incr("node:" + node)
ret1 = 1
if self.CM.StataCM(node):
self.incr("WasStopped")
if not self.start(node):
return self.failure("start (setup) failure: "+node)
self.set_timer()
if not self.stop(node):
return self.failure("stop failure: "+node)
if not self.start(node):
return self.failure("start failure: "+node)
return self.success()
# Register RestartTest as a good test to run
AllTestClasses.append(RestartTest)
class StonithdTest(CTSTest):
def __init__(self, cm):
CTSTest.__init__(self, cm)
self.name = "Stonithd"
self.startall = SimulStartLite(cm)
self.benchmark = 1
def __call__(self, node):
self.incr("calls")
if len(self.Env["nodes"]) < 2:
return self.skipped()
ret = self.startall(None)
if not ret:
return self.failure("Setup failed")
is_dc = self.CM.is_node_dc(node)
watchpats = []
watchpats.append(self.templates["Pat:FenceOpOK"] % node)
watchpats.append(self.templates["Pat:NodeFenced"] % node)
if self.Env["at-boot"] == 0:
self.debug("Expecting %s to stay down" % node)
self.CM.ShouldBeStatus[node] = "down"
else:
self.debug("Expecting %s to come up again %d" % (node, self.Env["at-boot"]))
watchpats.append("%s.* S_STARTING -> S_PENDING" % node)
watchpats.append("%s.* S_PENDING -> S_NOT_DC" % node)
watch = self.create_watch(watchpats, 30 + self.Env["DeadTime"] + self.Env["StableTime"] + self.Env["StartTime"])
watch.setwatch()
origin = self.Env.RandomGen.choice(self.Env["nodes"])
rc = self.rsh(origin, "stonith_admin --reboot %s -VVVVVV" % node)
if rc == 194:
# 194 - 256 = -62 = Timer expired
#
# Look for the patterns, usually this means the required
# device was running on the node to be fenced - or that
# the required devices were in the process of being loaded
# and/or moved
#
# Effectively the node committed suicide so there will be
# no confirmation, but pacemaker should be watching and
# fence the node again
self.logger.log("Fencing command on %s to fence %s timed out" % (origin, node))
elif origin != node and rc != 0:
self.debug("Waiting for the cluster to recover")
self.CM.cluster_stable()
self.debug("Waiting for fenced node to come back up")
self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600)
self.logger.log("Fencing command on %s failed to fence %s (rc=%d)" % (origin, node, rc))
elif origin == node and rc != 255:
# 255 == broken pipe, ie. the node was fenced as expected
self.logger.log("Locally originated fencing returned %d" % rc)
self.set_timer("fence")
matched = watch.lookforall()
self.log_timer("fence")
self.set_timer("reform")
if watch.unmatched:
self.logger.log("Patterns not found: " + repr(watch.unmatched))
self.debug("Waiting for the cluster to recover")
self.CM.cluster_stable()
self.debug("Waiting for fenced node to come back up")
self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600)
self.debug("Waiting for the cluster to re-stabilize with all nodes")
is_stable = self.CM.cluster_stable(self.Env["StartTime"])
if not matched:
return self.failure("Didn't find all expected patterns")
elif not is_stable:
return self.failure("Cluster did not become stable")
self.log_timer("reform")
return self.success()
def errorstoignore(self):
return [
self.templates["Pat:Fencing_start"] % ".*",
self.templates["Pat:Fencing_ok"] % ".*",
r"error.*: Resource .*stonith::.* is active on 2 nodes attempting recovery",
r"error.*: Operation reboot of .*by .* for stonith_admin.*: Timer expired",
]
def is_applicable(self):
if not self.is_applicable_common():
return 0
if "DoFencing" in list(self.Env.keys()):
return self.Env["DoFencing"]
return 1
AllTestClasses.append(StonithdTest)
class StartOnebyOne(CTSTest):
'''Start all the nodes ~ one by one'''
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "StartOnebyOne"
self.stopall = SimulStopLite(cm)
self.start = StartTest(cm)
self.ns = CTS.NodeStatus(cm.Env)
def __call__(self, dummy):
'''Perform the 'StartOnebyOne' test. '''
self.incr("calls")
# We ignore the "node" parameter...
# Shut down all the nodes...
ret = self.stopall(None)
if not ret:
return self.failure("Test setup failed")
failed = []
self.set_timer()
for node in self.Env["nodes"]:
if not self.start(node):
failed.append(node)
if len(failed) > 0:
return self.failure("Some node failed to start: " + repr(failed))
return self.success()
# Register StartOnebyOne as a good test to run
AllTestClasses.append(StartOnebyOne)
class SimulStart(CTSTest):
'''Start all the nodes ~ simultaneously'''
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "SimulStart"
self.stopall = SimulStopLite(cm)
self.startall = SimulStartLite(cm)
def __call__(self, dummy):
'''Perform the 'SimulStart' test. '''
self.incr("calls")
# We ignore the "node" parameter...
# Shut down all the nodes...
ret = self.stopall(None)
if not ret:
return self.failure("Setup failed")
if not self.startall(None):
return self.failure("Startall failed")
return self.success()
# Register SimulStart as a good test to run
AllTestClasses.append(SimulStart)
class SimulStop(CTSTest):
'''Stop all the nodes ~ simultaneously'''
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "SimulStop"
self.startall = SimulStartLite(cm)
self.stopall = SimulStopLite(cm)
def __call__(self, dummy):
'''Perform the 'SimulStop' test. '''
self.incr("calls")
# We ignore the "node" parameter...
# Start up all the nodes...
ret = self.startall(None)
if not ret:
return self.failure("Setup failed")
if not self.stopall(None):
return self.failure("Stopall failed")
return self.success()
# Register SimulStop as a good test to run
AllTestClasses.append(SimulStop)
class StopOnebyOne(CTSTest):
'''Stop all the nodes in order'''
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "StopOnebyOne"
self.startall = SimulStartLite(cm)
self.stop = StopTest(cm)
def __call__(self, dummy):
'''Perform the 'StopOnebyOne' test. '''
self.incr("calls")
# We ignore the "node" parameter...
# Start up all the nodes...
ret = self.startall(None)
if not ret:
return self.failure("Setup failed")
failed = []
self.set_timer()
for node in self.Env["nodes"]:
if not self.stop(node):
failed.append(node)
if len(failed) > 0:
return self.failure("Some node failed to stop: " + repr(failed))
return self.success()
# Register StopOnebyOne as a good test to run
AllTestClasses.append(StopOnebyOne)
class RestartOnebyOne(CTSTest):
'''Restart all the nodes in order'''
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "RestartOnebyOne"
self.startall = SimulStartLite(cm)
def __call__(self, dummy):
'''Perform the 'RestartOnebyOne' test. '''
self.incr("calls")
# We ignore the "node" parameter...
# Start up all the nodes...
ret = self.startall(None)
if not ret:
return self.failure("Setup failed")
did_fail = []
self.set_timer()
self.restart = RestartTest(self.CM)
for node in self.Env["nodes"]:
if not self.restart(node):
did_fail.append(node)
if did_fail:
return self.failure("Could not restart %d nodes: %s"
% (len(did_fail), repr(did_fail)))
return self.success()
# Register StopOnebyOne as a good test to run
AllTestClasses.append(RestartOnebyOne)
class PartialStart(CTSTest):
'''Start a node - but tell it to stop before it finishes starting up'''
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "PartialStart"
self.startall = SimulStartLite(cm)
self.stopall = SimulStopLite(cm)
self.stop = StopTest(cm)
#self.is_unsafe = 1
def __call__(self, node):
'''Perform the 'PartialStart' test. '''
self.incr("calls")
ret = self.stopall(None)
if not ret:
return self.failure("Setup failed")
# FIXME! This should use the CM class to get the pattern
# then it would be applicable in general
watchpats = []
watchpats.append("pacemaker-controld.*Connecting to cluster infrastructure")
watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
watch.setwatch()
self.CM.StartaCMnoBlock(node)
ret = watch.lookforall()
if not ret:
self.logger.log("Patterns not found: " + repr(watch.unmatched))
return self.failure("Setup of %s failed" % node)
ret = self.stop(node)
if not ret:
return self.failure("%s did not stop in time" % node)
return self.success()
def errorstoignore(self):
'''Return list of errors which should be ignored'''
# We might do some fencing in the 2-node case if we make it up far enough
return [
r"Executing reboot fencing operation",
r"Requesting fencing \([^)]+\) of node ",
]
# Register StopOnebyOne as a good test to run
AllTestClasses.append(PartialStart)
class StandbyTest(CTSTest):
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "Standby"
self.benchmark = 1
self.start = StartTest(cm)
self.startall = SimulStartLite(cm)
# make sure the node is active
# set the node to standby mode
# check resources, none resource should be running on the node
# set the node to active mode
# check resouces, resources should have been migrated back (SHOULD THEY?)
def __call__(self, node):
self.incr("calls")
ret = self.startall(None)
if not ret:
return self.failure("Start all nodes failed")
self.debug("Make sure node %s is active" % node)
if self.CM.StandbyStatus(node) != "off":
if not self.CM.SetStandbyMode(node, "off"):
return self.failure("can't set node %s to active mode" % node)
self.CM.cluster_stable()
status = self.CM.StandbyStatus(node)
if status != "off":
return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status))
self.debug("Getting resources running on node %s" % node)
rsc_on_node = self.CM.active_resources(node)
watchpats = []
watchpats.append(r"State transition .* -> S_POLICY_ENGINE")
watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
watch.setwatch()
self.debug("Setting node %s to standby mode" % node)
if not self.CM.SetStandbyMode(node, "on"):
return self.failure("can't set node %s to standby mode" % node)
self.set_timer("on")
ret = watch.lookforall()
if not ret:
self.logger.log("Patterns not found: " + repr(watch.unmatched))
self.CM.SetStandbyMode(node, "off")
return self.failure("cluster didn't react to standby change on %s" % node)
self.CM.cluster_stable()
status = self.CM.StandbyStatus(node)
if status != "on":
return self.failure("standby status of %s is [%s] but we expect [on]" % (node, status))
self.log_timer("on")
self.debug("Checking resources")
bad_run = self.CM.active_resources(node)
if len(bad_run) > 0:
rc = self.failure("%s set to standby, %s is still running on it" % (node, repr(bad_run)))
self.debug("Setting node %s to active mode" % node)
self.CM.SetStandbyMode(node, "off")
return rc
self.debug("Setting node %s to active mode" % node)
if not self.CM.SetStandbyMode(node, "off"):
return self.failure("can't set node %s to active mode" % node)
self.set_timer("off")
self.CM.cluster_stable()
status = self.CM.StandbyStatus(node)
if status != "off":
return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status))
self.log_timer("off")
return self.success()
AllTestClasses.append(StandbyTest)
class ValgrindTest(CTSTest):
'''Check for memory leaks'''
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "Valgrind"
self.stopall = SimulStopLite(cm)
self.startall = SimulStartLite(cm)
self.is_valgrind = 1
self.is_loop = 1
def setup(self, node):
self.incr("calls")
ret = self.stopall(None)
if not ret:
return self.failure("Stop all nodes failed")
# @TODO Edit /etc/sysconfig/pacemaker on all nodes to enable valgrind,
# and clear any valgrind logs from previous runs. For now, we rely on
# the user to do this manually.
ret = self.startall(None)
if not ret:
return self.failure("Start all nodes failed")
return self.success()
def teardown(self, node):
# Return all nodes to normal
# @TODO Edit /etc/sysconfig/pacemaker on all nodes to disable valgrind
ret = self.stopall(None)
if not ret:
return self.failure("Stop all nodes failed")
return self.success()
def find_leaks(self):
# Check for leaks
# (no longer used but kept in case feature is restored)
leaked = []
self.stop = StopTest(self.CM)
for node in self.Env["nodes"]:
rc = self.stop(node)
if not rc:
self.failure("Couldn't shut down %s" % node)
rc = self.rsh(node, "grep -e indirectly.*lost:.*[1-9] -e definitely.*lost:.*[1-9] -e (ERROR|error).*SUMMARY:.*[1-9].*errors %s" % self.logger.logPat, 0)
if rc != 1:
leaked.append(node)
self.failure("Valgrind errors detected on %s" % node)
(rc, output) = self.rsh(node, "grep -e lost: -e SUMMARY: %s" % self.logger.logPat, None)
for line in output:
self.logger.log(line)
(rc, output) = self.rsh(node, "cat %s" % self.logger.logPat, None)
for line in output:
self.debug(line)
self.rsh(node, "rm -f %s" % self.logger.logPat, None)
return leaked
def __call__(self, node):
#leaked = self.find_leaks()
#if len(leaked) > 0:
# return self.failure("Nodes %s leaked" % repr(leaked))
return self.success()
def errorstoignore(self):
'''Return list of errors which should be ignored'''
return [
r"pacemaker-based.*: \*\*\*\*\*\*\*\*\*\*\*\*\*",
r"pacemaker-based.*: .* avoid confusing Valgrind",
r"HA_VALGRIND_ENABLED",
]
class StandbyLoopTest(ValgrindTest):
'''Check for memory leaks by putting a node in and out of standby for an hour'''
# @TODO This is not a useful test for memory leaks
def __init__(self, cm):
ValgrindTest.__init__(self,cm)
self.name = "StandbyLoop"
def __call__(self, node):
lpc = 0
delay = 2
failed = 0
done = time.time() + self.Env["loop-minutes"] * 60
while time.time() <= done and not failed:
lpc = lpc + 1
time.sleep(delay)
if not self.CM.SetStandbyMode(node, "on"):
self.failure("can't set node %s to standby mode" % node)
failed = lpc
time.sleep(delay)
if not self.CM.SetStandbyMode(node, "off"):
self.failure("can't set node %s to active mode" % node)
failed = lpc
leaked = self.find_leaks()
if failed:
return self.failure("Iteration %d failed" % failed)
elif len(leaked) > 0:
return self.failure("Nodes %s leaked" % repr(leaked))
return self.success()
#AllTestClasses.append(StandbyLoopTest)
class BandwidthTest(CTSTest):
# Tests should not be cluster-manager-specific
# If you need to find out cluster manager configuration to do this, then
# it should be added to the generic cluster manager API.
'''Test the bandwidth which the cluster uses'''
def __init__(self, cm):
CTSTest.__init__(self, cm)
self.name = "Bandwidth"
self.start = StartTest(cm)
self.__setitem__("min",0)
self.__setitem__("max",0)
self.__setitem__("totalbandwidth",0)
(handle, self.tempfile) = tempfile.mkstemp(".cts")
os.close(handle)
self.startall = SimulStartLite(cm)
def __call__(self, node):
'''Perform the Bandwidth test'''
self.incr("calls")
if self.CM.upcount() < 1:
return self.skipped()
Path = self.CM.InternalCommConfig()
if "ip" not in Path["mediatype"]:
return self.skipped()
port = Path["port"][0]
port = int(port)
ret = self.startall(None)
if not ret:
return self.failure("Test setup failed")
time.sleep(5) # We get extra messages right after startup.
fstmpfile = "/var/run/band_estimate"
dumpcmd = "tcpdump -p -n -c 102 -i any udp port %d > %s 2>&1" \
% (port, fstmpfile)
rc = self.rsh(node, dumpcmd)
if rc == 0:
farfile = "root@%s:%s" % (node, fstmpfile)
self.rsh.cp(farfile, self.tempfile)
Bandwidth = self.countbandwidth(self.tempfile)
if not Bandwidth:
self.logger.log("Could not compute bandwidth.")
return self.success()
intband = int(Bandwidth + 0.5)
self.logger.log("...bandwidth: %d bits/sec" % intband)
self.Stats["totalbandwidth"] = self.Stats["totalbandwidth"] + Bandwidth
if self.Stats["min"] == 0:
self.Stats["min"] = Bandwidth
if Bandwidth > self.Stats["max"]:
self.Stats["max"] = Bandwidth
if Bandwidth < self.Stats["min"]:
self.Stats["min"] = Bandwidth
self.rsh(node, "rm -f %s" % fstmpfile)
os.unlink(self.tempfile)
return self.success()
else:
return self.failure("no response from tcpdump command [%d]!" % rc)
def countbandwidth(self, file):
fp = open(file, "r")
fp.seek(0)
count = 0
sum = 0
while 1:
line = fp.readline()
if not line:
return None
if re.search("udp",line) or re.search("UDP,", line):
count = count + 1
linesplit = line.split(" ")
for j in range(len(linesplit)-1):
if linesplit[j] == "udp": break
if linesplit[j] == "length:": break
try:
sum = sum + int(linesplit[j+1])
except ValueError:
self.logger.log("Invalid tcpdump line: %s" % line)
return None
T1 = linesplit[0]
timesplit = T1.split(":")
time2split = timesplit[2].split(".")
time1 = (int(timesplit[0])*60+int(timesplit[1]))*60+int(time2split[0])+int(time2split[1])*0.000001
break
while count < 100:
line = fp.readline()
if not line:
return None
if re.search("udp",line) or re.search("UDP,", line):
count = count+1
linessplit = line.split(" ")
for j in range(len(linessplit)-1):
if linessplit[j] == "udp": break
if linessplit[j] == "length:": break
try:
sum = int(linessplit[j+1]) + sum
except ValueError:
self.logger.log("Invalid tcpdump line: %s" % line)
return None
T2 = linessplit[0]
timesplit = T2.split(":")
time2split = timesplit[2].split(".")
time2 = (int(timesplit[0])*60+int(timesplit[1]))*60+int(time2split[0])+int(time2split[1])*0.000001
time = time2-time1
if (time <= 0):
return 0
return int((sum*8)/time)
def is_applicable(self):
'''BandwidthTest never applicable'''
return 0
AllTestClasses.append(BandwidthTest)
###################################################################
class MaintenanceMode(CTSTest):
###################################################################
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "MaintenanceMode"
self.start = StartTest(cm)
self.startall = SimulStartLite(cm)
self.max = 30
#self.is_unsafe = 1
self.benchmark = 1
self.action = "asyncmon"
self.interval = 0
self.rid = "maintenanceDummy"
def toggleMaintenanceMode(self, node, action):
pats = []
pats.append(self.templates["Pat:DC_IDLE"])
# fail the resource right after turning Maintenance mode on
# verify it is not recovered until maintenance mode is turned off
if action == "On":
pats.append(r"schedulerd.*:\s+warning:.*Processing failed %s of %s on" % (self.action, self.rid))
else:
pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.rid))
pats.append(self.templates["Pat:RscOpOK"] % ("start", self.rid))
watch = self.create_watch(pats, 60)
watch.setwatch()
self.debug("Turning maintenance mode %s" % action)
self.rsh(node, self.templates["MaintenanceMode%s" % (action)])
if (action == "On"):
self.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node))
self.set_timer("recover%s" % (action))
watch.lookforall()
self.log_timer("recover%s" % (action))
if watch.unmatched:
self.debug("Failed to find patterns when turning maintenance mode %s" % action)
return repr(watch.unmatched)
return ""
def insertMaintenanceDummy(self, node):
pats = []
pats.append(("%s.*" % node) + (self.templates["Pat:RscOpOK"] % ("start", self.rid)))
watch = self.create_watch(pats, 60)
watch.setwatch()
self.CM.AddDummyRsc(node, self.rid)
self.set_timer("addDummy")
watch.lookforall()
self.log_timer("addDummy")
if watch.unmatched:
self.debug("Failed to find patterns when adding maintenance dummy resource")
return repr(watch.unmatched)
return ""
def removeMaintenanceDummy(self, node):
pats = []
pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.rid))
watch = self.create_watch(pats, 60)
watch.setwatch()
self.CM.RemoveDummyRsc(node, self.rid)
self.set_timer("removeDummy")
watch.lookforall()
self.log_timer("removeDummy")
if watch.unmatched:
self.debug("Failed to find patterns when removing maintenance dummy resource")
return repr(watch.unmatched)
return ""
def managedRscList(self, node):
rscList = []
(rc, lines) = self.rsh(node, "crm_resource -c", None)
for line in lines:
if re.search("^Resource", line):
tmp = AuditResource(self.CM, line)
if tmp.managed():
rscList.append(tmp.id)
return rscList
def verifyResources(self, node, rscList, managed):
managedList = list(rscList)
managed_str = "managed"
if not managed:
managed_str = "unmanaged"
(rc, lines) = self.rsh(node, "crm_resource -c", None)
for line in lines:
if re.search("^Resource", line):
tmp = AuditResource(self.CM, line)
if managed and not tmp.managed():
continue
elif not managed and tmp.managed():
continue
elif managedList.count(tmp.id):
managedList.remove(tmp.id)
if len(managedList) == 0:
self.debug("Found all %s resources on %s" % (managed_str, node))
return True
self.logger.log("Could not find all %s resources on %s. %s" % (managed_str, node, managedList))
return False
def __call__(self, node):
'''Perform the 'MaintenanceMode' test. '''
self.incr("calls")
verify_managed = False
verify_unmanaged = False
failPat = ""
ret = self.startall(None)
if not ret:
return self.failure("Setup failed")
# get a list of all the managed resources. We use this list
# after enabling maintenance mode to verify all managed resources
# become un-managed. After maintenance mode is turned off, we use
# this list to verify all the resources become managed again.
managedResources = self.managedRscList(node)
if len(managedResources) == 0:
self.logger.log("No managed resources on %s" % node)
return self.skipped()
# insert a fake resource we can fail during maintenance mode
# so we can verify recovery does not take place until after maintenance
# mode is disabled.
failPat = failPat + self.insertMaintenanceDummy(node)
# toggle maintenance mode ON, then fail dummy resource.
failPat = failPat + self.toggleMaintenanceMode(node, "On")
# verify all the resources are now unmanaged
if self.verifyResources(node, managedResources, False):
verify_unmanaged = True
# Toggle maintenance mode OFF, verify dummy is recovered.
failPat = failPat + self.toggleMaintenanceMode(node, "Off")
# verify all the resources are now managed again
if self.verifyResources(node, managedResources, True):
verify_managed = True
# Remove our maintenance dummy resource.
failPat = failPat + self.removeMaintenanceDummy(node)
self.CM.cluster_stable()
if failPat != "":
return self.failure("Unmatched patterns: %s" % (failPat))
elif verify_unmanaged is False:
return self.failure("Failed to verify resources became unmanaged during maintenance mode")
elif verify_managed is False:
return self.failure("Failed to verify resources switched back to managed after disabling maintenance mode")
return self.success()
def errorstoignore(self):
'''Return list of errors which should be ignored'''
return [
r"Updating failcount for %s" % self.rid,
r"schedulerd.*: Recover %s\s*\(.*\)" % self.rid,
r"Unknown operation: fail",
self.templates["Pat:RscOpOK"] % (self.action, self.rid),
r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval),
]
AllTestClasses.append(MaintenanceMode)
class ResourceRecover(CTSTest):
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "ResourceRecover"
self.start = StartTest(cm)
self.startall = SimulStartLite(cm)
self.max = 30
self.rid = None
self.rid_alt = None
#self.is_unsafe = 1
self.benchmark = 1
# these are the values used for the new LRM API call
self.action = "asyncmon"
self.interval = 0
def __call__(self, node):
'''Perform the 'ResourceRecover' test. '''
self.incr("calls")
ret = self.startall(None)
if not ret:
return self.failure("Setup failed")
resourcelist = self.CM.active_resources(node)
# if there are no resourcelist, return directly
if len(resourcelist) == 0:
self.logger.log("No active resources on %s" % node)
return self.skipped()
self.rid = self.Env.RandomGen.choice(resourcelist)
self.rid_alt = self.rid
rsc = None
(rc, lines) = self.rsh(node, "crm_resource -c", None)
for line in lines:
if re.search("^Resource", line):
tmp = AuditResource(self.CM, line)
if tmp.id == self.rid:
rsc = tmp
# Handle anonymous clones that get renamed
self.rid = rsc.clone_id
break
if not rsc:
return self.failure("Could not find %s in the resource list" % self.rid)
self.debug("Shooting %s aka. %s" % (rsc.clone_id, rsc.id))
pats = []
pats.append(r"schedulerd.*:\s+warning:.*Processing failed %s of (%s|%s) on" % (self.action,
rsc.id, rsc.clone_id))
if rsc.managed():
pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.rid))
if rsc.unique():
pats.append(self.templates["Pat:RscOpOK"] % ("start", self.rid))
else:
# Anonymous clones may get restarted with a different clone number
pats.append(self.templates["Pat:RscOpOK"] % ("start", ".*"))
watch = self.create_watch(pats, 60)
watch.setwatch()
self.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node))
self.set_timer("recover")
watch.lookforall()
self.log_timer("recover")
self.CM.cluster_stable()
recovered = self.CM.ResourceLocation(self.rid)
if watch.unmatched:
return self.failure("Patterns not found: %s" % repr(watch.unmatched))
elif rsc.unique() and len(recovered) > 1:
return self.failure("%s is now active on more than one node: %s"%(self.rid, repr(recovered)))
elif len(recovered) > 0:
self.debug("%s is running on: %s" % (self.rid, repr(recovered)))
elif rsc.managed():
return self.failure("%s was not recovered and is inactive" % self.rid)
return self.success()
def errorstoignore(self):
'''Return list of errors which should be ignored'''
return [
r"Updating failcount for %s" % self.rid,
r"schedulerd.*: Recover (%s|%s)\s*\(.*\)" % (self.rid, self.rid_alt),
r"Unknown operation: fail",
self.templates["Pat:RscOpOK"] % (self.action, self.rid),
r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval),
]
AllTestClasses.append(ResourceRecover)
class ComponentFail(CTSTest):
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "ComponentFail"
# TODO make this work correctly in docker.
self.is_docker_unsafe = 1
self.startall = SimulStartLite(cm)
self.complist = cm.Components()
self.patterns = []
self.okerrpatterns = []
self.is_unsafe = 1
def __call__(self, node):
'''Perform the 'ComponentFail' test. '''
self.incr("calls")
self.patterns = []
self.okerrpatterns = []
# start all nodes
ret = self.startall(None)
if not ret:
return self.failure("Setup failed")
if not self.CM.cluster_stable(self.Env["StableTime"]):
return self.failure("Setup failed - unstable")
node_is_dc = self.CM.is_node_dc(node, None)
# select a component to kill
chosen = self.Env.RandomGen.choice(self.complist)
while chosen.dc_only == 1 and node_is_dc == 0:
chosen = self.Env.RandomGen.choice(self.complist)
self.debug("...component %s (dc=%d,boot=%d)" % (chosen.name, node_is_dc,chosen.triggersreboot))
self.incr(chosen.name)
if chosen.name != "corosync":
self.patterns.append(self.templates["Pat:ChildKilled"] %(node, chosen.name))
self.patterns.append(self.templates["Pat:ChildRespawn"] %(node, chosen.name))
self.patterns.extend(chosen.pats)
if node_is_dc:
self.patterns.extend(chosen.dc_pats)
- if chosen.name == "pacemaker-fenced":
- # Ignore actions for STONITH resources
+ # @TODO this should be a flag in the Component
+ if chosen.name in [ "corosync", "pacemaker-based", "pacemaker-fenced" ]:
+ # Ignore actions for fence devices if fencer will respawn
+ # (their registration will be lost, and probes will fail)
(rc, lines) = self.rsh(node, "crm_resource -c", None)
for line in lines:
if re.search("^Resource", line):
r = AuditResource(self.CM, line)
if r.rclass == "stonith":
self.okerrpatterns.append(self.templates["Pat:Fencing_recover"] % r.id)
+ self.okerrpatterns.append(self.templates["Pat:Fencing_active"] % r.id)
+ self.okerrpatterns.append(self.templates["Pat:Fencing_probe"] % r.id)
# supply a copy so self.patterns doesn't end up empty
tmpPats = []
tmpPats.extend(self.patterns)
self.patterns.extend(chosen.badnews_ignore)
# Look for STONITH ops, depending on Env["at-boot"] we might need to change the nodes status
stonithPats = []
stonithPats.append(self.templates["Pat:Fencing_ok"] % node)
stonith = self.create_watch(stonithPats, 0)
stonith.setwatch()
# set the watch for stable
watch = self.create_watch(
tmpPats, self.Env["DeadTime"] + self.Env["StableTime"] + self.Env["StartTime"])
watch.setwatch()
# kill the component
chosen.kill(node)
self.debug("Waiting for the cluster to recover")
self.CM.cluster_stable()
self.debug("Waiting for any fenced node to come back up")
self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600)
self.debug("Waiting for the cluster to re-stabilize with all nodes")
self.CM.cluster_stable(self.Env["StartTime"])
self.debug("Checking if %s was shot" % node)
shot = stonith.look(60)
if shot:
self.debug("Found: " + repr(shot))
self.okerrpatterns.append(self.templates["Pat:Fencing_start"] % node)
if self.Env["at-boot"] == 0:
self.CM.ShouldBeStatus[node] = "down"
# If fencing occurred, chances are many (if not all) the expected logs
# will not be sent - or will be lost when the node reboots
return self.success()
# check for logs indicating a graceful recovery
matched = watch.lookforall(allow_multiple_matches=1)
if watch.unmatched:
self.logger.log("Patterns not found: " + repr(watch.unmatched))
self.debug("Waiting for the cluster to re-stabilize with all nodes")
is_stable = self.CM.cluster_stable(self.Env["StartTime"])
if not matched:
return self.failure("Didn't find all expected %s patterns" % chosen.name)
elif not is_stable:
return self.failure("Cluster did not become stable after killing %s" % chosen.name)
return self.success()
def errorstoignore(self):
'''Return list of errors which should be ignored'''
# Note that okerrpatterns refers to the last time we ran this test
# The good news is that this works fine for us...
self.okerrpatterns.extend(self.patterns)
return self.okerrpatterns
AllTestClasses.append(ComponentFail)
class SplitBrainTest(CTSTest):
'''It is used to test split-brain. when the path between the two nodes break
check the two nodes both take over the resource'''
def __init__(self,cm):
CTSTest.__init__(self,cm)
self.name = "SplitBrain"
self.start = StartTest(cm)
self.startall = SimulStartLite(cm)
self.is_experimental = 1
def isolate_partition(self, partition):
other_nodes = []
other_nodes.extend(self.Env["nodes"])
for node in partition:
try:
other_nodes.remove(node)
except ValueError:
self.logger.log("Node "+node+" not in " + repr(self.Env["nodes"]) + " from " +repr(partition))
if len(other_nodes) == 0:
return 1
self.debug("Creating partition: " + repr(partition))
self.debug("Everyone else: " + repr(other_nodes))
for node in partition:
if not self.CM.isolate_node(node, other_nodes):
self.logger.log("Could not isolate %s" % node)
return 0
return 1
def heal_partition(self, partition):
other_nodes = []
other_nodes.extend(self.Env["nodes"])
for node in partition:
try:
other_nodes.remove(node)
except ValueError:
self.logger.log("Node "+node+" not in " + repr(self.Env["nodes"]))
if len(other_nodes) == 0:
return 1
self.debug("Healing partition: " + repr(partition))
self.debug("Everyone else: " + repr(other_nodes))
for node in partition:
self.CM.unisolate_node(node, other_nodes)
def __call__(self, node):
'''Perform split-brain test'''
self.incr("calls")
self.passed = 1
partitions = {}
ret = self.startall(None)
if not ret:
return self.failure("Setup failed")
while 1:
# Retry until we get multiple partitions
partitions = {}
p_max = len(self.Env["nodes"])
for node in self.Env["nodes"]:
p = self.Env.RandomGen.randint(1, p_max)
if not p in partitions:
partitions[p] = []
partitions[p].append(node)
p_max = len(list(partitions.keys()))
if p_max > 1:
break
# else, try again
self.debug("Created %d partitions" % p_max)
for key in list(partitions.keys()):
self.debug("Partition["+str(key)+"]:\t"+repr(partitions[key]))
# Disabling STONITH to reduce test complexity for now
self.rsh(node, "crm_attribute -V -n stonith-enabled -v false")
for key in list(partitions.keys()):
self.isolate_partition(partitions[key])
count = 30
while count > 0:
if len(self.CM.find_partitions()) != p_max:
time.sleep(10)
else:
break
else:
self.failure("Expected partitions were not created")
# Target number of partitions formed - wait for stability
if not self.CM.cluster_stable():
self.failure("Partitioned cluster not stable")
# Now audit the cluster state
self.CM.partitions_expected = p_max
if not self.audit():
self.failure("Audits failed")
self.CM.partitions_expected = 1
# And heal them again
for key in list(partitions.keys()):
self.heal_partition(partitions[key])
# Wait for a single partition to form
count = 30
while count > 0:
if len(self.CM.find_partitions()) != 1:
time.sleep(10)
count -= 1
else:
break
else:
self.failure("Cluster did not reform")
# Wait for it to have the right number of members
count = 30
while count > 0:
members = []
partitions = self.CM.find_partitions()
if len(partitions) > 0:
members = partitions[0].split()
if len(members) != len(self.Env["nodes"]):
time.sleep(10)
count -= 1
else:
break
else:
self.failure("Cluster did not completely reform")
# Wait up to 20 minutes - the delay is more preferable than
# trying to continue with in a messed up state
if not self.CM.cluster_stable(1200):
self.failure("Reformed cluster not stable")
if self.Env["continue"] == 1:
answer = "Y"
else:
try:
answer = input_wrapper('Continue? [nY]')
except EOFError as e:
answer = "n"
if answer and answer == "n":
raise ValueError("Reformed cluster not stable")
# Turn fencing back on
if self.Env["DoFencing"]:
self.rsh(node, "crm_attribute -V -D -n stonith-enabled")
self.CM.cluster_stable()
if self.passed:
return self.success()
return self.failure("See previous errors")
def errorstoignore(self):
'''Return list of errors which are 'normal' and should be ignored'''
return [
r"Another DC detected:",
r"(ERROR|error).*: .*Application of an update diff failed",
r"pacemaker-controld.*:.*not in our membership list",
r"CRIT:.*node.*returning after partition",
]
def is_applicable(self):
if not self.is_applicable_common():
return 0
return len(self.Env["nodes"]) > 2
AllTestClasses.append(SplitBrainTest)
class Reattach(CTSTest):
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "Reattach"
self.startall = SimulStartLite(cm)
self.restart1 = RestartTest(cm)
self.stopall = SimulStopLite(cm)
self.is_unsafe = 0 # Handled by canrunnow()
def _is_managed(self, node):
is_managed = self.rsh(node, "crm_attribute -t rsc_defaults -n is-managed -q -G -d true", 1)
is_managed = is_managed[:-1] # Strip off the newline
return is_managed == "true"
def _set_unmanaged(self, node):
self.debug("Disable resource management")
self.rsh(node, "crm_attribute -t rsc_defaults -n is-managed -v false")
def _set_managed(self, node):
self.debug("Re-enable resource management")
self.rsh(node, "crm_attribute -t rsc_defaults -n is-managed -D")
def setup(self, node):
attempt = 0
if not self.startall(None):
return None
# Make sure we are really _really_ stable and that all
# resources, including those that depend on transient node
# attributes, are started
while not self.CM.cluster_stable(double_check=True):
if attempt < 5:
attempt += 1
self.debug("Not stable yet, re-testing")
else:
self.logger.log("Cluster is not stable")
return None
return 1
def teardown(self, node):
# Make sure 'node' is up
start = StartTest(self.CM)
start(node)
if not self._is_managed(node):
self.logger.log("Attempting to re-enable resource management on %s" % node)
self._set_managed(node)
self.CM.cluster_stable()
if not self._is_managed(node):
self.logger.log("Could not re-enable resource management")
return 0
return 1
def canrunnow(self, node):
'''Return TRUE if we can meaningfully run right now'''
if self.find_ocfs2_resources(node):
self.logger.log("Detach/Reattach scenarios are not possible with OCFS2 services present")
return 0
return 1
def __call__(self, node):
self.incr("calls")
pats = []
# Conveniently, the scheduler will display this message when disabling
# management, even if fencing is not enabled, so we can rely on it.
managed = self.create_watch(["Delaying fencing operations"], 60)
managed.setwatch()
self._set_unmanaged(node)
if not managed.lookforall():
self.logger.log("Patterns not found: " + repr(managed.unmatched))
return self.failure("Resource management not disabled")
pats = []
pats.append(self.templates["Pat:RscOpOK"] % ("start", ".*"))
pats.append(self.templates["Pat:RscOpOK"] % ("stop", ".*"))
pats.append(self.templates["Pat:RscOpOK"] % ("promote", ".*"))
pats.append(self.templates["Pat:RscOpOK"] % ("demote", ".*"))
pats.append(self.templates["Pat:RscOpOK"] % ("migrate", ".*"))
watch = self.create_watch(pats, 60, "ShutdownActivity")
watch.setwatch()
self.debug("Shutting down the cluster")
ret = self.stopall(None)
if not ret:
self._set_managed(node)
return self.failure("Couldn't shut down the cluster")
self.debug("Bringing the cluster back up")
ret = self.startall(None)
time.sleep(5) # allow ping to update the CIB
if not ret:
self._set_managed(node)
return self.failure("Couldn't restart the cluster")
if self.local_badnews("ResourceActivity:", watch):
self._set_managed(node)
return self.failure("Resources stopped or started during cluster restart")
watch = self.create_watch(pats, 60, "StartupActivity")
watch.setwatch()
# Re-enable resource management (and verify it happened).
self._set_managed(node)
self.CM.cluster_stable()
if not self._is_managed(node):
return self.failure("Could not re-enable resource management")
# Ignore actions for STONITH resources
ignore = []
(rc, lines) = self.rsh(node, "crm_resource -c", None)
for line in lines:
if re.search("^Resource", line):
r = AuditResource(self.CM, line)
if r.rclass == "stonith":
self.debug("Ignoring start actions for %s" % r.id)
ignore.append(self.templates["Pat:RscOpOK"] % ("start", r.id))
if self.local_badnews("ResourceActivity:", watch, ignore):
return self.failure("Resources stopped or started after resource management was re-enabled")
return ret
def errorstoignore(self):
'''Return list of errors which should be ignored'''
return [
r"resource( was|s were) active at shutdown",
]
def is_applicable(self):
return 1
AllTestClasses.append(Reattach)
class SpecialTest1(CTSTest):
'''Set up a custom test to cause quorum failure issues for Andrew'''
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "SpecialTest1"
self.startall = SimulStartLite(cm)
self.restart1 = RestartTest(cm)
self.stopall = SimulStopLite(cm)
def __call__(self, node):
'''Perform the 'SpecialTest1' test for Andrew. '''
self.incr("calls")
# Shut down all the nodes...
ret = self.stopall(None)
if not ret:
return self.failure("Could not stop all nodes")
# Test config recovery when the other nodes come up
self.rsh(node, "rm -f "+CTSvars.CRM_CONFIG_DIR+"/cib*")
# Start the selected node
ret = self.restart1(node)
if not ret:
return self.failure("Could not start "+node)
# Start all remaining nodes
ret = self.startall(None)
if not ret:
return self.failure("Could not start the remaining nodes")
return self.success()
def errorstoignore(self):
'''Return list of errors which should be ignored'''
# Errors that occur as a result of the CIB being wiped
return [
r"error.*: v1 patchset error, patch failed to apply: Application of an update diff failed",
r"error.*: Resource start-up disabled since no STONITH resources have been defined",
r"error.*: Either configure some or disable STONITH with the stonith-enabled option",
r"error.*: NOTE: Clusters with shared data need STONITH to ensure data integrity",
]
AllTestClasses.append(SpecialTest1)
class HAETest(CTSTest):
'''Set up a custom test to cause quorum failure issues for Andrew'''
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "HAETest"
self.stopall = SimulStopLite(cm)
self.startall = SimulStartLite(cm)
self.is_loop = 1
def setup(self, node):
# Start all remaining nodes
ret = self.startall(None)
if not ret:
return self.failure("Couldn't start all nodes")
return self.success()
def teardown(self, node):
# Stop everything
ret = self.stopall(None)
if not ret:
return self.failure("Couldn't stop all nodes")
return self.success()
def wait_on_state(self, node, resource, expected_clones, attempts=240):
while attempts > 0:
active = 0
(rc, lines) = self.rsh(node, "crm_resource -r %s -W -Q" % resource, stdout=None)
# Hack until crm_resource does the right thing
if rc == 0 and lines:
active = len(lines)
if len(lines) == expected_clones:
return 1
elif rc == 1:
self.debug("Resource %s is still inactive" % resource)
elif rc == 234:
self.logger.log("Unknown resource %s" % resource)
return 0
elif rc == 246:
self.logger.log("Cluster is inactive")
return 0
elif rc != 0:
self.logger.log("Call to crm_resource failed, rc=%d" % rc)
return 0
else:
self.debug("Resource %s is active on %d times instead of %d" % (resource, active, expected_clones))
attempts -= 1
time.sleep(1)
return 0
def find_dlm(self, node):
self.r_dlm = None
(rc, lines) = self.rsh(node, "crm_resource -c", None)
for line in lines:
if re.search("^Resource", line):
r = AuditResource(self.CM, line)
if r.rtype == "controld" and r.parent != "NA":
self.debug("Found dlm: %s" % self.r_dlm)
self.r_dlm = r.parent
return 1
return 0
def find_hae_resources(self, node):
self.r_dlm = None
self.r_o2cb = None
self.r_ocfs2 = []
if self.find_dlm(node):
self.find_ocfs2_resources(node)
def is_applicable(self):
if not self.is_applicable_common():
return 0
if self.Env["Schema"] == "hae":
return 1
return None
class HAERoleTest(HAETest):
def __init__(self, cm):
'''Lars' mount/unmount test for the HA extension. '''
HAETest.__init__(self,cm)
self.name = "HAERoleTest"
def change_state(self, node, resource, target):
rc = self.rsh(node, "crm_resource -V -r %s -p target-role -v %s --meta" % (resource, target))
return rc
def __call__(self, node):
self.incr("calls")
lpc = 0
failed = 0
delay = 2
done = time.time() + self.Env["loop-minutes"]*60
self.find_hae_resources(node)
clone_max = len(self.Env["nodes"])
while time.time() <= done and not failed:
lpc = lpc + 1
self.change_state(node, self.r_dlm, "Stopped")
if not self.wait_on_state(node, self.r_dlm, 0):
self.failure("%s did not go down correctly" % self.r_dlm)
failed = lpc
self.change_state(node, self.r_dlm, "Started")
if not self.wait_on_state(node, self.r_dlm, clone_max):
self.failure("%s did not come up correctly" % self.r_dlm)
failed = lpc
if not self.wait_on_state(node, self.r_o2cb, clone_max):
self.failure("%s did not come up correctly" % self.r_o2cb)
failed = lpc
for fs in self.r_ocfs2:
if not self.wait_on_state(node, fs, clone_max):
self.failure("%s did not come up correctly" % fs)
failed = lpc
if failed:
return self.failure("iteration %d failed" % failed)
return self.success()
AllTestClasses.append(HAERoleTest)
class HAEStandbyTest(HAETest):
'''Set up a custom test to cause quorum failure issues for Andrew'''
def __init__(self, cm):
HAETest.__init__(self,cm)
self.name = "HAEStandbyTest"
def change_state(self, node, resource, target):
rc = self.rsh(node, "crm_standby -V -l reboot -v %s" % (target))
return rc
def __call__(self, node):
self.incr("calls")
lpc = 0
failed = 0
done = time.time() + self.Env["loop-minutes"]*60
self.find_hae_resources(node)
clone_max = len(self.Env["nodes"])
while time.time() <= done and not failed:
lpc = lpc + 1
self.change_state(node, self.r_dlm, "true")
if not self.wait_on_state(node, self.r_dlm, clone_max-1):
self.failure("%s did not go down correctly" % self.r_dlm)
failed = lpc
self.change_state(node, self.r_dlm, "false")
if not self.wait_on_state(node, self.r_dlm, clone_max):
self.failure("%s did not come up correctly" % self.r_dlm)
failed = lpc
if not self.wait_on_state(node, self.r_o2cb, clone_max):
self.failure("%s did not come up correctly" % self.r_o2cb)
failed = lpc
for fs in self.r_ocfs2:
if not self.wait_on_state(node, fs, clone_max):
self.failure("%s did not come up correctly" % fs)
failed = lpc
if failed:
return self.failure("iteration %d failed" % failed)
return self.success()
AllTestClasses.append(HAEStandbyTest)
class NearQuorumPointTest(CTSTest):
'''
This test brings larger clusters near the quorum point (50%).
In addition, it will test doing starts and stops at the same time.
Here is how I think it should work:
- loop over the nodes and decide randomly which will be up and which
will be down Use a 50% probability for each of up/down.
- figure out what to do to get into that state from the current state
- in parallel, bring up those going up and bring those going down.
'''
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "NearQuorumPoint"
def __call__(self, dummy):
'''Perform the 'NearQuorumPoint' test. '''
self.incr("calls")
startset = []
stopset = []
stonith = self.CM.prepare_fencing_watcher("NearQuorumPoint")
#decide what to do with each node
for node in self.Env["nodes"]:
action = self.Env.RandomGen.choice(["start","stop"])
#action = self.Env.RandomGen.choice(["start","stop","no change"])
if action == "start" :
startset.append(node)
elif action == "stop" :
stopset.append(node)
self.debug("start nodes:" + repr(startset))
self.debug("stop nodes:" + repr(stopset))
#add search patterns
watchpats = [ ]
for node in stopset:
if self.CM.ShouldBeStatus[node] == "up":
watchpats.append(self.templates["Pat:We_stopped"] % node)
for node in startset:
if self.CM.ShouldBeStatus[node] == "down":
#watchpats.append(self.templates["Pat:NonDC_started"] % node)
watchpats.append(self.templates["Pat:Local_started"] % node)
else:
for stopping in stopset:
if self.CM.ShouldBeStatus[stopping] == "up":
watchpats.append(self.templates["Pat:They_stopped"] % (node, self.CM.key_for_node(stopping)))
if len(watchpats) == 0:
return self.skipped()
if len(startset) != 0:
watchpats.append(self.templates["Pat:DC_IDLE"])
watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
watch.setwatch()
#begin actions
for node in stopset:
if self.CM.ShouldBeStatus[node] == "up":
self.CM.StopaCMnoBlock(node)
for node in startset:
if self.CM.ShouldBeStatus[node] == "down":
self.CM.StartaCMnoBlock(node)
#get the result
if watch.lookforall():
self.CM.cluster_stable()
self.CM.fencing_cleanup("NearQuorumPoint", stonith)
return self.success()
self.logger.log("Warn: Patterns not found: " + repr(watch.unmatched))
#get the "bad" nodes
upnodes = []
for node in stopset:
if self.CM.StataCM(node) == 1:
upnodes.append(node)
downnodes = []
for node in startset:
if self.CM.StataCM(node) == 0:
downnodes.append(node)
self.CM.fencing_cleanup("NearQuorumPoint", stonith)
if upnodes == [] and downnodes == []:
self.CM.cluster_stable()
# Make sure they're completely down with no residule
for node in stopset:
self.rsh(node, self.templates["StopCmd"])
return self.success()
if len(upnodes) > 0:
self.logger.log("Warn: Unstoppable nodes: " + repr(upnodes))
if len(downnodes) > 0:
self.logger.log("Warn: Unstartable nodes: " + repr(downnodes))
return self.failure()
def is_applicable(self):
return 1
AllTestClasses.append(NearQuorumPointTest)
class RollingUpgradeTest(CTSTest):
'''Perform a rolling upgrade of the cluster'''
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "RollingUpgrade"
self.start = StartTest(cm)
self.stop = StopTest(cm)
self.stopall = SimulStopLite(cm)
self.startall = SimulStartLite(cm)
def setup(self, node):
# Start all remaining nodes
ret = self.stopall(None)
if not ret:
return self.failure("Couldn't stop all nodes")
for node in self.Env["nodes"]:
if not self.downgrade(node, None):
return self.failure("Couldn't downgrade %s" % node)
ret = self.startall(None)
if not ret:
return self.failure("Couldn't start all nodes")
return self.success()
def teardown(self, node):
# Stop everything
ret = self.stopall(None)
if not ret:
return self.failure("Couldn't stop all nodes")
for node in self.Env["nodes"]:
if not self.upgrade(node, None):
return self.failure("Couldn't upgrade %s" % node)
return self.success()
def install(self, node, version, start=1, flags="--force"):
target_dir = "/tmp/rpm-%s" % version
src_dir = "%s/%s" % (self.Env["rpm-dir"], version)
self.logger.log("Installing %s on %s with %s" % (version, node, flags))
if not self.stop(node):
return self.failure("stop failure: "+node)
rc = self.rsh(node, "mkdir -p %s" % target_dir)
rc = self.rsh(node, "rm -f %s/*.rpm" % target_dir)
(rc, lines) = self.rsh(node, "ls -1 %s/*.rpm" % src_dir, None)
for line in lines:
line = line[:-1]
rc = self.rsh.cp("%s" % (line), "%s:%s/" % (node, target_dir))
rc = self.rsh(node, "rpm -Uvh %s %s/*.rpm" % (flags, target_dir))
if start and not self.start(node):
return self.failure("start failure: "+node)
return self.success()
def upgrade(self, node, start=1):
return self.install(node, self.Env["current-version"], start)
def downgrade(self, node, start=1):
return self.install(node, self.Env["previous-version"], start, "--force --nodeps")
def __call__(self, node):
'''Perform the 'Rolling Upgrade' test. '''
self.incr("calls")
for node in self.Env["nodes"]:
if self.upgrade(node):
return self.failure("Couldn't upgrade %s" % node)
self.CM.cluster_stable()
return self.success()
def is_applicable(self):
if not self.is_applicable_common():
return None
if not "rpm-dir" in list(self.Env.keys()):
return None
if not "current-version" in list(self.Env.keys()):
return None
if not "previous-version" in list(self.Env.keys()):
return None
return 1
# Register RestartTest as a good test to run
AllTestClasses.append(RollingUpgradeTest)
class BSC_AddResource(CTSTest):
'''Add a resource to the cluster'''
def __init__(self, cm):
CTSTest.__init__(self, cm)
self.name = "AddResource"
self.resource_offset = 0
self.cib_cmd = """cibadmin -C -o %s -X '%s' """
def __call__(self, node):
self.incr("calls")
self.resource_offset = self.resource_offset + 1
r_id = "bsc-rsc-%s-%d" % (node, self.resource_offset)
start_pat = "pacemaker-controld.*%s_start_0.*confirmed.*ok"
patterns = []
patterns.append(start_pat % r_id)
watch = self.create_watch(patterns, self.Env["DeadTime"])
watch.setwatch()
ip = self.NextIP()
if not self.make_ip_resource(node, r_id, "ocf", "IPaddr", ip):
return self.failure("Make resource %s failed" % r_id)
failed = 0
watch_result = watch.lookforall()
if watch.unmatched:
for regex in watch.unmatched:
self.logger.log ("Warn: Pattern not found: %s" % (regex))
failed = 1
if failed:
return self.failure("Resource pattern(s) not found")
if not self.CM.cluster_stable(self.Env["DeadTime"]):
return self.failure("Unstable cluster")
return self.success()
def NextIP(self):
ip = self.Env["IPBase"]
if ":" in ip:
fields = ip.rpartition(":")
fields[2] = str(hex(int(fields[2], 16)+1))
print(str(hex(int(f[2], 16)+1)))
else:
fields = ip.rpartition('.')
fields[2] = str(int(fields[2])+1)
ip = fields[0] + fields[1] + fields[3];
self.Env["IPBase"] = ip
return ip.strip()
def make_ip_resource(self, node, id, rclass, type, ip):
self.logger.log("Creating %s::%s:%s (%s) on %s" % (rclass,type,id,ip,node))
rsc_xml="""
""" % (id, rclass, type, id, id, ip)
node_constraint = """
""" % (id, id, id, id, node)
rc = 0
(rc, lines) = self.rsh(node, self.cib_cmd % ("constraints", node_constraint), None)
if rc != 0:
self.logger.log("Constraint creation failed: %d" % rc)
return None
(rc, lines) = self.rsh(node, self.cib_cmd % ("resources", rsc_xml), None)
if rc != 0:
self.logger.log("Resource creation failed: %d" % rc)
return None
return 1
def is_applicable(self):
if self.Env["DoBSC"]:
return 1
return None
AllTestClasses.append(BSC_AddResource)
class SimulStopLite(CTSTest):
'''Stop any active nodes ~ simultaneously'''
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "SimulStopLite"
def __call__(self, dummy):
'''Perform the 'SimulStopLite' setup work. '''
self.incr("calls")
self.debug("Setup: " + self.name)
# We ignore the "node" parameter...
watchpats = [ ]
for node in self.Env["nodes"]:
if self.CM.ShouldBeStatus[node] == "up":
self.incr("WasStarted")
watchpats.append(self.templates["Pat:We_stopped"] % node)
if len(watchpats) == 0:
return self.success()
# Stop all the nodes - at about the same time...
watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
watch.setwatch()
self.set_timer()
for node in self.Env["nodes"]:
if self.CM.ShouldBeStatus[node] == "up":
self.CM.StopaCMnoBlock(node)
if watch.lookforall():
# Make sure they're completely down with no residule
for node in self.Env["nodes"]:
self.rsh(node, self.templates["StopCmd"])
return self.success()
did_fail = 0
up_nodes = []
for node in self.Env["nodes"]:
if self.CM.StataCM(node) == 1:
did_fail = 1
up_nodes.append(node)
if did_fail:
return self.failure("Active nodes exist: " + repr(up_nodes))
self.logger.log("Warn: All nodes stopped but CTS didnt detect: "
+ repr(watch.unmatched))
return self.failure("Missing log message: "+repr(watch.unmatched))
def is_applicable(self):
'''SimulStopLite is a setup test and never applicable'''
return 0
class SimulStartLite(CTSTest):
'''Start any stopped nodes ~ simultaneously'''
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "SimulStartLite"
def __call__(self, dummy):
'''Perform the 'SimulStartList' setup work. '''
self.incr("calls")
self.debug("Setup: " + self.name)
# We ignore the "node" parameter...
node_list = []
for node in self.Env["nodes"]:
if self.CM.ShouldBeStatus[node] == "down":
self.incr("WasStopped")
node_list.append(node)
self.set_timer()
while len(node_list) > 0:
# Repeat until all nodes come up
watchpats = [ ]
uppat = self.templates["Pat:NonDC_started"]
if self.CM.upcount() == 0:
uppat = self.templates["Pat:Local_started"]
watchpats.append(self.templates["Pat:DC_IDLE"])
for node in node_list:
watchpats.append(uppat % node)
watchpats.append(self.templates["Pat:InfraUp"] % node)
watchpats.append(self.templates["Pat:PacemakerUp"] % node)
# Start all the nodes - at about the same time...
watch = self.create_watch(watchpats, self.Env["DeadTime"]+10)
watch.setwatch()
stonith = self.CM.prepare_fencing_watcher(self.name)
for node in node_list:
self.CM.StartaCMnoBlock(node)
watch.lookforall()
node_list = self.CM.fencing_cleanup(self.name, stonith)
if node_list == None:
return self.failure("Cluster did not stabilize")
# Remove node_list messages from watch.unmatched
for node in node_list:
self.logger.debug("Dealing with stonith operations for %s" % repr(node_list))
if watch.unmatched:
try:
watch.unmatched.remove(uppat % node)
except:
self.debug("Already matched: %s" % (uppat % node))
try:
watch.unmatched.remove(self.templates["Pat:InfraUp"] % node)
except:
self.debug("Already matched: %s" % (self.templates["Pat:InfraUp"] % node))
try:
watch.unmatched.remove(self.templates["Pat:PacemakerUp"] % node)
except:
self.debug("Already matched: %s" % (self.templates["Pat:PacemakerUp"] % node))
if watch.unmatched:
for regex in watch.unmatched:
self.logger.log ("Warn: Startup pattern not found: %s" %(regex))
if not self.CM.cluster_stable():
return self.failure("Cluster did not stabilize")
did_fail = 0
unstable = []
for node in self.Env["nodes"]:
if self.CM.StataCM(node) == 0:
did_fail = 1
unstable.append(node)
if did_fail:
return self.failure("Unstarted nodes exist: " + repr(unstable))
unstable = []
for node in self.Env["nodes"]:
if not self.CM.node_stable(node):
did_fail = 1
unstable.append(node)
if did_fail:
return self.failure("Unstable cluster nodes exist: " + repr(unstable))
return self.success()
def is_applicable(self):
'''SimulStartLite is a setup test and never applicable'''
return 0
def TestList(cm, audits):
result = []
for testclass in AllTestClasses:
bound_test = testclass(cm)
if bound_test.is_applicable():
bound_test.Audits = audits
result.append(bound_test)
return result
class RemoteLXC(CTSTest):
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = "RemoteLXC"
self.start = StartTest(cm)
self.startall = SimulStartLite(cm)
self.num_containers = 2
self.is_container = 1
self.is_docker_unsafe = 1
self.failed = 0
self.fail_string = ""
def start_lxc_simple(self, node):
# restore any artifacts laying around from a previous test.
self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -s -R &>/dev/null")
# generate the containers, put them in the config, add some resources to them
pats = [ ]
watch = self.create_watch(pats, 120)
watch.setwatch()
pats.append(self.templates["Pat:RscOpOK"] % ("start", "lxc1"))
pats.append(self.templates["Pat:RscOpOK"] % ("start", "lxc2"))
pats.append(self.templates["Pat:RscOpOK"] % ("start", "lxc-ms"))
pats.append(self.templates["Pat:RscOpOK"] % ("promote", "lxc-ms"))
self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -g -a -m -s -c %d &>/dev/null" % self.num_containers)
self.set_timer("remoteSimpleInit")
watch.lookforall()
self.log_timer("remoteSimpleInit")
if watch.unmatched:
self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
self.failed = 1
def cleanup_lxc_simple(self, node):
pats = [ ]
# if the test failed, attempt to clean up the cib and libvirt environment
# as best as possible
if self.failed == 1:
# restore libvirt and cib
self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -s -R &>/dev/null")
return
watch = self.create_watch(pats, 120)
watch.setwatch()
pats.append(self.templates["Pat:RscOpOK"] % ("stop", "container1"))
pats.append(self.templates["Pat:RscOpOK"] % ("stop", "container2"))
self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -p &>/dev/null")
self.set_timer("remoteSimpleCleanup")
watch.lookforall()
self.log_timer("remoteSimpleCleanup")
if watch.unmatched:
self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched))
self.failed = 1
# cleanup libvirt
self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -s -R &>/dev/null")
def __call__(self, node):
'''Perform the 'RemoteLXC' test. '''
self.incr("calls")
ret = self.startall(None)
if not ret:
return self.failure("Setup failed, start all nodes failed.")
rc = self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -v &>/dev/null")
if rc == 1:
self.log("Environment test for lxc support failed.")
return self.skipped()
self.start_lxc_simple(node)
self.cleanup_lxc_simple(node)
self.debug("Waiting for the cluster to recover")
self.CM.cluster_stable()
if self.failed == 1:
return self.failure(self.fail_string)
return self.success()
def errorstoignore(self):
'''Return list of errors which should be ignored'''
return [
r"Updating failcount for ping",
r"schedulerd.*: Recover (ping|lxc-ms|container)\s*\(.*\)",
# The orphaned lxc-ms resource causes an expected transition error
# that is a result of the scheduler not having knowledge that the
# promotable resource used to be a clone. As a result, it looks like that
# resource is running in multiple locations when it shouldn't... But in
# this instance we know why this error is occurring and that it is expected.
r"Calculated [Tt]ransition .*pe-error",
r"Resource lxc-ms .* is active on 2 nodes attempting recovery",
r"Unknown operation: fail",
r"VirtualDomain.*ERROR: Unable to determine emulator",
]
AllTestClasses.append(RemoteLXC)
class RemoteDriver(CTSTest):
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.name = self.__class__.__name__
self.is_docker_unsafe = 1
self.start = StartTest(cm)
self.startall = SimulStartLite(cm)
self.stop = StopTest(cm)
self.remote_rsc = "remote-rsc"
self.cib_cmd = """cibadmin -C -o %s -X '%s' """
self.reset()
def reset(self):
self.pcmk_started = 0
self.failed = False
self.fail_string = ""
self.remote_node_added = 0
self.remote_rsc_added = 0
self.remote_use_reconnect_interval = self.Env.RandomGen.choice([True,False])
def fail(self, msg):
""" Mark test as failed. """
self.failed = True
# Always log the failure.
self.logger.log(msg)
# Use first failure as test status, as it's likely to be most useful.
if not self.fail_string:
self.fail_string = msg
def get_othernode(self, node):
for othernode in self.Env["nodes"]:
if othernode == node:
# we don't want to try and use the cib that we just shutdown.
# find a cluster node that is not our soon to be remote-node.
continue
else:
return othernode
def del_rsc(self, node, rsc):
othernode = self.get_othernode(node)
rc = self.rsh(othernode, "crm_resource -D -r %s -t primitive" % (rsc))
if rc != 0:
self.fail("Removal of resource '%s' failed" % rsc)
def add_rsc(self, node, rsc_xml):
othernode = self.get_othernode(node)
rc = self.rsh(othernode, self.cib_cmd % ("resources", rsc_xml))
if rc != 0:
self.fail("resource creation failed")
def add_primitive_rsc(self, node):
rsc_xml = """
""" % { "node": self.remote_rsc }
self.add_rsc(node, rsc_xml)
if not self.failed:
self.remote_rsc_added = 1
def add_connection_rsc(self, node):
rsc_xml = """
""" % { "node": self.remote_node, "server": node }
if self.remote_use_reconnect_interval:
# Set cluster-recheck-interval lower
self.rsh(self.get_othernode(node), self.templates["SetCheckInterval"] % ("45s"))
# Set reconnect interval on resource
rsc_xml = rsc_xml + """
""" % (self.remote_node)
rsc_xml = rsc_xml + """
""" % { "node": self.remote_node }
self.add_rsc(node, rsc_xml)
if not self.failed:
self.remote_node_added = 1
def disable_services(self, node):
self.corosync_enabled = self.Env.service_is_enabled(node, "corosync")
if self.corosync_enabled:
self.Env.disable_service(node, "corosync")
self.pacemaker_enabled = self.Env.service_is_enabled(node, "pacemaker")
if self.pacemaker_enabled:
self.Env.disable_service(node, "pacemaker")
def restore_services(self, node):
if self.corosync_enabled:
self.Env.enable_service(node, "corosync")
if self.pacemaker_enabled:
self.Env.enable_service(node, "pacemaker")
def stop_pcmk_remote(self, node):
# disable pcmk remote
for i in range(10):
rc = self.rsh(node, "service pacemaker_remote stop")
if rc != 0:
time.sleep(6)
else:
break
def start_pcmk_remote(self, node):
for i in range(10):
rc = self.rsh(node, "service pacemaker_remote start")
if rc != 0:
time.sleep(6)
else:
self.pcmk_started = 1
break
def freeze_pcmk_remote(self, node):
""" Simulate a Pacemaker Remote daemon failure. """
# We freeze the process.
self.rsh(node, "killall -STOP pacemaker-remoted")
def resume_pcmk_remote(self, node):
# We resume the process.
self.rsh(node, "killall -CONT pacemaker-remoted")
def start_metal(self, node):
# Cluster nodes are reused as remote nodes in remote tests. If cluster
# services were enabled at boot, in case the remote node got fenced, the
# cluster node would join instead of the expected remote one. Meanwhile
# pacemaker_remote would not be able to start. Depending on the chances,
# the situations might not be able to be orchestrated gracefully any more.
#
# Temporarily disable any enabled cluster serivces.
self.disable_services(node)
pcmk_started = 0
# make sure the resource doesn't already exist for some reason
self.rsh(node, "crm_resource -D -r %s -t primitive" % (self.remote_rsc))
self.rsh(node, "crm_resource -D -r %s -t primitive" % (self.remote_node))
if not self.stop(node):
self.fail("Failed to shutdown cluster node %s" % node)
return
self.start_pcmk_remote(node)
if self.pcmk_started == 0:
self.fail("Failed to start pacemaker_remote on node %s" % node)
return
# Convert node to baremetal now that it has shutdown the cluster stack
pats = [ ]
watch = self.create_watch(pats, 120)
watch.setwatch()
pats.append(self.templates["Pat:RscOpOK"] % ("start", self.remote_node))
pats.append(self.templates["Pat:DC_IDLE"])
self.add_connection_rsc(node)
self.set_timer("remoteMetalInit")
watch.lookforall()
self.log_timer("remoteMetalInit")
if watch.unmatched:
self.fail("Unmatched patterns: %s" % watch.unmatched)
def migrate_connection(self, node):
if self.failed:
return
pats = [ ]
pats.append(self.templates["Pat:RscOpOK"] % ("migrate_to", self.remote_node))
pats.append(self.templates["Pat:RscOpOK"] % ("migrate_from", self.remote_node))
pats.append(self.templates["Pat:DC_IDLE"])
watch = self.create_watch(pats, 120)
watch.setwatch()
(rc, lines) = self.rsh(node, "crm_resource -M -r %s" % (self.remote_node), None)
if rc != 0:
self.fail("failed to move remote node connection resource")
return
self.set_timer("remoteMetalMigrate")
watch.lookforall()
self.log_timer("remoteMetalMigrate")
if watch.unmatched:
self.fail("Unmatched patterns: %s" % watch.unmatched)
return
def fail_rsc(self, node):
if self.failed:
return
watchpats = [ ]
watchpats.append(self.templates["Pat:RscRemoteOpOK"] % ("stop", self.remote_rsc, self.remote_node))
watchpats.append(self.templates["Pat:RscRemoteOpOK"] % ("start", self.remote_rsc, self.remote_node))
watchpats.append(self.templates["Pat:DC_IDLE"])
watch = self.create_watch(watchpats, 120)
watch.setwatch()
self.debug("causing dummy rsc to fail.")
rc = self.rsh(node, "rm -f /var/run/resource-agents/Dummy*")
self.set_timer("remoteRscFail")
watch.lookforall()
self.log_timer("remoteRscFail")
if watch.unmatched:
self.fail("Unmatched patterns during rsc fail: %s" % watch.unmatched)
def fail_connection(self, node):
if self.failed:
return
watchpats = [ ]
watchpats.append(self.templates["Pat:FenceOpOK"] % self.remote_node)
watchpats.append(self.templates["Pat:NodeFenced"] % self.remote_node)
watch = self.create_watch(watchpats, 120)
watch.setwatch()
# freeze the pcmk remote daemon. this will result in fencing
self.debug("Force stopped active remote node")
self.freeze_pcmk_remote(node)
self.debug("Waiting for remote node to be fenced.")
self.set_timer("remoteMetalFence")
watch.lookforall()
self.log_timer("remoteMetalFence")
if watch.unmatched:
self.fail("Unmatched patterns: %s" % watch.unmatched)
return
self.debug("Waiting for the remote node to come back up")
self.CM.ns.WaitForNodeToComeUp(node, 120);
pats = [ ]
watch = self.create_watch(pats, 240)
watch.setwatch()
pats.append(self.templates["Pat:RscOpOK"] % ("start", self.remote_node))
if self.remote_rsc_added == 1:
pats.append(self.templates["Pat:RscRemoteOpOK"] % ("start", self.remote_rsc, self.remote_node))
# start the remote node again watch it integrate back into cluster.
self.start_pcmk_remote(node)
if self.pcmk_started == 0:
self.fail("Failed to start pacemaker_remote on node %s" % node)
return
self.debug("Waiting for remote node to rejoin cluster after being fenced.")
self.set_timer("remoteMetalRestart")
watch.lookforall()
self.log_timer("remoteMetalRestart")
if watch.unmatched:
self.fail("Unmatched patterns: %s" % watch.unmatched)
return
def add_dummy_rsc(self, node):
if self.failed:
return
# verify we can put a resource on the remote node
pats = [ ]
watch = self.create_watch(pats, 120)
watch.setwatch()
pats.append(self.templates["Pat:RscRemoteOpOK"] % ("start", self.remote_rsc, self.remote_node))
pats.append(self.templates["Pat:DC_IDLE"])
# Add a resource that must live on remote-node
self.add_primitive_rsc(node)
# force that rsc to prefer the remote node.
(rc, line) = self.CM.rsh(node, "crm_resource -M -r %s -N %s -f" % (self.remote_rsc, self.remote_node), None)
if rc != 0:
self.fail("Failed to place remote resource on remote node.")
return
self.set_timer("remoteMetalRsc")
watch.lookforall()
self.log_timer("remoteMetalRsc")
if watch.unmatched:
self.fail("Unmatched patterns: %s" % watch.unmatched)
def test_attributes(self, node):
if self.failed:
return
# This verifies permanent attributes can be set on a remote-node. It also
# verifies the remote-node can edit its own cib node section remotely.
(rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -v testval -N %s" % (self.remote_node), None)
if rc != 0:
self.fail("Failed to set remote-node attribute. rc:%s output:%s" % (rc, line))
return
(rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -q -N %s" % (self.remote_node), None)
if rc != 0:
self.fail("Failed to get remote-node attribute")
return
(rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -D -N %s" % (self.remote_node), None)
if rc != 0:
self.fail("Failed to delete remote-node attribute")
return
def cleanup_metal(self, node):
self.restore_services(node)
if self.pcmk_started == 0:
return
pats = [ ]
watch = self.create_watch(pats, 120)
watch.setwatch()
if self.remote_rsc_added == 1:
pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.remote_rsc))
if self.remote_node_added == 1:
pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.remote_node))
self.set_timer("remoteMetalCleanup")
self.resume_pcmk_remote(node)
if self.remote_use_reconnect_interval:
self.debug("Cleaning up re-check interval")
self.rsh(self.get_othernode(node), self.templates["ClearCheckInterval"])
if self.remote_rsc_added == 1:
# Remove dummy resource added for remote node tests
self.debug("Cleaning up dummy rsc put on remote node")
self.rsh(self.get_othernode(node), "crm_resource -U -r %s" % self.remote_rsc)
self.del_rsc(node, self.remote_rsc)
if self.remote_node_added == 1:
# Remove remote node's connection resource
self.debug("Cleaning up remote node connection resource")
self.rsh(self.get_othernode(node), "crm_resource -U -r %s" % (self.remote_node))
self.del_rsc(node, self.remote_node)
watch.lookforall()
self.log_timer("remoteMetalCleanup")
if watch.unmatched:
self.fail("Unmatched patterns: %s" % watch.unmatched)
self.stop_pcmk_remote(node)
self.debug("Waiting for the cluster to recover")
self.CM.cluster_stable()
if self.remote_node_added == 1:
# Remove remote node itself
self.debug("Cleaning up node entry for remote node")
self.rsh(self.get_othernode(node), "crm_node --force --remove %s" % self.remote_node)
def setup_env(self, node):
self.remote_node = "remote-%s" % (node)
# we are assuming if all nodes have a key, that it is
# the right key... If any node doesn't have a remote
# key, we regenerate it everywhere.
if self.rsh.exists_on_all("/etc/pacemaker/authkey", self.Env["nodes"]):
return
# create key locally
(handle, keyfile) = tempfile.mkstemp(".cts")
os.close(handle)
devnull = open(os.devnull, 'wb')
subprocess.check_call(["dd", "if=/dev/urandom", "of=%s" % keyfile, "bs=4096", "count=1"],
stdout=devnull, stderr=devnull)
devnull.close()
# sync key throughout the cluster
for node in self.Env["nodes"]:
self.rsh(node, "mkdir -p --mode=0750 /etc/pacemaker")
self.rsh.cp(keyfile, "root@%s:/etc/pacemaker/authkey" % node)
self.rsh(node, "chgrp haclient /etc/pacemaker /etc/pacemaker/authkey")
self.rsh(node, "chmod 0640 /etc/pacemaker/authkey")
os.unlink(keyfile)
def is_applicable(self):
if not self.is_applicable_common():
return False
for node in self.Env["nodes"]:
rc = self.rsh(node, "which pacemaker-remoted >/dev/null 2>&1")
if rc != 0:
return False
return True
def start_new_test(self, node):
self.incr("calls")
self.reset()
ret = self.startall(None)
if not ret:
return self.failure("setup failed: could not start all nodes")
self.setup_env(node)
self.start_metal(node)
self.add_dummy_rsc(node)
return True
def __call__(self, node):
return self.failure("This base class is not meant to be called directly.")
def errorstoignore(self):
'''Return list of errors which should be ignored'''
return [ r"""is running on remote.*which isn't allowed""",
r"""Connection terminated""",
r"""Could not send remote""",
]
# RemoteDriver is just a base class for other tests, so it is not added to AllTestClasses
class RemoteBasic(RemoteDriver):
def __call__(self, node):
'''Perform the 'RemoteBaremetal' test. '''
if not self.start_new_test(node):
return self.failure(self.fail_string)
self.test_attributes(node)
self.cleanup_metal(node)
self.debug("Waiting for the cluster to recover")
self.CM.cluster_stable()
if self.failed:
return self.failure(self.fail_string)
return self.success()
AllTestClasses.append(RemoteBasic)
class RemoteStonithd(RemoteDriver):
def __call__(self, node):
'''Perform the 'RemoteStonithd' test. '''
if not self.start_new_test(node):
return self.failure(self.fail_string)
self.fail_connection(node)
self.cleanup_metal(node)
self.debug("Waiting for the cluster to recover")
self.CM.cluster_stable()
if self.failed:
return self.failure(self.fail_string)
return self.success()
def is_applicable(self):
if not RemoteDriver.is_applicable(self):
return False
if "DoFencing" in list(self.Env.keys()):
return self.Env["DoFencing"]
return True
def errorstoignore(self):
ignore_pats = [
r"Lost connection to Pacemaker Remote node",
r"Software caused connection abort",
r"pacemaker-controld.*:\s+error.*: Operation remote-.*_monitor",
r"pacemaker-controld.*:\s+error.*: Result of monitor operation for remote-.*",
r"schedulerd.*:\s+Recover remote-.*\s*\(.*\)",
- r"Calculated [Tt]ransition .*pe-error",
- r"error.*: Resource .*ocf::.* is active on 2 nodes attempting recovery",
r"error: Result of monitor operation for .* on remote-.*: No executor connection",
]
ignore_pats.extend(RemoteDriver.errorstoignore(self))
return ignore_pats
AllTestClasses.append(RemoteStonithd)
class RemoteMigrate(RemoteDriver):
def __call__(self, node):
'''Perform the 'RemoteMigrate' test. '''
if not self.start_new_test(node):
return self.failure(self.fail_string)
self.migrate_connection(node)
self.cleanup_metal(node)
self.debug("Waiting for the cluster to recover")
self.CM.cluster_stable()
if self.failed:
return self.failure(self.fail_string)
return self.success()
AllTestClasses.append(RemoteMigrate)
class RemoteRscFailure(RemoteDriver):
def __call__(self, node):
'''Perform the 'RemoteRscFailure' test. '''
if not self.start_new_test(node):
return self.failure(self.fail_string)
# This is an important step. We are migrating the connection
# before failing the resource. This verifies that the migration
# has properly maintained control over the remote-node.
self.migrate_connection(node)
self.fail_rsc(node)
self.cleanup_metal(node)
self.debug("Waiting for the cluster to recover")
self.CM.cluster_stable()
if self.failed:
return self.failure(self.fail_string)
return self.success()
def errorstoignore(self):
ignore_pats = [
r"schedulerd.*: Recover remote-rsc\s*\(.*\)",
r"Dummy.*: No process state file found",
]
ignore_pats.extend(RemoteDriver.errorstoignore(self))
return ignore_pats
AllTestClasses.append(RemoteRscFailure)
# vim:ts=4:sw=4:et:
diff --git a/cts/patterns.py b/cts/patterns.py
index 8de67b1f94..877ea69758 100644
--- a/cts/patterns.py
+++ b/cts/patterns.py
@@ -1,391 +1,413 @@
""" Pattern-holding classes for Pacemaker's Cluster Test Suite (CTS)
"""
# Pacemaker targets compatibility with Python 2.7 and 3.2+
from __future__ import print_function, unicode_literals, absolute_import, division
__copyright__ = "Copyright 2008-2019 the Pacemaker project contributors"
__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
import sys, os
from cts.CTSvars import *
patternvariants = {}
class BasePatterns(object):
def __init__(self, name):
self.name = name
patternvariants[name] = self
self.ignore = [
"avoid confusing Valgrind",
# Logging bug in some versions of libvirtd
r"libvirtd.*: internal error: Failed to parse PCI config address",
]
self.BadNews = []
self.components = {}
self.commands = {
"StatusCmd" : "crmadmin -t 60000 -S %s 2>/dev/null",
"CibQuery" : "cibadmin -Ql",
"CibAddXml" : "cibadmin --modify -c --xml-text %s",
"CibDelXpath" : "cibadmin --delete --xpath %s",
# 300,000 == 5 minutes
"RscRunning" : CTSvars.CRM_DAEMON_DIR + "/cts-exec-helper -R -r %s",
"CIBfile" : "%s:"+CTSvars.CRM_CONFIG_DIR+"/cib.xml",
"TmpDir" : "/tmp",
"BreakCommCmd" : "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1",
"FixCommCmd" : "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1",
# tc qdisc add dev lo root handle 1: cbq avpkt 1000 bandwidth 1000mbit
# tc class add dev lo parent 1: classid 1:1 cbq rate "$RATE"kbps allot 17000 prio 5 bounded isolated
# tc filter add dev lo parent 1: protocol ip prio 16 u32 match ip dst 127.0.0.1 match ip sport $PORT 0xFFFF flowid 1:1
# tc qdisc add dev lo parent 1: netem delay "$LATENCY"msec "$(($LATENCY/4))"msec 10% 2> /dev/null > /dev/null
"ReduceCommCmd" : "",
"RestoreCommCmd" : "tc qdisc del dev lo root",
"SetCheckInterval" : "cibadmin --modify -c --xml-text ''",
"ClearCheckInterval" : "cibadmin --delete --xpath \"//nvpair[@name='cluster-recheck-interval']\"",
"MaintenanceModeOn" : "cibadmin --modify -c --xml-text ''",
"MaintenanceModeOff" : "cibadmin --delete --xpath \"//nvpair[@name='maintenance-mode']\"",
"StandbyCmd" : "crm_attribute -Vq -U %s -n standby -l forever -v %s 2>/dev/null",
"StandbyQueryCmd" : "crm_attribute -qG -U %s -n standby -l forever -d off 2>/dev/null",
}
self.search = {
"Pat:DC_IDLE" : "pacemaker-controld.*State transition.*-> S_IDLE",
# This won't work if we have multiple partitions
"Pat:Local_started" : "%s\W.*controller successfully started",
"Pat:NonDC_started" : r"%s\W.*State transition.*-> S_NOT_DC",
"Pat:DC_started" : r"%s\W.*State transition.*-> S_IDLE",
"Pat:We_stopped" : "%s\W.*OVERRIDE THIS PATTERN",
"Pat:They_stopped" : "%s\W.*LOST:.* %s ",
"Pat:They_dead" : "node %s.*: is dead",
"Pat:TransitionComplete" : "Transition status: Complete: complete",
- "Pat:Fencing_start" : "(Initiating remote operation|Requesting peer fencing ).* (for|of) %s",
- "Pat:Fencing_ok" : r"pacemaker-fenced.*:\s*Operation .* of %s by .* for .*@.*: OK",
- "Pat:Fencing_recover" : r"schedulerd.*: Recover %s",
+ "Pat:Fencing_start" : r"(Initiating remote operation|Requesting peer fencing ).* (for|of) %s",
+ "Pat:Fencing_ok" : r"pacemaker-fenced.*:\s*Operation .* of %s by .* for .*@.*: OK",
+ "Pat:Fencing_recover" : r"pacemaker-schedulerd.*: Recover %s",
+ "Pat:Fencing_active" : r"pacemaker-schedulerd.*: Resource %s is active on .* nodes",
+ "Pat:Fencing_probe" : r"pacemaker-controld.* Result of probe operation for %s on .*: Error",
"Pat:RscOpOK" : r"pacemaker-controld.*:\s+Result of %s operation for %s.*: (0 \()?ok",
"Pat:RscRemoteOpOK" : r"pacemaker-controld.*:\s+Result of %s operation for %s on %s: (0 \()?ok",
"Pat:NodeFenced" : r"pacemaker-controld.*:\s* Peer %s was terminated \(.*\) by .* on behalf of .*: OK",
"Pat:FenceOpOK" : "Operation .* for host '%s' with device .* returned: 0",
}
def get_component(self, key):
if key in self.components:
return self.components[key]
print("Unknown component '%s' for %s" % (key, self.name))
return []
def get_patterns(self, key):
if key == "BadNews":
return self.BadNews
elif key == "BadNewsIgnore":
return self.ignore
elif key == "Commands":
return self.commands
elif key == "Search":
return self.search
elif key == "Components":
return self.components
def __getitem__(self, key):
if key == "Name":
return self.name
elif key in self.commands:
return self.commands[key]
elif key in self.search:
return self.search[key]
else:
print("Unknown template '%s' for %s" % (key, self.name))
return None
class crm_corosync(BasePatterns):
'''
Patterns for Corosync version 2 cluster manager class
'''
def __init__(self, name):
BasePatterns.__init__(self, name)
self.commands.update({
"StartCmd" : "service corosync start && service pacemaker start",
"StopCmd" : "service pacemaker stop; [ ! -e /usr/sbin/pacemaker-remoted ] || service pacemaker_remote stop; service corosync stop",
"EpochCmd" : "crm_node -e",
"QuorumCmd" : "crm_node -q",
"PartitionCmd" : "crm_node -p",
})
self.search.update({
# Close enough ... "Corosync Cluster Engine exiting normally" isn't
# printed reliably.
"Pat:We_stopped" : "%s\W.*Unloading all Corosync service engines",
"Pat:They_stopped" : "%s\W.*pacemaker-controld.*Node %s(\[|\s).*state is now lost",
"Pat:They_dead" : "pacemaker-controld.*Node %s(\[|\s).*state is now lost",
"Pat:ChildExit" : r"\[[0-9]+\] exited with status [0-9]+ \(",
"Pat:ChildKilled" : r"%s\W.*pacemakerd.*%s\[[0-9]+\] terminated with signal 9",
"Pat:ChildRespawn" : "%s\W.*pacemakerd.*Respawning failed child process: %s",
"Pat:InfraUp" : "%s\W.*corosync.*Initializing transport",
"Pat:PacemakerUp" : "%s\W.*pacemakerd.*Starting Pacemaker",
})
self.ignore = self.ignore + [
r"crm_mon:",
r"crmadmin:",
r"update_trace_data",
r"async_notify:.*strange, client not found",
r"Parse error: Ignoring unknown option .*nodename",
r"error.*: Operation 'reboot' .* with device 'FencingFail' returned:",
r"getinfo response error: 1$",
r"sbd.* error: inquisitor_child: DEBUG MODE IS ACTIVE",
r"sbd.* pcmk:\s*error:.*Connection to cib_ro.* (failed|closed)",
]
self.BadNews = [
r"error:",
r"crit:",
r"ERROR:",
r"CRIT:",
r"Shutting down...NOW",
r"Timer I_TERMINATE just popped",
r"input=I_ERROR",
r"input=I_FAIL",
r"input=I_INTEGRATED cause=C_TIMER_POPPED",
r"input=I_FINALIZED cause=C_TIMER_POPPED",
r"input=I_ERROR",
r"(pacemakerd|pacemaker-execd|pacemaker-controld):.*, exiting",
r"schedulerd.*Attempting recovery of resource",
r"is taking more than 2x its timeout",
r"Confirm not received from",
r"Welcome reply not received from",
r"Attempting to schedule .* after a stop",
r"Resource .* was active at shutdown",
r"duplicate entries for call_id",
r"Search terminated:",
r":global_timer_callback",
r"Faking parameter digest creation",
r"Parameters to .* action changed:",
r"Parameters to .* changed",
r"\[[0-9]+\] terminated with signal [0-9]+ \(",
r"schedulerd:.*Recover .*\(.* -\> .*\)",
r"rsyslogd.* imuxsock lost .* messages from pid .* due to rate-limiting",
r"Peer is not part of our cluster",
r"We appear to be in an election loop",
r"Unknown node -> we will not deliver message",
r"(Blackbox dump requested|Problem detected)",
r"pacemakerd.*Could not connect to Cluster Configuration Database API",
r"Receiving messages from a node we think is dead",
r"share the same cluster nodeid",
r"share the same name",
#r"crm_ipc_send:.*Request .* failed",
#r"crm_ipc_send:.*Sending to .* is disabled until pending reply is received",
# Not inherently bad, but worth tracking
#r"No need to invoke the TE",
#r"ping.*: DEBUG: Updated connected = 0",
#r"Digest mis-match:",
r"pacemaker-controld:.*Transition failed: terminated",
r"Local CIB .* differs from .*:",
r"warn.*:\s*Continuing but .* will NOT be used",
r"warn.*:\s*Cluster configuration file .* is corrupt",
#r"Executing .* fencing operation",
r"Election storm",
r"stalled the FSA with pending inputs",
]
self.components["common-ignore"] = [
r"Pending action:",
r"resource( was|s were) active at shutdown",
r"pending LRM operations at shutdown",
r"Lost connection to the CIB manager",
r"pacemaker-controld.*:\s*Action A_RECOVER .* not supported",
r"pacemaker-controld.*:\s*Performing A_EXIT_1 - forcefully exiting ",
r".*:\s*Executing .* fencing operation \(.*\) on ",
r".*:\s*Requesting fencing \([^)]+\) of node ",
r"(Blackbox dump requested|Problem detected)",
# "Resource .*stonith::.* is active on 2 nodes attempting recovery",
# "Transition .* ERRORs found during PE processing",
]
self.components["corosync-ignore"] = [
r"error:.*Connection to the CPG API failed: Library error",
r"\[[0-9]+\] exited with status [0-9]+ \(",
r"pacemaker-based.*error:.*Corosync connection lost",
r"pacemaker-fenced.*error:.*Corosync connection terminated",
r"pacemaker-controld.*State transition .* S_RECOVERY",
r"pacemaker-controld.*error:.*Input (I_ERROR|I_TERMINATE ) .*received in state",
r"pacemaker-controld.*error:.*Could not recover from internal error",
r"error:.*Connection to cib_(shm|rw).* (failed|closed)",
r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
r"crit: Fencing daemon connection failed",
+ # This is overbroad, but we don't have a way to say that only
+ # certain transition errors are acceptable (if the fencer respawns,
+ # fence devices may appear multiply active). We have to rely on
+ # other causes of a transition error logging their own error
+ # message, which is the usual practice.
+ r"pacemaker-schedulerd.* Calculated transition .*/pe-error",
]
self.components["corosync"] = [
# We expect each daemon to lose its cluster connection.
# However, if the CIB manager loses its connection first,
# it's possible for another daemon to lose that connection and
# exit before losing the cluster connection.
r"pacemakerd.*:\s*(crit|error):.*Lost connection to cluster layer",
r"pacemaker-attrd.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)",
r"pacemaker-based.*:\s*(crit|error):.*Lost connection to cluster layer",
r"pacemaker-controld.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)",
r"pacemaker-fenced.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)",
r"schedulerd.*Scheduling Node .* for STONITH",
r"pacemaker-controld.*:\s*Peer .* was terminated \(.*\) by .* on behalf of .*:\s*OK",
]
self.components["pacemaker-based"] = [
r"pacemakerd.* pacemaker-attrd\[[0-9]+\] exited with status 102",
r"pacemakerd.* pacemaker-controld\[[0-9]+\] exited with status 1",
r"pacemakerd.* Respawning failed child process: pacemaker-attrd",
r"pacemakerd.* Respawning failed child process: pacemaker-based",
r"pacemakerd.* Respawning failed child process: pacemaker-controld",
r"pacemakerd.* Respawning failed child process: pacemaker-fenced",
r"pacemaker-.* Connection to cib_.* (failed|closed)",
r"pacemaker-attrd.*:.*Lost connection to the CIB manager",
r"pacemaker-controld.*:.*Lost connection to the CIB manager",
r"pacemaker-controld.*I_ERROR.*crmd_cib_connection_destroy",
r"pacemaker-controld.* State transition .* S_RECOVERY",
r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover",
r"pacemaker-controld.*Could not recover from internal error",
]
self.components["pacemaker-based-ignore"] = [
r"pacemaker-execd.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
+ # This is overbroad, but we don't have a way to say that only
+ # certain transition errors are acceptable (if the fencer respawns,
+ # fence devices may appear multiply active). We have to rely on
+ # other causes of a transition error logging their own error
+ # message, which is the usual practice.
+ r"pacemaker-schedulerd.* Calculated transition .*/pe-error",
]
self.components["pacemaker-execd"] = [
r"pacemaker-controld.*Connection to (pacemaker-execd|lrmd|executor) (failed|closed)",
r"pacemaker-controld.*I_ERROR.*lrm_connection_destroy",
r"pacemaker-controld.*State transition .* S_RECOVERY",
r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover",
r"pacemaker-controld.*Could not recover from internal error",
r"pacemakerd.*pacemaker-execd.* terminated with signal 9",
r"pacemakerd.*pacemaker-controld\[[0-9]+\] exited with status 1",
r"pacemakerd.*Respawning failed child process: pacemaker-execd",
r"pacemakerd.*Respawning failed child process: pacemaker-controld",
]
- self.components["pacemaker-execd-ignore"] = []
+ self.components["pacemaker-execd-ignore"] = [
+ r"pacemaker-attrd.*Connection to lrmd (failed|closed)",
+ ]
self.components["pacemaker-controld"] = [
# "WARN: determine_online_status: Node .* is unclean",
# "Scheduling Node .* for STONITH",
# "Executing .* fencing operation",
# Only if the node wasn't the DC: "State transition S_IDLE",
"State transition .* -> S_IDLE",
]
self.components["pacemaker-controld-ignore"] = []
self.components["pacemaker-attrd"] = []
self.components["pacemaker-attrd-ignore"] = []
self.components["pacemaker-schedulerd"] = [
"State transition .* S_RECOVERY",
r"Respawning failed child process: pacemaker-controld",
r"pacemaker-controld\[[0-9]+\] exited with status 1 \(",
"Connection to pengine failed",
"Connection to pengine.* closed",
r"Connection to the scheduler failed",
"pacemaker-controld.*I_ERROR.*save_cib_contents",
r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover",
"pacemaker-controld.*Could not recover from internal error",
]
self.components["pacemaker-schedulerd-ignore"] = []
self.components["pacemaker-fenced"] = [
r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
r"Fencing daemon connection failed",
r"pacemaker-controld.*Fencer successfully connected",
]
self.components["pacemaker-fenced-ignore"] = [
r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
r"crit:.*Fencing daemon connection failed",
r"error:.*Fencer connection failed \(will retry\)",
r"Connection to (fencer|stonith-ng) failed, finalizing .* pending operations",
r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error",
+ # This is overbroad, but we don't have a way to say that only
+ # certain transition errors are acceptable (if the fencer respawns,
+ # fence devices may appear multiply active). We have to rely on
+ # other causes of a transition error logging their own error
+ # message, which is the usual practice.
+ r"pacemaker-schedulerd.* Calculated transition .*/pe-error",
]
self.components["pacemaker-fenced-ignore"].extend(self.components["common-ignore"])
class crm_corosync_docker(crm_corosync):
'''
Patterns for Corosync version 2 cluster manager class
'''
def __init__(self, name):
crm_corosync.__init__(self, name)
self.commands.update({
"StartCmd" : "pcmk_start",
"StopCmd" : "pcmk_stop",
})
class PatternSelector(object):
def __init__(self, name=None):
self.name = name
self.base = BasePatterns("crm-base")
if not name:
crm_corosync("crm-corosync")
elif name == "crm-corosync":
crm_corosync(name)
elif name == "crm-corosync-docker":
crm_corosync_docker(name)
def get_variant(self, variant):
if variant in patternvariants:
return patternvariants[variant]
print("defaulting to crm-base for %s" % variant)
return self.base
def get_patterns(self, variant, kind):
return self.get_variant(variant).get_patterns(kind)
def get_template(self, variant, key):
v = self.get_variant(variant)
return v[key]
def get_component(self, variant, kind):
return self.get_variant(variant).get_component(kind)
def __getitem__(self, key):
return self.get_template(self.name, key)
# python cts/CTSpatt.py -k crm-corosync -t StartCmd
if __name__ == '__main__':
pdir=os.path.dirname(sys.path[0])
sys.path.insert(0, pdir) # So that things work from the source directory
kind=None
template=None
skipthis=None
args=sys.argv[1:]
for i in range(0, len(args)):
if skipthis:
skipthis=None
continue
elif args[i] == "-k" or args[i] == "--kind":
skipthis=1
kind = args[i+1]
elif args[i] == "-t" or args[i] == "--template":
skipthis=1
template = args[i+1]
else:
print("Illegal argument " + args[i])
print(PatternSelector(kind)[template])
diff --git a/daemons/controld/controld_attrd.c b/daemons/controld/controld_attrd.c
index c874a214a2..8ae6992c22 100644
--- a/daemons/controld/controld_attrd.c
+++ b/daemons/controld/controld_attrd.c
@@ -1,186 +1,184 @@
/*
* Copyright 2006-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
-#include
-#include
-#include
+#include
static crm_ipc_t *attrd_ipc = NULL;
void
controld_close_attrd_ipc()
{
if (attrd_ipc) {
crm_trace("Closing connection to pacemaker-attrd");
crm_ipc_close(attrd_ipc);
crm_ipc_destroy(attrd_ipc);
attrd_ipc = NULL;
}
}
static void
log_attrd_error(const char *host, const char *name, const char *value,
gboolean is_remote, char command, int rc)
{
const char *node_type = (is_remote? "Pacemaker Remote" : "cluster");
gboolean shutting_down = is_set(fsa_input_register, R_SHUTDOWN);
const char *when = (shutting_down? " at shutdown" : "");
switch (command) {
case 0:
crm_err("Could not clear failure attributes for %s on %s node %s%s: %s "
CRM_XS " rc=%d", (name? name : "all resources"), node_type,
host, when, pcmk_strerror(rc), rc);
break;
case 'C':
crm_err("Could not purge %s node %s in attribute manager%s: %s "
CRM_XS " rc=%d",
node_type, host, when, pcmk_strerror(rc), rc);
break;
case 'U':
/* We weren't able to update an attribute after several retries,
* so something is horribly wrong with the attribute manager or the
* underlying system.
*/
do_crm_log(AM_I_DC? LOG_CRIT : LOG_ERR,
"Could not update attribute %s=%s for %s node %s%s: %s "
CRM_XS " rc=%d", name, value, node_type, host, when,
pcmk_strerror(rc), rc);
if (AM_I_DC) {
/* We are unable to provide accurate information to the
* scheduler, so allow another node to take over DC.
* @TODO Should we do this unconditionally on any failure?
*/
crmd_exit(CRM_EX_FATAL);
} else if (shutting_down) {
// Fast-track shutdown since unable to request via attribute
register_fsa_input(C_FSA_INTERNAL, I_FAIL, NULL);
}
break;
}
}
static void
update_attrd_helper(const char *host, const char *name, const char *value,
const char *interval_spec, const char *user_name,
gboolean is_remote_node, char command)
{
int rc;
int attrd_opts = attrd_opt_none;
if (is_remote_node) {
attrd_opts |= attrd_opt_remote;
}
if (attrd_ipc == NULL) {
attrd_ipc = crm_ipc_new(T_ATTRD, 0);
}
for (int attempt = 1; attempt <= 4; ++attempt) {
rc = pcmk_ok;
// If we're not already connected, try to connect
if (crm_ipc_connected(attrd_ipc) == FALSE) {
if (attempt == 1) {
// Start with a clean slate
crm_ipc_close(attrd_ipc);
}
if (crm_ipc_connect(attrd_ipc) == FALSE) {
rc = errno;
}
crm_debug("Attribute manager connection attempt %d of 4: %s (%d)",
attempt, pcmk_strerror(rc), rc);
}
if (rc == pcmk_ok) {
rc = command?
attrd_update_delegate(attrd_ipc, command, host, name, value,
XML_CIB_TAG_STATUS, NULL, NULL,
user_name, attrd_opts)
/* No command means clear fail count (name/value is really
* resource/operation)
*/
: attrd_clear_delegate(attrd_ipc, host, name, value,
interval_spec, user_name, attrd_opts);
crm_debug("Attribute manager request attempt %d of 4: %s (%d)",
attempt, pcmk_strerror(rc), rc);
}
if (rc == pcmk_ok) {
// Success, we're done
break;
} else if ((rc != EAGAIN) && (rc != EALREADY)) {
/* EAGAIN or EALREADY indicates a temporary block, so just try
* again. Otherwise, close the connection for a clean slate.
*/
crm_ipc_close(attrd_ipc);
}
/* @TODO If the attribute manager remains unavailable the entire time,
* this function takes more than 6 seconds. Maybe set a timer for
* retries, to let the main loop do other work.
*/
if (attempt < 4) {
sleep(attempt);
}
}
if (rc != pcmk_ok) {
log_attrd_error(host, name, value, is_remote_node, command, rc);
}
}
void
update_attrd(const char *host, const char *name, const char *value,
const char *user_name, gboolean is_remote_node)
{
update_attrd_helper(host, name, value, NULL, user_name, is_remote_node,
'U');
}
void
update_attrd_remote_node_removed(const char *host, const char *user_name)
{
crm_trace("Asking attribute manager to purge Pacemaker Remote node %s",
host);
update_attrd_helper(host, NULL, NULL, NULL, user_name, TRUE, 'C');
}
void
update_attrd_clear_failures(const char *host, const char *rsc, const char *op,
const char *interval_spec, gboolean is_remote_node)
{
const char *op_desc = NULL;
const char *interval_desc = NULL;
const char *node_type = is_remote_node? "Pacemaker Remote" : "cluster";
if (op) {
interval_desc = interval_spec? interval_spec : "nonrecurring";
op_desc = op;
} else {
interval_desc = "all";
op_desc = "operations";
}
crm_info("Asking pacemaker-attrd to clear failure of %s %s for %s on %s node %s",
interval_desc, op_desc, rsc, node_type, host);
update_attrd_helper(host, rsc, op, interval_spec, NULL, is_remote_node, 0);
}
diff --git a/daemons/controld/controld_based.c b/daemons/controld/controld_based.c
index ed86d46d25..4ea62fd344 100644
--- a/daemons/controld/controld_based.c
+++ b/daemons/controld/controld_based.c
@@ -1,171 +1,170 @@
/*
- * Copyright 2004-2018 Andrew Beekhof
+ * Copyright 2004-2019 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include /* sleep */
#include
#include
#include
#include
#include
-#include // for crmd_cib_connection_destroy()
-#include
-#include
int cib_retries = 0;
static void
do_cib_updated(const char *event, xmlNode * msg)
{
if (crm_patchset_contains_alert(msg, TRUE)) {
mainloop_set_trigger(config_read);
}
}
static void
do_cib_replaced(const char *event, xmlNode * msg)
{
crm_debug("Updating the CIB after a replace: DC=%s", AM_I_DC ? "true" : "false");
if (AM_I_DC == FALSE) {
return;
} else if (fsa_state == S_FINALIZE_JOIN && is_set(fsa_input_register, R_CIB_ASKED)) {
/* no need to restart the join - we asked for this replace op */
return;
}
/* start the join process again so we get everyone's LRM status */
populate_cib_nodes(node_update_quick|node_update_all, __FUNCTION__);
register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
}
/* A_CIB_STOP, A_CIB_START, O_CIB_RESTART */
void
do_cib_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
CRM_ASSERT(fsa_cib_conn != NULL);
if (action & A_CIB_STOP) {
if (fsa_cib_conn->state != cib_disconnected && last_resource_update != 0) {
crm_info("Waiting for resource update %d to complete", last_resource_update);
crmd_fsa_stall(FALSE);
return;
}
crm_info("Disconnecting from the CIB manager");
clear_bit(fsa_input_register, R_CIB_CONNECTED);
fsa_cib_conn->cmds->del_notify_callback(fsa_cib_conn, T_CIB_DIFF_NOTIFY, do_cib_updated);
if (fsa_cib_conn->state != cib_disconnected) {
fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local);
fsa_cib_conn->cmds->signoff(fsa_cib_conn);
}
crm_notice("Disconnected from the CIB manager");
}
if (action & A_CIB_START) {
int rc = pcmk_ok;
if (cur_state == S_STOPPING) {
crm_err("Ignoring request to connect to the CIB manager after shutdown");
return;
}
rc = fsa_cib_conn->cmds->signon(fsa_cib_conn, CRM_SYSTEM_CRMD, cib_command_nonblocking);
if (rc != pcmk_ok) {
/* a short wait that usually avoids stalling the FSA */
sleep(1);
rc = fsa_cib_conn->cmds->signon(fsa_cib_conn, CRM_SYSTEM_CRMD, cib_command_nonblocking);
}
if (rc != pcmk_ok) {
crm_info("Could not connect to the CIB manager: %s", pcmk_strerror(rc));
} else if (pcmk_ok !=
fsa_cib_conn->cmds->set_connection_dnotify(fsa_cib_conn,
crmd_cib_connection_destroy)) {
crm_err("Could not set dnotify callback");
} else if (pcmk_ok !=
fsa_cib_conn->cmds->add_notify_callback(fsa_cib_conn, T_CIB_REPLACE_NOTIFY,
do_cib_replaced)) {
crm_err("Could not set CIB notification callback (replace)");
} else if (pcmk_ok !=
fsa_cib_conn->cmds->add_notify_callback(fsa_cib_conn, T_CIB_DIFF_NOTIFY,
do_cib_updated)) {
crm_err("Could not set CIB notification callback (update)");
} else {
set_bit(fsa_input_register, R_CIB_CONNECTED);
cib_retries = 0;
}
if (is_not_set(fsa_input_register, R_CIB_CONNECTED)) {
cib_retries++;
crm_warn("Couldn't complete CIB registration %d"
" times... pause and retry", cib_retries);
if (cib_retries < 30) {
crm_timer_start(wait_timer);
crmd_fsa_stall(FALSE);
} else {
crm_err("Could not complete CIB"
" registration %d times..." " hard error", cib_retries);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
}
}
/*!
* \internal
* \brief Get CIB call options to use local scope if master unavailable
*
* \return CIB call options
*/
int crmd_cib_smart_opt()
{
int call_opt = cib_quorum_override;
if (fsa_state == S_ELECTION || fsa_state == S_PENDING) {
crm_info("Sending update to local CIB in state: %s", fsa_state2string(fsa_state));
call_opt |= cib_scope_local;
}
return call_opt;
}
/*!
* \internal
* \brief Check whether an action type should be recorded in the CIB
*
* \param[in] action Action type
*
* \return TRUE if action should be recorded, FALSE otherwise
*/
bool
controld_action_is_recordable(const char *action)
{
if (safe_str_eq(action, CRMD_ACTION_CANCEL)
|| safe_str_eq(action, CRMD_ACTION_DELETE)
|| safe_str_eq(action, CRMD_ACTION_NOTIFY)
|| safe_str_eq(action, CRMD_ACTION_METADATA)) {
return FALSE;
}
return TRUE;
}
diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
index 48225ac58a..5cbd392a18 100644
--- a/daemons/controld/controld_callbacks.c
+++ b/daemons/controld/controld_callbacks.c
@@ -1,340 +1,334 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
+ * The version control history for this file may have further details.
+ *
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
-#include
#include
-#include
+#include
#include
#include
-
#include
#include
#include
-#include
-#include
-#include
-#include
-#include
-#include
/* From join_dc... */
extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
void
crmd_ha_msg_filter(xmlNode * msg)
{
if (AM_I_DC) {
const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
if (safe_str_eq(sys_from, CRM_SYSTEM_DC)) {
const char *from = crm_element_value(msg, F_ORIG);
if (safe_str_neq(from, fsa_our_uname)) {
int level = LOG_INFO;
const char *op = crm_element_value(msg, F_CRM_TASK);
/* make sure the election happens NOW */
if (fsa_state != S_ELECTION) {
ha_msg_input_t new_input;
level = LOG_WARNING;
new_input.msg = msg;
register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input,
__FUNCTION__);
}
do_crm_log(level, "Another DC detected: %s (op=%s)", from, op);
goto done;
}
}
} else {
const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
if (safe_str_eq(sys_to, CRM_SYSTEM_DC)) {
return;
}
}
/* crm_log_xml_trace("HA[inbound]", msg); */
route_message(C_HA_MESSAGE, msg);
done:
trigger_fsa(fsa_source);
}
/*!
* \internal
* \brief Check whether a node is online
*
* \param[in] node Node to check
*
* \retval -1 if completely dead
* \retval 0 if partially alive
* \retval 1 if completely alive
*/
static int
node_alive(const crm_node_t *node)
{
if (is_set(node->flags, crm_remote_node)) {
// Pacemaker Remote nodes can't be partially alive
return safe_str_eq(node->state, CRM_NODE_MEMBER)? 1: -1;
} else if (crm_is_peer_active(node)) {
// Completely up cluster node: both cluster member and peer
return 1;
} else if (is_not_set(node->processes, crm_get_cluster_proc())
&& safe_str_neq(node->state, CRM_NODE_MEMBER)) {
// Completely down cluster node: neither cluster member nor peer
return -1;
}
// Partially up cluster node: only cluster member or only peer
return 0;
}
#define state_text(state) ((state)? (const char *)(state) : "in unknown state")
void
peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
uint32_t old = 0;
bool appeared = FALSE;
bool is_remote = is_set(node->flags, crm_remote_node);
/* The controller waits to receive some information from the membership
* layer before declaring itself operational. If this is being called for a
* cluster node, indicate that we have it.
*/
if (!is_remote) {
set_bit(fsa_input_register, R_PEER_DATA);
}
if (node->uname == NULL) {
return;
}
switch (type) {
case crm_status_uname:
/* If we've never seen the node, then it also won't be in the status section */
crm_info("%s node %s is now %s",
(is_remote? "Remote" : "Cluster"),
node->uname, state_text(node->state));
return;
case crm_status_nstate:
/* This callback should not be called unless the state actually
* changed, but here's a failsafe just in case.
*/
CRM_CHECK(safe_str_neq(data, node->state), return);
crm_info("%s node %s is now %s (was %s)",
(is_remote? "Remote" : "Cluster"),
node->uname, state_text(node->state), state_text(data));
if (safe_str_eq(CRM_NODE_MEMBER, node->state)) {
appeared = TRUE;
if (!is_remote) {
remove_stonith_cleanup(node->uname);
}
} else {
controld_remove_voter(node->uname);
}
crmd_alert_node_event(node);
break;
case crm_status_processes:
CRM_CHECK(data != NULL, return);
old = *(const uint32_t *)data;
appeared = is_set(node->processes, crm_get_cluster_proc());
crm_info("Node %s is %s a peer " CRM_XS " DC=%s old=0x%07x new=0x%07x",
node->uname, (appeared? "now" : "no longer"),
(AM_I_DC? "true" : (fsa_our_dc? fsa_our_dc : "")),
old, node->processes);
if (is_not_set((node->processes ^ old), crm_get_cluster_proc())) {
/* Peer status did not change. This should not be possible,
* since we don't track process flags other than peer status.
*/
crm_trace("Process flag 0x%7x did not change from 0x%7x to 0x%7x",
crm_get_cluster_proc(), old, node->processes);
return;
}
if (!appeared) {
controld_remove_voter(node->uname);
}
if (is_not_set(fsa_input_register, R_CIB_CONNECTED)) {
crm_trace("Ignoring peer status change because not connected to CIB");
return;
} else if (fsa_state == S_STOPPING) {
crm_trace("Ignoring peer status change because stopping");
return;
}
if (safe_str_eq(node->uname, fsa_our_uname) && !appeared) {
/* Did we get evicted? */
crm_notice("Our peer connection failed");
register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);
} else if (safe_str_eq(node->uname, fsa_our_dc) && crm_is_peer_active(node) == FALSE) {
/* Did the DC leave us? */
crm_notice("Our peer on the DC (%s) is dead", fsa_our_dc);
register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);
/* @COMPAT DC < 1.1.13: If a DC shuts down normally, we don't
* want to fence it. Newer DCs will send their shutdown request
* to all peers, who will update the DC's expected state to
* down, thus avoiding fencing. We can safely erase the DC's
* transient attributes when it leaves in that case. However,
* the only way to avoid fencing older DCs is to leave the
* transient attributes intact until it rejoins.
*/
if (compare_version(fsa_our_dc_version, "3.0.9") > 0) {
erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
}
} else if(AM_I_DC) {
if (appeared) {
te_trigger_stonith_history_sync(FALSE);
} else {
erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
}
}
break;
}
if (AM_I_DC) {
xmlNode *update = NULL;
int flags = node_update_peer;
int alive = node_alive(node);
crm_action_t *down = match_down_event(node->uuid);
crm_trace("Alive=%d, appeared=%d, down=%d",
alive, appeared, (down? down->id : -1));
if (appeared && (alive > 0) && !is_remote) {
register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
}
if (down) {
const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK);
if (safe_str_eq(task, CRM_OP_FENCE)) {
/* tengine_stonith_callback() confirms fence actions */
crm_trace("Updating CIB %s fencer reported fencing of %s complete",
(down->confirmed? "after" : "before"), node->uname);
} else if (!appeared && safe_str_eq(task, CRM_OP_SHUTDOWN)) {
// Shutdown actions are immediately confirmed (i.e. no_wait)
if (!is_remote) {
flags |= node_update_join | node_update_expected;
crmd_peer_down(node, FALSE);
check_join_state(fsa_state, __FUNCTION__);
}
if (alive >= 0) {
crm_info("%s of peer %s is in progress " CRM_XS " action=%d",
task, node->uname, down->id);
} else {
crm_notice("%s of peer %s is complete " CRM_XS " action=%d",
task, node->uname, down->id);
update_graph(transition_graph, down);
trigger_graph();
}
} else {
crm_trace("Node %s is %s, was expected to %s (op %d)",
node->uname,
((alive > 0)? "alive" :
((alive < 0)? "dead" : "partially alive")),
task, down->id);
}
} else if (appeared == FALSE) {
crm_warn("Stonith/shutdown of node %s was not expected",
node->uname);
if (!is_remote) {
crm_update_peer_join(__FUNCTION__, node, crm_join_none);
check_join_state(fsa_state, __FUNCTION__);
}
abort_transition(INFINITY, tg_restart, "Node failure", NULL);
fail_incompletable_actions(transition_graph, node->uuid);
} else {
crm_trace("Node %s came up, was not expected to be down",
node->uname);
}
if (is_remote) {
/* A pacemaker_remote node won't have its cluster status updated
* in the CIB by membership-layer callbacks, so do it here.
*/
flags |= node_update_cluster;
/* Trigger resource placement on newly integrated nodes */
if (appeared) {
abort_transition(INFINITY, tg_restart,
"pacemaker_remote node integrated", NULL);
}
}
/* Update the CIB node state */
update = create_node_state_update(node, flags, NULL, __FUNCTION__);
if (update == NULL) {
crm_debug("Node state update not yet possible for %s", node->uname);
} else {
fsa_cib_anon_update(XML_CIB_TAG_STATUS, update);
}
free_xml(update);
}
trigger_fsa(fsa_source);
}
void
crmd_cib_connection_destroy(gpointer user_data)
{
CRM_CHECK(user_data == fsa_cib_conn,;);
crm_trace("Invoked");
trigger_fsa(fsa_source);
fsa_cib_conn->state = cib_disconnected;
if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) {
crm_info("Connection to the CIB manager terminated");
return;
}
// @TODO This should trigger a reconnect, not a shutdown
crm_crit("Lost connection to the CIB manager, shutting down");
register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
clear_bit(fsa_input_register, R_CIB_CONNECTED);
return;
}
gboolean
crm_fsa_trigger(gpointer user_data)
{
crm_trace("Invoked (queue len: %d)", g_list_length(fsa_message_queue));
s_crmd_fsa(C_FSA_INTERNAL);
crm_trace("Exited (queue len: %d)", g_list_length(fsa_message_queue));
return TRUE;
}
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
index 687aa8fd99..232bcc48dd 100644
--- a/daemons/controld/controld_control.c
+++ b/daemons/controld/controld_control.c
@@ -1,881 +1,869 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
+#include
+#include
#include
-
#include
-
#include
#include
#include
#include
#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-#include
-#include
qb_ipcs_service_t *ipcs = NULL;
#if SUPPORT_COROSYNC
extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
#endif
void crm_shutdown(int nsig);
gboolean crm_read_options(gpointer user_data);
gboolean fsa_has_quorum = FALSE;
crm_trigger_t *fsa_source = NULL;
crm_trigger_t *config_read = NULL;
bool no_quorum_suicide_escalation = FALSE;
/* A_HA_CONNECT */
void
do_ha_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
gboolean registered = FALSE;
static crm_cluster_t *cluster = NULL;
if (cluster == NULL) {
cluster = calloc(1, sizeof(crm_cluster_t));
}
if (action & A_HA_DISCONNECT) {
crm_cluster_disconnect(cluster);
crm_info("Disconnected from the cluster");
set_bit(fsa_input_register, R_HA_DISCONNECTED);
}
if (action & A_HA_CONNECT) {
crm_set_status_callback(&peer_update_callback);
crm_set_autoreap(FALSE);
if (is_corosync_cluster()) {
#if SUPPORT_COROSYNC
registered = crm_connect_corosync(cluster);
#endif
}
if (registered == TRUE) {
controld_election_init(cluster->uname);
fsa_our_uname = cluster->uname;
fsa_our_uuid = cluster->uuid;
if(cluster->uuid == NULL) {
crm_err("Could not obtain local uuid");
registered = FALSE;
}
}
if (registered == FALSE) {
set_bit(fsa_input_register, R_HA_DISCONNECTED);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
populate_cib_nodes(node_update_none, __FUNCTION__);
clear_bit(fsa_input_register, R_HA_DISCONNECTED);
crm_info("Connected to the cluster");
}
if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__);
}
}
/* A_SHUTDOWN */
void
do_shutdown(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* just in case */
set_bit(fsa_input_register, R_SHUTDOWN);
controld_disconnect_fencer(FALSE);
}
/* A_SHUTDOWN_REQ */
void
do_shutdown_req(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *msg = NULL;
set_bit(fsa_input_register, R_SHUTDOWN);
crm_info("Sending shutdown request to all peers (DC is %s)",
(fsa_our_dc? fsa_our_dc : "not set"));
msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
/* set_bit(fsa_input_register, R_STAYDOWN); */
if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
free_xml(msg);
}
extern char *max_generation_from;
extern xmlNode *max_generation_xml;
extern GHashTable *resource_history;
extern GHashTable *voted;
void
crmd_fast_exit(crm_exit_t exit_code)
{
if (is_set(fsa_input_register, R_STAYDOWN)) {
crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
exit_code, CRM_EX_FATAL);
exit_code = CRM_EX_FATAL;
} else if ((exit_code == CRM_EX_OK)
&& is_set(fsa_input_register, R_IN_RECOVERY)) {
crm_err("Could not recover from internal error");
exit_code = CRM_EX_ERROR;
}
crm_exit(exit_code);
}
crm_exit_t
crmd_exit(crm_exit_t exit_code)
{
GListPtr gIter = NULL;
GMainLoop *mloop = crmd_mainloop;
static bool in_progress = FALSE;
if (in_progress && (exit_code == CRM_EX_OK)) {
crm_debug("Exit is already in progress");
return exit_code;
} else if(in_progress) {
crm_notice("Error during shutdown process, exiting now with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
in_progress = TRUE;
crm_trace("Preparing to exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
/* Suppress secondary errors resulting from us disconnecting everything */
set_bit(fsa_input_register, R_HA_DISCONNECTED);
/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */
if(ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
controld_close_attrd_ipc();
pe_subsystem_free();
controld_disconnect_fencer(TRUE);
if ((exit_code == CRM_EX_OK) && (crmd_mainloop == NULL)) {
crm_debug("No mainloop detected");
exit_code = CRM_EX_ERROR;
}
/* On an error, just get out.
*
* Otherwise, make the effort to have mainloop exit gracefully so
* that it (mostly) cleans up after itself and valgrind has less
* to report on - allowing real errors stand out
*/
if (exit_code != CRM_EX_OK) {
crm_notice("Forcing immediate exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
/* Clean up as much memory as possible for valgrind */
for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) {
fsa_data_t *fsa_data = gIter->data;
crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
fsa_input2string(fsa_data->fsa_input),
fsa_state2string(fsa_state),
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
delete_fsa_input(fsa_data);
}
clear_bit(fsa_input_register, R_MEMBERSHIP);
g_list_free(fsa_message_queue); fsa_message_queue = NULL;
metadata_cache_fini();
controld_election_fini();
/* Tear down the CIB manager connection, but don't free it yet -- it could
* be used when we drain the mainloop later.
*/
cib_free_callbacks(fsa_cib_conn);
fsa_cib_conn->cmds->signoff(fsa_cib_conn);
verify_stopped(fsa_state, LOG_WARNING);
clear_bit(fsa_input_register, R_LRM_CONNECTED);
lrm_state_destroy_all();
/* This basically will not work, since mainloop has a reference to it */
mainloop_destroy_trigger(fsa_source); fsa_source = NULL;
mainloop_destroy_trigger(config_read); config_read = NULL;
mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL;
crm_client_cleanup();
crm_peer_destroy();
crm_timer_stop(transition_timer);
crm_timer_stop(integration_timer);
crm_timer_stop(finalization_timer);
crm_timer_stop(election_trigger);
crm_timer_stop(shutdown_escalation_timer);
crm_timer_stop(wait_timer);
crm_timer_stop(recheck_timer);
te_cleanup_stonith_history_sync(NULL, TRUE);
controld_free_sched_timer();
free(transition_timer); transition_timer = NULL;
free(integration_timer); integration_timer = NULL;
free(finalization_timer); finalization_timer = NULL;
free(election_trigger); election_trigger = NULL;
free(shutdown_escalation_timer); shutdown_escalation_timer = NULL;
free(wait_timer); wait_timer = NULL;
free(recheck_timer); recheck_timer = NULL;
free(fsa_our_dc_version); fsa_our_dc_version = NULL;
free(fsa_our_uname); fsa_our_uname = NULL;
free(fsa_our_uuid); fsa_our_uuid = NULL;
free(fsa_our_dc); fsa_our_dc = NULL;
free(fsa_cluster_name); fsa_cluster_name = NULL;
free(te_uuid); te_uuid = NULL;
free(failed_stop_offset); failed_stop_offset = NULL;
free(failed_start_offset); failed_start_offset = NULL;
free(max_generation_from); max_generation_from = NULL;
free_xml(max_generation_xml); max_generation_xml = NULL;
mainloop_destroy_signal(SIGPIPE);
mainloop_destroy_signal(SIGUSR1);
mainloop_destroy_signal(SIGTERM);
mainloop_destroy_signal(SIGTRAP);
/* leave SIGCHLD engaged as we might still want to drain some service-actions */
if (mloop) {
GMainContext *ctx = g_main_loop_get_context(crmd_mainloop);
/* Don't re-enter this block */
crmd_mainloop = NULL;
/* no signals on final draining anymore */
mainloop_destroy_signal(SIGCHLD);
crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
{
int lpc = 0;
while((g_main_context_pending(ctx) && lpc < 10)) {
lpc++;
crm_trace("Iteration %d", lpc);
g_main_context_dispatch(ctx);
}
}
crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
g_main_loop_quit(mloop);
/* Won't do anything yet, since we're inside it now */
g_main_loop_unref(mloop);
} else {
mainloop_destroy_signal(SIGCHLD);
}
cib_delete(fsa_cib_conn);
fsa_cib_conn = NULL;
throttle_fini();
/* Graceful */
crm_trace("Done preparing for exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
return exit_code;
}
/* A_EXIT_0, A_EXIT_1 */
void
do_exit(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_exit_t exit_code = CRM_EX_OK;
int log_level = LOG_INFO;
const char *exit_type = "gracefully";
if (action & A_EXIT_1) {
log_level = LOG_ERR;
exit_type = "forcefully";
exit_code = CRM_EX_ERROR;
}
verify_stopped(cur_state, LOG_ERR);
do_crm_log(log_level, "Performing %s - %s exiting the controller",
fsa_action2string(action), exit_type);
crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
crmd_exit(exit_code);
}
static void sigpipe_ignore(int nsig) { return; }
/* A_STARTUP */
void
do_startup(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int was_error = 0;
crm_debug("Registering Signal Handlers");
mainloop_add_signal(SIGTERM, crm_shutdown);
mainloop_add_signal(SIGPIPE, sigpipe_ignore);
fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL);
config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL);
transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL);
crm_debug("Creating CIB manager and executor objects");
fsa_cib_conn = cib_new();
lrm_state_init_local();
/* set up the timers */
transition_timer = calloc(1, sizeof(fsa_timer_t));
integration_timer = calloc(1, sizeof(fsa_timer_t));
finalization_timer = calloc(1, sizeof(fsa_timer_t));
election_trigger = calloc(1, sizeof(fsa_timer_t));
shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t));
wait_timer = calloc(1, sizeof(fsa_timer_t));
recheck_timer = calloc(1, sizeof(fsa_timer_t));
if (election_trigger != NULL) {
election_trigger->source_id = 0;
election_trigger->period_ms = -1;
election_trigger->fsa_input = I_DC_TIMEOUT;
election_trigger->callback = crm_timer_popped;
- election_trigger->repeat = FALSE;
+ election_trigger->log_error = FALSE;
} else {
was_error = TRUE;
}
if (transition_timer != NULL) {
transition_timer->source_id = 0;
transition_timer->period_ms = -1;
transition_timer->fsa_input = I_PE_CALC;
transition_timer->callback = crm_timer_popped;
- transition_timer->repeat = FALSE;
+ transition_timer->log_error = FALSE;
} else {
was_error = TRUE;
}
if (integration_timer != NULL) {
integration_timer->source_id = 0;
integration_timer->period_ms = -1;
integration_timer->fsa_input = I_INTEGRATED;
integration_timer->callback = crm_timer_popped;
- integration_timer->repeat = FALSE;
+ integration_timer->log_error = TRUE;
} else {
was_error = TRUE;
}
if (finalization_timer != NULL) {
finalization_timer->source_id = 0;
finalization_timer->period_ms = -1;
finalization_timer->fsa_input = I_FINALIZED;
finalization_timer->callback = crm_timer_popped;
- finalization_timer->repeat = FALSE;
+ finalization_timer->log_error = FALSE;
/* for possible enabling... a bug in the join protocol left
* a slave in S_PENDING while we think it's in S_NOT_DC
*
* raising I_FINALIZED put us into a transition loop which is
* never resolved.
* in this loop we continually send probes which the node
* NACK's because it's in S_PENDING
*
* if we have nodes where the cluster layer is active but the
* CRM is not... then this will be handled in the
* integration phase
*/
finalization_timer->fsa_input = I_ELECTION;
} else {
was_error = TRUE;
}
if (shutdown_escalation_timer != NULL) {
shutdown_escalation_timer->source_id = 0;
shutdown_escalation_timer->period_ms = -1;
shutdown_escalation_timer->fsa_input = I_STOP;
shutdown_escalation_timer->callback = crm_timer_popped;
- shutdown_escalation_timer->repeat = FALSE;
+ shutdown_escalation_timer->log_error = TRUE;
} else {
was_error = TRUE;
}
if (wait_timer != NULL) {
wait_timer->source_id = 0;
wait_timer->period_ms = 2000;
wait_timer->fsa_input = I_NULL;
wait_timer->callback = crm_timer_popped;
- wait_timer->repeat = FALSE;
+ wait_timer->log_error = FALSE;
} else {
was_error = TRUE;
}
if (recheck_timer != NULL) {
recheck_timer->source_id = 0;
recheck_timer->period_ms = -1;
recheck_timer->fsa_input = I_PE_CALC;
recheck_timer->callback = crm_timer_popped;
- recheck_timer->repeat = FALSE;
+ recheck_timer->log_error = FALSE;
} else {
was_error = TRUE;
}
if (was_error) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
static int32_t
crmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
crm_trace("Connection %p", c);
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
crmd_ipc_created(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
}
static int32_t
crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
crm_client_t *client = crm_client_get(c);
xmlNode *msg = crm_ipcs_recv(client, data, size, &id, &flags);
crm_trace("Invoked: %s", crm_client_name(client));
crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__);
if (msg == NULL) {
return 0;
}
#if ENABLE_ACL
CRM_ASSERT(client->user != NULL);
crm_acl_get_set_user(msg, F_CRM_USER, client->user);
#endif
crm_trace("Processing msg from %s", crm_client_name(client));
crm_log_xml_trace(msg, "controller[inbound]");
crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
if (crmd_authorize_message(msg, client, NULL)) {
route_message(C_IPC_MESSAGE, msg);
}
trigger_fsa(fsa_source);
free_xml(msg);
return 0;
}
static int32_t
crmd_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
if (client) {
crm_trace("Disconnecting %sregistered client %s (%p/%p)",
(client->userdata? "" : "un"), crm_client_name(client),
c, client);
free(client->userdata);
crm_client_destroy(client);
trigger_fsa(fsa_source);
}
return 0;
}
static void
crmd_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
crmd_ipc_closed(c);
}
/* A_STOP */
void
do_stop(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs); ipcs = NULL;
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
/* A_STARTED */
void
do_started(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
static struct qb_ipcs_service_handlers crmd_callbacks = {
.connection_accept = crmd_ipc_accept,
.connection_created = crmd_ipc_created,
.msg_process = crmd_ipc_dispatch,
.connection_closed = crmd_ipc_closed,
.connection_destroyed = crmd_ipc_destroy
};
if (cur_state != S_STARTING) {
crm_err("Start cancelled... %s", fsa_state2string(cur_state));
return;
} else if (is_set(fsa_input_register, R_MEMBERSHIP) == FALSE) {
crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) {
crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) {
crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_READ_CONFIG) == FALSE) {
crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
crmd_fsa_stall(TRUE);
return;
} else if (is_set(fsa_input_register, R_PEER_DATA) == FALSE) {
crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
crmd_fsa_stall(TRUE);
return;
}
crm_debug("Init server comms");
ipcs = crmd_ipc_server_init(&crmd_callbacks);
if (ipcs == NULL) {
crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
} else {
crm_notice("Pacemaker controller successfully started and accepting connections");
}
controld_trigger_fencer_connect();
clear_bit(fsa_input_register, R_STARTING);
register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
}
/* A_RECOVER */
void
do_recover(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
set_bit(fsa_input_register, R_IN_RECOVERY);
crm_warn("Fast-tracking shutdown in response to errors");
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
/* *INDENT-OFF* */
static pe_cluster_option crmd_opts[] = {
/* name, old-name, validate, values, default, short description, long description */
{ "dc-version", NULL, "string", NULL, "none", NULL,
"Version of Pacemaker on the cluster's DC.",
"Includes the hash which identifies the exact changeset it was built from. Used for diagnostic purposes."
},
{ "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL,
"The messaging stack on which Pacemaker is currently running.",
"Used for informational and diagnostic purposes." },
{ XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time", NULL, "20s", &check_time,
"How long to wait for a response from other nodes during startup.",
"The \"correct\" value will depend on the speed/load of your network and the type of switches used."
},
{ XML_CONFIG_ATTR_RECHECK, NULL, "time",
"Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)",
"15min", &check_timer,
"Polling interval for time based changes to options, resource parameters and constraints.",
"The Cluster is primarily event driven, however the configuration can have elements that change based on time."
" To ensure these changes take effect, we can optionally poll the cluster's status for changes."
},
{ "load-threshold", NULL, "percentage", NULL, "80%", &check_utilization,
"The maximum amount of system resources that should be used by nodes in the cluster",
"The cluster will slow down its recovery process when the amount of system resources used"
" (currently CPU) approaches this limit",
},
{ "node-action-limit", NULL, "integer", NULL, "0", &check_number,
"The maximum number of jobs that can be scheduled per node. Defaults to 2x cores"},
{ XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL, "2min", &check_timer,
"*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug."
},
{ XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL, "20min", &check_timer,
"*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug."
},
{
"join-integration-timeout", "crmd-integration-timeout",
"time", NULL, "3min", &check_timer,
"*** Advanced Use Only ***",
"If need to adjust this value, it probably indicates the presence of a bug"
},
{
"join-finalization-timeout", "crmd-finalization-timeout",
"time", NULL, "30min", &check_timer,
"*** Advanced Use Only ***",
"If you need to adjust this value, it probably indicates the presence of a bug"
},
{
"transition-delay", "crmd-transition-delay",
"time", NULL, "0s", &check_timer,
"*** Advanced Use Only *** Enabling this option will slow down cluster recovery under all conditions",
"Delay cluster recovery for the configured interval to allow for additional/related events to occur.\n"
"Useful if your configuration is sensitive to the order in which ping updates arrive."
},
{ "stonith-watchdog-timeout", NULL, "time", NULL, NULL, &check_sbd_timeout,
"How long to wait before we can assume nodes are safely down", NULL
},
{ "stonith-max-attempts",NULL,"integer",NULL,"10",&check_positive_number,
"How many times stonith can fail before it will no longer be attempted on a target"
},
{ "no-quorum-policy", NULL, "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, NULL, NULL },
};
/* *INDENT-ON* */
void
crmd_metadata(void)
{
config_metadata("pacemaker-controld", "1.0",
"controller properties",
"Cluster properties used by Pacemaker's controller,"
" formerly known as crmd",
crmd_opts, DIMOF(crmd_opts));
}
static void
verify_crmd_options(GHashTable * options)
{
verify_all_options(options, crmd_opts, DIMOF(crmd_opts));
}
static const char *
crmd_pref(GHashTable * options, const char *name)
{
return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name);
}
static void
config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
const char *value = NULL;
GHashTable *config_hash = NULL;
crm_time_t *now = crm_time_new(NULL);
xmlNode *crmconfig = NULL;
xmlNode *alerts = NULL;
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
crm_err("The cluster is mis-configured - shutting down and staying down");
set_bit(fsa_input_register, R_STAYDOWN);
}
goto bail;
}
crmconfig = output;
if ((crmconfig) &&
(crm_element_name(crmconfig)) &&
(strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) {
crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
}
if (!crmconfig) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
goto bail;
}
crm_debug("Call %d : Parsing CIB options", call_id);
config_hash = crm_str_table_new();
unpack_instance_attributes(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL, config_hash,
CIB_OPTIONS_FIRST, FALSE, now);
verify_crmd_options(config_hash);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME);
election_trigger->period_ms = crm_get_msec(value);
value = crmd_pref(config_hash, "node-action-limit"); /* Also checks migration-limit */
throttle_update_job_max(value);
value = crmd_pref(config_hash, "load-threshold");
if(value) {
throttle_set_load_target(strtof(value, NULL) / 100.0);
}
value = crmd_pref(config_hash, "no-quorum-policy");
if (safe_str_eq(value, "suicide") && pcmk_locate_sbd()) {
no_quorum_suicide_escalation = TRUE;
}
value = crmd_pref(config_hash,"stonith-max-attempts");
update_stonith_max_attempts(value);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT);
shutdown_escalation_timer->period_ms = crm_get_msec(value);
/* How long to declare an election over - even if not everyone voted */
crm_debug("Shutdown escalation occurs after: %dms", shutdown_escalation_timer->period_ms);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL);
controld_set_election_period(value);
value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK);
recheck_timer->period_ms = crm_get_msec(value);
crm_debug("Checking for expired actions every %dms", recheck_timer->period_ms);
value = crmd_pref(config_hash, "transition-delay");
transition_timer->period_ms = crm_get_msec(value);
value = crmd_pref(config_hash, "join-integration-timeout");
integration_timer->period_ms = crm_get_msec(value);
value = crmd_pref(config_hash, "join-finalization-timeout");
finalization_timer->period_ms = crm_get_msec(value);
free(fsa_cluster_name);
fsa_cluster_name = NULL;
value = g_hash_table_lookup(config_hash, "cluster-name");
if (value) {
fsa_cluster_name = strdup(value);
}
alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
crmd_unpack_alerts(alerts);
set_bit(fsa_input_register, R_READ_CONFIG);
crm_trace("Triggering FSA: %s", __FUNCTION__);
mainloop_set_trigger(fsa_source);
g_hash_table_destroy(config_hash);
bail:
crm_time_free(now);
}
gboolean
crm_read_options(gpointer user_data)
{
int call_id =
fsa_cib_conn->cmds->query(fsa_cib_conn,
"//" XML_CIB_TAG_CRMCONFIG " | //" XML_CIB_TAG_ALERTS,
NULL, cib_xpath | cib_scope_local);
fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback);
crm_trace("Querying the CIB... call %d", call_id);
return TRUE;
}
/* A_READCONFIG */
void
do_read_config(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
throttle_init();
mainloop_set_trigger(config_read);
}
void
crm_shutdown(int nsig)
{
if (crmd_mainloop != NULL && g_main_loop_is_running(crmd_mainloop)) {
if (is_set(fsa_input_register, R_SHUTDOWN)) {
crm_err("Escalating the shutdown");
register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
} else {
set_bit(fsa_input_register, R_SHUTDOWN);
register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
if (shutdown_escalation_timer->period_ms < 1) {
const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT);
int msec = crm_get_msec(value);
crm_debug("Using default shutdown escalation: %dms", msec);
shutdown_escalation_timer->period_ms = msec;
}
/* can't rely on this... */
crm_notice("Shutting down cluster resource manager " CRM_XS
" limit=%dms", shutdown_escalation_timer->period_ms);
crm_timer_start(shutdown_escalation_timer);
}
} else {
crm_info("exit from shutdown");
crmd_exit(CRM_EX_OK);
}
}
diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c
index 8e739cd11b..f483b234c6 100644
--- a/daemons/controld/controld_corosync.c
+++ b/daemons/controld/controld_corosync.c
@@ -1,113 +1,109 @@
/*
- * Copyright 2004-2018 Andrew Beekhof
+ * Copyright 2004-2019 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
+#include
+#include
#include
#include
#include
#include
-#include
-#include
-#include
-#include
-#include
-
-#include
-#include
#if SUPPORT_COROSYNC
extern void post_cache_update(int seq);
/* A_HA_CONNECT */
static void
crmd_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
uint32_t kind = 0;
const char *from = NULL;
char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
if(data == NULL) {
return;
}
if (kind == crm_class_cluster) {
crm_node_t *peer = NULL;
xmlNode *xml = string2xml(data);
if (xml == NULL) {
crm_err("Could not parse message content (%d): %.100s", kind, data);
free(data);
return;
}
crm_xml_add(xml, F_ORIG, from);
/* crm_xml_add_int(xml, F_SEQ, wrapper->id); Fake? */
peer = crm_get_peer(0, from);
if (is_not_set(peer->processes, crm_proc_cpg)) {
/* If we can still talk to our peer process on that node,
* then it must be part of the corosync membership
*/
crm_warn("Receiving messages from a node we think is dead: %s[%d]",
peer->uname, peer->id);
crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg,
ONLINESTATUS);
}
crmd_ha_msg_filter(xml);
free_xml(xml);
} else {
crm_err("Invalid message class (%d): %.100s", kind, data);
}
free(data);
}
static gboolean
crmd_quorum_callback(unsigned long long seq, gboolean quorate)
{
crm_update_quorum(quorate, FALSE);
post_cache_update(seq);
return TRUE;
}
static void
crmd_cs_destroy(gpointer user_data)
{
if (is_not_set(fsa_input_register, R_HA_DISCONNECTED)) {
crm_crit("Lost connection to cluster layer, shutting down");
crmd_exit(CRM_EX_DISCONNECT);
} else {
crm_info("Corosync connection closed");
}
}
extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
gboolean
crm_connect_corosync(crm_cluster_t * cluster)
{
if (is_corosync_cluster()) {
crm_set_status_callback(&peer_update_callback);
cluster->cpg.cpg_deliver_fn = crmd_cs_dispatch;
cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership;
cluster->destroy = crmd_cs_destroy;
if (crm_cluster_connect(cluster)) {
cluster_connect_quorum(crmd_quorum_callback, crmd_cs_destroy);
return TRUE;
}
}
return FALSE;
}
#endif
diff --git a/daemons/controld/controld_election.c b/daemons/controld/controld_election.c
index f0c6418581..9d02843c91 100644
--- a/daemons/controld/controld_election.c
+++ b/daemons/controld/controld_election.c
@@ -1,283 +1,274 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
+ * The version control history for this file may have further details.
+ *
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
-
#include
#include
#include
+
#include
-#include
-#include
-#include
-#include
-#include
static election_t *fsa_election = NULL;
static gboolean
election_win_cb(gpointer data)
{
register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL);
return FALSE;
}
void
controld_election_init(const char *uname)
{
fsa_election = election_init("DC", uname, 60000 /*60s*/, election_win_cb);
}
void
controld_remove_voter(const char *uname)
{
election_remove(fsa_election, uname);
if (safe_str_eq(uname, fsa_our_dc)) {
/* Clear any election dampening in effect. Otherwise, if the lost DC had
* just won, an immediate new election could fizzle out with no new DC.
*/
election_clear_dampening(fsa_election);
}
}
-void
-controld_stop_election_timeout()
-{
- election_timeout_stop(fsa_election);
-}
-
void
controld_election_fini()
{
election_fini(fsa_election);
fsa_election = NULL;
}
void
controld_set_election_period(const char *value)
{
election_timeout_set_period(fsa_election, crm_get_msec(value));
}
void
controld_stop_election_timer()
{
election_timeout_stop(fsa_election);
}
/* A_ELECTION_VOTE */
void
do_election_vote(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
gboolean not_voting = FALSE;
/* don't vote if we're in one of these states or wanting to shut down */
switch (cur_state) {
case S_STARTING:
case S_RECOVERY:
case S_STOPPING:
case S_TERMINATE:
crm_warn("Not voting in election, we're in state %s", fsa_state2string(cur_state));
not_voting = TRUE;
break;
case S_ELECTION:
case S_INTEGRATION:
case S_RELEASE_DC:
break;
default:
crm_err("Broken? Voting in state %s", fsa_state2string(cur_state));
break;
}
if (not_voting == FALSE) {
if (is_set(fsa_input_register, R_STARTING)) {
not_voting = TRUE;
}
}
if (not_voting) {
if (AM_I_DC) {
register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);
} else {
register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
}
return;
}
election_vote(fsa_election);
return;
}
void
do_election_check(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
if (fsa_state == S_ELECTION) {
election_check(fsa_election);
} else {
crm_debug("Ignoring election check because we are not in an election");
}
}
/* A_ELECTION_COUNT */
void
do_election_count_vote(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
enum election_result rc = 0;
ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg);
if(crm_peer_cache == NULL) {
if(is_not_set(fsa_input_register, R_SHUTDOWN)) {
crm_err("Internal error, no peer cache");
}
return;
}
rc = election_count_vote(fsa_election, vote->msg, cur_state != S_STARTING);
switch(rc) {
case election_start:
election_reset(fsa_election);
register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
break;
case election_lost:
update_dc(NULL);
if (fsa_input_register & R_THE_DC) {
register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);
fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local);
} else if (cur_state != S_STARTING) {
register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
}
break;
default:
crm_trace("Election message resulted in state %d", rc);
}
}
static void
feature_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_notice("Feature update failed: %s "CRM_XS" rc=%d",
pcmk_strerror(rc), rc);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
/* A_DC_TAKEOVER */
void
do_dc_takeover(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int rc = pcmk_ok;
xmlNode *cib = NULL;
const char *cluster_type = name_for_cluster_type(get_cluster_type());
pid_t watchdog = pcmk_locate_sbd();
crm_info("Taking over DC status for this partition");
set_bit(fsa_input_register, R_THE_DC);
execute_stonith_cleanup();
election_reset(fsa_election);
set_bit(fsa_input_register, R_JOIN_OK);
set_bit(fsa_input_register, R_INVOKE_PE);
fsa_cib_conn->cmds->set_master(fsa_cib_conn, cib_scope_local);
cib = create_xml_node(NULL, XML_TAG_CIB);
crm_xml_add(cib, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
fsa_cib_update(XML_TAG_CIB, cib, cib_quorum_override, rc, NULL);
fsa_register_cib_callback(rc, FALSE, NULL, feature_update_callback);
update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
XML_ATTR_HAVE_WATCHDOG, watchdog?"true":"false", FALSE, NULL, NULL);
update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
"dc-version", PACEMAKER_VERSION "-" BUILD_VERSION, FALSE, NULL, NULL);
update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
"cluster-infrastructure", cluster_type, FALSE, NULL, NULL);
#if SUPPORT_COROSYNC
if (fsa_cluster_name == NULL && is_corosync_cluster()) {
char *cluster_name = corosync_cluster_name();
if (cluster_name) {
update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
"cluster-name", cluster_name, FALSE, NULL, NULL);
}
free(cluster_name);
}
#endif
mainloop_set_trigger(config_read);
free_xml(cib);
}
/* A_DC_RELEASE */
void
do_dc_release(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
if (action & A_DC_RELEASE) {
crm_debug("Releasing the role of DC");
clear_bit(fsa_input_register, R_THE_DC);
controld_expect_sched_reply(NULL);
} else if (action & A_DC_RELEASED) {
crm_info("DC role released");
#if 0
if (are there errors) {
/* we can't stay up if not healthy */
/* or perhaps I_ERROR and go to S_RECOVER? */
result = I_SHUTDOWN;
}
#endif
if (is_set(fsa_input_register, R_SHUTDOWN)) {
xmlNode *update = NULL;
crm_node_t *node = crm_get_peer(0, fsa_our_uname);
crm_update_peer_expected(__FUNCTION__, node, CRMD_JOINSTATE_DOWN);
update = create_node_state_update(node, node_update_expected, NULL,
__FUNCTION__);
fsa_cib_anon_update(XML_CIB_TAG_STATUS, update);
free_xml(update);
}
register_fsa_input(C_FSA_INTERNAL, I_RELEASE_SUCCESS, NULL);
} else {
crm_err("Unknown DC action %s", fsa_action2string(action));
}
crm_trace("Am I still the DC? %s", AM_I_DC ? XML_BOOLEAN_YES : XML_BOOLEAN_NO);
}
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
index a20f96aacc..bd339d284a 100644
--- a/daemons/controld/controld_execd.c
+++ b/daemons/controld/controld_execd.c
@@ -1,2802 +1,2800 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
+#include
#include
#include
#include
#include
+#include // lrmd_event_data_t, lrmd_rsc_info_t, etc.
#include
-
#include
#include
+#include
+#include
#include
-#include
-#include
-#include
-#include
-#include
-#include
#define START_DELAY_THRESHOLD 5 * 60 * 1000
#define MAX_LRM_REG_FAILS 30
#define s_if_plural(i) (((i) == 1)? "" : "s")
struct delete_event_s {
int rc;
const char *rsc;
lrm_state_t *lrm_state;
};
static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id);
static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list);
static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);
static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options,
const char *user_name);
static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op,
const char *rsc_id, const char *operation);
static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation,
xmlNode * msg, xmlNode * request);
void send_direct_ack(const char *to_host, const char *to_sys,
lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id);
static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
int log_level);
static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op);
static void
lrm_connection_destroy(void)
{
if (is_set(fsa_input_register, R_LRM_CONNECTED)) {
crm_crit("Connection to executor failed");
register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
clear_bit(fsa_input_register, R_LRM_CONNECTED);
} else {
crm_info("Disconnected from executor");
}
}
static char *
make_stop_id(const char *rsc, int call_id)
{
return crm_strdup_printf("%s:%d", rsc, call_id);
}
static void
copy_instance_keys(gpointer key, gpointer value, gpointer user_data)
{
if (strstr(key, CRM_META "_") == NULL) {
g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
}
}
static void
copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
{
if (strstr(key, CRM_META "_") != NULL) {
g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
}
}
/*!
* \internal
* \brief Remove a recurring operation from a resource's history
*
* \param[in,out] history Resource history to modify
* \param[in] op Operation to remove
*
* \return TRUE if the operation was found and removed, FALSE otherwise
*/
static gboolean
history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op)
{
GList *iter;
for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
lrmd_event_data_t *existing = iter->data;
if ((op->interval_ms == existing->interval_ms)
&& crm_str_eq(op->rsc_id, existing->rsc_id, TRUE)
&& safe_str_eq(op->op_type, existing->op_type)) {
history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter);
lrmd_free_event(existing);
return TRUE;
}
}
return FALSE;
}
/*!
* \internal
* \brief Free all recurring operations in resource history
*
* \param[in,out] history Resource history to modify
*/
static void
history_free_recurring_ops(rsc_history_t *history)
{
GList *iter;
for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
lrmd_free_event(iter->data);
}
g_list_free(history->recurring_op_list);
history->recurring_op_list = NULL;
}
/*!
* \internal
* \brief Free resource history
*
* \param[in,out] history Resource history to free
*/
void
history_free(gpointer data)
{
rsc_history_t *history = (rsc_history_t*)data;
if (history->stop_params) {
g_hash_table_destroy(history->stop_params);
}
/* Don't need to free history->rsc.id because it's set to history->id */
free(history->rsc.type);
free(history->rsc.standard);
free(history->rsc.provider);
lrmd_free_event(history->failed);
lrmd_free_event(history->last);
free(history->id);
history_free_recurring_ops(history);
free(history);
}
static void
update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
{
int target_rc = 0;
rsc_history_t *entry = NULL;
if (op->rsc_deleted) {
crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL);
return;
}
if (safe_str_eq(op->op_type, RSC_NOTIFY)) {
return;
}
crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type);
entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
if (entry == NULL && rsc) {
entry = calloc(1, sizeof(rsc_history_t));
entry->id = strdup(op->rsc_id);
g_hash_table_insert(lrm_state->resource_history, entry->id, entry);
entry->rsc.id = entry->id;
entry->rsc.type = strdup(rsc->type);
entry->rsc.standard = strdup(rsc->standard);
if (rsc->provider) {
entry->rsc.provider = strdup(rsc->provider);
} else {
entry->rsc.provider = NULL;
}
} else if (entry == NULL) {
crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
return;
}
entry->last_callid = op->call_id;
target_rc = rsc_op_expected_rc(op);
if (op->op_status == PCMK_LRM_OP_CANCELLED) {
if (op->interval_ms > 0) {
crm_trace("Removing cancelled recurring op: " CRM_OP_FMT,
op->rsc_id, op->op_type, op->interval_ms);
history_remove_recurring_op(entry, op);
return;
} else {
crm_trace("Skipping " CRM_OP_FMT " rc=%d, status=%d",
op->rsc_id, op->op_type, op->interval_ms, op->rc,
op->op_status);
}
} else if (did_rsc_op_fail(op, target_rc)) {
/* Store failed monitors here, otherwise the block below will cause them
* to be forgotten when a stop happens.
*/
if (entry->failed) {
lrmd_free_event(entry->failed);
}
entry->failed = lrmd_copy_event(op);
} else if (op->interval_ms == 0) {
if (entry->last) {
lrmd_free_event(entry->last);
}
entry->last = lrmd_copy_event(op);
if (op->params &&
(safe_str_eq(CRMD_ACTION_START, op->op_type) ||
safe_str_eq("reload", op->op_type) ||
safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) {
if (entry->stop_params) {
g_hash_table_destroy(entry->stop_params);
}
entry->stop_params = crm_str_table_new();
g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params);
}
}
if (op->interval_ms > 0) {
/* Ensure there are no duplicates */
history_remove_recurring_op(entry, op);
crm_trace("Adding recurring op: " CRM_OP_FMT,
op->rsc_id, op->op_type, op->interval_ms);
entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op));
} else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) {
crm_trace("Dropping %d recurring ops because of: " CRM_OP_FMT,
g_list_length(entry->recurring_op_list), op->rsc_id,
op->op_type, op->interval_ms);
history_free_recurring_ops(entry);
}
}
/*!
* \internal
* \brief Send a direct OK ack for a resource task
*
* \param[in] lrm_state LRM connection
* \param[in] input Input message being ack'ed
* \param[in] rsc_id ID of affected resource
* \param[in] rsc Affected resource (if available)
* \param[in] task Operation task being ack'ed
* \param[in] ack_host Name of host to send ack to
* \param[in] ack_sys IPC system name to ack
*/
static void
send_task_ok_ack(lrm_state_t *lrm_state, ha_msg_input_t *input,
const char *rsc_id, lrmd_rsc_info_t *rsc, const char *task,
const char *ack_host, const char *ack_sys)
{
lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task);
op->rc = PCMK_OCF_OK;
op->op_status = PCMK_LRM_OP_DONE;
send_direct_ack(ack_host, ack_sys, rsc, op, rsc_id);
lrmd_free_event(op);
}
static inline const char *
op_node_name(lrmd_event_data_t *op)
{
return op->remote_nodename? op->remote_nodename : fsa_our_uname;
}
void
lrm_op_callback(lrmd_event_data_t * op)
{
CRM_CHECK(op != NULL, return);
switch (op->type) {
case lrmd_event_disconnect:
if (op->remote_nodename == NULL) {
/* If this is the local executor IPC connection, set the right
* bits in the controller when the connection goes down.
*/
lrm_connection_destroy();
}
break;
case lrmd_event_exec_complete:
{
lrm_state_t *lrm_state = lrm_state_find(op_node_name(op));
CRM_ASSERT(lrm_state != NULL);
process_lrm_event(lrm_state, op, NULL, NULL);
}
break;
default:
break;
}
}
/* A_LRM_CONNECT */
void
do_lrm_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* This only pertains to local executor connections. Remote connections are
* handled as resources within the scheduler. Connecting and disconnecting
* from remote executor instances is handled differently.
*/
lrm_state_t *lrm_state = NULL;
if(fsa_our_uname == NULL) {
return; /* Nothing to do */
}
lrm_state = lrm_state_find_or_create(fsa_our_uname);
if (lrm_state == NULL) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
if (action & A_LRM_DISCONNECT) {
if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) {
if (action == A_LRM_DISCONNECT) {
crmd_fsa_stall(FALSE);
return;
}
}
clear_bit(fsa_input_register, R_LRM_CONNECTED);
crm_info("Disconnecting from the executor");
lrm_state_disconnect(lrm_state);
lrm_state_reset_tables(lrm_state, FALSE);
crm_notice("Disconnected from the executor");
}
if (action & A_LRM_CONNECT) {
int ret = pcmk_ok;
crm_debug("Connecting to the executor");
ret = lrm_state_ipc_connect(lrm_state);
if (ret != pcmk_ok) {
if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) {
crm_warn("Failed to connect to the executor %d time%s (%d max)",
lrm_state->num_lrm_register_fails,
s_if_plural(lrm_state->num_lrm_register_fails),
MAX_LRM_REG_FAILS);
crm_timer_start(wait_timer);
crmd_fsa_stall(FALSE);
return;
}
}
if (ret != pcmk_ok) {
crm_err("Failed to connect to the executor the max allowed %d time%s",
lrm_state->num_lrm_register_fails,
s_if_plural(lrm_state->num_lrm_register_fails));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
set_bit(fsa_input_register, R_LRM_CONNECTED);
crm_info("Connection to the executor established");
}
if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__);
}
}
static gboolean
lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level)
{
int counter = 0;
gboolean rc = TRUE;
const char *when = "lrm disconnect";
GHashTableIter gIter;
const char *key = NULL;
rsc_history_t *entry = NULL;
struct recurring_op_s *pending = NULL;
crm_debug("Checking for active resources before exit");
if (cur_state == S_TERMINATE) {
log_level = LOG_ERR;
when = "shutdown";
} else if (is_set(fsa_input_register, R_SHUTDOWN)) {
when = "shutdown... waiting";
}
if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) {
guint removed = g_hash_table_foreach_remove(
lrm_state->pending_ops, stop_recurring_actions, lrm_state);
guint nremaining = g_hash_table_size(lrm_state->pending_ops);
if (removed || nremaining) {
crm_notice("Stopped %u recurring operation%s at %s (%u remaining)",
removed, s_if_plural(removed), when, nremaining);
}
}
if (lrm_state->pending_ops) {
g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) {
/* Ignore recurring actions in the shutdown calculations */
if (pending->interval_ms == 0) {
counter++;
}
}
}
if (counter > 0) {
do_crm_log(log_level, "%d pending executor operation%s at %s",
counter, s_if_plural(counter), when);
if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) {
g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) {
do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key);
}
} else {
rc = FALSE;
}
return rc;
}
if (lrm_state->resource_history == NULL) {
return rc;
}
if (is_set(fsa_input_register, R_SHUTDOWN)) {
/* At this point we're not waiting, we're just shutting down */
when = "shutdown";
}
counter = 0;
g_hash_table_iter_init(&gIter, lrm_state->resource_history);
while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) {
if (is_rsc_active(lrm_state, entry->id) == FALSE) {
continue;
}
counter++;
if (log_level == LOG_ERR) {
crm_info("Found %s active at %s", entry->id, when);
} else {
crm_trace("Found %s active at %s", entry->id, when);
}
if (lrm_state->pending_ops) {
GHashTableIter hIter;
g_hash_table_iter_init(&hIter, lrm_state->pending_ops);
while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) {
if (crm_str_eq(entry->id, pending->rsc_id, TRUE)) {
crm_notice("%sction %s (%s) incomplete at %s",
pending->interval_ms == 0 ? "A" : "Recurring a",
key, pending->op_key, when);
}
}
}
}
if (counter) {
crm_err("%d resource%s active at %s",
counter, (counter == 1)? " was" : "s were", when);
}
return rc;
}
static char *
build_parameter_list(const lrmd_event_data_t *op,
const struct ra_metadata_s *metadata,
xmlNode *result, enum ra_param_flags_e param_type,
bool invert_for_xml)
{
int len = 0;
int max = 0;
char *list = NULL;
GList *iter = NULL;
/* Newer resource agents support the "private" parameter attribute to
* indicate sensitive parameters. For backward compatibility with older
* agents, this list is used if the agent doesn't specify any as "private".
*/
const char *secure_terms[] = {
"password",
"passwd",
"user",
};
if (is_not_set(metadata->ra_flags, ra_uses_private)
&& (param_type == ra_param_private)) {
max = DIMOF(secure_terms);
}
for (iter = metadata->ra_params; iter != NULL; iter = iter->next) {
struct ra_param_s *param = (struct ra_param_s *) iter->data;
bool accept = FALSE;
if (is_set(param->rap_flags, param_type)) {
accept = TRUE;
} else if (max) {
for (int lpc = 0; lpc < max; lpc++) {
if (safe_str_eq(secure_terms[lpc], param->rap_name)) {
accept = TRUE;
break;
}
}
}
if (accept) {
int start = len;
crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type));
len += strlen(param->rap_name) + 2; // include spaces around
list = realloc_safe(list, len + 1); // include null terminator
// spaces before and after make parsing simpler
sprintf(list + start, " %s ", param->rap_name);
} else {
crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type));
}
if (result && (invert_for_xml? !accept : accept)) {
const char *v = g_hash_table_lookup(op->params, param->rap_name);
if (v != NULL) {
crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v);
crm_xml_add(result, param->rap_name, v);
}
}
}
return list;
}
static void
append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata,
xmlNode *update, const char *version)
{
char *list = NULL;
char *digest = NULL;
xmlNode *restart = NULL;
CRM_LOG_ASSERT(op->params != NULL);
if (op->interval_ms > 0) {
/* monitors are not reloadable */
return;
}
if (is_set(metadata->ra_flags, ra_supports_reload)) {
restart = create_xml_node(NULL, XML_TAG_PARAMS);
/* Add any parameters with unique="1" to the "op-force-restart" list.
*
* (Currently, we abuse "unique=0" to indicate reloadability. This is
* nonstandard and should eventually be replaced once the OCF standard
* is updated with something better.)
*/
list = build_parameter_list(op, metadata, restart, ra_param_unique,
FALSE);
} else {
/* Resource does not support reloads */
return;
}
digest = calculate_operation_digest(restart, version);
/* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload,
* no matter if it actually supports any parameters with unique="1"). */
crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: "");
crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest);
crm_trace("%s: %s, %s", op->rsc_id, digest, list);
crm_log_xml_trace(restart, "restart digest source");
free_xml(restart);
free(digest);
free(list);
}
static void
append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata,
xmlNode *update, const char *version)
{
char *list = NULL;
char *digest = NULL;
xmlNode *secure = NULL;
CRM_LOG_ASSERT(op->params != NULL);
/*
* To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the
* secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on
* the insecure ones
*/
secure = create_xml_node(NULL, XML_TAG_PARAMS);
list = build_parameter_list(op, metadata, secure, ra_param_private, TRUE);
if (list != NULL) {
digest = calculate_operation_digest(secure, version);
crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list);
crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest);
crm_trace("%s: %s, %s", op->rsc_id, digest, list);
crm_log_xml_trace(secure, "secure digest source");
} else {
crm_trace("%s: no secure parameters", op->rsc_id);
}
free_xml(secure);
free(digest);
free(list);
}
static gboolean
build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op,
const char *node_name, const char *src)
{
int target_rc = 0;
xmlNode *xml_op = NULL;
struct ra_metadata_s *metadata = NULL;
const char *caller_version = NULL;
lrm_state_t *lrm_state = NULL;
if (op == NULL) {
return FALSE;
}
target_rc = rsc_op_expected_rc(op);
/* there is a small risk in formerly mixed clusters that it will
* be sub-optimal.
*
* however with our upgrade policy, the update we send should
* still be completely supported anyway
*/
caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION);
CRM_LOG_ASSERT(caller_version != NULL);
if(caller_version == NULL) {
caller_version = CRM_FEATURE_SET;
}
crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version);
- xml_op = create_operation_update(parent, op, caller_version, target_rc, fsa_our_uname, src, LOG_DEBUG);
+ xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc,
+ fsa_our_uname, src, LOG_DEBUG);
if (xml_op == NULL) {
return TRUE;
}
if ((rsc == NULL) || (op->params == NULL)
|| !crm_op_needs_metadata(rsc->standard, op->op_type)) {
crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)",
op->op_type, op->rsc_id, op->params, rsc);
return TRUE;
}
lrm_state = lrm_state_find(node_name);
if (lrm_state == NULL) {
crm_warn("Cannot calculate digests for operation " CRM_OP_FMT
" because we have no connection to executor for %s",
op->rsc_id, op->op_type, op->interval_ms, node_name);
return TRUE;
}
metadata = metadata_cache_get(lrm_state->metadata_cache, rsc);
if (metadata == NULL) {
/* For now, we always collect resource agent meta-data via a local,
* synchronous, direct execution of the agent. This has multiple issues:
* the executor should execute agents, not the controller; meta-data for
* Pacemaker Remote nodes should be collected on those nodes, not
* locally; and the meta-data call shouldn't eat into the timeout of the
* real action being performed.
*
* These issues are planned to be addressed by having the scheduler
* schedule a meta-data cache check at the beginning of each transition.
* Once that is working, this block will only be a fallback in case the
* initial collection fails.
*/
char *metadata_str = NULL;
int rc = lrm_state_get_metadata(lrm_state, rsc->standard,
rsc->provider, rsc->type,
&metadata_str, 0);
if (rc != pcmk_ok) {
crm_warn("Failed to get metadata for %s (%s:%s:%s)",
rsc->id, rsc->standard, rsc->provider, rsc->type);
return TRUE;
}
metadata = metadata_cache_update(lrm_state->metadata_cache, rsc,
metadata_str);
free(metadata_str);
if (metadata == NULL) {
crm_warn("Failed to update metadata for %s (%s:%s:%s)",
rsc->id, rsc->standard, rsc->provider, rsc->type);
return TRUE;
}
}
#if ENABLE_VERSIONED_ATTRS
crm_xml_add(xml_op, XML_ATTR_RA_VERSION, metadata->ra_version);
#endif
crm_trace("Including additional digests for %s::%s:%s", rsc->standard, rsc->provider, rsc->type);
append_restart_list(op, metadata, xml_op, caller_version);
append_secure_list(op, metadata, xml_op, caller_version);
return TRUE;
}
static gboolean
is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id)
{
rsc_history_t *entry = NULL;
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
if (entry == NULL || entry->last == NULL) {
return FALSE;
}
crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type,
entry->last->interval_ms, entry->last->rc);
if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) {
return FALSE;
} else if (entry->last->rc == PCMK_OCF_OK
&& safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) {
/* a stricter check is too complex...
* leave that to the PE
*/
return FALSE;
} else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) {
return FALSE;
} else if ((entry->last->interval_ms == 0)
&& (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) {
/* Badly configured resources can't be reliably stopped */
return FALSE;
}
return TRUE;
}
static gboolean
build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
{
GHashTableIter iter;
rsc_history_t *entry = NULL;
g_hash_table_iter_init(&iter, lrm_state->resource_history);
while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
GList *gIter = NULL;
xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE);
crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id);
crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type);
crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.standard);
crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider);
if (entry->last && entry->last->params) {
const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
if (container) {
crm_trace("Resource %s is a part of container resource %s", entry->id, container);
crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container);
}
}
build_operation_update(xml_rsc, &(entry->rsc), entry->failed, lrm_state->node_name, __FUNCTION__);
build_operation_update(xml_rsc, &(entry->rsc), entry->last, lrm_state->node_name, __FUNCTION__);
for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
build_operation_update(xml_rsc, &(entry->rsc), gIter->data, lrm_state->node_name, __FUNCTION__);
}
}
return FALSE;
}
static xmlNode *
do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags)
{
xmlNode *xml_state = NULL;
xmlNode *xml_data = NULL;
xmlNode *rsc_list = NULL;
crm_node_t *peer = NULL;
peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY);
CRM_CHECK(peer != NULL, return NULL);
xml_state = create_node_state_update(peer, update_flags, NULL,
__FUNCTION__);
if (xml_state == NULL) {
return NULL;
}
xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM);
crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid);
rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES);
/* Build a list of active (not always running) resources */
build_active_RAs(lrm_state, rsc_list);
crm_log_xml_trace(xml_state, "Current executor state");
return xml_state;
}
xmlNode *
do_lrm_query(gboolean is_replace, const char *node_name)
{
lrm_state_t *lrm_state = lrm_state_find(node_name);
if (!lrm_state) {
crm_err("Could not find executor state for node %s", node_name);
return NULL;
}
return do_lrm_query_internal(lrm_state,
node_update_cluster|node_update_peer);
}
static void
notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc)
{
lrmd_event_data_t *op = NULL;
const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
crm_info("Notifying %s on %s that %s was%s deleted",
from_sys, (from_host? from_host : "localhost"), rsc_id,
((rc == pcmk_ok)? "" : " not"));
op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE);
if (rc == pcmk_ok) {
op->op_status = PCMK_LRM_OP_DONE;
op->rc = PCMK_OCF_OK;
} else {
op->op_status = PCMK_LRM_OP_ERROR;
op->rc = PCMK_OCF_UNKNOWN_ERROR;
}
send_direct_ack(from_host, from_sys, NULL, op, rsc_id);
lrmd_free_event(op);
if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
/* this isn't expected - trigger a new transition */
time_t now = time(NULL);
char *now_s = crm_itoa(now);
crm_debug("Triggering a refresh after %s deleted %s from the executor",
from_sys, rsc_id);
update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
"last-lrm-refresh", now_s, FALSE, NULL, NULL);
free(now_s);
}
}
static gboolean
lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
{
struct delete_event_s *event = user_data;
struct pending_deletion_op_s *op = value;
if (crm_str_eq(event->rsc, op->rsc, TRUE)) {
notify_deleted(event->lrm_state, op->input, event->rsc, event->rc);
return TRUE;
}
return FALSE;
}
static gboolean
lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
{
const char *rsc = user_data;
struct recurring_op_s *pending = value;
if (crm_str_eq(rsc, pending->rsc_id, TRUE)) {
crm_info("Removing op %s:%d for deleted resource %s",
pending->op_key, pending->call_id, rsc);
return TRUE;
}
return FALSE;
}
/*
* Remove the rsc from the CIB
*
* Avoids refreshing the entire LRM section of this host
*/
#define RSC_TEMPLATE "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']"
static int
delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options,
const char *user_name)
{
char *rsc_xpath = NULL;
int rc = pcmk_ok;
CRM_CHECK(rsc_id != NULL, return -ENXIO);
rsc_xpath = crm_strdup_printf(RSC_TEMPLATE, lrm_state->node_name, rsc_id);
rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath,
NULL, NULL, call_options | cib_xpath, user_name);
free(rsc_xpath);
return rc;
}
static void
delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id,
GHashTableIter * rsc_gIter, int rc, const char *user_name)
{
struct delete_event_s event;
CRM_CHECK(rsc_id != NULL, return);
if (rc == pcmk_ok) {
char *rsc_id_copy = strdup(rsc_id);
if (rsc_gIter)
g_hash_table_iter_remove(rsc_gIter);
else
g_hash_table_remove(lrm_state->resource_history, rsc_id_copy);
crm_debug("sync: Sending delete op for %s", rsc_id_copy);
delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name);
g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy);
free(rsc_id_copy);
}
if (input) {
notify_deleted(lrm_state, input, rsc_id, rc);
}
event.rc = rc;
event.rsc = rsc_id;
event.lrm_state = lrm_state;
g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event);
}
/*!
* \internal
* \brief Erase an LRM history entry from the CIB, given the operation data
*
* \param[in] lrm_state LRM state of the desired node
* \param[in] op Operation whose history should be deleted
*/
static void
erase_lrm_history_by_op(lrm_state_t *lrm_state, lrmd_event_data_t *op)
{
xmlNode *xml_top = NULL;
CRM_CHECK(op != NULL, return);
xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP);
crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id);
crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data);
if (op->interval_ms > 0) {
char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval_ms);
/* Avoid deleting last_failure too (if it was a result of this recurring op failing) */
crm_xml_add(xml_top, XML_ATTR_ID, op_id);
free(op_id);
}
crm_debug("Erasing resource operation history for " CRM_OP_FMT " (call=%d)",
op->rsc_id, op->op_type, op->interval_ms, op->call_id);
fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top,
cib_quorum_override);
crm_log_xml_trace(xml_top, "op:cancel");
free_xml(xml_top);
}
/* Define xpath to find LRM resource history entry by node and resource */
#define XPATH_HISTORY \
"/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \
"/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \
"/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \
"/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \
"/" XML_LRM_TAG_RSC_OP
/* ... and also by operation key */
#define XPATH_HISTORY_ID XPATH_HISTORY \
"[@" XML_ATTR_ID "='%s']"
/* ... and also by operation key and operation call ID */
#define XPATH_HISTORY_CALL XPATH_HISTORY \
"[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']"
/* ... and also by operation key and original operation key */
#define XPATH_HISTORY_ORIG XPATH_HISTORY \
"[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']"
/*!
* \internal
* \brief Erase an LRM history entry from the CIB, given operation identifiers
*
* \param[in] lrm_state LRM state of the node to clear history for
* \param[in] rsc_id Name of resource to clear history for
* \param[in] key Operation key of operation to clear history for
* \param[in] orig_op If specified, delete only if it has this original op
* \param[in] call_id If specified, delete entry only if it has this call ID
*/
static void
erase_lrm_history_by_id(lrm_state_t *lrm_state, const char *rsc_id,
const char *key, const char *orig_op, int call_id)
{
char *op_xpath = NULL;
CRM_CHECK((rsc_id != NULL) && (key != NULL), return);
if (call_id > 0) {
op_xpath = crm_strdup_printf(XPATH_HISTORY_CALL,
lrm_state->node_name, rsc_id, key,
call_id);
} else if (orig_op) {
op_xpath = crm_strdup_printf(XPATH_HISTORY_ORIG,
lrm_state->node_name, rsc_id, key,
orig_op);
} else {
op_xpath = crm_strdup_printf(XPATH_HISTORY_ID,
lrm_state->node_name, rsc_id, key);
}
crm_debug("Erasing resource operation history for %s on %s (call=%d)",
key, rsc_id, call_id);
fsa_cib_conn->cmds->remove(fsa_cib_conn, op_xpath, NULL,
cib_quorum_override | cib_xpath);
free(op_xpath);
}
static inline gboolean
last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms)
{
if (entry == NULL) {
return FALSE;
}
if (op == NULL) {
return TRUE;
}
return (safe_str_eq(op, entry->failed->op_type)
&& (interval_ms == entry->failed->interval_ms));
}
/*!
* \internal
* \brief Clear a resource's last failure
*
* Erase a resource's last failure on a particular node from both the
* LRM resource history in the CIB, and the resource history remembered
* for the LRM state.
*
* \param[in] rsc_id Resource name
* \param[in] node_name Node name
* \param[in] operation If specified, only clear if matching this operation
* \param[in] interval_ms If operation is specified, it has this interval
*/
void
lrm_clear_last_failure(const char *rsc_id, const char *node_name,
const char *operation, guint interval_ms)
{
char *op_key = NULL;
char *orig_op_key = NULL;
lrm_state_t *lrm_state = NULL;
lrm_state = lrm_state_find(node_name);
if (lrm_state == NULL) {
return;
}
/* Erase from CIB */
op_key = generate_op_key(rsc_id, "last_failure", 0);
if (operation) {
orig_op_key = generate_op_key(rsc_id, operation, interval_ms);
}
erase_lrm_history_by_id(lrm_state, rsc_id, op_key, orig_op_key, 0);
free(op_key);
free(orig_op_key);
/* Remove from memory */
if (lrm_state->resource_history) {
rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history,
rsc_id);
if (last_failed_matches_op(entry, operation, interval_ms)) {
lrmd_free_event(entry->failed);
entry->failed = NULL;
}
}
}
/* Returns: gboolean - cancellation is in progress */
static gboolean
cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove)
{
int rc = pcmk_ok;
char *local_key = NULL;
struct recurring_op_s *pending = NULL;
CRM_CHECK(op != 0, return FALSE);
CRM_CHECK(rsc_id != NULL, return FALSE);
if (key == NULL) {
local_key = make_stop_id(rsc_id, op);
key = local_key;
}
pending = g_hash_table_lookup(lrm_state->pending_ops, key);
if (pending) {
if (remove && pending->remove == FALSE) {
pending->remove = TRUE;
crm_debug("Scheduling %s for removal", key);
}
if (pending->cancelled) {
crm_debug("Operation %s already cancelled", key);
free(local_key);
return FALSE;
}
pending->cancelled = TRUE;
} else {
crm_info("No pending op found for %s", key);
free(local_key);
return FALSE;
}
crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key);
rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type,
pending->interval_ms);
if (rc == pcmk_ok) {
crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key);
free(local_key);
return TRUE;
}
crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key);
/* The caller needs to make sure the entry is
* removed from the pending_ops list
*
* Usually by returning TRUE inside the worker function
* supplied to g_hash_table_foreach_remove()
*
* Not removing the entry from pending_ops will block
* the node from shutting down
*/
free(local_key);
return FALSE;
}
struct cancel_data {
gboolean done;
gboolean remove;
const char *key;
lrmd_rsc_info_t *rsc;
lrm_state_t *lrm_state;
};
static gboolean
cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
struct cancel_data *data = user_data;
struct recurring_op_s *op = (struct recurring_op_s *)value;
if (crm_str_eq(op->op_key, data->key, TRUE)) {
data->done = TRUE;
remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove);
}
return remove;
}
static gboolean
cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove)
{
guint removed = 0;
struct cancel_data data;
CRM_CHECK(rsc != NULL, return FALSE);
CRM_CHECK(key != NULL, return FALSE);
data.key = key;
data.rsc = rsc;
data.done = FALSE;
data.remove = remove;
data.lrm_state = lrm_state;
removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data);
crm_trace("Removed %u op cache entries, new size: %u",
removed, g_hash_table_size(lrm_state->pending_ops));
return data.done;
}
/*!
* \internal
* \brief Retrieve resource information from LRM
*
* \param[in] lrm_state LRM connection to use
* \param[in] rsc_xml XML containing resource configuration
* \param[in] do_create If true, register resource with LRM if not already
* \param[out] rsc_info Where to store resource information obtained from LRM
*
* \retval pcmk_ok Success (and rsc_info holds newly allocated result)
* \retval -EINVAL Required information is missing from arguments
* \retval -ENOTCONN No active connection to LRM
* \retval -ENODEV Resource not found
* \retval -errno Error communicating with executor when registering resource
*
* \note Caller is responsible for freeing result on success.
*/
static int
get_lrm_resource(lrm_state_t *lrm_state, xmlNode *rsc_xml, gboolean do_create,
lrmd_rsc_info_t **rsc_info)
{
const char *id = ID(rsc_xml);
CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL);
CRM_CHECK(id, return -EINVAL);
if (lrm_state_is_connected(lrm_state) == FALSE) {
return -ENOTCONN;
}
crm_trace("Retrieving resource information for %s from the executor", id);
*rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
// If resource isn't known by ID, try clone name, if provided
if (!*rsc_info) {
const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG);
if (long_id) {
*rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0);
}
}
if ((*rsc_info == NULL) && do_create) {
const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS);
const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER);
const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE);
int rc;
crm_trace("Registering resource %s with the executor", id);
rc = lrm_state_register_rsc(lrm_state, id, class, provider, type,
lrmd_opt_drop_recurring);
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_err("Could not register resource %s with the executor on %s: %s "
CRM_XS " rc=%d",
id, lrm_state->node_name, pcmk_strerror(rc), rc);
/* Register this as an internal error if this involves the local
* executor. Otherwise, we're likely dealing with an unresponsive
* remote node, which is not an FSA failure.
*/
if (lrm_state_is_local(lrm_state) == TRUE) {
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
}
return rc;
}
*rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
}
return *rsc_info? pcmk_ok : -ENODEV;
}
static void
delete_resource(lrm_state_t * lrm_state,
const char *id,
lrmd_rsc_info_t * rsc,
GHashTableIter * gIter,
const char *sys,
const char *host,
const char *user,
ha_msg_input_t * request,
gboolean unregister)
{
int rc = pcmk_ok;
crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host);
if (rsc && unregister) {
rc = lrm_state_unregister_rsc(lrm_state, id, 0);
}
if (rc == pcmk_ok) {
crm_trace("Resource '%s' deleted", id);
} else if (rc == -EINPROGRESS) {
crm_info("Deletion of resource '%s' pending", id);
if (request) {
struct pending_deletion_op_s *op = NULL;
char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE);
op = calloc(1, sizeof(struct pending_deletion_op_s));
op->rsc = strdup(rsc->id);
op->input = copy_ha_msg_input(request);
g_hash_table_insert(lrm_state->deletion_ops, ref, op);
}
return;
} else {
crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d",
id, sys, user ? user : "internal", host, rc);
}
delete_rsc_entry(lrm_state, request, id, gIter, rc, user);
}
static int
get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id)
{
int call_id = 999999999;
rsc_history_t *entry = NULL;
if(lrm_state) {
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
}
/* Make sure the call id is greater than the last successful operation,
* otherwise the failure will not result in a possible recovery of the resource
* as it could appear the failure occurred before the successful start */
if (entry) {
call_id = entry->last_callid + 1;
}
if (call_id < 0) {
call_id = 1;
}
return call_id;
}
static void
fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status,
enum ocf_exitcode op_exitcode)
{
op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
op->t_run = time(NULL);
op->t_rcchange = op->t_run;
op->op_status = op_status;
op->rc = op_exitcode;
}
static void
force_reprobe(lrm_state_t *lrm_state, const char *from_sys,
const char *from_host, const char *user_name,
gboolean is_remote_node)
{
GHashTableIter gIter;
rsc_history_t *entry = NULL;
crm_info("Clearing resource history on node %s", lrm_state->node_name);
g_hash_table_iter_init(&gIter, lrm_state->resource_history);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
/* only unregister the resource during a reprobe if it is not a remote connection
* resource. otherwise unregistering the connection will terminate remote-node
* membership */
gboolean unregister = TRUE;
if (is_remote_lrmd_ra(NULL, NULL, entry->id)) {
lrm_state_t *remote_lrm_state = lrm_state_find(entry->id);
if (remote_lrm_state) {
/* when forcing a reprobe, make sure to clear remote node before
* clearing the remote node's connection resource */
force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE);
}
unregister = FALSE;
}
delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host,
user_name, NULL, unregister);
}
/* Now delete the copy in the CIB */
erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local);
/* Finally, _delete_ the value in pacemaker-attrd -- setting it to FALSE
* would result in the scheduler sending us back here again
*/
update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node);
}
/*!
* \internal
* \brief Fail a requested action without actually executing it
*
* For an action that can't be executed, process it similarly to an actual
* execution result, with specified error status (except for notify actions,
* which will always be treated as successful).
*
* \param[in] lrm_state Executor connection that action is for
* \param[in] action Action XML from request
* \param[in] rc Desired return code to use
* \param[in] op_status Desired operation status to use
*/
static void
synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action,
int op_status, enum ocf_exitcode rc)
{
lrmd_event_data_t *op = NULL;
const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK);
const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET);
xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE);
if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) {
/* @TODO Should we do something else, like direct ack? */
crm_info("Can't fake %s failure (%d) on %s without resource configuration",
crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc,
target_node);
return;
} else if(operation == NULL) {
/* This probably came from crm_resource -C, nothing to do */
crm_info("Can't fake %s failure (%d) on %s without operation",
ID(xml_rsc), rc, target_node);
return;
}
op = construct_op(lrm_state, action, ID(xml_rsc), operation);
if (safe_str_eq(operation, RSC_NOTIFY)) { // Notifications can't fail
fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_OK);
} else {
fake_op_status(lrm_state, op, op_status, rc);
}
crm_info("Faking " CRM_OP_FMT " result (%d) on %s",
op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node);
// Process the result as if it came from the LRM
process_lrm_event(lrm_state, op, NULL, action);
lrmd_free_event(op);
}
/*!
* \internal
* \brief Get target of an LRM operation
*
* \param[in] xml LRM operation data XML
*
* \return LRM operation target node name (local node or Pacemaker Remote node)
*/
static const char *
lrm_op_target(xmlNode *xml)
{
const char *target = NULL;
if (xml) {
target = crm_element_value(xml, XML_LRM_ATTR_TARGET);
}
if (target == NULL) {
target = fsa_our_uname;
}
return target;
}
static void
fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name,
const char *from_host, const char *from_sys)
{
lrmd_event_data_t *op = NULL;
lrmd_rsc_info_t *rsc = NULL;
xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE);
CRM_CHECK(xml_rsc != NULL, return);
/* The executor simply executes operations and reports the results, without
* any concept of success or failure, so to fail a resource, we must fake
* what a failure looks like.
*
* To do this, we create a fake executor operation event for the resource,
* and pass that event to the executor client callback so it will be
* processed as if it came from the executor.
*/
op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon");
fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR);
free((char*) op->user_data);
op->user_data = NULL;
op->interval_ms = 0;
#if ENABLE_ACL
if (user_name && is_privileged(user_name) == FALSE) {
crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc));
send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
lrmd_free_event(op);
return;
}
#endif
if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) {
crm_info("Failing resource %s...", rsc->id);
op->exit_reason = strdup("Simulated failure");
process_lrm_event(lrm_state, op, NULL, xml);
op->op_status = PCMK_LRM_OP_DONE;
op->rc = PCMK_OCF_OK;
lrmd_free_rsc_info(rsc);
} else {
crm_info("Cannot find/create resource in order to fail it...");
crm_log_xml_warn(xml, "bad input");
}
send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
lrmd_free_event(op);
}
static void
handle_refresh_op(lrm_state_t *lrm_state, const char *user_name,
const char *from_host, const char *from_sys)
{
int rc = pcmk_ok;
xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all);
fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name);
crm_info("Forced a local resource history refresh: call=%d", rc);
if (safe_str_neq(CRM_SYSTEM_CRMD, from_sys)) {
xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, fragment, from_host,
from_sys, CRM_SYSTEM_LRMD,
fsa_our_uuid);
crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host);
if (relay_message(reply, TRUE) == FALSE) {
crm_log_xml_err(reply, "Unable to route reply");
}
free_xml(reply);
}
free_xml(fragment);
}
static void
handle_query_op(xmlNode *msg, lrm_state_t *lrm_state)
{
xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all);
xmlNode *reply = create_reply(msg, data);
if (relay_message(reply, TRUE) == FALSE) {
crm_err("Unable to route reply");
crm_log_xml_err(reply, "reply");
}
free_xml(reply);
free_xml(data);
}
static void
handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys,
const char *from_host, const char *user_name,
gboolean is_remote_node)
{
crm_notice("Forcing the status of all resources to be redetected");
force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node);
if (safe_str_neq(CRM_SYSTEM_PENGINE, from_sys)
&& safe_str_neq(CRM_SYSTEM_TENGINE, from_sys)) {
xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host,
from_sys, CRM_SYSTEM_LRMD,
fsa_our_uuid);
crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host);
if (relay_message(reply, TRUE) == FALSE) {
crm_log_xml_err(reply, "Unable to route reply");
}
free_xml(reply);
}
}
static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state,
lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys)
{
char *op_key = NULL;
char *meta_key = NULL;
int call = 0;
const char *call_id = NULL;
const char *op_task = NULL;
const char *interval_ms_s = NULL;
gboolean in_progress = FALSE;
xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE);
CRM_CHECK(params != NULL, return FALSE);
meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS);
interval_ms_s = crm_element_value(params, meta_key);
free(meta_key);
CRM_CHECK(interval_ms_s != NULL, return FALSE);
meta_key = crm_meta_name(XML_LRM_ATTR_TASK);
op_task = crm_element_value(params, meta_key);
free(meta_key);
CRM_CHECK(op_task != NULL, return FALSE);
meta_key = crm_meta_name(XML_LRM_ATTR_CALLID);
call_id = crm_element_value(params, meta_key);
free(meta_key);
op_key = generate_op_key(rsc->id, op_task, crm_parse_ms(interval_ms_s));
crm_debug("Scheduler requested op %s (call=%s) be cancelled",
op_key, (call_id? call_id : "NA"));
call = crm_parse_int(call_id, "0");
if (call == 0) {
// Normal case when the scheduler cancels a recurring op
in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE);
} else {
// Normal case when the scheduler cancels an orphan op
in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE);
}
// Acknowledge cancellation operation if for a remote connection resource
if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
char *op_id = make_stop_id(rsc->id, call);
if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) {
crm_info("Nothing known about operation %d for %s", call, op_key);
}
erase_lrm_history_by_id(lrm_state, rsc->id, op_key, NULL, call);
send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
from_host, from_sys);
/* needed at least for cancellation of a remote operation */
g_hash_table_remove(lrm_state->pending_ops, op_id);
free(op_id);
} else {
/* No ack is needed since abcdaa8, but peers with older versions
* in a rolling upgrade need one. We didn't bump the feature set
* at that commit, so we can only compare against the previous
* CRM version (3.0.8). If any peers have feature set 3.0.9 but
* not abcdaa8, they will time out waiting for the ack (no
* released versions of Pacemaker are affected).
*/
const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION);
if (compare_version(peer_version, "3.0.8") <= 0) {
crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)",
op_key, from_host, peer_version);
send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
from_host, from_sys);
}
}
free(op_key);
return TRUE;
}
static void
do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state,
lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host,
bool crm_rsc_delete, const char *user_name)
{
gboolean unregister = TRUE;
#if ENABLE_ACL
int cib_rc = delete_rsc_status(lrm_state, rsc->id,
cib_dryrun|cib_sync_call, user_name);
if (cib_rc != pcmk_ok) {
lrmd_event_data_t *op = NULL;
crm_err("Could not delete resource status of %s for %s (user %s) on %s: %s"
CRM_XS " rc=%d",
rsc->id, from_sys, (user_name? user_name : "unknown"),
from_host, pcmk_strerror(cib_rc), cib_rc);
op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE);
op->op_status = PCMK_LRM_OP_ERROR;
if (cib_rc == -EACCES) {
op->rc = PCMK_OCF_INSUFFICIENT_PRIV;
} else {
op->rc = PCMK_OCF_UNKNOWN_ERROR;
}
send_direct_ack(from_host, from_sys, NULL, op, rsc->id);
lrmd_free_event(op);
return;
}
#endif
if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
unregister = FALSE;
}
delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host,
user_name, input, unregister);
}
/* A_LRM_INVOKE */
void
do_lrm_invoke(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
lrm_state_t *lrm_state = NULL;
const char *crm_op = NULL;
const char *from_sys = NULL;
const char *from_host = NULL;
const char *operation = NULL;
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
const char *user_name = NULL;
const char *target_node = NULL;
gboolean is_remote_node = FALSE;
bool crm_rsc_delete = FALSE;
target_node = lrm_op_target(input->xml);
is_remote_node = safe_str_neq(target_node, fsa_our_uname);
lrm_state = lrm_state_find(target_node);
if ((lrm_state == NULL) && is_remote_node) {
crm_err("Failing action because local node has never had connection to remote node %s",
target_node);
synthesize_lrmd_failure(NULL, input->xml, PCMK_LRM_OP_NOT_CONNECTED,
PCMK_OCF_UNKNOWN_ERROR);
return;
}
CRM_ASSERT(lrm_state != NULL);
#if ENABLE_ACL
user_name = crm_acl_get_set_user(input->msg, F_CRM_USER, NULL);
crm_trace("Executor command from user '%s'", user_name);
#endif
crm_op = crm_element_value(input->msg, F_CRM_TASK);
from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
}
crm_trace("Executor %s command from %s", crm_op, from_sys);
if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) {
crm_rsc_delete = TRUE; // Only crm_resource uses this op
operation = CRMD_ACTION_DELETE;
} else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) {
fail_lrm_resource(input->xml, lrm_state, user_name, from_host,
from_sys);
return;
} else if (input->xml != NULL) {
operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK);
}
if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
handle_refresh_op(lrm_state, user_name, from_host, from_sys);
} else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) {
handle_query_op(input->msg, lrm_state);
} else if (safe_str_eq(operation, CRM_OP_PROBED)) {
update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE,
user_name, is_remote_node);
} else if (safe_str_eq(operation, CRM_OP_REPROBE)
|| safe_str_eq(crm_op, CRM_OP_REPROBE)) {
handle_reprobe_op(lrm_state, from_sys, from_host, user_name,
is_remote_node);
} else if (operation != NULL) {
lrmd_rsc_info_t *rsc = NULL;
xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
gboolean create_rsc = safe_str_neq(operation, CRMD_ACTION_DELETE);
int rc;
// We can't return anything meaningful without a resource ID
CRM_CHECK(xml_rsc && ID(xml_rsc), return);
rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc);
if (rc == -ENOTCONN) {
synthesize_lrmd_failure(lrm_state, input->xml,
PCMK_LRM_OP_NOT_CONNECTED,
PCMK_OCF_UNKNOWN_ERROR);
return;
} else if ((rc < 0) && !create_rsc) {
/* Delete of malformed or nonexistent resource
* (deleting something that does not exist is a success)
*/
crm_notice("Not registering resource '%s' for a %s event "
CRM_XS " get-rc=%d (%s) transition-key=%s",
ID(xml_rsc), operation,
rc, pcmk_strerror(rc), ID(input->xml));
delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok,
user_name);
send_task_ok_ack(lrm_state, input, ID(xml_rsc), NULL, operation,
from_host, from_sys);
return;
} else if (rc == -EINVAL) {
// Resource operation on malformed resource
crm_err("Invalid resource definition for %s", ID(xml_rsc));
crm_log_xml_warn(input->msg, "invalid resource");
synthesize_lrmd_failure(lrm_state, input->xml, PCMK_LRM_OP_ERROR,
PCMK_OCF_NOT_CONFIGURED); // fatal error
return;
} else if (rc < 0) {
// Error communicating with the executor
crm_err("Could not register resource '%s' with executor: %s "
CRM_XS " rc=%d",
ID(xml_rsc), pcmk_strerror(rc), rc);
crm_log_xml_warn(input->msg, "failed registration");
synthesize_lrmd_failure(lrm_state, input->xml, PCMK_LRM_OP_ERROR,
PCMK_OCF_INVALID_PARAM); // hard error
return;
}
if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) {
if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) {
crm_log_xml_warn(input->xml, "Bad command");
}
} else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) {
do_lrm_delete(input, lrm_state, rsc, from_sys, from_host,
crm_rsc_delete, user_name);
} else {
do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg);
}
lrmd_free_rsc_info(rsc);
} else {
crm_err("Cannot perform operation %s of unknown type", crm_str(crm_op));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
static lrmd_event_data_t *
construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation)
{
lrmd_event_data_t *op = NULL;
const char *op_delay = NULL;
const char *op_timeout = NULL;
const char *interval_ms_s = NULL;
GHashTable *params = NULL;
const char *transition = NULL;
CRM_ASSERT(rsc_id && operation);
op = calloc(1, sizeof(lrmd_event_data_t));
CRM_ASSERT(op != NULL);
op->type = lrmd_event_exec_complete;
op->op_type = strdup(operation);
op->op_status = PCMK_LRM_OP_PENDING;
op->rc = -1;
op->rsc_id = strdup(rsc_id);
op->interval_ms = 0;
op->timeout = 0;
op->start_delay = 0;
if (rsc_op == NULL) {
CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation));
op->user_data = NULL;
/* the stop_all_resources() case
* by definition there is no DC (or they'd be shutting
* us down).
* So we should put our version here.
*/
op->params = crm_str_table_new();
g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET));
crm_trace("Constructed %s op for %s", operation, rsc_id);
return op;
}
params = xml2list(rsc_op);
g_hash_table_remove(params, CRM_META "_op_target_rc");
op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY);
op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT);
interval_ms_s = crm_meta_value(params, XML_LRM_ATTR_INTERVAL_MS);
op->interval_ms = crm_parse_ms(interval_ms_s);
op->timeout = crm_parse_int(op_timeout, "0");
op->start_delay = crm_parse_int(op_delay, "0");
#if ENABLE_VERSIONED_ATTRS
// Resolve any versioned parameters
if (lrm_state && safe_str_neq(op->op_type, RSC_METADATA)
&& safe_str_neq(op->op_type, CRMD_ACTION_DELETE)
&& !is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
// Resource info *should* already be cached, so we don't get executor call
lrmd_rsc_info_t *rsc = lrm_state_get_rsc_info(lrm_state, rsc_id, 0);
struct ra_metadata_s *metadata;
metadata = metadata_cache_get(lrm_state->metadata_cache, rsc);
if (metadata) {
xmlNode *versioned_attrs = NULL;
GHashTable *hash = NULL;
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_ATTRS);
hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version);
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
g_hash_table_iter_steal(&iter);
g_hash_table_replace(params, key, value);
}
g_hash_table_destroy(hash);
versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_META);
hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version);
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
g_hash_table_replace(params, crm_meta_name(key), strdup(value));
if (safe_str_eq(key, XML_ATTR_TIMEOUT)) {
op->timeout = crm_parse_int(value, "0");
} else if (safe_str_eq(key, XML_OP_ATTR_START_DELAY)) {
op->start_delay = crm_parse_int(value, "0");
}
}
g_hash_table_destroy(hash);
versioned_attrs = first_named_child(rsc_op, XML_TAG_RSC_VER_ATTRS);
hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version);
g_hash_table_iter_init(&iter, hash);
while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
g_hash_table_iter_steal(&iter);
g_hash_table_replace(params, key, value);
}
g_hash_table_destroy(hash);
}
lrmd_free_rsc_info(rsc);
}
#endif
if (safe_str_neq(operation, RSC_STOP)) {
op->params = params;
} else {
rsc_history_t *entry = NULL;
if (lrm_state) {
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
}
/* If we do not have stop parameters cached, use
* whatever we are given */
if (!entry || !entry->stop_params) {
op->params = params;
} else {
/* Copy the cached parameter list so that we stop the resource
* with the old attributes, not the new ones */
op->params = crm_str_table_new();
g_hash_table_foreach(params, copy_meta_keys, op->params);
g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params);
g_hash_table_destroy(params);
params = NULL;
}
}
/* sanity */
if (op->timeout <= 0) {
op->timeout = op->interval_ms;
}
if (op->start_delay < 0) {
op->start_delay = 0;
}
transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY);
CRM_CHECK(transition != NULL, return op);
op->user_data = strdup(transition);
if (op->interval_ms != 0) {
if (safe_str_eq(operation, CRMD_ACTION_START)
|| safe_str_eq(operation, CRMD_ACTION_STOP)) {
crm_err("Start and Stop actions cannot have an interval: %u",
op->interval_ms);
op->interval_ms = 0;
}
}
crm_trace("Constructed %s op for %s: interval=%u",
operation, rsc_id, op->interval_ms);
return op;
}
void
send_direct_ack(const char *to_host, const char *to_sys,
lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id)
{
xmlNode *reply = NULL;
xmlNode *update, *iter;
crm_node_t *peer = NULL;
CRM_CHECK(op != NULL, return);
if (op->rsc_id == NULL) {
CRM_ASSERT(rsc_id != NULL);
op->rsc_id = strdup(rsc_id);
}
if (to_sys == NULL) {
to_sys = CRM_SYSTEM_TENGINE;
}
peer = crm_get_peer(0, fsa_our_uname);
update = create_node_state_update(peer, node_update_none, NULL,
__FUNCTION__);
iter = create_xml_node(update, XML_CIB_TAG_LRM);
crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
build_operation_update(iter, rsc, op, fsa_our_uname, __FUNCTION__);
reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL);
crm_log_xml_trace(update, "ACK Update");
crm_debug("ACK'ing resource op " CRM_OP_FMT " from %s: %s",
op->rsc_id, op->op_type, op->interval_ms, op->user_data,
crm_element_value(reply, XML_ATTR_REFERENCE));
if (relay_message(reply, TRUE) == FALSE) {
crm_log_xml_err(reply, "Unable to route reply");
}
free_xml(update);
free_xml(reply);
}
gboolean
verify_stopped(enum crmd_fsa_state cur_state, int log_level)
{
gboolean res = TRUE;
GList *lrm_state_list = lrm_state_get_list();
GList *state_entry;
for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
lrm_state_t *lrm_state = state_entry->data;
if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) {
/* keep iterating through all even when false is returned */
res = FALSE;
}
}
set_bit(fsa_input_register, R_SENT_RSC_STOP);
g_list_free(lrm_state_list); lrm_state_list = NULL;
return res;
}
struct stop_recurring_action_s {
lrmd_rsc_info_t *rsc;
lrm_state_t *lrm_state;
};
static gboolean
stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
struct stop_recurring_action_s *event = user_data;
struct recurring_op_s *op = (struct recurring_op_s *)value;
if ((op->interval_ms != 0)
&& crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) {
crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key);
remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE);
}
return remove;
}
static gboolean
stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
lrm_state_t *lrm_state = user_data;
struct recurring_op_s *op = (struct recurring_op_s *)value;
if (op->interval_ms != 0) {
crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id,
(const char *) key);
remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE);
}
return remove;
}
static void
record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t *op)
{
const char *record_pending = NULL;
CRM_CHECK(node_name != NULL, return);
CRM_CHECK(rsc != NULL, return);
CRM_CHECK(op != NULL, return);
// Never record certain operation types as pending
if ((op->op_type == NULL) || (op->params == NULL)
|| !controld_action_is_recordable(op->op_type)) {
return;
}
// defaults to true
record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING);
if (record_pending && !crm_is_true(record_pending)) {
return;
}
op->call_id = -1;
op->op_status = PCMK_LRM_OP_PENDING;
op->rc = PCMK_OCF_UNKNOWN;
op->t_run = time(NULL);
op->t_rcchange = op->t_run;
/* write a "pending" entry to the CIB, inhibit notification */
crm_debug("Recording pending op " CRM_OP_FMT " on %s in the CIB",
op->rsc_id, op->op_type, op->interval_ms, node_name);
do_update_resource(node_name, rsc, op);
}
static void
do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg,
xmlNode * request)
{
int call_id = 0;
char *op_id = NULL;
lrmd_event_data_t *op = NULL;
lrmd_key_value_t *params = NULL;
fsa_data_t *msg_data = NULL;
const char *transition = NULL;
gboolean stop_recurring = FALSE;
bool send_nack = FALSE;
CRM_CHECK(rsc != NULL, return);
CRM_CHECK(operation != NULL, return);
if (msg != NULL) {
transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
if (transition == NULL) {
crm_log_xml_err(msg, "Missing transition number");
}
}
op = construct_op(lrm_state, msg, rsc->id, operation);
CRM_CHECK(op != NULL, return);
if (is_remote_lrmd_ra(NULL, NULL, rsc->id)
&& (op->interval_ms == 0)
&& strcmp(operation, CRMD_ACTION_MIGRATE) == 0) {
/* pcmk remote connections are a special use case.
* We never ever want to stop monitoring a connection resource until
* the entire migration has completed. If the connection is unexpectedly
* severed, even during a migration, this is an event we must detect.*/
stop_recurring = FALSE;
} else if ((op->interval_ms == 0)
&& strcmp(operation, CRMD_ACTION_STATUS) != 0
&& strcmp(operation, CRMD_ACTION_NOTIFY) != 0) {
/* stop any previous monitor operations before changing the resource state */
stop_recurring = TRUE;
}
if (stop_recurring == TRUE) {
guint removed = 0;
struct stop_recurring_action_s data;
data.rsc = rsc;
data.lrm_state = lrm_state;
removed = g_hash_table_foreach_remove(
lrm_state->pending_ops, stop_recurring_action_by_rsc, &data);
if (removed) {
crm_debug("Stopped %u recurring operation%s in preparation for " CRM_OP_FMT,
removed, s_if_plural(removed),
rsc->id, operation, op->interval_ms);
}
}
/* now do the op */
crm_info("Performing key=%s op=" CRM_OP_FMT,
transition, rsc->id, operation, op->interval_ms);
if (is_set(fsa_input_register, R_SHUTDOWN) && safe_str_eq(operation, RSC_START)) {
register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
send_nack = TRUE;
} else if (fsa_state != S_NOT_DC
&& fsa_state != S_POLICY_ENGINE /* Recalculating */
&& fsa_state != S_TRANSITION_ENGINE
&& safe_str_neq(operation, CRMD_ACTION_STOP)) {
send_nack = TRUE;
}
if(send_nack) {
crm_notice("Discarding attempt to perform action %s on %s in state %s (shutdown=%s)",
operation, rsc->id, fsa_state2string(fsa_state),
is_set(fsa_input_register, R_SHUTDOWN)?"true":"false");
op->rc = PCMK_OCF_UNKNOWN_ERROR;
op->op_status = PCMK_LRM_OP_INVALID;
send_direct_ack(NULL, NULL, rsc, op, rsc->id);
lrmd_free_event(op);
free(op_id);
return;
}
record_pending_op(lrm_state->node_name, rsc, op);
op_id = generate_op_key(rsc->id, op->op_type, op->interval_ms);
if (op->interval_ms > 0) {
/* cancel it so we can then restart it without conflict */
cancel_op_key(lrm_state, rsc, op_id, FALSE);
}
if (op->params) {
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, op->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
params = lrmd_key_value_add(params, key, value);
}
}
call_id = lrm_state_exec(lrm_state, rsc->id, op->op_type, op->user_data,
op->interval_ms, op->timeout, op->start_delay,
params);
if (call_id <= 0 && lrm_state_is_local(lrm_state)) {
crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id);
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
} else if (call_id <= 0) {
crm_err("Operation %s on resource %s failed to execute on remote node %s: %d",
operation, rsc->id, lrm_state->node_name, call_id);
fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR);
process_lrm_event(lrm_state, op, NULL, NULL);
} else {
/* record all operations so we can wait
* for them to complete during shutdown
*/
char *call_id_s = make_stop_id(rsc->id, call_id);
struct recurring_op_s *pending = NULL;
pending = calloc(1, sizeof(struct recurring_op_s));
crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);
pending->call_id = call_id;
pending->interval_ms = op->interval_ms;
pending->op_type = strdup(operation);
pending->op_key = strdup(op_id);
pending->rsc_id = strdup(rsc->id);
pending->start_time = time(NULL);
pending->user_data = op->user_data? strdup(op->user_data) : NULL;
g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending);
if ((op->interval_ms > 0)
&& (op->start_delay > START_DELAY_THRESHOLD)) {
int target_rc = 0;
crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);
decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc);
op->rc = target_rc;
op->op_status = PCMK_LRM_OP_DONE;
send_direct_ack(NULL, NULL, rsc, op, rsc->id);
}
pending->params = op->params;
op->params = NULL;
}
free(op_id);
lrmd_free_event(op);
return;
}
int last_resource_update = 0;
static void
cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
switch (rc) {
case pcmk_ok:
case -pcmk_err_diff_failed:
case -pcmk_err_diff_resync:
crm_trace("Resource update %d complete: rc=%d", call_id, rc);
break;
default:
crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc));
}
if (call_id == last_resource_update) {
last_resource_update = 0;
trigger_fsa(fsa_source);
}
}
static int
do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
{
/*
*/
int rc = pcmk_ok;
xmlNode *update, *iter = NULL;
int call_opt = crmd_cib_smart_opt();
const char *uuid = NULL;
CRM_CHECK(op != NULL, return 0);
iter = create_xml_node(iter, XML_CIB_TAG_STATUS);
update = iter;
iter = create_xml_node(iter, XML_CIB_TAG_STATE);
if (safe_str_eq(node_name, fsa_our_uname)) {
uuid = fsa_our_uuid;
} else {
/* remote nodes uuid and uname are equal */
uuid = node_name;
crm_xml_add(iter, XML_NODE_IS_REMOTE, "true");
}
CRM_LOG_ASSERT(uuid != NULL);
if(uuid == NULL) {
rc = -EINVAL;
goto done;
}
crm_xml_add(iter, XML_ATTR_UUID, uuid);
crm_xml_add(iter, XML_ATTR_UNAME, node_name);
crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__);
iter = create_xml_node(iter, XML_CIB_TAG_LRM);
crm_xml_add(iter, XML_ATTR_ID, uuid);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
build_operation_update(iter, rsc, op, node_name, __FUNCTION__);
if (rsc) {
const char *container = NULL;
crm_xml_add(iter, XML_ATTR_TYPE, rsc->type);
crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->standard);
crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider);
if (op->params) {
container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
}
if (container) {
crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container);
crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container);
}
} else {
crm_warn("Resource %s no longer exists in the executor", op->rsc_id);
send_direct_ack(NULL, NULL, rsc, op, op->rsc_id);
goto cleanup;
}
crm_log_xml_trace(update, __FUNCTION__);
/* make it an asynchronous call and be done with it
*
* Best case:
* the resource state will be discovered during
* the next signup or election.
*
* Bad case:
* we are shutting down and there is no DC at the time,
* but then why were we shutting down then anyway?
* (probably because of an internal error)
*
* Worst case:
* we get shot for having resources "running" that really weren't
*
* the alternative however means blocking here for too long, which
* isn't acceptable
*/
fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL);
if (rc > 0) {
last_resource_update = rc;
}
done:
/* the return code is a call number, not an error code */
crm_trace("Sent resource state update message: %d for %s=%u on %s",
rc, op->op_type, op->interval_ms, op->rsc_id);
fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback);
cleanup:
free_xml(update);
return rc;
}
void
do_lrm_event(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)
{
CRM_CHECK(FALSE, return);
}
static char *
unescape_newlines(const char *string)
{
char *pch = NULL;
char *ret = NULL;
static const char *escaped_newline = "\\n";
if (!string) {
return NULL;
}
ret = strdup(string);
pch = strstr(ret, escaped_newline);
while (pch != NULL) {
/* 2 chars for 2 chars, null-termination irrelevant */
memcpy(pch, "\n ", 2 * sizeof(char));
pch = strstr(pch, escaped_newline);
}
return ret;
}
static bool
did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id,
const char * op_type, guint interval_ms)
{
rsc_history_t *entry = NULL;
CRM_CHECK(lrm_state != NULL, return FALSE);
CRM_CHECK(rsc_id != NULL, return FALSE);
CRM_CHECK(op_type != NULL, return FALSE);
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
if (entry == NULL || entry->failed == NULL) {
return FALSE;
}
if (crm_str_eq(entry->failed->rsc_id, rsc_id, TRUE)
&& safe_str_eq(entry->failed->op_type, op_type)
&& entry->failed->interval_ms == interval_ms) {
return TRUE;
}
return FALSE;
}
void
process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
struct recurring_op_s *pending, xmlNode *action_xml)
{
char *op_id = NULL;
char *op_key = NULL;
int update_id = 0;
gboolean remove = FALSE;
gboolean removed = FALSE;
bool need_direct_ack = FALSE;
lrmd_rsc_info_t *rsc = NULL;
const char *node_name = NULL;
CRM_CHECK(op != NULL, return);
CRM_CHECK(op->rsc_id != NULL, return);
// Remap new status codes for older DCs
if (compare_version(fsa_our_dc_version, "3.2.0") < 0) {
switch (op->op_status) {
case PCMK_LRM_OP_NOT_CONNECTED:
op->op_status = PCMK_LRM_OP_ERROR;
op->rc = PCMK_OCF_CONNECTION_DIED;
break;
case PCMK_LRM_OP_INVALID:
op->op_status = PCMK_LRM_OP_ERROR;
op->rc = CRM_DIRECT_NACK_RC;
break;
default:
break;
}
}
op_id = make_stop_id(op->rsc_id, op->call_id);
op_key = generate_op_key(op->rsc_id, op->op_type, op->interval_ms);
// Get resource info if available (from executor state or action XML)
if (lrm_state) {
rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
}
if ((rsc == NULL) && action_xml) {
xmlNode *xml = find_xml_node(action_xml, XML_CIB_TAG_RESOURCE, TRUE);
const char *standard = crm_element_value(xml, XML_AGENT_ATTR_CLASS);
const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER);
const char *type = crm_element_value(xml, XML_ATTR_TYPE);
if (standard && type) {
crm_info("%s agent information not cached, using %s%s%s:%s from action XML",
op->rsc_id, standard,
(provider? ":" : ""), (provider? provider : ""), type);
rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type);
} else {
crm_err("Can't process %s result because %s agent information not cached or in XML",
op_key, op->rsc_id);
}
}
// Get node name if available (from executor state or action XML)
if (lrm_state) {
node_name = lrm_state->node_name;
} else if (action_xml) {
node_name = crm_element_value(action_xml, XML_LRM_ATTR_TARGET);
}
if(pending == NULL) {
remove = TRUE;
if (lrm_state) {
pending = g_hash_table_lookup(lrm_state->pending_ops, op_id);
}
}
if (op->op_status == PCMK_LRM_OP_ERROR) {
switch(op->rc) {
case PCMK_OCF_NOT_RUNNING:
case PCMK_OCF_RUNNING_MASTER:
case PCMK_OCF_DEGRADED:
case PCMK_OCF_DEGRADED_MASTER:
// Leave it to the TE/scheduler to decide if this is an error
op->op_status = PCMK_LRM_OP_DONE;
break;
default:
/* Nothing to do */
break;
}
}
if (op->op_status != PCMK_LRM_OP_CANCELLED) {
/* We might not record the result, so directly acknowledge it to the
* originator instead, so it doesn't time out waiting for the result
* (especially important if part of a transition).
*/
need_direct_ack = TRUE;
if (controld_action_is_recordable(op->op_type)) {
if (node_name && rsc) {
// We should record the result, and happily, we can
update_id = do_update_resource(node_name, rsc, op);
need_direct_ack = FALSE;
} else if (op->rsc_deleted) {
/* We shouldn't record the result (likely the resource was
* refreshed, cleaned, or removed while this operation was
* in flight).
*/
crm_notice("Not recording %s result in CIB because "
"resource information was removed since it was initiated",
op_key);
} else {
/* This shouldn't be possible; the executor didn't consider the
* resource deleted, but we couldn't find resource or node
* information.
*/
crm_err("Unable to record %s result in CIB: %s", op_key,
(node_name? "No resource information" : "No node name"));
}
}
} else if (op->interval_ms == 0) {
/* A non-recurring operation was cancelled. Most likely, the
* never-initiated action was removed from the executor's pending
* operations list upon resource removal.
*/
need_direct_ack = TRUE;
} else if (pending == NULL) {
/* This recurring operation was cancelled, but was not pending. No
* transition actions are waiting on it, nothing needs to be done.
*/
} else if (op->user_data == NULL) {
/* This recurring operation was cancelled and pending, but we don't
* have a transition key. This should never happen.
*/
crm_err("Recurring operation %s was cancelled without transition information",
op_key);
} else if (pending->remove) {
/* This recurring operation was cancelled (by us) and pending, and we
* have been waiting for it to finish.
*/
if (lrm_state) {
erase_lrm_history_by_op(lrm_state, op);
}
/* If the recurring operation had failed, the lrm_rsc_op is recorded as
* "last_failure" which won't get erased from the cib given the logic on
* purpose in erase_lrm_history_by_op(). So that the cancel action won't
* have a chance to get confirmed by DC with process_op_deletion().
* Cluster transition would get stuck waiting for the remaining action
* timer to time out.
*
* Directly acknowledge the cancel operation in this case.
*/
if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id,
pending->op_type, pending->interval_ms)) {
need_direct_ack = TRUE;
}
} else if (op->rsc_deleted) {
/* This recurring operation was cancelled (but not by us, and the
* executor does not have resource information, likely due to resource
* cleanup, refresh, or removal) and pending.
*/
crm_debug("Recurring op %s was cancelled due to resource deletion",
op_key);
need_direct_ack = TRUE;
} else {
/* This recurring operation was cancelled (but not by us, likely by the
* executor before stopping the resource) and pending. We don't need to
* do anything special.
*/
}
if (need_direct_ack) {
send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
}
if(remove == FALSE) {
/* The caller will do this afterwards, but keep the logging consistent */
removed = TRUE;
} else if (lrm_state && ((op->interval_ms == 0)
|| (op->op_status == PCMK_LRM_OP_CANCELLED))) {
gboolean found = g_hash_table_remove(lrm_state->pending_ops, op_id);
if (op->interval_ms != 0) {
removed = TRUE;
} else if (found) {
removed = TRUE;
crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed",
op_key, op->call_id, op_id,
g_hash_table_size(lrm_state->pending_ops));
}
}
if (node_name == NULL) {
node_name = "unknown node"; // for logging
}
switch (op->op_status) {
case PCMK_LRM_OP_CANCELLED:
crm_info("Result of %s operation for %s on %s: %s "
CRM_XS " call=%d key=%s confirmed=%s",
crm_action_str(op->op_type, op->interval_ms),
op->rsc_id, node_name,
services_lrm_status_str(op->op_status),
op->call_id, op_key, (removed? "true" : "false"));
break;
case PCMK_LRM_OP_DONE:
crm_notice("Result of %s operation for %s on %s: %d (%s) "
CRM_XS " call=%d key=%s confirmed=%s cib-update=%d",
crm_action_str(op->op_type, op->interval_ms),
op->rsc_id, node_name,
op->rc, services_ocf_exitcode_str(op->rc),
op->call_id, op_key, (removed? "true" : "false"),
update_id);
break;
case PCMK_LRM_OP_TIMEOUT:
crm_err("Result of %s operation for %s on %s: %s "
CRM_XS " call=%d key=%s timeout=%dms",
crm_action_str(op->op_type, op->interval_ms),
op->rsc_id, node_name,
services_lrm_status_str(op->op_status),
op->call_id, op_key, op->timeout);
break;
default:
crm_err("Result of %s operation for %s on %s: %s "
CRM_XS " call=%d key=%s confirmed=%s status=%d cib-update=%d",
crm_action_str(op->op_type, op->interval_ms),
op->rsc_id, node_name,
services_lrm_status_str(op->op_status), op->call_id, op_key,
(removed? "true" : "false"), op->op_status, update_id);
}
if (op->output) {
char *prefix =
crm_strdup_printf("%s-" CRM_OP_FMT ":%d", node_name,
op->rsc_id, op->op_type, op->interval_ms,
op->call_id);
if (op->rc) {
crm_log_output(LOG_NOTICE, prefix, op->output);
} else {
crm_log_output(LOG_DEBUG, prefix, op->output);
}
free(prefix);
}
if (lrm_state) {
if (safe_str_neq(op->op_type, RSC_METADATA)) {
crmd_alert_resource_op(lrm_state->node_name, op);
} else if (rsc && (op->rc == PCMK_OCF_OK)) {
char *metadata = unescape_newlines(op->output);
metadata_cache_update(lrm_state->metadata_cache, rsc, metadata);
free(metadata);
}
}
if (op->rsc_deleted) {
crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
if (lrm_state) {
delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL);
}
}
/* If a shutdown was escalated while operations were pending,
* then the FSA will be stalled right now... allow it to continue
*/
mainloop_set_trigger(fsa_source);
if (lrm_state && rsc) {
update_history_cache(lrm_state, rsc, op);
}
lrmd_free_rsc_info(rsc);
free(op_key);
free(op_id);
}
diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c
index 63e6b33444..66ad426ddf 100644
--- a/daemons/controld/controld_execd_state.c
+++ b/daemons/controld/controld_execd_state.c
@@ -1,834 +1,829 @@
/*
* Copyright 2012-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
#include
#include
#include
+#include
+#include
+
GHashTable *lrm_state_table = NULL;
extern GHashTable *proxy_table;
int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg);
void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg));
static void
free_rsc_info(gpointer value)
{
lrmd_rsc_info_t *rsc_info = value;
lrmd_free_rsc_info(rsc_info);
}
static void
free_deletion_op(gpointer value)
{
struct pending_deletion_op_s *op = value;
free(op->rsc);
delete_ha_msg_input(op->input);
free(op);
}
static void
free_recurring_op(gpointer value)
{
struct recurring_op_s *op = (struct recurring_op_s *)value;
free(op->user_data);
free(op->rsc_id);
free(op->op_type);
free(op->op_key);
if (op->params) {
g_hash_table_destroy(op->params);
}
free(op);
}
static gboolean
fail_pending_op(gpointer key, gpointer value, gpointer user_data)
{
lrmd_event_data_t event = { 0, };
lrm_state_t *lrm_state = user_data;
struct recurring_op_s *op = (struct recurring_op_s *)value;
crm_trace("Pre-emptively failing " CRM_OP_FMT " on %s (call=%s, %s)",
op->rsc_id, op->op_type, op->interval_ms,
lrm_state->node_name, (char*)key, op->user_data);
event.type = lrmd_event_exec_complete;
event.rsc_id = op->rsc_id;
event.op_type = op->op_type;
event.user_data = op->user_data;
event.timeout = 0;
event.interval_ms = op->interval_ms;
event.rc = PCMK_OCF_UNKNOWN_ERROR;
event.op_status = PCMK_LRM_OP_NOT_CONNECTED;
event.t_run = op->start_time;
event.t_rcchange = op->start_time;
event.call_id = op->call_id;
event.remote_nodename = lrm_state->node_name;
event.params = op->params;
process_lrm_event(lrm_state, &event, op, NULL);
return TRUE;
}
gboolean
lrm_state_is_local(lrm_state_t *lrm_state)
{
if (lrm_state == NULL || fsa_our_uname == NULL) {
return FALSE;
}
if (strcmp(lrm_state->node_name, fsa_our_uname) != 0) {
return FALSE;
}
return TRUE;
}
lrm_state_t *
lrm_state_create(const char *node_name)
{
lrm_state_t *state = NULL;
if (!node_name) {
crm_err("No node name given for lrm state object");
return NULL;
}
state = calloc(1, sizeof(lrm_state_t));
if (!state) {
return NULL;
}
state->node_name = strdup(node_name);
state->rsc_info_cache = g_hash_table_new_full(crm_str_hash,
g_str_equal, NULL, free_rsc_info);
state->deletion_ops = g_hash_table_new_full(crm_str_hash, g_str_equal, free,
free_deletion_op);
state->pending_ops = g_hash_table_new_full(crm_str_hash, g_str_equal, free,
free_recurring_op);
state->resource_history = g_hash_table_new_full(crm_str_hash,
g_str_equal, NULL, history_free);
state->metadata_cache = metadata_cache_new();
g_hash_table_insert(lrm_state_table, (char *)state->node_name, state);
return state;
}
void
lrm_state_destroy(const char *node_name)
{
g_hash_table_remove(lrm_state_table, node_name);
}
static gboolean
remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data)
{
remote_proxy_t *proxy = value;
const char *node_name = user_data;
if (safe_str_eq(node_name, proxy->node_name)) {
return TRUE;
}
return FALSE;
}
static void
internal_lrm_state_destroy(gpointer data)
{
lrm_state_t *lrm_state = data;
if (!lrm_state) {
return;
}
crm_trace("Destroying proxy table %s with %d members", lrm_state->node_name, g_hash_table_size(proxy_table));
g_hash_table_foreach_remove(proxy_table, remote_proxy_remove_by_node, (char *) lrm_state->node_name);
remote_ra_cleanup(lrm_state);
lrmd_api_delete(lrm_state->conn);
if (lrm_state->rsc_info_cache) {
crm_trace("Destroying rsc info cache with %d members", g_hash_table_size(lrm_state->rsc_info_cache));
g_hash_table_destroy(lrm_state->rsc_info_cache);
}
if (lrm_state->resource_history) {
crm_trace("Destroying history op cache with %d members", g_hash_table_size(lrm_state->resource_history));
g_hash_table_destroy(lrm_state->resource_history);
}
if (lrm_state->deletion_ops) {
crm_trace("Destroying deletion op cache with %d members", g_hash_table_size(lrm_state->deletion_ops));
g_hash_table_destroy(lrm_state->deletion_ops);
}
if (lrm_state->pending_ops) {
crm_trace("Destroying pending op cache with %d members", g_hash_table_size(lrm_state->pending_ops));
g_hash_table_destroy(lrm_state->pending_ops);
}
metadata_cache_free(lrm_state->metadata_cache);
free((char *)lrm_state->node_name);
free(lrm_state);
}
void
lrm_state_reset_tables(lrm_state_t * lrm_state, gboolean reset_metadata)
{
if (lrm_state->resource_history) {
crm_trace("Re-setting history op cache with %d members",
g_hash_table_size(lrm_state->resource_history));
g_hash_table_remove_all(lrm_state->resource_history);
}
if (lrm_state->deletion_ops) {
crm_trace("Re-setting deletion op cache with %d members",
g_hash_table_size(lrm_state->deletion_ops));
g_hash_table_remove_all(lrm_state->deletion_ops);
}
if (lrm_state->pending_ops) {
crm_trace("Re-setting pending op cache with %d members",
g_hash_table_size(lrm_state->pending_ops));
g_hash_table_remove_all(lrm_state->pending_ops);
}
if (lrm_state->rsc_info_cache) {
crm_trace("Re-setting rsc info cache with %d members",
g_hash_table_size(lrm_state->rsc_info_cache));
g_hash_table_remove_all(lrm_state->rsc_info_cache);
}
if (reset_metadata) {
metadata_cache_reset(lrm_state->metadata_cache);
}
}
gboolean
lrm_state_init_local(void)
{
if (lrm_state_table) {
return TRUE;
}
lrm_state_table =
g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, internal_lrm_state_destroy);
if (!lrm_state_table) {
return FALSE;
}
proxy_table =
g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, remote_proxy_free);
if (!proxy_table) {
g_hash_table_destroy(lrm_state_table);
lrm_state_table = NULL;
return FALSE;
}
return TRUE;
}
void
lrm_state_destroy_all(void)
{
if (lrm_state_table) {
crm_trace("Destroying state table with %d members", g_hash_table_size(lrm_state_table));
g_hash_table_destroy(lrm_state_table); lrm_state_table = NULL;
}
if(proxy_table) {
crm_trace("Destroying proxy table with %d members", g_hash_table_size(proxy_table));
g_hash_table_destroy(proxy_table); proxy_table = NULL;
}
}
lrm_state_t *
lrm_state_find(const char *node_name)
{
if (!node_name) {
return NULL;
}
return g_hash_table_lookup(lrm_state_table, node_name);
}
lrm_state_t *
lrm_state_find_or_create(const char *node_name)
{
lrm_state_t *lrm_state;
lrm_state = g_hash_table_lookup(lrm_state_table, node_name);
if (!lrm_state) {
lrm_state = lrm_state_create(node_name);
}
return lrm_state;
}
GList *
lrm_state_get_list(void)
{
return g_hash_table_get_values(lrm_state_table);
}
static remote_proxy_t *
find_connected_proxy_by_node(const char * node_name)
{
GHashTableIter gIter;
remote_proxy_t *proxy = NULL;
CRM_CHECK(proxy_table != NULL, return NULL);
g_hash_table_iter_init(&gIter, proxy_table);
while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) &proxy)) {
if (proxy->source
&& safe_str_eq(node_name, proxy->node_name)) {
return proxy;
}
}
return NULL;
}
static void
remote_proxy_disconnect_by_node(const char * node_name)
{
remote_proxy_t *proxy = NULL;
CRM_CHECK(proxy_table != NULL, return);
while ((proxy = find_connected_proxy_by_node(node_name)) != NULL) {
/* mainloop_del_ipc_client() eventually calls remote_proxy_disconnected()
* , which removes the entry from proxy_table.
* Do not do this in a g_hash_table_iter_next() loop. */
if (proxy->source) {
mainloop_del_ipc_client(proxy->source);
}
}
return;
}
void
lrm_state_disconnect_only(lrm_state_t * lrm_state)
{
int removed = 0;
if (!lrm_state->conn) {
return;
}
crm_trace("Disconnecting %s", lrm_state->node_name);
remote_proxy_disconnect_by_node(lrm_state->node_name);
((lrmd_t *) lrm_state->conn)->cmds->disconnect(lrm_state->conn);
if (is_not_set(fsa_input_register, R_SHUTDOWN)) {
removed = g_hash_table_foreach_remove(lrm_state->pending_ops, fail_pending_op, lrm_state);
crm_trace("Synthesized %d operation failures for %s", removed, lrm_state->node_name);
}
}
void
lrm_state_disconnect(lrm_state_t * lrm_state)
{
if (!lrm_state->conn) {
return;
}
lrm_state_disconnect_only(lrm_state);
lrmd_api_delete(lrm_state->conn);
lrm_state->conn = NULL;
}
int
lrm_state_is_connected(lrm_state_t * lrm_state)
{
if (!lrm_state->conn) {
return FALSE;
}
return ((lrmd_t *) lrm_state->conn)->cmds->is_connected(lrm_state->conn);
}
int
lrm_state_poke_connection(lrm_state_t * lrm_state)
{
if (!lrm_state->conn) {
return -1;
}
return ((lrmd_t *) lrm_state->conn)->cmds->poke_connection(lrm_state->conn);
}
int
lrm_state_ipc_connect(lrm_state_t * lrm_state)
{
int ret;
if (!lrm_state->conn) {
lrm_state->conn = lrmd_api_new();
((lrmd_t *) lrm_state->conn)->cmds->set_callback(lrm_state->conn, lrm_op_callback);
}
ret = ((lrmd_t *) lrm_state->conn)->cmds->connect(lrm_state->conn, CRM_SYSTEM_CRMD, NULL);
if (ret != pcmk_ok) {
lrm_state->num_lrm_register_fails++;
} else {
lrm_state->num_lrm_register_fails = 0;
}
return ret;
}
static remote_proxy_t *
crmd_remote_proxy_new(lrmd_t *lrmd, const char *node_name, const char *session_id, const char *channel)
{
static struct ipc_client_callbacks proxy_callbacks = {
.dispatch = remote_proxy_dispatch,
.destroy = remote_proxy_disconnected
};
remote_proxy_t *proxy = remote_proxy_new(lrmd, &proxy_callbacks, node_name,
session_id, channel);
return proxy;
}
gboolean
crmd_is_proxy_session(const char *session)
{
return g_hash_table_lookup(proxy_table, session) ? TRUE : FALSE;
}
void
crmd_proxy_send(const char *session, xmlNode *msg)
{
remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session);
lrm_state_t *lrm_state = NULL;
if (!proxy) {
return;
}
crm_log_xml_trace(msg, "to-proxy");
lrm_state = lrm_state_find(proxy->node_name);
if (lrm_state) {
crm_trace("Sending event to %.8s on %s", proxy->session_id, proxy->node_name);
remote_proxy_relay_event(proxy, msg);
}
}
static void
crmd_proxy_dispatch(const char *session, xmlNode *msg)
{
crm_log_xml_trace(msg, "controller-proxy[inbound]");
crm_xml_add(msg, F_CRM_SYS_FROM, session);
if (crmd_authorize_message(msg, NULL, session)) {
route_message(C_IPC_MESSAGE, msg);
}
trigger_fsa(fsa_source);
}
static void
remote_config_check(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
if (rc != pcmk_ok) {
crm_err("Query resulted in an error: %s", pcmk_strerror(rc));
if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
crm_err("The cluster is mis-configured - shutting down and staying down");
}
} else {
lrmd_t * lrmd = (lrmd_t *)user_data;
crm_time_t *now = crm_time_new(NULL);
GHashTable *config_hash = crm_str_table_new();
crm_debug("Call %d : Parsing CIB options", call_id);
unpack_instance_attributes(
output, output, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, now);
/* Now send it to the remote peer */
remote_proxy_check(lrmd, config_hash);
g_hash_table_destroy(config_hash);
crm_time_free(now);
}
}
static void
crmd_remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg)
{
lrm_state_t *lrm_state = userdata;
const char *session = crm_element_value(msg, F_LRMD_IPC_SESSION);
remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session);
const char *op = crm_element_value(msg, F_LRMD_IPC_OP);
if (safe_str_eq(op, LRMD_IPC_OP_NEW)) {
const char *channel = crm_element_value(msg, F_LRMD_IPC_IPC_SERVER);
proxy = crmd_remote_proxy_new(lrmd, lrm_state->node_name, session, channel);
if (!remote_ra_controlling_guest(lrm_state)) {
if (proxy != NULL) {
/* Look up stonith-watchdog-timeout and send to the remote peer for validation */
int rc = fsa_cib_conn->cmds->query(fsa_cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local);
fsa_cib_conn->cmds->register_callback_full(fsa_cib_conn, rc, 10, FALSE, lrmd,
"remote_config_check", remote_config_check, NULL);
}
} else {
crm_debug("Skipping remote_config_check for guest-nodes");
}
} else if (safe_str_eq(op, LRMD_IPC_OP_SHUTDOWN_REQ)) {
char *now_s = NULL;
time_t now = time(NULL);
crm_notice("%s requested shutdown of its remote connection",
lrm_state->node_name);
if (!remote_ra_is_in_maintenance(lrm_state)) {
now_s = crm_itoa(now);
update_attrd(lrm_state->node_name, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, TRUE);
free(now_s);
remote_proxy_ack_shutdown(lrmd);
crm_warn("Reconnection attempts to %s may result in failures that must be cleared",
lrm_state->node_name);
} else {
remote_proxy_nack_shutdown(lrmd);
crm_notice("Remote resource for %s is not managed so no ordered shutdown happening",
lrm_state->node_name);
}
return;
} else if (safe_str_eq(op, LRMD_IPC_OP_REQUEST) && proxy && proxy->is_local) {
/* This is for the controller, which we are, so don't try
* to send to ourselves over IPC -- do it directly.
*/
int flags = 0;
xmlNode *request = get_message_xml(msg, F_LRMD_IPC_MSG);
CRM_CHECK(request != NULL, return);
#if ENABLE_ACL
CRM_CHECK(lrm_state->node_name, return);
crm_xml_add(request, XML_ACL_TAG_ROLE, "pacemaker-remote");
crm_acl_get_set_user(request, F_LRMD_IPC_USER, lrm_state->node_name);
#endif
/* Pacemaker Remote nodes don't know their own names (as known to the
* cluster). When getting a node info request with no name or ID, add
* the name, so we don't return info for ourselves instead of the
* Pacemaker Remote node.
*/
if (safe_str_eq(crm_element_value(request, F_CRM_TASK),
CRM_OP_NODE_INFO)) {
int node_id;
crm_element_value_int(request, XML_ATTR_ID, &node_id);
if ((node_id <= 0)
&& (crm_element_value(request, XML_ATTR_UNAME) == NULL)) {
crm_xml_add(request, XML_ATTR_UNAME, lrm_state->node_name);
}
}
crmd_proxy_dispatch(session, request);
crm_element_value_int(msg, F_LRMD_IPC_MSG_FLAGS, &flags);
if (flags & crm_ipc_client_response) {
int msg_id = 0;
xmlNode *op_reply = create_xml_node(NULL, "ack");
crm_xml_add(op_reply, "function", __FUNCTION__);
crm_xml_add_int(op_reply, "line", __LINE__);
crm_element_value_int(msg, F_LRMD_IPC_MSG_ID, &msg_id);
remote_proxy_relay_response(proxy, op_reply, msg_id);
free_xml(op_reply);
}
} else {
remote_proxy_cb(lrmd, lrm_state->node_name, msg);
}
}
int
lrm_state_remote_connect_async(lrm_state_t * lrm_state, const char *server, int port,
int timeout_ms)
{
int ret;
if (!lrm_state->conn) {
lrm_state->conn = lrmd_remote_api_new(lrm_state->node_name, server, port);
if (!lrm_state->conn) {
return -1;
}
((lrmd_t *) lrm_state->conn)->cmds->set_callback(lrm_state->conn, remote_lrm_op_callback);
lrmd_internal_set_proxy_callback(lrm_state->conn, lrm_state, crmd_remote_proxy_cb);
}
crm_trace("initiating remote connection to %s at %d with timeout %d", server, port, timeout_ms);
ret =
((lrmd_t *) lrm_state->conn)->cmds->connect_async(lrm_state->conn, lrm_state->node_name,
timeout_ms);
if (ret != pcmk_ok) {
lrm_state->num_lrm_register_fails++;
} else {
lrm_state->num_lrm_register_fails = 0;
}
return ret;
}
int
lrm_state_get_metadata(lrm_state_t * lrm_state,
const char *class,
const char *provider,
const char *agent, char **output, enum lrmd_call_options options)
{
lrmd_key_value_t *params = NULL;
if (!lrm_state->conn) {
return -ENOTCONN;
}
/* Add the node name to the environment, as is done with normal resource
* action calls. Meta-data calls shouldn't need it, but some agents are
* written with an ocf_local_nodename call at the beginning regardless of
* action. Without the environment variable, the agent would try to contact
* the controller to get the node name -- but the controller would be
* blocking on the synchronous meta-data call.
*
* At this point, we have to assume that agents are unlikely to make other
* calls that require the controller, such as crm_node --quorum or
* --cluster-id.
*
* @TODO Make meta-data calls asynchronous. (This will be part of a larger
* project to make meta-data calls via the executor rather than directly.)
*/
params = lrmd_key_value_add(params, CRM_META "_" XML_LRM_ATTR_TARGET,
lrm_state->node_name);
return ((lrmd_t *) lrm_state->conn)->cmds->get_metadata_params(lrm_state->conn,
class, provider, agent, output, options, params);
}
int
lrm_state_cancel(lrm_state_t *lrm_state, const char *rsc_id, const char *action,
guint interval_ms)
{
if (!lrm_state->conn) {
return -ENOTCONN;
}
/* Figure out a way to make this async?
* NOTICE: Currently it's synced and directly acknowledged in do_lrm_invoke(). */
if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
return remote_ra_cancel(lrm_state, rsc_id, action, interval_ms);
}
return ((lrmd_t *) lrm_state->conn)->cmds->cancel(lrm_state->conn, rsc_id,
action, interval_ms);
}
lrmd_rsc_info_t *
lrm_state_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options)
{
lrmd_rsc_info_t *rsc = NULL;
if (!lrm_state->conn) {
return NULL;
}
if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
return remote_ra_get_rsc_info(lrm_state, rsc_id);
}
rsc = g_hash_table_lookup(lrm_state->rsc_info_cache, rsc_id);
if (rsc == NULL) {
/* only contact the lrmd if we don't already have a cached rsc info */
rsc = ((lrmd_t *) lrm_state->conn)->cmds->get_rsc_info(lrm_state->conn, rsc_id, options);
if (rsc == NULL) {
return NULL;
}
/* cache the result */
g_hash_table_insert(lrm_state->rsc_info_cache, rsc->id, rsc);
}
return lrmd_copy_rsc_info(rsc);
}
int
lrm_state_exec(lrm_state_t *lrm_state, const char *rsc_id, const char *action,
const char *userdata, guint interval_ms,
int timeout, /* ms */
int start_delay, /* ms */
lrmd_key_value_t * params)
{
if (!lrm_state->conn) {
lrmd_key_value_freeall(params);
return -ENOTCONN;
}
if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
return remote_ra_exec(lrm_state, rsc_id, action, userdata, interval_ms,
timeout, start_delay, params);
}
return ((lrmd_t *) lrm_state->conn)->cmds->exec(lrm_state->conn,
rsc_id,
action,
userdata,
interval_ms,
timeout,
start_delay,
lrmd_opt_notify_changes_only, params);
}
int
lrm_state_register_rsc(lrm_state_t * lrm_state,
const char *rsc_id,
const char *class,
const char *provider, const char *agent, enum lrmd_call_options options)
{
lrmd_t *conn = (lrmd_t *) lrm_state->conn;
if (conn == NULL) {
return -ENOTCONN;
}
if (is_remote_lrmd_ra(agent, provider, NULL)) {
return lrm_state_find_or_create(rsc_id)? pcmk_ok : -EINVAL;
}
/* @TODO Implement an asynchronous version of this (currently a blocking
* call to the lrmd).
*/
return conn->cmds->register_rsc(lrm_state->conn, rsc_id, class, provider,
agent, options);
}
int
lrm_state_unregister_rsc(lrm_state_t * lrm_state,
const char *rsc_id, enum lrmd_call_options options)
{
if (!lrm_state->conn) {
return -ENOTCONN;
}
if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
lrm_state_destroy(rsc_id);
return pcmk_ok;
}
g_hash_table_remove(lrm_state->rsc_info_cache, rsc_id);
/* @TODO Optimize this ... this function is a blocking round trip from
* client to daemon. The controld_execd_state.c code path that uses this
* function should always treat it as an async operation. The executor API
* should make an async version available.
*/
return ((lrmd_t *) lrm_state->conn)->cmds->unregister_rsc(lrm_state->conn, rsc_id, options);
}
/*
* Functions for sending alerts via local executor connection
*/
static GListPtr crmd_alert_list = NULL;
void
crmd_unpack_alerts(xmlNode *alerts)
{
pe_free_alert_list(crmd_alert_list);
crmd_alert_list = pe_unpack_alerts(alerts);
}
void
crmd_alert_node_event(crm_node_t *node)
{
lrm_state_t *lrm_state;
if (crmd_alert_list == NULL) {
return;
}
lrm_state = lrm_state_find(fsa_our_uname);
if (lrm_state == NULL) {
return;
}
lrmd_send_node_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
node->uname, node->id, node->state);
}
void
crmd_alert_fencing_op(stonith_event_t * e)
{
char *desc;
lrm_state_t *lrm_state;
if (crmd_alert_list == NULL) {
return;
}
lrm_state = lrm_state_find(fsa_our_uname);
if (lrm_state == NULL) {
return;
}
desc = crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s (ref=%s)",
e->action, e->target,
(e->executioner? e->executioner : ""),
e->client_origin, e->origin,
pcmk_strerror(e->result), e->id);
lrmd_send_fencing_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
e->target, e->operation, desc, e->result);
free(desc);
}
void
crmd_alert_resource_op(const char *node, lrmd_event_data_t * op)
{
lrm_state_t *lrm_state;
if (crmd_alert_list == NULL) {
return;
}
lrm_state = lrm_state_find(fsa_our_uname);
if (lrm_state == NULL) {
return;
}
lrmd_send_resource_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, node,
op);
}
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index cbabb9ab19..afb862a17a 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -1,949 +1,928 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
+ * The version control history for this file may have further details.
+ *
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
-#include
-#include
-#include
-#include
-
-#ifdef HAVE_SYS_REBOOT_H
-# include
-# include
-#endif
+#include
static void
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
/*
* stonith failure counting
*
* We don't want to get stuck in a permanent fencing loop. Keep track of the
* number of fencing failures for each target node, and the most we'll restart a
* transition for.
*/
struct st_fail_rec {
int count;
};
static unsigned long int stonith_max_attempts = 10;
static GHashTable *stonith_failures = NULL;
void
update_stonith_max_attempts(const char *value)
{
if (safe_str_eq(value, CRM_INFINITY_S)) {
stonith_max_attempts = CRM_SCORE_INFINITY;
} else {
stonith_max_attempts = crm_int_helper(value, NULL);
}
}
static gboolean
too_many_st_failures(const char *target)
{
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *value = NULL;
if (stonith_failures == NULL) {
return FALSE;
}
if (target == NULL) {
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &value)) {
if (value->count >= stonith_max_attempts) {
target = (const char*)key;
goto too_many;
}
}
} else {
value = g_hash_table_lookup(stonith_failures, target);
if ((value != NULL) && (value->count >= stonith_max_attempts)) {
goto too_many;
}
}
return FALSE;
too_many:
crm_warn("Too many failures (%d) to fence %s, giving up",
value->count, target);
return TRUE;
}
/*!
* \internal
* \brief Reset a stonith fail count
*
* \param[in] target Name of node to reset, or NULL for all
*/
void
st_fail_count_reset(const char *target)
{
if (stonith_failures == NULL) {
return;
}
if (target) {
struct st_fail_rec *rec = NULL;
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count = 0;
}
} else {
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *rec = NULL;
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &rec)) {
rec->count = 0;
}
}
}
static void
st_fail_count_increment(const char *target)
{
struct st_fail_rec *rec = NULL;
if (stonith_failures == NULL) {
stonith_failures = crm_str_table_new();
}
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count++;
} else {
rec = malloc(sizeof(struct st_fail_rec));
if(rec == NULL) {
return;
}
rec->count = 1;
g_hash_table_insert(stonith_failures, strdup(target), rec);
}
}
/* end stonith fail count functions */
static void
cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
void *user_data)
{
if (rc < pcmk_ok) {
crm_err("Fencing update %d for %s: failed - %s (%d)",
call_id, (char *)user_data, pcmk_strerror(rc), rc);
crm_log_xml_warn(msg, "Failed update");
abort_transition(INFINITY, tg_shutdown, "CIB update failed", NULL);
} else {
crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
}
}
static void
send_stonith_update(crm_action_t *action, const char *target, const char *uuid)
{
int rc = pcmk_ok;
crm_node_t *peer = NULL;
/* We (usually) rely on the membership layer to do node_update_cluster,
* and the peer status callback to do node_update_peer, because the node
* might have already rejoined before we get the stonith result here.
*/
int flags = node_update_join | node_update_expected;
/* zero out the node-status & remove all LRM status info */
xmlNode *node_state = NULL;
CRM_CHECK(target != NULL, return);
CRM_CHECK(uuid != NULL, return);
/* Make sure the membership and join caches are accurate */
peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY);
CRM_CHECK(peer != NULL, return);
if (peer->state == NULL) {
/* Usually, we rely on the membership layer to update the cluster state
* in the CIB. However, if the node has never been seen, do it here, so
* the node is not considered unclean.
*/
flags |= node_update_cluster;
}
if (peer->uuid == NULL) {
crm_info("Recording uuid '%s' for node '%s'", uuid, target);
peer->uuid = strdup(uuid);
}
crmd_peer_down(peer, TRUE);
/* Generate a node state update for the CIB */
node_state = create_node_state_update(peer, flags, NULL, __FUNCTION__);
/* we have to mark whether or not remote nodes have already been fenced */
if (peer->flags & crm_remote_node) {
time_t now = time(NULL);
char *now_s = crm_itoa(now);
crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s);
free(now_s);
}
/* Force our known ID */
crm_xml_add(node_state, XML_ATTR_UUID, uuid);
rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state,
cib_quorum_override | cib_scope_local | cib_can_create);
/* Delay processing the trigger until the update completes */
crm_debug("Sending fencing update %d for %s", rc, target);
fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated);
/* Make sure it sticks */
/* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */
erase_status_tag(peer->uname, XML_CIB_TAG_LRM, cib_scope_local);
erase_status_tag(peer->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
free_xml(node_state);
return;
}
/*!
* \internal
* \brief Abort transition due to stonith failure
*
* \param[in] abort_action Whether to restart or stop transition
* \param[in] target Don't restart if this (NULL for any) has too many failures
* \param[in] reason Log this stonith action XML as abort reason (or NULL)
*/
static void
abort_for_stonith_failure(enum transition_action abort_action,
const char *target, xmlNode *reason)
{
/* If stonith repeatedly fails, we eventually give up on starting a new
* transition for that reason.
*/
if ((abort_action != tg_stop) && too_many_st_failures(target)) {
abort_action = tg_stop;
}
abort_transition(INFINITY, abort_action, "Stonith failed", reason);
}
/*
* stonith cleanup list
*
* If the DC is shot, proper notifications might not go out.
* The stonith cleanup list allows the cluster to (re-)send
* notifications once a new DC is elected.
*/
static GListPtr stonith_cleanup_list = NULL;
/*!
* \internal
* \brief Add a node to the stonith cleanup list
*
* \param[in] target Name of node to add
*/
void
add_stonith_cleanup(const char *target) {
stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
}
/*!
* \internal
* \brief Remove a node from the stonith cleanup list
*
* \param[in] Name of node to remove
*/
void
remove_stonith_cleanup(const char *target)
{
GListPtr iter = stonith_cleanup_list;
while (iter != NULL) {
GListPtr tmp = iter;
char *iter_name = tmp->data;
iter = iter->next;
if (safe_str_eq(target, iter_name)) {
crm_trace("Removing %s from the cleanup list", iter_name);
stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
free(iter_name);
}
}
}
/*!
* \internal
* \brief Purge all entries from the stonith cleanup list
*/
void
purge_stonith_cleanup()
{
if (stonith_cleanup_list) {
GListPtr iter = NULL;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_info("Purging %s from stonith cleanup list", target);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
}
/*!
* \internal
* \brief Send stonith updates for all entries in cleanup list, then purge it
*/
void
execute_stonith_cleanup()
{
GListPtr iter;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_node_t *target_node = crm_get_peer(0, target);
const char *uuid = crm_peer_uuid(target_node);
crm_notice("Marking %s, target of a previous stonith action, as clean", target);
send_stonith_update(NULL, target, uuid);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
/* end stonith cleanup list functions */
/* stonith API client
*
* Functions that need to interact directly with the fencer via its API
*/
static stonith_t *stonith_api = NULL;
static crm_trigger_t *stonith_reconnect = NULL;
static char *te_client_id = NULL;
static gboolean
fail_incompletable_stonith(crm_graph_t *graph)
{
GListPtr lpc = NULL;
const char *task = NULL;
xmlNode *last_action = NULL;
if (graph == NULL) {
return FALSE;
}
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
GListPtr lpc2 = NULL;
synapse_t *synapse = (synapse_t *) lpc->data;
if (synapse->confirmed) {
continue;
}
for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
crm_action_t *action = (crm_action_t *) lpc2->data;
if (action->type != action_type_crm || action->confirmed) {
continue;
}
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
if (task && safe_str_eq(task, CRM_OP_FENCE)) {
action->failed = TRUE;
last_action = action->xml;
update_graph(graph, action);
crm_notice("Failing action %d (%s): fencer terminated",
action->id, ID(action->xml));
}
}
}
if (last_action != NULL) {
crm_warn("Fencer failure resulted in unrunnable actions");
abort_for_stonith_failure(tg_restart, NULL, last_action);
return TRUE;
}
return FALSE;
}
static void
tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
{
te_cleanup_stonith_history_sync(st, FALSE);
if (is_set(fsa_input_register, R_ST_REQUIRED)) {
crm_crit("Fencing daemon connection failed");
mainloop_set_trigger(stonith_reconnect);
} else {
crm_info("Fencing daemon disconnected");
}
if (stonith_api) {
/* the client API won't properly reconnect notifications
* if they are still in the table - so remove them
*/
if (stonith_api->state != stonith_disconnected) {
stonith_api->cmds->disconnect(st);
}
stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT);
stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_FENCE);
stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_HISTORY_SYNCED);
}
if (AM_I_DC) {
fail_incompletable_stonith(transition_graph);
trigger_graph();
}
}
static void
tengine_stonith_notify(stonith_t *st, stonith_event_t *st_event)
{
if (te_client_id == NULL) {
te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
(unsigned long) getpid());
}
if (st_event == NULL) {
crm_err("Notify data not found");
return;
}
crmd_alert_fencing_op(st_event);
if ((st_event->result == pcmk_ok) && safe_str_eq("on", st_event->action)) {
crm_notice("%s was successfully unfenced by %s (at the request of %s)",
st_event->target,
st_event->executioner? st_event->executioner : "",
st_event->origin);
/* TODO: Hook up st_event->device */
return;
} else if (safe_str_eq("on", st_event->action)) {
crm_err("Unfencing of %s by %s failed: %s (%d)",
st_event->target,
st_event->executioner? st_event->executioner : "",
pcmk_strerror(st_event->result), st_event->result);
return;
} else if ((st_event->result == pcmk_ok)
&& crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
- crm_crit("We were allegedly just fenced by %s for %s!",
- st_event->executioner? st_event->executioner : "",
- st_event->origin); /* Dumps blackbox if enabled */
-
- qb_log_fini(); /* Try to get the above log message to disk - somehow */
-
- /* Get out ASAP and do not come back up.
- *
- * Triggering a reboot is also not the worst idea either since
- * the rest of the cluster thinks we're safely down
- */
-
-#ifdef RB_HALT_SYSTEM
- reboot(RB_HALT_SYSTEM);
-#endif
-
- /*
- * If reboot() fails or is not supported, coming back up will
- * probably lead to a situation where the other nodes set our
- * status to 'lost' because of the fencing callback and will
- * discard subsequent election votes with:
- *
- * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster)
- *
- * So just stay dead, something is seriously messed up anyway.
+ /* We were notified of our own fencing. Most likely, either fencing was
+ * misconfigured, or fabric fencing that doesn't cut cluster
+ * communication is in use.
*
+ * Either way, shutting down the local host is a good idea, to require
+ * administrator intervention. Also, other nodes would otherwise likely
+ * set our status to lost because of the fencing callback and discard
+ * our subsequent election votes as "not part of our cluster".
*/
- exit(CRM_EX_FATAL); // None of our wrappers since we already called qb_log_fini()
+ crm_crit("We were allegedly just fenced by %s for %s!",
+ st_event->executioner? st_event->executioner : "the cluster",
+ st_event->origin); /* Dumps blackbox if enabled */
+ pcmk_panic(__FUNCTION__);
return;
}
/* Update the count of stonith failures for this target, in case we become
* DC later. The current DC has already updated its fail count in
* tengine_stonith_callback().
*/
if (!AM_I_DC && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
if (st_event->result == pcmk_ok) {
st_fail_count_reset(st_event->target);
} else {
st_fail_count_increment(st_event->target);
}
}
crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s "
CRM_XS " initiator=%s ref=%s",
st_event->target, st_event->result == pcmk_ok ? "" : " not",
st_event->action,
st_event->executioner ? st_event->executioner : "",
(st_event->client_origin? st_event->client_origin : ""),
pcmk_strerror(st_event->result),
st_event->origin, st_event->id);
if (st_event->result == pcmk_ok) {
crm_node_t *peer = crm_find_known_peer_full(0, st_event->target, CRM_GET_PEER_ANY);
const char *uuid = NULL;
gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);
if (peer == NULL) {
return;
}
uuid = crm_peer_uuid(peer);
crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
if(AM_I_DC) {
/* The DC always sends updates */
send_stonith_update(NULL, st_event->target, uuid);
/* @TODO Ideally, at this point, we'd check whether the fenced node
* hosted any guest nodes, and call remote_node_down() for them.
* Unfortunately, the controller doesn't have a simple, reliable way
* to map hosts to guests. It might be possible to track this in the
* peer cache via crm_remote_peer_cache_refresh(). For now, we rely
* on the PE creating fence pseudo-events for the guests.
*/
if (st_event->client_origin
&& safe_str_neq(st_event->client_origin, te_client_id)) {
/* Abort the current transition graph if it wasn't us
* that invoked stonith to fence someone
*/
crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
}
/* Assume it was our leader if we don't currently have one */
} else if (((fsa_our_dc == NULL) || safe_str_eq(fsa_our_dc, st_event->target))
&& is_not_set(peer->flags, crm_remote_node)) {
crm_notice("Target %s our leader %s (recorded: %s)",
fsa_our_dc ? "was" : "may have been", st_event->target,
fsa_our_dc ? fsa_our_dc : "");
/* Given the CIB resyncing that occurs around elections,
* have one node update the CIB now and, if the new DC is different,
* have them do so too after the election
*/
if (we_are_executioner) {
send_stonith_update(NULL, st_event->target, uuid);
}
add_stonith_cleanup(st_event->target);
}
/* If the target is a remote node, and we host its connection,
* immediately fail all monitors so it can be recovered quickly.
* The connection won't necessarily drop when a remote node is fenced,
* so the failure might not otherwise be detected until the next poke.
*/
if (is_set(peer->flags, crm_remote_node)) {
remote_ra_fail(st_event->target);
}
crmd_peer_down(peer, TRUE);
}
}
/*!
* \brief Connect to fencer
*
* \param[in] user_data If NULL, retry failures now, otherwise retry in main loop
*
* \return TRUE
* \note If user_data is NULL, this will wait 2s between attempts, for up to
* 30 attempts, meaning the controller could be blocked as long as 58s.
*/
static gboolean
te_connect_stonith(gpointer user_data)
{
int rc = pcmk_ok;
if (stonith_api == NULL) {
stonith_api = stonith_api_new();
}
if (stonith_api->state != stonith_disconnected) {
crm_trace("Already connected to fencer, no need to retry");
return TRUE;
}
if (user_data == NULL) {
// Blocking (retry failures now until successful)
rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
if (rc != pcmk_ok) {
crm_err("Could not connect to fencer in 30 attempts: %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
} else {
// Non-blocking (retry failures later in main loop)
rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
if (rc != pcmk_ok) {
if (is_set(fsa_input_register, R_ST_REQUIRED)) {
crm_err("Fencer connection failed (will retry): %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
mainloop_set_trigger(stonith_reconnect);
} else {
crm_info("Fencer connection failed (ignoring because no longer required): %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
return TRUE;
}
}
if (rc == pcmk_ok) {
stonith_api->cmds->register_notification(stonith_api,
T_STONITH_NOTIFY_DISCONNECT,
tengine_stonith_connection_destroy);
stonith_api->cmds->register_notification(stonith_api,
T_STONITH_NOTIFY_FENCE,
tengine_stonith_notify);
stonith_api->cmds->register_notification(stonith_api,
T_STONITH_NOTIFY_HISTORY_SYNCED,
tengine_stonith_history_synced);
te_trigger_stonith_history_sync(TRUE);
crm_notice("Fencer successfully connected");
}
return TRUE;
}
/*!
\internal
\brief Schedule fencer connection attempt in main loop
*/
void
controld_trigger_fencer_connect()
{
if (stonith_reconnect == NULL) {
stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
te_connect_stonith,
GINT_TO_POINTER(TRUE));
}
set_bit(fsa_input_register, R_ST_REQUIRED);
mainloop_set_trigger(stonith_reconnect);
}
void
controld_disconnect_fencer(bool destroy)
{
if (stonith_api) {
// Prevent fencer connection from coming up again
clear_bit(fsa_input_register, R_ST_REQUIRED);
if (stonith_api->state != stonith_disconnected) {
stonith_api->cmds->disconnect(stonith_api);
}
stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT);
stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_FENCE);
stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_HISTORY_SYNCED);
}
if (destroy) {
if (stonith_api) {
stonith_api->cmds->free(stonith_api);
stonith_api = NULL;
}
if (stonith_reconnect) {
mainloop_destroy_trigger(stonith_reconnect);
stonith_reconnect = NULL;
}
if (te_client_id) {
free(te_client_id);
te_client_id = NULL;
}
}
}
static gboolean
do_stonith_history_sync(gpointer user_data)
{
if (stonith_api && (stonith_api->state != stonith_disconnected)) {
stonith_history_t *history = NULL;
te_cleanup_stonith_history_sync(stonith_api, FALSE);
stonith_api->cmds->history(stonith_api,
st_opt_sync_call | st_opt_broadcast,
NULL, &history, 5);
stonith_history_free(history);
return TRUE;
} else {
crm_info("Skip triggering stonith history-sync as stonith is disconnected");
return FALSE;
}
}
static void
tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
{
char *uuid = NULL;
int stonith_id = -1;
int transition_id = -1;
crm_action_t *action = NULL;
int call_id = data->call_id;
int rc = data->rc;
char *userdata = data->userdata;
CRM_CHECK(userdata != NULL, return);
crm_notice("Stonith operation %d/%s: %s (%d)", call_id, (char *)userdata,
pcmk_strerror(rc), rc);
if (AM_I_DC == FALSE) {
return;
}
/* crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", */
/* op->call_id, op->optype, op->node_name, op->op_result, */
/* (char *)op->node_list, op->private_data); */
/* filter out old STONITH actions */
CRM_CHECK(decode_transition_key(userdata, &uuid, &transition_id, &stonith_id, NULL),
goto bail);
if (transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid)
|| transition_graph->id != transition_id) {
crm_info("Ignoring STONITH action initiated outside of the current transition");
goto bail;
}
action = controld_get_action(stonith_id);
if (action == NULL) {
crm_err("Stonith action not matched");
goto bail;
}
stop_te_timer(action->timer);
if (rc == pcmk_ok) {
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
const char *op = crm_meta_value(action->params, "stonith_action");
crm_info("Stonith operation %d for %s passed", call_id, target);
if (action->confirmed == FALSE) {
te_action_confirmed(action, NULL);
if (safe_str_eq("on", op)) {
const char *value = NULL;
char *now = crm_itoa(time(NULL));
update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, FALSE);
free(now);
value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, FALSE);
value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, FALSE);
} else if (action->sent_update == FALSE) {
send_stonith_update(action, target, uuid);
action->sent_update = TRUE;
}
}
st_fail_count_reset(target);
} else {
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
enum transition_action abort_action = tg_restart;
action->failed = TRUE;
crm_notice("Stonith operation %d for %s failed (%s): aborting transition.",
call_id, target, pcmk_strerror(rc));
/* If no fence devices were available, there's no use in immediately
* checking again, so don't start a new transition in that case.
*/
if (rc == -ENODEV) {
crm_warn("No devices found in cluster to fence %s, giving up",
target);
abort_action = tg_stop;
}
/* Increment the fail count now, so abort_for_stonith_failure() can
* check it. Non-DC nodes will increment it in tengine_stonith_notify().
*/
st_fail_count_increment(target);
abort_for_stonith_failure(abort_action, target, NULL);
}
update_graph(transition_graph, action);
trigger_graph();
bail:
free(userdata);
free(uuid);
return;
}
gboolean
te_fence_node(crm_graph_t *graph, crm_action_t *action)
{
int rc = 0;
const char *id = NULL;
const char *uuid = NULL;
const char *target = NULL;
const char *type = NULL;
gboolean invalid_action = FALSE;
enum stonith_call_options options = st_opt_none;
id = ID(action->xml);
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
type = crm_meta_value(action->params, "stonith_action");
CRM_CHECK(id != NULL, invalid_action = TRUE);
CRM_CHECK(uuid != NULL, invalid_action = TRUE);
CRM_CHECK(type != NULL, invalid_action = TRUE);
CRM_CHECK(target != NULL, invalid_action = TRUE);
if (invalid_action) {
crm_log_xml_warn(action->xml, "BadAction");
return FALSE;
}
crm_notice("Requesting fencing (%s) of node %s "
CRM_XS " action=%s timeout=%d",
type, target, id, transition_graph->stonith_timeout);
/* Passing NULL means block until we can connect... */
te_connect_stonith(NULL);
if (crmd_join_phase_count(crm_join_confirmed) == 1) {
options |= st_opt_allow_suicide;
}
rc = stonith_api->cmds->fence(stonith_api, options, target, type,
transition_graph->stonith_timeout / 1000, 0);
stonith_api->cmds->register_callback(stonith_api, rc, transition_graph->stonith_timeout / 1000,
st_opt_timeout_updates,
generate_transition_key(transition_graph->id, action->id,
0, te_uuid),
"tengine_stonith_callback", tengine_stonith_callback);
return TRUE;
}
/* end stonith API client functions */
/*
* stonith history synchronization
*
* Each node's fencer keeps track of a cluster-wide fencing history. When a node
* joins or leaves, we need to synchronize the history across all nodes.
*/
static crm_trigger_t *stonith_history_sync_trigger = NULL;
static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
void
te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
{
if (free_timers) {
mainloop_timer_del(stonith_history_sync_timer_short);
stonith_history_sync_timer_short = NULL;
mainloop_timer_del(stonith_history_sync_timer_long);
stonith_history_sync_timer_long = NULL;
} else {
mainloop_timer_stop(stonith_history_sync_timer_short);
mainloop_timer_stop(stonith_history_sync_timer_long);
}
if (st) {
st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED);
}
}
static void
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
{
te_cleanup_stonith_history_sync(st, FALSE);
crm_debug("Fence-history synced - cancel all timers");
}
static gboolean
stonith_history_sync_set_trigger(gpointer user_data)
{
mainloop_set_trigger(stonith_history_sync_trigger);
return FALSE;
}
void
te_trigger_stonith_history_sync(bool long_timeout)
{
/* trigger a sync in 5s to give more nodes the
* chance to show up so that we don't create
* unnecessary stonith-history-sync traffic
*
* the long timeout of 30s is there as a fallback
* so that after a successful connection to fenced
* we will wait for 30s for the DC to trigger a
* history-sync
* if this doesn't happen we trigger a sync locally
* (e.g. fenced segfaults and is restarted by pacemakerd)
*/
/* as we are finally checking the stonith-connection
* in do_stonith_history_sync we should be fine
* leaving stonith_history_sync_time & stonith_history_sync_trigger
* around
*/
if (stonith_history_sync_trigger == NULL) {
stonith_history_sync_trigger =
mainloop_add_trigger(G_PRIORITY_LOW,
do_stonith_history_sync, NULL);
}
if (long_timeout) {
if(stonith_history_sync_timer_long == NULL) {
stonith_history_sync_timer_long =
mainloop_timer_add("history_sync_long", 30000,
FALSE, stonith_history_sync_set_trigger,
NULL);
}
crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
mainloop_timer_start(stonith_history_sync_timer_long);
} else {
if(stonith_history_sync_timer_short == NULL) {
stonith_history_sync_timer_short =
mainloop_timer_add("history_sync_short", 5000,
FALSE, stonith_history_sync_set_trigger,
NULL);
}
crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
mainloop_timer_start(stonith_history_sync_timer_short);
}
}
/* end stonith history synchronization functions */
diff --git a/daemons/controld/controld_fsa.c b/daemons/controld/controld_fsa.c
index dc1937fc81..b00b08a9bd 100644
--- a/daemons/controld/controld_fsa.c
+++ b/daemons/controld/controld_fsa.c
@@ -1,661 +1,656 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
-
#include
#include
-#include
-#include
-#include
-#include
#include
char *fsa_our_dc = NULL;
cib_t *fsa_cib_conn = NULL;
char *fsa_our_dc_version = NULL;
char *fsa_our_uuid = NULL;
char *fsa_our_uname = NULL;
char *fsa_cluster_name = NULL;
fsa_timer_t *wait_timer = NULL; // How long to wait before retrying a cib or executor connection
fsa_timer_t *recheck_timer = NULL; // Periodically re-run scheduler to handle time-based actions
fsa_timer_t *election_trigger = NULL; /* How long to wait at startup, or after an election, for the DC to make contact */
fsa_timer_t *transition_timer = NULL; /* How long to delay the start of a new transition with the expectation something else might happen too */
fsa_timer_t *integration_timer = NULL;
fsa_timer_t *finalization_timer = NULL;
fsa_timer_t *shutdown_escalation_timer = NULL; /* How long to wait for the DC to stop all resources and give us the all-clear to shut down */
gboolean do_fsa_stall = FALSE;
long long fsa_input_register = 0;
long long fsa_actions = A_NOTHING;
enum crmd_fsa_state fsa_state = S_STARTING;
extern uint highest_born_on;
extern uint num_join_invites;
extern void initialize_join(gboolean before);
#define DOT_PREFIX "actions:trace: "
#define do_dot_log(fmt, args...) crm_trace( fmt, ##args)
long long do_state_transition(long long actions,
enum crmd_fsa_state cur_state,
enum crmd_fsa_state next_state, fsa_data_t * msg_data);
void s_crmd_fsa_actions(fsa_data_t * fsa_data);
void log_fsa_input(fsa_data_t * stored_msg);
void init_dotfile(void);
void
init_dotfile(void)
{
do_dot_log(DOT_PREFIX "digraph \"g\" {");
do_dot_log(DOT_PREFIX " size = \"30,30\"");
do_dot_log(DOT_PREFIX " graph [");
do_dot_log(DOT_PREFIX " fontsize = \"12\"");
do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\"");
do_dot_log(DOT_PREFIX " fontcolor = \"black\"");
do_dot_log(DOT_PREFIX " bb = \"0,0,398.922306,478.927856\"");
do_dot_log(DOT_PREFIX " color = \"black\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX " node [");
do_dot_log(DOT_PREFIX " fontsize = \"12\"");
do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\"");
do_dot_log(DOT_PREFIX " fontcolor = \"black\"");
do_dot_log(DOT_PREFIX " shape = \"ellipse\"");
do_dot_log(DOT_PREFIX " color = \"black\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX " edge [");
do_dot_log(DOT_PREFIX " fontsize = \"12\"");
do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\"");
do_dot_log(DOT_PREFIX " fontcolor = \"black\"");
do_dot_log(DOT_PREFIX " color = \"black\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX "// special nodes");
do_dot_log(DOT_PREFIX " \"S_PENDING\" ");
do_dot_log(DOT_PREFIX " [");
do_dot_log(DOT_PREFIX " color = \"blue\"");
do_dot_log(DOT_PREFIX " fontcolor = \"blue\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX " \"S_TERMINATE\" ");
do_dot_log(DOT_PREFIX " [");
do_dot_log(DOT_PREFIX " color = \"red\"");
do_dot_log(DOT_PREFIX " fontcolor = \"red\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX "// DC only nodes");
do_dot_log(DOT_PREFIX " \"S_INTEGRATION\" [ fontcolor = \"green\" ]");
do_dot_log(DOT_PREFIX " \"S_POLICY_ENGINE\" [ fontcolor = \"green\" ]");
do_dot_log(DOT_PREFIX " \"S_TRANSITION_ENGINE\" [ fontcolor = \"green\" ]");
do_dot_log(DOT_PREFIX " \"S_RELEASE_DC\" [ fontcolor = \"green\" ]");
do_dot_log(DOT_PREFIX " \"S_IDLE\" [ fontcolor = \"green\" ]");
}
static void
do_fsa_action(fsa_data_t * fsa_data, long long an_action,
void (*function) (long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t * msg_data))
{
fsa_actions &= ~an_action;
crm_trace(DOT_PREFIX "\t// %s", fsa_action2string(an_action));
function(an_action, fsa_data->fsa_cause, fsa_state, fsa_data->fsa_input, fsa_data);
}
static long long startup_actions =
A_STARTUP | A_CIB_START | A_LRM_CONNECT | A_HA_CONNECT | A_READCONFIG |
A_STARTED | A_CL_JOIN_QUERY;
// A_LOG, A_WARN, A_ERROR
void
do_log(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t *msg_data)
{
unsigned log_type = LOG_TRACE;
if (action & A_LOG) {
log_type = LOG_INFO;
} else if (action & A_WARN) {
log_type = LOG_WARNING;
} else if (action & A_ERROR) {
log_type = LOG_ERR;
}
do_crm_log(log_type, "Input %s received in state %s from %s",
fsa_input2string(msg_data->fsa_input),
fsa_state2string(cur_state), msg_data->origin);
if (msg_data->data_type == fsa_dt_ha_msg) {
ha_msg_input_t *input = fsa_typed_data(msg_data->data_type);
crm_log_xml_debug(input->msg, __FUNCTION__);
} else if (msg_data->data_type == fsa_dt_xml) {
xmlNode *input = fsa_typed_data(msg_data->data_type);
crm_log_xml_debug(input, __FUNCTION__);
} else if (msg_data->data_type == fsa_dt_lrm) {
lrmd_event_data_t *input = fsa_typed_data(msg_data->data_type);
do_crm_log(log_type,
"Resource %s: Call ID %d returned %d (%d)."
" New status if rc=0: %s",
input->rsc_id, input->call_id, input->rc,
input->op_status, (char *)input->user_data);
}
}
enum crmd_fsa_state
s_crmd_fsa(enum crmd_fsa_cause cause)
{
fsa_data_t *fsa_data = NULL;
long long register_copy = fsa_input_register;
long long new_actions = A_NOTHING;
enum crmd_fsa_state last_state;
crm_trace("FSA invoked with Cause: %s\tState: %s",
fsa_cause2string(cause), fsa_state2string(fsa_state));
fsa_dump_actions(fsa_actions, "Initial");
do_fsa_stall = FALSE;
if (is_message() == FALSE && fsa_actions != A_NOTHING) {
/* fake the first message so we can get into the loop */
fsa_data = calloc(1, sizeof(fsa_data_t));
fsa_data->fsa_input = I_NULL;
fsa_data->fsa_cause = C_FSA_INTERNAL;
fsa_data->origin = __FUNCTION__;
fsa_data->data_type = fsa_dt_none;
fsa_message_queue = g_list_append(fsa_message_queue, fsa_data);
fsa_data = NULL;
}
while (is_message() && do_fsa_stall == FALSE) {
crm_trace("Checking messages (%d remaining)", g_list_length(fsa_message_queue));
fsa_data = get_message();
if(fsa_data == NULL) {
continue;
}
log_fsa_input(fsa_data);
/* add any actions back to the queue */
fsa_actions |= fsa_data->actions;
fsa_dump_actions(fsa_data->actions, "Restored actions");
/* get the next batch of actions */
new_actions = crmd_fsa_actions[fsa_data->fsa_input][fsa_state];
fsa_actions |= new_actions;
fsa_dump_actions(new_actions, "New actions");
if (fsa_data->fsa_input != I_NULL && fsa_data->fsa_input != I_ROUTER) {
crm_debug("Processing %s: [ state=%s cause=%s origin=%s ]",
fsa_input2string(fsa_data->fsa_input),
fsa_state2string(fsa_state),
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
}
/* logging : *before* the state is changed */
if (is_set(fsa_actions, A_ERROR)) {
do_fsa_action(fsa_data, A_ERROR, do_log);
}
if (is_set(fsa_actions, A_WARN)) {
do_fsa_action(fsa_data, A_WARN, do_log);
}
if (is_set(fsa_actions, A_LOG)) {
do_fsa_action(fsa_data, A_LOG, do_log);
}
/* update state variables */
last_state = fsa_state;
fsa_state = crmd_fsa_state[fsa_data->fsa_input][fsa_state];
/*
* Remove certain actions during shutdown
*/
if (fsa_state == S_STOPPING || ((fsa_input_register & R_SHUTDOWN) == R_SHUTDOWN)) {
clear_bit(fsa_actions, startup_actions);
}
/*
* Hook for change of state.
* Allows actions to be added or removed when entering a state
*/
if (last_state != fsa_state) {
fsa_actions = do_state_transition(fsa_actions, last_state, fsa_state, fsa_data);
} else {
do_dot_log(DOT_PREFIX "\t// FSA input: State=%s \tCause=%s"
" \tInput=%s \tOrigin=%s() \tid=%d",
fsa_state2string(fsa_state),
fsa_cause2string(fsa_data->fsa_cause),
fsa_input2string(fsa_data->fsa_input), fsa_data->origin, fsa_data->id);
}
/* start doing things... */
s_crmd_fsa_actions(fsa_data);
delete_fsa_input(fsa_data);
fsa_data = NULL;
}
if (g_list_length(fsa_message_queue) > 0 || fsa_actions != A_NOTHING || do_fsa_stall) {
crm_debug("Exiting the FSA: queue=%d, fsa_actions=0x%llx, stalled=%s",
g_list_length(fsa_message_queue), fsa_actions, do_fsa_stall ? "true" : "false");
} else {
crm_trace("Exiting the FSA");
}
/* cleanup inputs? */
if (register_copy != fsa_input_register) {
long long same = register_copy & fsa_input_register;
fsa_dump_inputs(LOG_DEBUG, "Added", fsa_input_register ^ same);
fsa_dump_inputs(LOG_DEBUG, "Removed", register_copy ^ same);
}
fsa_dump_actions(fsa_actions, "Remaining");
fsa_dump_queue(LOG_DEBUG);
return fsa_state;
}
void
s_crmd_fsa_actions(fsa_data_t * fsa_data)
{
/*
* Process actions in order of priority but do only one
* action at a time to avoid complicating the ordering.
*/
CRM_CHECK(fsa_data != NULL, return);
while (fsa_actions != A_NOTHING && do_fsa_stall == FALSE) {
/* regular action processing in order of action priority
*
* Make sure all actions that connect to required systems
* are performed first
*/
if (fsa_actions & A_ERROR) {
do_fsa_action(fsa_data, A_ERROR, do_log);
} else if (fsa_actions & A_WARN) {
do_fsa_action(fsa_data, A_WARN, do_log);
} else if (fsa_actions & A_LOG) {
do_fsa_action(fsa_data, A_LOG, do_log);
/* get out of here NOW! before anything worse happens */
} else if (fsa_actions & A_EXIT_1) {
do_fsa_action(fsa_data, A_EXIT_1, do_exit);
/* sub-system restart */
} else if ((fsa_actions & O_LRM_RECONNECT) == O_LRM_RECONNECT) {
do_fsa_action(fsa_data, O_LRM_RECONNECT, do_lrm_control);
} else if ((fsa_actions & O_CIB_RESTART) == O_CIB_RESTART) {
do_fsa_action(fsa_data, O_CIB_RESTART, do_cib_control);
} else if ((fsa_actions & O_PE_RESTART) == O_PE_RESTART) {
do_fsa_action(fsa_data, O_PE_RESTART, do_pe_control);
} else if ((fsa_actions & O_TE_RESTART) == O_TE_RESTART) {
do_fsa_action(fsa_data, O_TE_RESTART, do_te_control);
/* essential start tasks */
} else if (fsa_actions & A_STARTUP) {
do_fsa_action(fsa_data, A_STARTUP, do_startup);
} else if (fsa_actions & A_CIB_START) {
do_fsa_action(fsa_data, A_CIB_START, do_cib_control);
} else if (fsa_actions & A_HA_CONNECT) {
do_fsa_action(fsa_data, A_HA_CONNECT, do_ha_control);
} else if (fsa_actions & A_READCONFIG) {
do_fsa_action(fsa_data, A_READCONFIG, do_read_config);
/* sub-system start/connect */
} else if (fsa_actions & A_LRM_CONNECT) {
do_fsa_action(fsa_data, A_LRM_CONNECT, do_lrm_control);
} else if (fsa_actions & A_TE_START) {
do_fsa_action(fsa_data, A_TE_START, do_te_control);
} else if (fsa_actions & A_PE_START) {
do_fsa_action(fsa_data, A_PE_START, do_pe_control);
/* Timers */
/* else if(fsa_actions & O_DC_TIMER_RESTART) {
do_fsa_action(fsa_data, O_DC_TIMER_RESTART, do_timer_control) */ ;
} else if (fsa_actions & A_DC_TIMER_STOP) {
do_fsa_action(fsa_data, A_DC_TIMER_STOP, do_timer_control);
} else if (fsa_actions & A_INTEGRATE_TIMER_STOP) {
do_fsa_action(fsa_data, A_INTEGRATE_TIMER_STOP, do_timer_control);
} else if (fsa_actions & A_INTEGRATE_TIMER_START) {
do_fsa_action(fsa_data, A_INTEGRATE_TIMER_START, do_timer_control);
} else if (fsa_actions & A_FINALIZE_TIMER_STOP) {
do_fsa_action(fsa_data, A_FINALIZE_TIMER_STOP, do_timer_control);
} else if (fsa_actions & A_FINALIZE_TIMER_START) {
do_fsa_action(fsa_data, A_FINALIZE_TIMER_START, do_timer_control);
/*
* Highest priority actions
*/
} else if (fsa_actions & A_MSG_ROUTE) {
do_fsa_action(fsa_data, A_MSG_ROUTE, do_msg_route);
} else if (fsa_actions & A_RECOVER) {
do_fsa_action(fsa_data, A_RECOVER, do_recover);
} else if (fsa_actions & A_CL_JOIN_RESULT) {
do_fsa_action(fsa_data, A_CL_JOIN_RESULT, do_cl_join_finalize_respond);
} else if (fsa_actions & A_CL_JOIN_REQUEST) {
do_fsa_action(fsa_data, A_CL_JOIN_REQUEST, do_cl_join_offer_respond);
} else if (fsa_actions & A_SHUTDOWN_REQ) {
do_fsa_action(fsa_data, A_SHUTDOWN_REQ, do_shutdown_req);
} else if (fsa_actions & A_ELECTION_VOTE) {
do_fsa_action(fsa_data, A_ELECTION_VOTE, do_election_vote);
} else if (fsa_actions & A_ELECTION_COUNT) {
do_fsa_action(fsa_data, A_ELECTION_COUNT, do_election_count_vote);
} else if (fsa_actions & A_LRM_EVENT) {
do_fsa_action(fsa_data, A_LRM_EVENT, do_lrm_event);
/*
* High priority actions
*/
} else if (fsa_actions & A_STARTED) {
do_fsa_action(fsa_data, A_STARTED, do_started);
} else if (fsa_actions & A_CL_JOIN_QUERY) {
do_fsa_action(fsa_data, A_CL_JOIN_QUERY, do_cl_join_query);
} else if (fsa_actions & A_DC_TIMER_START) {
do_fsa_action(fsa_data, A_DC_TIMER_START, do_timer_control);
/*
* Medium priority actions
* - Membership
*/
} else if (fsa_actions & A_DC_TAKEOVER) {
do_fsa_action(fsa_data, A_DC_TAKEOVER, do_dc_takeover);
} else if (fsa_actions & A_DC_RELEASE) {
do_fsa_action(fsa_data, A_DC_RELEASE, do_dc_release);
} else if (fsa_actions & A_DC_JOIN_FINAL) {
do_fsa_action(fsa_data, A_DC_JOIN_FINAL, do_dc_join_final);
} else if (fsa_actions & A_ELECTION_CHECK) {
do_fsa_action(fsa_data, A_ELECTION_CHECK, do_election_check);
} else if (fsa_actions & A_ELECTION_START) {
do_fsa_action(fsa_data, A_ELECTION_START, do_election_vote);
} else if (fsa_actions & A_DC_JOIN_OFFER_ALL) {
do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all);
} else if (fsa_actions & A_DC_JOIN_OFFER_ONE) {
do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ONE, do_dc_join_offer_one);
} else if (fsa_actions & A_DC_JOIN_PROCESS_REQ) {
do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_REQ, do_dc_join_filter_offer);
} else if (fsa_actions & A_DC_JOIN_PROCESS_ACK) {
do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_ACK, do_dc_join_ack);
} else if (fsa_actions & A_DC_JOIN_FINALIZE) {
do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize);
} else if (fsa_actions & A_CL_JOIN_ANNOUNCE) {
do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce);
/*
* Low(er) priority actions
* Make sure the CIB is always updated before invoking the
* PE, and the PE before the TE
*/
} else if (fsa_actions & A_TE_HALT) {
do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke);
} else if (fsa_actions & A_TE_CANCEL) {
do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke);
} else if (fsa_actions & A_LRM_INVOKE) {
do_fsa_action(fsa_data, A_LRM_INVOKE, do_lrm_invoke);
} else if (fsa_actions & A_PE_INVOKE) {
do_fsa_action(fsa_data, A_PE_INVOKE, do_pe_invoke);
} else if (fsa_actions & A_TE_INVOKE) {
do_fsa_action(fsa_data, A_TE_INVOKE, do_te_invoke);
/* Shutdown actions */
} else if (fsa_actions & A_DC_RELEASED) {
do_fsa_action(fsa_data, A_DC_RELEASED, do_dc_release);
} else if (fsa_actions & A_PE_STOP) {
do_fsa_action(fsa_data, A_PE_STOP, do_pe_control);
} else if (fsa_actions & A_TE_STOP) {
do_fsa_action(fsa_data, A_TE_STOP, do_te_control);
} else if (fsa_actions & A_SHUTDOWN) {
do_fsa_action(fsa_data, A_SHUTDOWN, do_shutdown);
} else if (fsa_actions & A_LRM_DISCONNECT) {
do_fsa_action(fsa_data, A_LRM_DISCONNECT, do_lrm_control);
} else if (fsa_actions & A_HA_DISCONNECT) {
do_fsa_action(fsa_data, A_HA_DISCONNECT, do_ha_control);
} else if (fsa_actions & A_CIB_STOP) {
do_fsa_action(fsa_data, A_CIB_STOP, do_cib_control);
} else if (fsa_actions & A_STOP) {
do_fsa_action(fsa_data, A_STOP, do_stop);
/* exit gracefully */
} else if (fsa_actions & A_EXIT_0) {
do_fsa_action(fsa_data, A_EXIT_0, do_exit);
/* Error checking and reporting */
} else {
crm_err("Action %s not supported "CRM_XS" 0x%llx",
fsa_action2string(fsa_actions), fsa_actions);
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, fsa_data, NULL, __FUNCTION__);
}
}
}
void
log_fsa_input(fsa_data_t * stored_msg)
{
CRM_ASSERT(stored_msg);
crm_trace("Processing queued input %d", stored_msg->id);
if (stored_msg->fsa_cause == C_LRM_OP_CALLBACK) {
crm_trace("FSA processing LRM callback from %s", stored_msg->origin);
} else if (stored_msg->data == NULL) {
crm_trace("FSA processing input from %s", stored_msg->origin);
} else {
ha_msg_input_t *ha_input = fsa_typed_data_adv(stored_msg, fsa_dt_ha_msg, __FUNCTION__);
crm_trace("FSA processing XML message from %s", stored_msg->origin);
crm_log_xml_trace(ha_input->xml, "FSA message data");
}
}
long long
do_state_transition(long long actions,
enum crmd_fsa_state cur_state,
enum crmd_fsa_state next_state, fsa_data_t * msg_data)
{
int level = LOG_INFO;
long long tmp = actions;
gboolean clear_recovery_bit = TRUE;
enum crmd_fsa_cause cause = msg_data->fsa_cause;
enum crmd_fsa_input current_input = msg_data->fsa_input;
const char *state_from = fsa_state2string(cur_state);
const char *state_to = fsa_state2string(next_state);
const char *input = fsa_input2string(current_input);
CRM_LOG_ASSERT(cur_state != next_state);
do_dot_log(DOT_PREFIX "\t%s -> %s [ label=%s cause=%s origin=%s ]",
state_from, state_to, input, fsa_cause2string(cause), msg_data->origin);
if (cur_state == S_IDLE || next_state == S_IDLE) {
level = LOG_NOTICE;
} else if (cur_state == S_NOT_DC || next_state == S_NOT_DC) {
level = LOG_NOTICE;
} else if (cur_state == S_ELECTION) {
level = LOG_NOTICE;
} else if (cur_state == S_STARTING) {
level = LOG_NOTICE;
} else if (next_state == S_RECOVERY) {
level = LOG_WARNING;
}
do_crm_log(level, "State transition %s -> %s "
CRM_XS " input=%s cause=%s origin=%s",
state_from, state_to, input, fsa_cause2string(cause),
msg_data->origin);
if (next_state != S_ELECTION && cur_state != S_RELEASE_DC) {
controld_stop_election_timer();
}
#if 0
if ((fsa_input_register & R_SHUTDOWN)) {
set_bit(tmp, A_DC_TIMER_STOP);
}
#endif
if (next_state == S_INTEGRATION) {
set_bit(tmp, A_INTEGRATE_TIMER_START);
} else {
set_bit(tmp, A_INTEGRATE_TIMER_STOP);
}
if (next_state == S_FINALIZE_JOIN) {
set_bit(tmp, A_FINALIZE_TIMER_START);
} else {
set_bit(tmp, A_FINALIZE_TIMER_STOP);
}
if (next_state != S_PENDING) {
set_bit(tmp, A_DC_TIMER_STOP);
}
if (next_state != S_ELECTION) {
highest_born_on = 0;
}
if (next_state != S_IDLE) {
crm_timer_stop(recheck_timer);
}
if (cur_state == S_FINALIZE_JOIN && next_state == S_POLICY_ENGINE) {
populate_cib_nodes(node_update_quick|node_update_all, __FUNCTION__);
}
switch (next_state) {
case S_PENDING:
fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local);
/* fall through */
case S_ELECTION:
crm_trace("Resetting our DC to NULL on transition to %s", fsa_state2string(next_state));
update_dc(NULL);
break;
case S_NOT_DC:
election_trigger->counter = 0;
purge_stonith_cleanup();
if (is_set(fsa_input_register, R_SHUTDOWN)) {
crm_info("(Re)Issuing shutdown request now" " that we have a new DC");
set_bit(tmp, A_SHUTDOWN_REQ);
}
CRM_LOG_ASSERT(fsa_our_dc != NULL);
if (fsa_our_dc == NULL) {
crm_err("Reached S_NOT_DC without a DC" " being recorded");
}
break;
case S_RECOVERY:
clear_recovery_bit = FALSE;
break;
case S_FINALIZE_JOIN:
CRM_LOG_ASSERT(AM_I_DC);
if (cause == C_TIMER_POPPED) {
crm_warn("Progressed to state %s after %s",
fsa_state2string(next_state), fsa_cause2string(cause));
}
if (crmd_join_phase_count(crm_join_welcomed) > 0) {
crm_warn("%u cluster nodes failed to respond"
" to the join offer.", crmd_join_phase_count(crm_join_welcomed));
crmd_join_phase_log(LOG_NOTICE);
} else {
crm_debug("All %d cluster nodes responded to the join offer.",
crmd_join_phase_count(crm_join_integrated));
}
break;
case S_POLICY_ENGINE:
election_trigger->counter = 0;
CRM_LOG_ASSERT(AM_I_DC);
if (cause == C_TIMER_POPPED) {
crm_info("Progressed to state %s after %s",
fsa_state2string(next_state), fsa_cause2string(cause));
}
if (crmd_join_phase_count(crm_join_finalized) > 0) {
crm_err("%u cluster nodes failed to confirm their join.",
crmd_join_phase_count(crm_join_finalized));
crmd_join_phase_log(LOG_NOTICE);
} else if (crmd_join_phase_count(crm_join_confirmed)
== crm_active_peers()) {
crm_debug("All %u cluster nodes are"
" eligible to run resources.", crm_active_peers());
} else if (crmd_join_phase_count(crm_join_confirmed) > crm_active_peers()) {
crm_err("We have more confirmed nodes than our membership does: %d vs. %d",
crmd_join_phase_count(crm_join_confirmed), crm_active_peers());
register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
} else if (saved_ccm_membership_id != crm_peer_seq) {
crm_info("Membership changed: %llu -> %llu - join restart",
saved_ccm_membership_id, crm_peer_seq);
register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
} else {
crm_warn("Only %u of %u cluster "
"nodes are eligible to run resources - continue %d",
crmd_join_phase_count(crm_join_confirmed),
crm_active_peers(), crmd_join_phase_count(crm_join_welcomed));
}
/* initialize_join(FALSE); */
break;
case S_STOPPING:
case S_TERMINATE:
/* possibly redundant */
set_bit(fsa_input_register, R_SHUTDOWN);
break;
case S_IDLE:
CRM_LOG_ASSERT(AM_I_DC);
if (is_set(fsa_input_register, R_SHUTDOWN)) {
crm_info("(Re)Issuing shutdown request now" " that we are the DC");
set_bit(tmp, A_SHUTDOWN_REQ);
}
if (recheck_timer->period_ms > 0) {
crm_debug("Starting %s", get_timer_desc(recheck_timer));
crm_timer_start(recheck_timer);
}
break;
default:
break;
}
if (clear_recovery_bit && next_state != S_PENDING) {
tmp &= ~A_RECOVER;
} else if (clear_recovery_bit == FALSE) {
tmp |= A_RECOVER;
}
if (tmp != actions) {
/* fsa_dump_actions(actions ^ tmp, "New actions"); */
actions = tmp;
}
return actions;
}
diff --git a/daemons/controld/controld_fsa.h b/daemons/controld/controld_fsa.h
index 7527ed9995..b73e445426 100644
--- a/daemons/controld/controld_fsa.h
+++ b/daemons/controld/controld_fsa.h
@@ -1,702 +1,699 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef CRMD_FSA__H
# define CRMD_FSA__H
# include
# include
# include
# include
# include
# include
# include
/*! States the controller can be in */
enum crmd_fsa_state {
S_IDLE = 0, /* Nothing happening */
S_ELECTION, /* Take part in the election algorithm as
* described below
*/
S_INTEGRATION, /* integrate that status of new nodes (which is
* all of them if we have just been elected DC)
* to form a complete and up-to-date picture of
* the CIB
*/
S_FINALIZE_JOIN, /* integrate that status of new nodes (which is
* all of them if we have just been elected DC)
* to form a complete and up-to-date picture of
* the CIB
*/
S_NOT_DC, /* we are in non-DC mode */
S_POLICY_ENGINE, /* Determine next stable state of the cluster */
S_RECOVERY, /* Something bad happened, check everything is ok
* before continuing and attempt to recover if
* required
*/
S_RELEASE_DC, /* we were the DC, but now we arent anymore,
* possibly by our own request, and we should
* release all unnecessary sub-systems, finish
* any pending actions, do general cleanup and
* unset anything that makes us think we are
* special :)
*/
S_STARTING, /* we are just starting out */
S_PENDING, /* we are not a full/active member yet */
S_STOPPING, /* We are in the final stages of shutting down */
S_TERMINATE, /* We are going to shutdown, this is the equiv of
* "Sending TERM signal to all processes" in Linux
* and in worst case scenarios could be considered
* a self STONITH
*/
S_TRANSITION_ENGINE, /* Attempt to make the calculated next stable
* state of the cluster a reality
*/
S_HALT, /* Freeze - don't do anything
* Something bad happened that needs the admin to fix
* Wait for I_ELECTION
*/
/* ----------- Last input found in table is above ---------- */
S_ILLEGAL /* This is an illegal FSA state */
/* (must be last) */
};
# define MAXSTATE S_ILLEGAL
/*
Once we start and do some basic sanity checks, we go into the
S_NOT_DC state and await instructions from the DC or input from
the cluster layer which indicates the election algorithm needs to run.
If the election algorithm is triggered, we enter the S_ELECTION state
from where we can either go back to the S_NOT_DC state or progress
to the S_INTEGRATION state (or S_RELEASE_DC if we used to be the DC
but aren't anymore). See the libcrmcluster API documentation for more
information about the election algorithm.
Once the election is complete, if we are the DC, we enter the
S_INTEGRATION state which is a DC-in-waiting style state. We are
the DC, but we shouldn't do anything yet because we may not have an
up-to-date picture of the cluster. There may of course be times
when this fails, so we should go back to the S_RECOVERY stage and
check everything is ok. We may also end up here if a new node came
online, since each node is authoritative about itself, and we would want
to incorporate its information into the CIB.
Once we have the latest CIB, we then enter the S_POLICY_ENGINE state
where invoke the scheduler. It is possible that between
invoking the scheduler and receiving an answer, that we receive
more input. In this case, we would discard the orginal result and
invoke it again.
Once we are satisfied with the output from the scheduler, we
enter S_TRANSITION_ENGINE and feed the scheduler's output to the
Transition Engine who attempts to make the scheduler's
calculation a reality. If the transition completes successfully,
we enter S_IDLE, otherwise we go back to S_POLICY_ENGINE with the
current unstable state and try again.
Of course, we may be asked to shutdown at any time, however we must
progress to S_NOT_DC before doing so. Once we have handed over DC
duties to another node, we can then shut down like everyone else,
that is, by asking the DC for permission and waiting for it to take all
our resources away.
The case where we are the DC and the only node in the cluster is a
special case and handled as an escalation which takes us to
S_SHUTDOWN. Similarly, if any other point in the shutdown
fails or stalls, this is escalated and we end up in S_TERMINATE.
At any point, the controller can relay messages for its subsystems,
but outbound messages (from subsystems) should probably be blocked
until S_INTEGRATION (for the DC) or the join protocol has
completed (for non-DC controllers).
*/
/*======================================
*
* Inputs/Events/Stimuli to be given to the finite state machine
*
* Some of these a true events, and others are synthesised based on
* the "register" (see below) and the contents or source of messages.
*
* The machine keeps processing until receiving I_NULL
*
*======================================*/
enum crmd_fsa_input {
/* 0 */
I_NULL, /* Nothing happened */
/* 1 */
I_CIB_OP, /* An update to the CIB occurred */
I_CIB_UPDATE, /* An update to the CIB occurred */
I_DC_TIMEOUT, /* We have lost communication with the DC */
I_ELECTION, /* Someone started an election */
I_PE_CALC, /* The scheduler needs to be invoked */
I_RELEASE_DC, /* The election completed and we were not
* elected, but we were the DC beforehand
*/
I_ELECTION_DC, /* The election completed and we were (re-)elected
* DC
*/
I_ERROR, /* Something bad happened (more serious than
* I_FAIL) and may not have been due to the action
* being performed. For example, we may have lost
* our connection to the CIB.
*/
/* 9 */
I_FAIL, /* The action failed to complete successfully */
I_INTEGRATED,
I_FINALIZED,
I_NODE_JOIN, /* A node has entered the cluster */
I_NOT_DC, /* We are not and were not the DC before or after
* the current operation or state
*/
I_RECOVERED, /* The recovery process completed successfully */
I_RELEASE_FAIL, /* We could not give up DC status for some reason
*/
I_RELEASE_SUCCESS, /* We are no longer the DC */
I_RESTART, /* The current set of actions needs to be
* restarted
*/
I_TE_SUCCESS, /* Some non-resource, non-cluster-layer action
* is required of us, e.g. ping
*/
/* 20 */
I_ROUTER, /* Do our job as router and forward this to the
* right place
*/
I_SHUTDOWN, /* We are asking to shutdown */
I_STOP, /* We have been told to shutdown */
I_TERMINATE, /* Actually exit */
I_STARTUP,
I_PE_SUCCESS, /* The action completed successfully */
I_JOIN_OFFER, /* The DC is offering membership */
I_JOIN_REQUEST, /* The client is requesting membership */
I_JOIN_RESULT, /* If not the DC: The result of a join request
* Else: A client is responding with its local state info
*/
I_WAIT_FOR_EVENT, /* we may be waiting for an async task to "happen"
* and until it does, we can't do anything else
*/
I_DC_HEARTBEAT, /* The DC is telling us that it is alive and well */
I_LRM_EVENT,
/* 30 */
I_PENDING,
I_HALT,
/* ------------ Last input found in table is above ----------- */
I_ILLEGAL /* This is an illegal value for an FSA input */
/* (must be last) */
};
# define MAXINPUT I_ILLEGAL
# define I_MESSAGE I_ROUTER
/*======================================
*
* actions
*
* Some of the actions below will always occur together for now, but this may
* not always be the case, so they are split up so that they can easily be
* called independently in the future, if necessary.
*
* For example, separating A_LRM_CONNECT from A_STARTUP might be useful
* if we ever try to recover from a faulty or disconnected executor.
*
*======================================*/
/* Don't do anything */
# define A_NOTHING 0x0000000000000000ULL
/* -- Startup actions -- */
/* Hook to perform any actions (other than connecting to other daemons)
* that might be needed as part of the startup.
*/
# define A_STARTUP 0x0000000000000001ULL
/* Hook to perform any actions that might be needed as part
* after startup is successful.
*/
# define A_STARTED 0x0000000000000002ULL
/* Connect to cluster layer */
# define A_HA_CONNECT 0x0000000000000004ULL
# define A_HA_DISCONNECT 0x0000000000000008ULL
# define A_INTEGRATE_TIMER_START 0x0000000000000010ULL
# define A_INTEGRATE_TIMER_STOP 0x0000000000000020ULL
# define A_FINALIZE_TIMER_START 0x0000000000000040ULL
# define A_FINALIZE_TIMER_STOP 0x0000000000000080ULL
/* -- Election actions -- */
# define A_DC_TIMER_START 0x0000000000000100ULL
# define A_DC_TIMER_STOP 0x0000000000000200ULL
# define A_ELECTION_COUNT 0x0000000000000400ULL
# define A_ELECTION_VOTE 0x0000000000000800ULL
# define A_ELECTION_START 0x0000000000001000ULL
/* -- Message processing -- */
/* Process the queue of requests */
# define A_MSG_PROCESS 0x0000000000002000ULL
/* Send the message to the correct recipient */
# define A_MSG_ROUTE 0x0000000000004000ULL
/* Send a welcome message to new node(s) */
# define A_DC_JOIN_OFFER_ONE 0x0000000000008000ULL
/* -- Server Join protocol actions -- */
/* Send a welcome message to all nodes */
# define A_DC_JOIN_OFFER_ALL 0x0000000000010000ULL
/* Process the remote node's ack of our join message */
# define A_DC_JOIN_PROCESS_REQ 0x0000000000020000ULL
/* Send out the results of the Join phase */
# define A_DC_JOIN_FINALIZE 0x0000000000040000ULL
/* Send out the results of the Join phase */
# define A_DC_JOIN_PROCESS_ACK 0x0000000000080000ULL
/* -- Client Join protocol actions -- */
# define A_CL_JOIN_QUERY 0x0000000000100000ULL
# define A_CL_JOIN_ANNOUNCE 0x0000000000200000ULL
/* Request membership to the DC list */
# define A_CL_JOIN_REQUEST 0x0000000000400000ULL
/* Did the DC accept or reject the request */
# define A_CL_JOIN_RESULT 0x0000000000800000ULL
/* -- Recovery, DC start/stop -- */
/* Something bad happened, try to recover */
# define A_RECOVER 0x0000000001000000ULL
/* Hook to perform any actions (apart from starting, the TE, scheduler,
* and gathering the latest CIB) that might be necessary before
* giving up the responsibilities of being the DC.
*/
# define A_DC_RELEASE 0x0000000002000000ULL
/* */
# define A_DC_RELEASED 0x0000000004000000ULL
/* Hook to perform any actions (apart from starting, the TE, scheduler,
* and gathering the latest CIB) that might be necessary before
* taking over the responsibilities of being the DC.
*/
# define A_DC_TAKEOVER 0x0000000008000000ULL
/* -- Shutdown actions -- */
# define A_SHUTDOWN 0x0000000010000000ULL
# define A_STOP 0x0000000020000000ULL
# define A_EXIT_0 0x0000000040000000ULL
# define A_EXIT_1 0x0000000080000000ULL
# define A_SHUTDOWN_REQ 0x0000000100000000ULL
# define A_ELECTION_CHECK 0x0000000200000000ULL
# define A_DC_JOIN_FINAL 0x0000000400000000ULL
/* -- CIB actions -- */
# define A_CIB_START 0x0000020000000000ULL
# define A_CIB_STOP 0x0000040000000000ULL
/* -- Transition Engine actions -- */
/* Attempt to reach the newly calculated cluster state. This is
* only called once per transition (except if it is asked to
* stop the transition or start a new one).
* Once given a cluster state to reach, the TE will determine
* tasks that can be performed in parallel, execute them, wait
* for replies and then determine the next set until the new
* state is reached or no further tasks can be taken.
*/
# define A_TE_INVOKE 0x0000100000000000ULL
# define A_TE_START 0x0000200000000000ULL
# define A_TE_STOP 0x0000400000000000ULL
# define A_TE_CANCEL 0x0000800000000000ULL
# define A_TE_HALT 0x0001000000000000ULL
/* -- Scheduler actions -- */
/* Calculate the next state for the cluster. This is only
* invoked once per needed calculation.
*/
# define A_PE_INVOKE 0x0002000000000000ULL
# define A_PE_START 0x0004000000000000ULL
# define A_PE_STOP 0x0008000000000000ULL
/* -- Misc actions -- */
/* Add a system generate "block" so that resources arent moved
* to or are activly moved away from the affected node. This
* way we can return quickly even if busy with other things.
*/
# define A_NODE_BLOCK 0x0010000000000000ULL
/* Update our information in the local CIB */
# define A_UPDATE_NODESTATUS 0x0020000000000000ULL
# define A_READCONFIG 0x0080000000000000ULL
/* -- LRM Actions -- */
/* Connect to pacemaker-execd */
# define A_LRM_CONNECT 0x0100000000000000ULL
/* Disconnect from pacemaker-execd */
# define A_LRM_DISCONNECT 0x0200000000000000ULL
# define A_LRM_INVOKE 0x0400000000000000ULL
# define A_LRM_EVENT 0x0800000000000000ULL
/* -- Logging actions -- */
# define A_LOG 0x1000000000000000ULL
# define A_ERROR 0x2000000000000000ULL
# define A_WARN 0x4000000000000000ULL
# define O_EXIT (A_SHUTDOWN|A_STOP|A_LRM_DISCONNECT|A_HA_DISCONNECT|A_EXIT_0|A_CIB_STOP)
# define O_RELEASE (A_DC_TIMER_STOP|A_DC_RELEASE|A_PE_STOP|A_TE_STOP|A_DC_RELEASED)
# define O_PE_RESTART (A_PE_START|A_PE_STOP)
# define O_TE_RESTART (A_TE_START|A_TE_STOP)
# define O_CIB_RESTART (A_CIB_START|A_CIB_STOP)
# define O_LRM_RECONNECT (A_LRM_CONNECT|A_LRM_DISCONNECT)
# define O_DC_TIMER_RESTART (A_DC_TIMER_STOP|A_DC_TIMER_START)
/*======================================
*
* "register" contents
*
* Things we may want to remember regardless of which state we are in.
*
* These also count as inputs for synthesizing I_*
*
*======================================*/
# define R_THE_DC 0x00000001ULL
/* Are we the DC? */
# define R_STARTING 0x00000002ULL
/* Are we starting up? */
# define R_SHUTDOWN 0x00000004ULL
/* Are we trying to shut down? */
# define R_STAYDOWN 0x00000008ULL
/* Should we restart? */
# define R_JOIN_OK 0x00000010ULL /* Have we completed the join process */
# define R_READ_CONFIG 0x00000040ULL
# define R_INVOKE_PE 0x00000080ULL // Should the scheduler be invoked?
# define R_CIB_CONNECTED 0x00000100ULL
/* Is the CIB connected? */
# define R_PE_CONNECTED 0x00000200ULL // Is the scheduler connected?
# define R_TE_CONNECTED 0x00000400ULL
/* Is the Transition Engine connected? */
# define R_LRM_CONNECTED 0x00000800ULL // Is pacemaker-execd connected?
# define R_CIB_REQUIRED 0x00001000ULL
/* Is the CIB required? */
# define R_PE_REQUIRED 0x00002000ULL // Is the scheduler required?
# define R_TE_REQUIRED 0x00004000ULL
/* Is the Transition Engine required? */
# define R_ST_REQUIRED 0x00008000ULL
/* Is the Stonith daemon required? */
# define R_CIB_DONE 0x00010000ULL
/* Have we calculated the CIB? */
# define R_HAVE_CIB 0x00020000ULL /* Do we have an up-to-date CIB */
# define R_CIB_ASKED 0x00040000ULL /* Have we asked for an up-to-date CIB */
# define R_MEMBERSHIP 0x00100000ULL /* Have we got cluster layer data yet */
# define R_PEER_DATA 0x00200000ULL /* Have we got T_CL_STATUS data yet */
# define R_HA_DISCONNECTED 0x00400000ULL /* did we sign out of our own accord */
# define R_REQ_PEND 0x01000000ULL
/* Are there Requests waiting for
processing? */
# define R_PE_PEND 0x02000000ULL // Are we awaiting reply from scheduler?
# define R_TE_PEND 0x04000000ULL
/* Has the TE been invoked and we're
awaiting completion? */
# define R_RESP_PEND 0x08000000ULL
/* Do we have clients waiting on a
response? if so perhaps we shouldn't
stop yet */
# define R_IN_TRANSITION 0x10000000ULL
/* */
# define R_SENT_RSC_STOP 0x20000000ULL /* Have we sent a stop action to all
* resources in preparation for
* shutting down */
# define R_IN_RECOVERY 0x80000000ULL
#define CRM_DIRECT_NACK_RC (99) // Deprecated (see PCMK_LRM_OP_INVALID)
enum crmd_fsa_cause {
C_UNKNOWN = 0,
C_STARTUP,
C_IPC_MESSAGE,
C_HA_MESSAGE,
C_CRMD_STATUS_CALLBACK,
C_LRM_OP_CALLBACK,
C_TIMER_POPPED,
C_SHUTDOWN,
C_FSA_INTERNAL,
};
-typedef struct fsa_timer_s fsa_timer_t;
-struct fsa_timer_s {
+typedef struct fsa_timer_s {
guint source_id; /* timer source id */
int period_ms; /* timer period */
enum crmd_fsa_input fsa_input;
- gboolean(*callback) (gpointer data);
- gboolean repeat;
+ gboolean (*callback) (gpointer data);
+ bool log_error;
int counter;
-};
+} fsa_timer_t;
enum fsa_data_type {
fsa_dt_none,
fsa_dt_ha_msg,
fsa_dt_xml,
fsa_dt_lrm,
};
typedef struct fsa_data_s fsa_data_t;
struct fsa_data_s {
int id;
enum crmd_fsa_input fsa_input;
enum crmd_fsa_cause fsa_cause;
long long actions;
const char *origin;
void *data;
enum fsa_data_type data_type;
};
/* Global FSA stuff */
extern gboolean do_fsa_stall;
extern enum crmd_fsa_state fsa_state;
extern long long fsa_input_register;
extern long long fsa_actions;
extern cib_t *fsa_cib_conn;
extern char *fsa_our_uname;
extern char *fsa_our_uuid;
extern char *fsa_pe_ref; // Last invocation of the scheduler
extern char *fsa_our_dc;
extern char *fsa_our_dc_version;
extern GListPtr fsa_message_queue;
extern char *fsa_cluster_name;
extern fsa_timer_t *election_trigger;
extern fsa_timer_t *shutdown_escalation_timer;
extern fsa_timer_t *transition_timer;
extern fsa_timer_t *integration_timer;
extern fsa_timer_t *finalization_timer;
extern fsa_timer_t *wait_timer;
extern fsa_timer_t *recheck_timer;
extern crm_trigger_t *fsa_source;
extern crm_trigger_t *config_read;
extern unsigned long long saved_ccm_membership_id;
extern gboolean ever_had_quorum;
// These should be moved elsewhere
void do_update_cib_nodes(gboolean overwrite, const char *caller);
int crmd_cib_smart_opt(void);
xmlNode *do_lrm_query(gboolean, const char *node_name);
const char *fsa_input2string(enum crmd_fsa_input input);
const char *fsa_state2string(enum crmd_fsa_state state);
const char *fsa_cause2string(enum crmd_fsa_cause cause);
const char *fsa_action2string(long long action);
enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause);
# define AM_I_DC is_set(fsa_input_register, R_THE_DC)
# define AM_I_OPERATIONAL (is_set(fsa_input_register, R_STARTING) == FALSE)
# define trigger_fsa(source) do { \
crm_trace("Triggering FSA: %s", __FUNCTION__); \
mainloop_set_trigger(source); \
} while(0)
/* A_READCONFIG */
void do_read_config(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t *msg_data);
/* A_PE_INVOKE */
void do_pe_invoke(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t *msg_data);
/* A_LOG */
void do_log(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_STARTUP */
void do_startup(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_CIB_START, STOP, RESTART */
void do_cib_control(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_HA_CONNECT */
void do_ha_control(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_LRM_CONNECT */
void do_lrm_control(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_PE_START, STOP, RESTART */
void do_pe_control(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_TE_START, STOP, RESTART */
void do_te_control(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_STARTED */
void do_started(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_MSG_ROUTE */
void do_msg_route(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_RECOVER */
void do_recover(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_ELECTION_VOTE */
void do_election_vote(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_ELECTION_COUNT */
void do_election_count_vote(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input,
fsa_data_t *msg_data);
/* A_ELECTION_CHECK */
void do_election_check(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_DC_TIMER_STOP */
void do_timer_control(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_DC_TAKEOVER */
void do_dc_takeover(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_DC_RELEASE */
void do_dc_release(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_DC_JOIN_OFFER_ALL */
void do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_DC_JOIN_OFFER_ONE */
void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_DC_JOIN_ACK */
void do_dc_join_ack(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_DC_JOIN_REQ */
void do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input,
fsa_data_t *msg_data);
/* A_DC_JOIN_FINALIZE */
void do_dc_join_finalize(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_CL_JOIN_QUERY */
/* is there a DC out there? */
void do_cl_join_query(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t *msg_data);
/* A_CL_JOIN_ANNOUNCE */
void do_cl_join_announce(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t *msg_data);
/* A_CL_JOIN_REQUEST */
void do_cl_join_offer_respond(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input,
fsa_data_t *msg_data);
/* A_CL_JOIN_RESULT */
void do_cl_join_finalize_respond(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input,
fsa_data_t *msg_data);
/* A_LRM_INVOKE */
void do_lrm_invoke(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_LRM_EVENT */
void do_lrm_event(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_TE_INVOKE, A_TE_CANCEL */
void do_te_invoke(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_SHUTDOWN_REQ */
void do_shutdown_req(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_SHUTDOWN */
void do_shutdown(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_STOP */
void do_stop(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_EXIT_0, A_EXIT_1 */
void do_exit(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_DC_JOIN_FINAL */
void do_dc_join_final(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t *msg_data);
-
-# include
#endif
diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c
index 12966b2d24..4ac0d2a5c7 100644
--- a/daemons/controld/controld_join_client.c
+++ b/daemons/controld/controld_join_client.c
@@ -1,309 +1,310 @@
/*
- * Copyright 2004-2018 Andrew Beekhof
+ * Copyright 2004-2019 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
-#include
-#include
+#include
int reannounce_count = 0;
void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig);
/*!
* \internal
* \brief Remember if DC is shutting down as we join
*
* If we're joining while the current DC is shutting down, update its expected
* state, so we don't fence it if we become the new DC. (We weren't a peer
* when it broadcast its shutdown request.)
*
* \param[in] msg A join message from the DC
*/
static void
update_dc_expected(xmlNode *msg)
{
if (fsa_our_dc && crm_is_true(crm_element_value(msg, F_CRM_DC_LEAVING))) {
crm_node_t *dc_node = crm_get_peer(0, fsa_our_dc);
crm_update_peer_expected(__FUNCTION__, dc_node, CRMD_JOINSTATE_DOWN);
}
}
/* A_CL_JOIN_QUERY */
/* is there a DC out there? */
void
do_cl_join_query(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL,
CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
sleep(1); // Give the cluster layer time to propagate to the DC
update_dc(NULL); /* Unset any existing value so that the result is not discarded */
crm_debug("Querying for a DC");
send_cluster_message(NULL, crm_msg_crmd, req, FALSE);
free_xml(req);
}
/* A_CL_JOIN_ANNOUNCE */
/* this is kind of a workaround for the fact that we may not be around or
* are otherwise unable to reply when the DC sends out A_DC_JOIN_OFFER_ALL
*/
void
do_cl_join_announce(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* don't announce if we're in one of these states */
if (cur_state != S_PENDING) {
crm_warn("Not announcing cluster join because in state %s",
fsa_state2string(cur_state));
return;
}
if (AM_I_OPERATIONAL) {
/* send as a broadcast */
xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL,
CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
crm_debug("Announcing availability");
update_dc(NULL);
send_cluster_message(NULL, crm_msg_crmd, req, FALSE);
free_xml(req);
} else {
/* Delay announce until we have finished local startup */
crm_warn("Delaying announce of cluster join until local startup is complete");
return;
}
}
static int query_call_id = 0;
/* A_CL_JOIN_REQUEST */
/* aka. accept the welcome offer */
void
do_cl_join_offer_respond(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
const char *welcome_from;
const char *join_id;
CRM_CHECK(input != NULL, return);
#if 0
if (we are sick) {
log error;
/* save the request for later? */
return;
}
#endif
welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM);
join_id = crm_element_value(input->msg, F_CRM_JOIN_ID);
crm_trace("Accepting cluster join offer from node %s "CRM_XS" join-%s",
welcome_from, crm_element_value(input->msg, F_CRM_JOIN_ID));
/* we only ever want the last one */
if (query_call_id > 0) {
crm_trace("Cancelling previous join query: %d", query_call_id);
remove_cib_op_callback(query_call_id, FALSE);
query_call_id = 0;
}
if (update_dc(input->msg) == FALSE) {
crm_warn("Discarding cluster join offer from node %s (expected %s)",
welcome_from, fsa_our_dc);
return;
}
update_dc_expected(input->msg);
query_call_id =
fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local | cib_no_children);
fsa_register_cib_callback(query_call_id, FALSE, strdup(join_id), join_query_callback);
crm_trace("Registered join query callback: %d", query_call_id);
register_fsa_action(A_DC_TIMER_STOP);
}
void
join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
char *join_id = user_data;
xmlNode *generation = create_xml_node(NULL, XML_CIB_TAG_GENERATION_TUPPLE);
CRM_LOG_ASSERT(join_id != NULL);
if (query_call_id != call_id) {
crm_trace("Query %d superseded", call_id);
goto done;
}
query_call_id = 0;
if(rc != pcmk_ok || output == NULL) {
crm_err("Could not retrieve version details for join-%s: %s (%d)",
join_id, pcmk_strerror(rc), rc);
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
} else if (fsa_our_dc == NULL) {
crm_debug("Membership is in flux, not continuing join-%s", join_id);
} else {
xmlNode *reply = NULL;
crm_debug("Respond to join offer join-%s from %s", join_id, fsa_our_dc);
copy_in_properties(generation, output);
reply = create_request(CRM_OP_JOIN_REQUEST, generation, fsa_our_dc,
CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
crm_xml_add(reply, F_CRM_JOIN_ID, join_id);
crm_xml_add(reply, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE);
free_xml(reply);
}
done:
free_xml(generation);
}
static void
set_join_state(const char * start_state)
{
if (safe_str_eq(start_state, "standby")) {
crm_notice("Forcing node %s to join in %s state per configured environment",
fsa_our_uname, start_state);
update_attr_delegate(fsa_cib_conn, cib_sync_call, XML_CIB_TAG_NODES, fsa_our_uuid,
NULL, NULL, NULL, "standby", "on", TRUE, NULL, NULL);
} else if (safe_str_eq(start_state, "online")) {
crm_notice("Forcing node %s to join in %s state per configured environment",
fsa_our_uname, start_state);
update_attr_delegate(fsa_cib_conn, cib_sync_call, XML_CIB_TAG_NODES, fsa_our_uuid,
NULL, NULL, NULL, "standby", "off", TRUE, NULL, NULL);
} else if (safe_str_eq(start_state, "default")) {
crm_debug("Not forcing a starting state on node %s", fsa_our_uname);
} else {
crm_warn("Unrecognized start state '%s', using 'default' (%s)",
start_state, fsa_our_uname);
}
}
/* A_CL_JOIN_RESULT */
/* aka. this is notification that we have (or have not) been accepted */
void
do_cl_join_finalize_respond(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *tmp1 = NULL;
gboolean was_nack = TRUE;
static gboolean first_join = TRUE;
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
const char *start_state = daemon_option("node_start_state");
int join_id = -1;
const char *op = crm_element_value(input->msg, F_CRM_TASK);
const char *ack_nack = crm_element_value(input->msg, CRM_OP_JOIN_ACKNAK);
const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM);
if (safe_str_neq(op, CRM_OP_JOIN_ACKNAK)) {
crm_trace("Ignoring op=%s message", op);
return;
}
/* calculate if it was an ack or a nack */
if (crm_is_true(ack_nack)) {
was_nack = FALSE;
}
crm_element_value_int(input->msg, F_CRM_JOIN_ID, &join_id);
if (was_nack) {
crm_err("Shutting down because cluster join with leader %s failed "
CRM_XS" join-%d NACK'd", welcome_from, join_id);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
if (AM_I_DC == FALSE && safe_str_eq(welcome_from, fsa_our_uname)) {
crm_warn("Discarding our own welcome - we're no longer the DC");
return;
}
if (update_dc(input->msg) == FALSE) {
crm_warn("Discarding %s from node %s (expected from %s)",
op, welcome_from, fsa_our_dc);
return;
}
update_dc_expected(input->msg);
/* send our status section to the DC */
tmp1 = do_lrm_query(TRUE, fsa_our_uname);
if (tmp1 != NULL) {
xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc,
CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
crm_xml_add_int(reply, F_CRM_JOIN_ID, join_id);
crm_debug("Confirming join-%d: sending local operation history to %s",
join_id, fsa_our_dc);
/*
* If this is the node's first join since the controller started on it,
* set its initial state (standby or member) according to the user's
* preference.
*
* We do not clear the LRM history here. Even if the DC failed to do it
* when we last left, removing them here creates a race condition if the
* controller is being recovered. Instead of a list of active resources
* from the executor, we may end up with a blank status section. If we
* are _NOT_ lucky, we will probe for the "wrong" instance of anonymous
* clones and end up with multiple active instances on the machine.
*/
if (first_join && is_not_set(fsa_input_register, R_SHUTDOWN)) {
first_join = FALSE;
if (start_state) {
set_join_state(start_state);
}
}
send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE);
free_xml(reply);
if (AM_I_DC == FALSE) {
register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, __FUNCTION__);
}
free_xml(tmp1);
} else {
crm_err("Could not confirm join-%d with %s: Local operation history failed",
join_id, fsa_our_dc);
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
}
}
diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
index d790d9ab27..d13a84258b 100644
--- a/daemons/controld/controld_join_dc.c
+++ b/daemons/controld/controld_join_dc.c
@@ -1,716 +1,716 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
+ * The version control history for this file may have further details.
+ *
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
-#include
-#include
-#include
+#include
char *max_epoch = NULL;
char *max_generation_from = NULL;
xmlNode *max_generation_xml = NULL;
void initialize_join(gboolean before);
void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
static int current_join_id = 0;
unsigned long long saved_ccm_membership_id = 0;
void
crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase)
{
enum crm_join_phase last = 0;
if(node == NULL) {
crm_err("Could not update join because node not specified"
CRM_XS " join-%u source=%s phase=%s",
current_join_id, source, crm_join_phase_str(phase));
return;
}
/* Remote nodes do not participate in joins */
if (is_set(node->flags, crm_remote_node)) {
return;
}
last = node->join;
if(phase == last) {
crm_trace("%s: Node %s[%u] - join-%u phase still %s",
source, node->uname, node->id, current_join_id,
crm_join_phase_str(last));
} else if ((phase <= crm_join_none) || (phase == (last + 1))) {
node->join = phase;
crm_info("%s: Node %s[%u] - join-%u phase %s -> %s",
source, node->uname, node->id, current_join_id,
crm_join_phase_str(last), crm_join_phase_str(phase));
} else {
crm_err("Could not update join for node %s because phase transition invalid "
CRM_XS " join-%u source=%s node_id=%u last=%s new=%s",
node->uname, current_join_id, source, node->id,
crm_join_phase_str(last), crm_join_phase_str(phase));
}
}
void
initialize_join(gboolean before)
{
GHashTableIter iter;
crm_node_t *peer = NULL;
/* clear out/reset a bunch of stuff */
crm_debug("join-%d: Initializing join data (flag=%s)",
current_join_id, before ? "true" : "false");
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
crm_update_peer_join(__FUNCTION__, peer, crm_join_none);
}
if (before) {
if (max_generation_from != NULL) {
free(max_generation_from);
max_generation_from = NULL;
}
if (max_generation_xml != NULL) {
free_xml(max_generation_xml);
max_generation_xml = NULL;
}
clear_bit(fsa_input_register, R_HAVE_CIB);
clear_bit(fsa_input_register, R_CIB_ASKED);
}
}
/*!
* \internal
* \brief Create a join message from the DC
*
* \param[in] join_op Join operation name
* \param[in] host_to Recipient of message
*/
static xmlNode *
create_dc_message(const char *join_op, const char *host_to)
{
xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
CRM_SYSTEM_DC, NULL);
/* Identify which election this is a part of */
crm_xml_add_int(msg, F_CRM_JOIN_ID, current_join_id);
/* Add a field specifying whether the DC is shutting down. This keeps the
* joining node from fencing the old DC if it becomes the new DC.
*/
crm_xml_add_boolean(msg, F_CRM_DC_LEAVING,
is_set(fsa_input_register, R_SHUTDOWN));
return msg;
}
static void
join_make_offer(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *offer = NULL;
crm_node_t *member = (crm_node_t *)value;
CRM_ASSERT(member != NULL);
if (crm_is_peer_active(member) == FALSE) {
crm_info("Not making an offer to %s: not active (%s)", member->uname, member->state);
if(member->expected == NULL && safe_str_eq(member->state, CRM_NODE_LOST)) {
/* You would think this unsafe, but in fact this plus an
* active resource is what causes it to be fenced.
*
* Yes, this does mean that any node that dies at the same
* time as the old DC and is not running resource (still)
* won't be fenced.
*
* I'm not happy about this either.
*/
crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_DOWN);
}
return;
}
if (member->uname == NULL) {
crm_info("No recipient for welcome message.(Node uuid:%s)", member->uuid);
return;
}
if (saved_ccm_membership_id != crm_peer_seq) {
saved_ccm_membership_id = crm_peer_seq;
crm_info("Making join offers based on membership %llu", crm_peer_seq);
}
if(user_data && member->join > crm_join_none) {
crm_info("Skipping %s: already known %d", member->uname, member->join);
return;
}
crm_update_peer_join(__FUNCTION__, (crm_node_t*)member, crm_join_none);
offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);
// Advertise our feature set so the joining node can bail if not compatible
crm_xml_add(offer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
/* send the welcome */
crm_info("join-%d: Sending offer to %s", current_join_id, member->uname);
send_cluster_message(member, crm_msg_crmd, offer, TRUE);
free_xml(offer);
crm_update_peer_join(__FUNCTION__, member, crm_join_welcomed);
/* crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_PENDING); */
}
/* A_DC_JOIN_OFFER_ALL */
void
do_dc_join_offer_all(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* Reset everyone's status back to down or in_ccm in the CIB.
* Any nodes that are active in the CIB but not in the cluster membership
* will be seen as offline by the scheduler anyway.
*/
current_join_id++;
initialize_join(TRUE);
/* do_update_cib_nodes(TRUE, __FUNCTION__); */
update_dc(NULL);
if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
crm_info("A new node joined the cluster");
}
g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
// Don't waste time by invoking the scheduler yet
crm_info("join-%d: Waiting on %d outstanding join acks",
current_join_id, crmd_join_phase_count(crm_join_welcomed));
}
/* A_DC_JOIN_OFFER_ONE */
void
do_dc_join_offer_one(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_node_t *member;
ha_msg_input_t *welcome = NULL;
const char *op = NULL;
const char *join_to = NULL;
if (msg_data->data) {
welcome = fsa_typed_data(fsa_dt_ha_msg);
} else {
crm_info("An unknown node joined - (re-)offer to any unconfirmed nodes");
g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
check_join_state(cur_state, __FUNCTION__);
return;
}
if (welcome == NULL) {
crm_err("Attempt to send welcome message without a message to reply to!");
return;
}
join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM);
if (join_to == NULL) {
crm_err("Attempt to send welcome message without a host to reply to!");
return;
}
member = crm_get_peer(0, join_to);
op = crm_element_value(welcome->msg, F_CRM_TASK);
if (join_to != NULL && (cur_state == S_INTEGRATION || cur_state == S_FINALIZE_JOIN)) {
/* note: it _is_ possible that a node will have been
* sick or starting up when the original offer was made.
* however, it will either re-announce itself in due course
* _or_ we can re-store the original offer on the client.
*/
crm_trace("(Re-)offering membership to %s...", join_to);
}
crm_info("join-%d: Processing %s request from %s in state %s",
current_join_id, op, join_to, fsa_state2string(cur_state));
crm_update_peer_join(__FUNCTION__, member, crm_join_none);
join_make_offer(NULL, member, NULL);
/* always offer to the DC (ourselves)
* this ensures the correct value for max_generation_from
*/
if (strcmp(join_to, fsa_our_uname) != 0) {
member = crm_get_peer(0, fsa_our_uname);
join_make_offer(NULL, member, NULL);
}
/* this was a genuine join request, cancel any existing
* transition and invoke the PE
*/
abort_transition(INFINITY, tg_restart, "Node join", NULL);
// Don't waste time by invoking the scheduler yet
crm_debug("Waiting on %d outstanding join acks for join-%d",
crmd_join_phase_count(crm_join_welcomed), current_join_id);
}
static int
compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
{
const char *elem_l = crm_element_value(left, field);
const char *elem_r = crm_element_value(right, field);
int int_elem_l = crm_int_helper(elem_l, NULL);
int int_elem_r = crm_int_helper(elem_r, NULL);
if (int_elem_l < int_elem_r) {
return -1;
} else if (int_elem_l > int_elem_r) {
return 1;
}
return 0;
}
/* A_DC_JOIN_PROCESS_REQ */
void
do_dc_join_filter_offer(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *generation = NULL;
int cmp = 0;
int join_id = -1;
gboolean ack_nack_bool = TRUE;
const char *ack_nack = CRMD_JOINSTATE_MEMBER;
ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE);
const char *join_version = crm_element_value(join_ack->msg,
XML_ATTR_CRM_VERSION);
crm_node_t *join_node = crm_get_peer(0, join_from);
crm_debug("Processing req from %s", join_from);
generation = join_ack->xml;
crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
if (max_generation_xml != NULL && generation != NULL) {
int lpc = 0;
const char *attributes[] = {
XML_ATTR_GENERATION_ADMIN,
XML_ATTR_GENERATION,
XML_ATTR_NUMUPDATES,
};
for (lpc = 0; cmp == 0 && lpc < DIMOF(attributes); lpc++) {
cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]);
}
}
if (join_id != current_join_id) {
crm_debug("Invalid response from %s: join-%d vs. join-%d",
join_from, join_id, current_join_id);
check_join_state(cur_state, __FUNCTION__);
return;
} else if (join_node == NULL || crm_is_peer_active(join_node) == FALSE) {
crm_err("Node %s is not a member", join_from);
ack_nack_bool = FALSE;
} else if (generation == NULL) {
crm_err("Generation was NULL");
ack_nack_bool = FALSE;
} else if ((join_version == NULL)
|| !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
crm_err("Node %s feature set (%s) is incompatible with ours (%s)",
join_from, (join_version? join_version : "pre-3.1.0"),
CRM_FEATURE_SET);
ack_nack_bool = FALSE;
} else if (max_generation_xml == NULL) {
max_generation_xml = copy_xml(generation);
max_generation_from = strdup(join_from);
} else if (cmp < 0 || (cmp == 0 && safe_str_eq(join_from, fsa_our_uname))) {
crm_debug("%s has a better generation number than"
" the current max %s", join_from, max_generation_from);
if (max_generation_xml) {
crm_log_xml_debug(max_generation_xml, "Max generation");
}
crm_log_xml_debug(generation, "Their generation");
free(max_generation_from);
free_xml(max_generation_xml);
max_generation_from = strdup(join_from);
max_generation_xml = copy_xml(join_ack->xml);
}
if (ack_nack_bool == FALSE) {
/* NACK this client */
ack_nack = CRMD_JOINSTATE_NACK;
crm_update_peer_join(__FUNCTION__, join_node, crm_join_nack);
crm_err("Rejecting cluster join request from %s " CRM_XS
" NACK join-%d ref=%s", join_from, join_id, ref);
} else {
crm_debug("join-%d: Welcoming node %s (ref %s)", join_id, join_from, ref);
crm_update_peer_join(__FUNCTION__, join_node, crm_join_integrated);
}
crm_update_peer_expected(__FUNCTION__, join_node, ack_nack);
crm_debug("%u nodes have been integrated into join-%d",
crmd_join_phase_count(crm_join_integrated), join_id);
if (check_join_state(cur_state, __FUNCTION__) == FALSE) {
// Don't waste time by invoking the scheduler yet
crm_debug("join-%d: Still waiting on %d outstanding offers",
join_id, crmd_join_phase_count(crm_join_welcomed));
}
}
/* A_DC_JOIN_FINALIZE */
void
do_dc_join_finalize(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
char *sync_from = NULL;
int rc = pcmk_ok;
/* This we can do straight away and avoid clients timing us out
* while we compute the latest CIB
*/
crm_debug("Finalizing join-%d for %d clients",
current_join_id, crmd_join_phase_count(crm_join_integrated));
crmd_join_phase_log(LOG_INFO);
if (crmd_join_phase_count(crm_join_welcomed) != 0) {
crm_info("Waiting for %d more nodes", crmd_join_phase_count(crm_join_welcomed));
/* crmd_fsa_stall(FALSE); Needed? */
return;
} else if (crmd_join_phase_count(crm_join_integrated) == 0) {
/* Nothing to do */
check_join_state(fsa_state, __FUNCTION__);
return;
}
clear_bit(fsa_input_register, R_HAVE_CIB);
if (max_generation_from == NULL || safe_str_eq(max_generation_from, fsa_our_uname)) {
set_bit(fsa_input_register, R_HAVE_CIB);
}
if (is_set(fsa_input_register, R_IN_TRANSITION)) {
crm_warn("Delaying response to cluster join offer while transition in progress "
CRM_XS " join-%d", current_join_id);
crmd_fsa_stall(FALSE);
return;
}
if (max_generation_from && is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
/* ask for the agreed best CIB */
sync_from = strdup(max_generation_from);
set_bit(fsa_input_register, R_CIB_ASKED);
crm_notice("Syncing the Cluster Information Base from %s to rest of cluster "
CRM_XS " join-%d", sync_from, current_join_id);
crm_log_xml_notice(max_generation_xml, "Requested version");
} else {
/* Send _our_ CIB out to everyone */
sync_from = strdup(fsa_our_uname);
crm_info("join-%d: Syncing our CIB to the rest of the cluster",
current_join_id);
crm_log_xml_debug(max_generation_xml, "Requested version");
}
rc = fsa_cib_conn->cmds->sync_from(fsa_cib_conn, sync_from, NULL, cib_quorum_override);
fsa_register_cib_callback(rc, FALSE, sync_from, finalize_sync_callback);
}
void
finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
CRM_LOG_ASSERT(-EPERM != rc);
clear_bit(fsa_input_register, R_CIB_ASKED);
if (rc != pcmk_ok) {
do_crm_log((rc == -pcmk_err_old_data ? LOG_WARNING : LOG_ERR),
"Sync from %s failed: %s", (char *)user_data, pcmk_strerror(rc));
/* restart the whole join process */
register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __FUNCTION__);
} else if (AM_I_DC && fsa_state == S_FINALIZE_JOIN) {
set_bit(fsa_input_register, R_HAVE_CIB);
clear_bit(fsa_input_register, R_CIB_ASKED);
/* make sure dc_uuid is re-set to us */
if (check_join_state(fsa_state, __FUNCTION__) == FALSE) {
crm_debug("Notifying %d clients of join-%d results",
crmd_join_phase_count(crm_join_integrated), current_join_id);
g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
}
} else {
crm_debug("No longer the DC in S_FINALIZE_JOIN: %s in %s",
AM_I_DC ? "DC" : "controller", fsa_state2string(fsa_state));
}
}
static void
join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
fsa_data_t *msg_data = NULL;
if (rc == pcmk_ok) {
crm_debug("Join update %d complete", call_id);
check_join_state(fsa_state, __FUNCTION__);
} else {
crm_err("Join update %d failed", call_id);
crm_log_xml_debug(msg, "failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
/* A_DC_JOIN_PROCESS_ACK */
void
do_dc_join_ack(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int join_id = -1;
int call_id = 0;
ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
const char *op = crm_element_value(join_ack->msg, F_CRM_TASK);
const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
crm_node_t *peer = crm_get_peer(0, join_from);
if (safe_str_neq(op, CRM_OP_JOIN_CONFIRM) || peer == NULL) {
crm_debug("Ignoring op=%s message from %s", op, join_from);
return;
}
crm_trace("Processing ack from %s", join_from);
crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
if (peer->join != crm_join_finalized) {
crm_info("Join not in progress: ignoring join-%d from %s (phase = %d)",
join_id, join_from, peer->join);
return;
} else if (join_id != current_join_id) {
crm_err("Invalid response from %s: join-%d vs. join-%d",
join_from, join_id, current_join_id);
crm_update_peer_join(__FUNCTION__, peer, crm_join_nack);
return;
}
crm_update_peer_join(__FUNCTION__, peer, crm_join_confirmed);
crm_info("join-%d: Updating node state to %s for %s",
join_id, CRMD_JOINSTATE_MEMBER, join_from);
/* update CIB with the current LRM status from the node
* We don't need to notify the TE of these updates, a transition will
* be started in due time
*/
erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local);
if (safe_str_eq(join_from, fsa_our_uname)) {
xmlNode *now_dc_lrmd_state = do_lrm_query(TRUE, fsa_our_uname);
if (now_dc_lrmd_state != NULL) {
crm_debug("Local executor state updated from query");
fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state,
cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
free_xml(now_dc_lrmd_state);
} else {
crm_warn("Local executor state updated from join acknowledgement because query failed");
fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
}
} else {
crm_debug("Executor state for %s updated from join acknowledgement",
join_from);
fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
}
fsa_register_cib_callback(call_id, FALSE, NULL, join_update_complete_callback);
crm_debug("join-%d: Registered callback for CIB status update %d", join_id, call_id);
}
void
finalize_join_for(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *acknak = NULL;
xmlNode *tmp1 = NULL;
crm_node_t *join_node = value;
const char *join_to = join_node->uname;
if(join_node->join != crm_join_integrated) {
crm_trace("Skipping %s in state %d", join_to, join_node->join);
return;
}
/* make sure a node entry exists for the new node */
crm_trace("Creating node entry for %s", join_to);
tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE);
set_uuid(tmp1, XML_ATTR_UUID, join_node);
crm_xml_add(tmp1, XML_ATTR_UNAME, join_to);
fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1);
free_xml(tmp1);
join_node = crm_get_peer(0, join_to);
if (crm_is_peer_active(join_node) == FALSE) {
/*
* NACK'ing nodes that the membership layer doesn't know about yet
* simply creates more churn
*
* Better to leave them waiting and let the join restart when
* the new membership event comes in
*
* All other NACKs (due to versions etc) should still be processed
*/
crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_PENDING);
return;
}
/* send the ack/nack to the node */
acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
crm_debug("join-%d: ACK'ing join request from %s",
current_join_id, join_to);
crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE);
crm_update_peer_join(__FUNCTION__, join_node, crm_join_finalized);
crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER);
send_cluster_message(crm_get_peer(0, join_to), crm_msg_crmd, acknak, TRUE);
free_xml(acknak);
return;
}
gboolean
check_join_state(enum crmd_fsa_state cur_state, const char *source)
{
static unsigned long long highest_seq = 0;
crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state));
if (saved_ccm_membership_id != crm_peer_seq) {
crm_debug("%s: Membership changed since join started: %llu -> %llu (%llu)",
source, saved_ccm_membership_id, crm_peer_seq, highest_seq);
if(highest_seq < crm_peer_seq) {
/* Don't spam the FSA with duplicates */
highest_seq = crm_peer_seq;
register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
}
} else if (cur_state == S_INTEGRATION) {
if (crmd_join_phase_count(crm_join_welcomed) == 0) {
crm_debug("join-%d: Integration of %d peers complete: %s",
current_join_id, crmd_join_phase_count(crm_join_integrated), source);
register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
return TRUE;
}
} else if (cur_state == S_FINALIZE_JOIN) {
if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id);
return TRUE;
} else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
crm_debug("join-%d: Still waiting on %d welcomed nodes",
current_join_id, crmd_join_phase_count(crm_join_welcomed));
crmd_join_phase_log(LOG_DEBUG);
} else if (crmd_join_phase_count(crm_join_integrated) != 0) {
crm_debug("join-%d: Still waiting on %d integrated nodes",
current_join_id, crmd_join_phase_count(crm_join_integrated));
crmd_join_phase_log(LOG_DEBUG);
} else if (crmd_join_phase_count(crm_join_finalized) != 0) {
crm_debug("join-%d: Still waiting on %d finalized nodes",
current_join_id, crmd_join_phase_count(crm_join_finalized));
crmd_join_phase_log(LOG_DEBUG);
} else {
crm_debug("join-%d complete: %s", current_join_id, source);
register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
return TRUE;
}
}
return FALSE;
}
void
do_dc_join_final(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
crm_update_quorum(crm_have_quorum, TRUE);
}
int crmd_join_phase_count(enum crm_join_phase phase)
{
int count = 0;
crm_node_t *peer;
GHashTableIter iter;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
if(peer->join == phase) {
count++;
}
}
return count;
}
void crmd_join_phase_log(int level)
{
crm_node_t *peer;
GHashTableIter iter;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname,
crm_join_phase_str(peer->join));
}
}
diff --git a/daemons/controld/controld_membership.c b/daemons/controld/controld_membership.c
index 7ab2ad611a..1c3877e1ac 100644
--- a/daemons/controld/controld_membership.c
+++ b/daemons/controld/controld_membership.c
@@ -1,440 +1,436 @@
/*
- * Copyright 2004-2018 Andrew Beekhof
+ * Copyright 2004-2019 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
/* put these first so that uuid_t is defined without conflicts */
#include
#include
#include
-
#include
#include
#include
-#include
-#include
-#include
-#include
-#include
-#include
+
#include
gboolean membership_flux_hack = FALSE;
void post_cache_update(int instance);
int last_peer_update = 0;
guint highest_born_on = -1;
extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
static void
reap_dead_nodes(gpointer key, gpointer value, gpointer user_data)
{
crm_node_t *node = value;
if (crm_is_peer_active(node) == FALSE) {
crm_update_peer_join(__FUNCTION__, node, crm_join_none);
if(node && node->uname) {
if (safe_str_eq(fsa_our_uname, node->uname)) {
crm_err("We're not part of the cluster anymore");
register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
} else if (AM_I_DC == FALSE && safe_str_eq(node->uname, fsa_our_dc)) {
crm_warn("Our DC node (%s) left the cluster", node->uname);
register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
}
}
if (fsa_state == S_INTEGRATION || fsa_state == S_FINALIZE_JOIN) {
check_join_state(fsa_state, __FUNCTION__);
}
if(node && node->uuid) {
fail_incompletable_actions(transition_graph, node->uuid);
}
}
}
gboolean ever_had_quorum = FALSE;
void
post_cache_update(int instance)
{
xmlNode *no_op = NULL;
crm_peer_seq = instance;
crm_debug("Updated cache after membership event %d.", instance);
g_hash_table_foreach(crm_peer_cache, reap_dead_nodes, NULL);
set_bit(fsa_input_register, R_MEMBERSHIP);
if (AM_I_DC) {
populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer |
node_update_expected, __FUNCTION__);
}
/*
* If we lost nodes, we should re-check the election status
* Safe to call outside of an election
*/
register_fsa_action(A_ELECTION_CHECK);
/* Membership changed, remind everyone we're here.
* This will aid detection of duplicate DCs
*/
no_op = create_request(CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD,
AM_I_DC ? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD, NULL);
send_cluster_message(NULL, crm_msg_crmd, no_op, FALSE);
free_xml(no_op);
}
static void
crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
fsa_data_t *msg_data = NULL;
last_peer_update = 0;
if (rc == pcmk_ok) {
crm_trace("Node update %d complete", call_id);
} else if(call_id < pcmk_ok) {
crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id);
crm_log_xml_debug(msg, "failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
} else {
crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
crm_log_xml_debug(msg, "failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
/*!
* \internal
* \brief Create an XML node state tag with updates
*
* \param[in,out] node Node whose state will be used for update
* \param[in] flags Bitmask of node_update_flags indicating what to update
* \param[in,out] parent XML node to contain update (or NULL)
* \param[in] source Who requested the update (only used for logging)
*
* \return Pointer to created node state tag
*/
xmlNode *
create_node_state_update(crm_node_t *node, int flags, xmlNode *parent,
const char *source)
{
const char *value = NULL;
xmlNode *node_state;
if (!node->state) {
crm_info("Node update for %s cancelled: no state, not seen yet", node->uname);
return NULL;
}
node_state = create_xml_node(parent, XML_CIB_TAG_STATE);
if (is_set(node->flags, crm_remote_node)) {
crm_xml_add(node_state, XML_NODE_IS_REMOTE, XML_BOOLEAN_TRUE);
}
set_uuid(node_state, XML_ATTR_UUID, node);
if (crm_element_value(node_state, XML_ATTR_UUID) == NULL) {
crm_info("Node update for %s cancelled: no id", node->uname);
free_xml(node_state);
return NULL;
}
crm_xml_add(node_state, XML_ATTR_UNAME, node->uname);
if ((flags & node_update_cluster) && node->state) {
crm_xml_add_boolean(node_state, XML_NODE_IN_CLUSTER,
safe_str_eq(node->state, CRM_NODE_MEMBER));
}
if (!is_set(node->flags, crm_remote_node)) {
if (flags & node_update_peer) {
value = OFFLINESTATUS;
if (is_set(node->processes, crm_get_cluster_proc())) {
value = ONLINESTATUS;
}
crm_xml_add(node_state, XML_NODE_IS_PEER, value);
}
if (flags & node_update_join) {
if (node->join <= crm_join_none) {
value = CRMD_JOINSTATE_DOWN;
} else {
value = CRMD_JOINSTATE_MEMBER;
}
crm_xml_add(node_state, XML_NODE_JOIN_STATE, value);
}
if (flags & node_update_expected) {
crm_xml_add(node_state, XML_NODE_EXPECTED, node->expected);
}
}
crm_xml_add(node_state, XML_ATTR_ORIGIN, source);
return node_state;
}
static void
remove_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
xmlNode * output, void *user_data)
{
char *node_uuid = user_data;
do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
"Deletion of the unknown conflicting node \"%s\": %s (rc=%d)",
node_uuid, pcmk_strerror(rc), rc);
}
static void
search_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
xmlNode * output, void *user_data)
{
char *new_node_uuid = user_data;
xmlNode *node_xml = NULL;
if (rc != pcmk_ok) {
if (rc != -ENXIO) {
crm_notice("Searching conflicting nodes for %s failed: %s (%d)",
new_node_uuid, pcmk_strerror(rc), rc);
}
return;
} else if (output == NULL) {
return;
}
if (safe_str_eq(crm_element_name(output), XML_CIB_TAG_NODE)) {
node_xml = output;
} else {
node_xml = __xml_first_child(output);
}
for (; node_xml != NULL; node_xml = __xml_next(node_xml)) {
const char *node_uuid = NULL;
const char *node_uname = NULL;
GHashTableIter iter;
crm_node_t *node = NULL;
gboolean known = FALSE;
if (safe_str_neq(crm_element_name(node_xml), XML_CIB_TAG_NODE)) {
continue;
}
node_uuid = crm_element_value(node_xml, XML_ATTR_ID);
node_uname = crm_element_value(node_xml, XML_ATTR_UNAME);
if (node_uuid == NULL || node_uname == NULL) {
continue;
}
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if (node->uuid
&& safe_str_eq(node->uuid, node_uuid)
&& node->uname
&& safe_str_eq(node->uname, node_uname)) {
known = TRUE;
break;
}
}
if (known == FALSE) {
int delete_call_id = 0;
xmlNode *node_state_xml = NULL;
crm_notice("Deleting unknown node %s/%s which has conflicting uname with %s",
node_uuid, node_uname, new_node_uuid);
delete_call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_NODES, node_xml,
cib_scope_local | cib_quorum_override);
fsa_register_cib_callback(delete_call_id, FALSE, strdup(node_uuid),
remove_conflicting_node_callback);
node_state_xml = create_xml_node(NULL, XML_CIB_TAG_STATE);
crm_xml_add(node_state_xml, XML_ATTR_ID, node_uuid);
crm_xml_add(node_state_xml, XML_ATTR_UNAME, node_uname);
delete_call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state_xml,
cib_scope_local | cib_quorum_override);
fsa_register_cib_callback(delete_call_id, FALSE, strdup(node_uuid),
remove_conflicting_node_callback);
free_xml(node_state_xml);
}
}
}
static void
node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
fsa_data_t *msg_data = NULL;
if(call_id < pcmk_ok) {
crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id);
crm_log_xml_debug(msg, "update:failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
} else if(rc < pcmk_ok) {
crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
crm_log_xml_debug(msg, "update:failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
#define NODE_PATH_MAX 512
void
populate_cib_nodes(enum node_update_flags flags, const char *source)
{
int call_id = 0;
gboolean from_hashtable = TRUE;
int call_options = cib_scope_local | cib_quorum_override;
xmlNode *node_list = create_xml_node(NULL, XML_CIB_TAG_NODES);
#if SUPPORT_COROSYNC
if (is_not_set(flags, node_update_quick) && is_corosync_cluster()) {
from_hashtable = corosync_initialize_nodelist(NULL, FALSE, node_list);
}
#endif
if (from_hashtable) {
GHashTableIter iter;
crm_node_t *node = NULL;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
xmlNode *new_node = NULL;
crm_trace("Creating node entry for %s/%s", node->uname, node->uuid);
if(node->uuid && node->uname) {
char xpath[NODE_PATH_MAX];
/* We need both to be valid */
new_node = create_xml_node(node_list, XML_CIB_TAG_NODE);
crm_xml_add(new_node, XML_ATTR_ID, node->uuid);
crm_xml_add(new_node, XML_ATTR_UNAME, node->uname);
/* Search and remove unknown nodes with the conflicting uname from CIB */
snprintf(xpath, NODE_PATH_MAX,
"/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_NODES
"/" XML_CIB_TAG_NODE "[@uname='%s'][@id!='%s']",
node->uname, node->uuid);
call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, xpath, NULL,
cib_scope_local | cib_xpath);
fsa_register_cib_callback(call_id, FALSE, strdup(node->uuid),
search_conflicting_node_callback);
}
}
}
crm_trace("Populating section from %s", from_hashtable ? "hashtable" : "cluster");
fsa_cib_update(XML_CIB_TAG_NODES, node_list, call_options, call_id, NULL);
fsa_register_cib_callback(call_id, FALSE, NULL, node_list_update_callback);
free_xml(node_list);
if (call_id >= pcmk_ok && crm_peer_cache != NULL && AM_I_DC) {
/*
* There is no need to update the local CIB with our values if
* we've not seen valid membership data
*/
GHashTableIter iter;
crm_node_t *node = NULL;
node_list = create_xml_node(NULL, XML_CIB_TAG_STATUS);
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
create_node_state_update(node, flags, node_list, source);
}
if (crm_remote_peer_cache) {
g_hash_table_iter_init(&iter, crm_remote_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
create_node_state_update(node, flags, node_list, source);
}
}
fsa_cib_update(XML_CIB_TAG_STATUS, node_list, call_options, call_id, NULL);
fsa_register_cib_callback(call_id, FALSE, NULL, crmd_node_update_complete);
last_peer_update = call_id;
free_xml(node_list);
}
}
static void
cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
fsa_data_t *msg_data = NULL;
if (rc == pcmk_ok) {
crm_trace("Quorum update %d complete", call_id);
} else {
crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
crm_log_xml_debug(msg, "failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
void
crm_update_quorum(gboolean quorum, gboolean force_update)
{
ever_had_quorum |= quorum;
if(ever_had_quorum && quorum == FALSE && no_quorum_suicide_escalation) {
pcmk_panic(__FUNCTION__);
}
if (AM_I_DC && (force_update || fsa_has_quorum != quorum)) {
int call_id = 0;
xmlNode *update = NULL;
int call_options = cib_scope_local | cib_quorum_override;
update = create_xml_node(NULL, XML_TAG_CIB);
crm_xml_add_int(update, XML_ATTR_HAVE_QUORUM, quorum);
crm_xml_add(update, XML_ATTR_DC_UUID, fsa_our_uuid);
fsa_cib_update(XML_TAG_CIB, update, call_options, call_id, NULL);
crm_debug("Updating quorum status to %s (call=%d)", quorum ? "true" : "false", call_id);
fsa_register_cib_callback(call_id, FALSE, NULL, cib_quorum_update_complete);
free_xml(update);
/* Quorum changes usually cause a new transition via other activity:
* quorum gained via a node joining will abort via the node join,
* and quorum lost via a node leaving will usually abort via resource
* activity and/or fencing.
*
* However, it is possible that nothing else causes a transition (e.g.
* someone forces quorum via corosync-cmaptcl, or quorum is lost due to
* a node in standby shutting down cleanly), so here ensure a new
* transition is triggered.
*/
if (quorum) {
/* If quorum was gained, abort after a short delay, in case multiple
* nodes are joining around the same time, so the one that brings us
* to quorum doesn't cause all the remaining ones to be fenced.
*/
abort_after_delay(INFINITY, tg_restart, "Quorum gained", 5000);
} else {
abort_transition(INFINITY, tg_restart, "Quorum lost", NULL);
}
}
fsa_has_quorum = quorum;
}
diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c
index 510ff43c93..9af1d09202 100644
--- a/daemons/controld/controld_messages.c
+++ b/daemons/controld/controld_messages.c
@@ -1,1174 +1,1167 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
-#include
#include
#include
-#include
+#include
#include
#include
-
#include
#include
#include
#include
-#include
-#include
-#include
-#include
-#include
GListPtr fsa_message_queue = NULL;
extern void crm_shutdown(int nsig);
void handle_response(xmlNode * stored_msg);
enum crmd_fsa_input handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause);
enum crmd_fsa_input handle_shutdown_request(xmlNode * stored_msg);
#define ROUTER_RESULT(x) crm_trace("Router result: %s", x)
/* debug only, can wrap all it likes */
int last_data_id = 0;
void
register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
fsa_data_t * cur_data, void *new_data, const char *raised_from)
{
/* save the current actions if any */
if (fsa_actions != A_NOTHING) {
register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL,
I_NULL, cur_data ? cur_data->data : NULL,
fsa_actions, TRUE, __FUNCTION__);
}
/* reset the action list */
crm_info("Resetting the current action list");
fsa_dump_actions(fsa_actions, "Drop");
fsa_actions = A_NOTHING;
/* register the error */
register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from);
}
int
register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
void *data, long long with_actions,
gboolean prepend, const char *raised_from)
{
unsigned old_len = g_list_length(fsa_message_queue);
fsa_data_t *fsa_data = NULL;
if (raised_from == NULL) {
raised_from = "";
}
if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) {
/* no point doing anything */
crm_err("Cannot add entry to queue: no input and no action");
return 0;
}
if (input == I_WAIT_FOR_EVENT) {
do_fsa_stall = TRUE;
crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d",
raised_from, fsa_cause2string(cause), data, old_len);
if (old_len > 0) {
fsa_dump_queue(LOG_TRACE);
prepend = FALSE;
}
if (data == NULL) {
fsa_actions |= with_actions;
fsa_dump_actions(with_actions, "Restored");
return 0;
}
/* Store everything in the new event and reset fsa_actions */
with_actions |= fsa_actions;
fsa_actions = A_NOTHING;
}
last_data_id++;
crm_trace("%s %s FSA input %d (%s) (cause=%s) %s data",
raised_from, prepend ? "prepended" : "appended", last_data_id,
fsa_input2string(input), fsa_cause2string(cause), data ? "with" : "without");
fsa_data = calloc(1, sizeof(fsa_data_t));
fsa_data->id = last_data_id;
fsa_data->fsa_input = input;
fsa_data->fsa_cause = cause;
fsa_data->origin = raised_from;
fsa_data->data = NULL;
fsa_data->data_type = fsa_dt_none;
fsa_data->actions = with_actions;
if (with_actions != A_NOTHING) {
crm_trace("Adding actions %.16llx to input", with_actions);
}
if (data != NULL) {
switch (cause) {
case C_FSA_INTERNAL:
case C_CRMD_STATUS_CALLBACK:
case C_IPC_MESSAGE:
case C_HA_MESSAGE:
crm_trace("Copying %s data from %s as a HA msg",
fsa_cause2string(cause), raised_from);
CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL,
crm_err("Bogus data from %s", raised_from));
fsa_data->data = copy_ha_msg_input(data);
fsa_data->data_type = fsa_dt_ha_msg;
break;
case C_LRM_OP_CALLBACK:
crm_trace("Copying %s data from %s as lrmd_event_data_t",
fsa_cause2string(cause), raised_from);
fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data);
fsa_data->data_type = fsa_dt_lrm;
break;
case C_TIMER_POPPED:
case C_SHUTDOWN:
case C_UNKNOWN:
case C_STARTUP:
crm_err("Copying %s data (from %s)"
" not yet implemented", fsa_cause2string(cause), raised_from);
crmd_exit(CRM_EX_SOFTWARE);
break;
}
crm_trace("%s data copied", fsa_cause2string(fsa_data->fsa_cause));
}
/* make sure to free it properly later */
if (prepend) {
crm_trace("Prepending input");
fsa_message_queue = g_list_prepend(fsa_message_queue, fsa_data);
} else {
fsa_message_queue = g_list_append(fsa_message_queue, fsa_data);
}
crm_trace("Queue len: %d", g_list_length(fsa_message_queue));
/* fsa_dump_queue(LOG_TRACE); */
if (old_len == g_list_length(fsa_message_queue)) {
crm_err("Couldn't add message to the queue");
}
if (fsa_source && input != I_WAIT_FOR_EVENT) {
crm_trace("Triggering FSA: %s", __FUNCTION__);
mainloop_set_trigger(fsa_source);
}
return last_data_id;
}
void
fsa_dump_queue(int log_level)
{
int offset = 0;
GListPtr lpc = NULL;
for (lpc = fsa_message_queue; lpc != NULL; lpc = lpc->next) {
fsa_data_t *data = (fsa_data_t *) lpc->data;
do_crm_log_unlikely(log_level,
"queue[%d.%d]: input %s raised by %s(%p.%d)\t(cause=%s)",
offset++, data->id, fsa_input2string(data->fsa_input),
data->origin, data->data, data->data_type,
fsa_cause2string(data->fsa_cause));
}
}
ha_msg_input_t *
copy_ha_msg_input(ha_msg_input_t * orig)
{
ha_msg_input_t *copy = NULL;
xmlNodePtr data = NULL;
if (orig != NULL) {
crm_trace("Copy msg");
data = copy_xml(orig->msg);
} else {
crm_trace("No message to copy");
}
copy = new_ha_msg_input(data);
if (orig && orig->msg != NULL) {
CRM_CHECK(copy->msg != NULL, crm_err("copy failed"));
}
return copy;
}
void
delete_fsa_input(fsa_data_t * fsa_data)
{
lrmd_event_data_t *op = NULL;
xmlNode *foo = NULL;
if (fsa_data == NULL) {
return;
}
crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause));
if (fsa_data->data != NULL) {
switch (fsa_data->data_type) {
case fsa_dt_ha_msg:
delete_ha_msg_input(fsa_data->data);
break;
case fsa_dt_xml:
foo = fsa_data->data;
free_xml(foo);
break;
case fsa_dt_lrm:
op = (lrmd_event_data_t *) fsa_data->data;
lrmd_free_event(op);
break;
case fsa_dt_none:
if (fsa_data->data != NULL) {
crm_err("Don't know how to free %s data from %s",
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
crmd_exit(CRM_EX_SOFTWARE);
}
break;
}
crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause));
}
free(fsa_data);
}
/* returns the next message */
fsa_data_t *
get_message(void)
{
fsa_data_t *message = g_list_nth_data(fsa_message_queue, 0);
fsa_message_queue = g_list_remove(fsa_message_queue, message);
crm_trace("Processing input %d", message->id);
return message;
}
/* returns the current head of the FIFO queue */
gboolean
is_message(void)
{
return (g_list_length(fsa_message_queue) > 0);
}
void *
fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller)
{
void *ret_val = NULL;
if (fsa_data == NULL) {
crm_err("%s: No FSA data available", caller);
} else if (fsa_data->data == NULL) {
crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin);
} else if (fsa_data->data_type != a_type) {
crm_crit("%s: Message data was the wrong type! %d vs. requested=%d. Origin: %s",
caller, fsa_data->data_type, a_type, fsa_data->origin);
CRM_ASSERT(fsa_data->data_type == a_type);
} else {
ret_val = fsa_data->data;
}
return ret_val;
}
/* A_MSG_ROUTE */
void
do_msg_route(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
route_message(msg_data->fsa_cause, input->msg);
}
void
route_message(enum crmd_fsa_cause cause, xmlNode * input)
{
ha_msg_input_t fsa_input;
enum crmd_fsa_input result = I_NULL;
fsa_input.msg = input;
CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return);
/* try passing the buck first */
if (relay_message(input, cause == C_IPC_MESSAGE)) {
return;
}
/* handle locally */
result = handle_message(input, cause);
/* done or process later? */
switch (result) {
case I_NULL:
case I_CIB_OP:
case I_ROUTER:
case I_NODE_JOIN:
case I_JOIN_REQUEST:
case I_JOIN_RESULT:
break;
default:
/* Defering local processing of message */
register_fsa_input_later(cause, result, &fsa_input);
return;
}
if (result != I_NULL) {
/* add to the front of the queue */
register_fsa_input(cause, result, &fsa_input);
}
}
gboolean
relay_message(xmlNode * msg, gboolean originated_locally)
{
int dest = 1;
int is_for_dc = 0;
int is_for_dcib = 0;
int is_for_te = 0;
int is_for_crm = 0;
int is_for_cib = 0;
int is_local = 0;
gboolean processing_complete = FALSE;
const char *host_to = crm_element_value(msg, F_CRM_HOST_TO);
const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
const char *type = crm_element_value(msg, F_TYPE);
const char *task = crm_element_value(msg, F_CRM_TASK);
const char *msg_error = NULL;
crm_trace("Routing message %s", crm_element_value(msg, XML_ATTR_REFERENCE));
if (msg == NULL) {
msg_error = "Cannot route empty message";
} else if (safe_str_eq(task, CRM_OP_HELLO)) {
/* quietly ignore */
processing_complete = TRUE;
} else if (safe_str_neq(type, T_CRM)) {
msg_error = "Bad message type";
} else if (sys_to == NULL) {
msg_error = "Bad message destination: no subsystem";
}
if (msg_error != NULL) {
processing_complete = TRUE;
crm_err("%s", msg_error);
crm_log_xml_warn(msg, "bad msg");
}
if (processing_complete) {
return TRUE;
}
processing_complete = TRUE;
is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0);
is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0);
is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0);
is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0);
is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0);
is_local = 0;
if (host_to == NULL || strlen(host_to) == 0) {
if (is_for_dc || is_for_te) {
is_local = 0;
} else if (is_for_crm) {
if (safe_str_eq(task, CRM_OP_NODE_INFO)) {
/* Node info requests do not specify a host, which is normally
* treated as "all hosts", because the whole point is that the
* client doesn't know the local node name. Always handle these
* requests locally.
*/
is_local = 1;
} else {
is_local = !originated_locally;
}
} else {
is_local = 1;
}
} else if (safe_str_eq(fsa_our_uname, host_to)) {
is_local = 1;
}
if (is_for_dc || is_for_dcib || is_for_te) {
if (AM_I_DC && is_for_te) {
ROUTER_RESULT("Message result: Local relay");
send_msg_via_ipc(msg, sys_to);
} else if (AM_I_DC) {
ROUTER_RESULT("Message result: DC/controller process");
processing_complete = FALSE; /* more to be done by caller */
} else if (originated_locally && safe_str_neq(sys_from, CRM_SYSTEM_PENGINE)
&& safe_str_neq(sys_from, CRM_SYSTEM_TENGINE)) {
/* Neither the TE nor the scheduler should be sending messages
* to DCs on other nodes. By definition, if we are no longer the DC,
* then the scheduler's or TE's data should be discarded.
*/
#if SUPPORT_COROSYNC
if (is_corosync_cluster()) {
dest = text2msg_type(sys_to);
}
#endif
ROUTER_RESULT("Message result: External relay to DC");
send_cluster_message(host_to ? crm_get_peer(0, host_to) : NULL, dest, msg, TRUE);
} else {
/* discard */
ROUTER_RESULT("Message result: Discard, not DC");
}
} else if (is_local && (is_for_crm || is_for_cib)) {
ROUTER_RESULT("Message result: controller process");
processing_complete = FALSE; /* more to be done by caller */
} else if (is_local) {
ROUTER_RESULT("Message result: Local relay");
send_msg_via_ipc(msg, sys_to);
} else {
crm_node_t *node_to = NULL;
#if SUPPORT_COROSYNC
if (is_corosync_cluster()) {
dest = text2msg_type(sys_to);
if (dest == crm_msg_none || dest > crm_msg_stonith_ng) {
dest = crm_msg_crmd;
}
}
#endif
if (host_to) {
node_to = crm_find_peer(0, host_to);
if (node_to == NULL) {
crm_err("Cannot route message to unknown node %s", host_to);
return TRUE;
}
}
ROUTER_RESULT("Message result: External relay");
send_cluster_message(host_to ? node_to : NULL, dest, msg, TRUE);
}
return processing_complete;
}
static gboolean
process_hello_message(xmlNode * hello,
char **client_name, char **major_version, char **minor_version)
{
const char *local_client_name;
const char *local_major_version;
const char *local_minor_version;
*client_name = NULL;
*major_version = NULL;
*minor_version = NULL;
if (hello == NULL) {
return FALSE;
}
local_client_name = crm_element_value(hello, "client_name");
local_major_version = crm_element_value(hello, "major_version");
local_minor_version = crm_element_value(hello, "minor_version");
if (local_client_name == NULL || strlen(local_client_name) == 0) {
crm_err("Hello message was not valid (field %s not found)", "client name");
return FALSE;
} else if (local_major_version == NULL || strlen(local_major_version) == 0) {
crm_err("Hello message was not valid (field %s not found)", "major version");
return FALSE;
} else if (local_minor_version == NULL || strlen(local_minor_version) == 0) {
crm_err("Hello message was not valid (field %s not found)", "minor version");
return FALSE;
}
*client_name = strdup(local_client_name);
*major_version = strdup(local_major_version);
*minor_version = strdup(local_minor_version);
crm_trace("Hello message ok");
return TRUE;
}
gboolean
crmd_authorize_message(xmlNode * client_msg, crm_client_t * curr_client, const char *proxy_session)
{
char *client_name = NULL;
char *major_version = NULL;
char *minor_version = NULL;
gboolean auth_result = FALSE;
xmlNode *xml = NULL;
const char *op = crm_element_value(client_msg, F_CRM_TASK);
const char *uuid = curr_client ? curr_client->id : proxy_session;
if (uuid == NULL) {
crm_warn("Message [%s] not authorized", crm_element_value(client_msg, XML_ATTR_REFERENCE));
return FALSE;
} else if (safe_str_neq(CRM_OP_HELLO, op)) {
return TRUE;
}
xml = get_message_xml(client_msg, F_CRM_DATA);
auth_result = process_hello_message(xml, &client_name, &major_version, &minor_version);
if (auth_result == TRUE) {
if (client_name == NULL) {
crm_err("Bad client details (client_name=%s, uuid=%s)",
crm_str(client_name), uuid);
auth_result = FALSE;
}
}
if (auth_result == TRUE) {
/* check version */
int mav = atoi(major_version);
int miv = atoi(minor_version);
crm_trace("Checking client version number");
if (mav < 0 || miv < 0) {
crm_err("Client version (%d:%d) is not acceptable", mav, miv);
auth_result = FALSE;
}
}
if (auth_result == TRUE) {
crm_trace("Accepted client %s", client_name);
if (curr_client) {
curr_client->userdata = strdup(client_name);
}
crm_trace("Triggering FSA: %s", __FUNCTION__);
mainloop_set_trigger(fsa_source);
} else {
crm_warn("Rejected client logon request");
if (curr_client) {
qb_ipcs_disconnect(curr_client->ipcs);
}
}
free(minor_version);
free(major_version);
free(client_name);
/* hello messages should never be processed further */
return FALSE;
}
enum crmd_fsa_input
handle_message(xmlNode * msg, enum crmd_fsa_cause cause)
{
const char *type = NULL;
CRM_CHECK(msg != NULL, return I_NULL);
type = crm_element_value(msg, F_CRM_MSG_TYPE);
if (crm_str_eq(type, XML_ATTR_REQUEST, TRUE)) {
return handle_request(msg, cause);
} else if (crm_str_eq(type, XML_ATTR_RESPONSE, TRUE)) {
handle_response(msg);
return I_NULL;
}
crm_err("Unknown message type: %s", type);
return I_NULL;
}
static enum crmd_fsa_input
handle_failcount_op(xmlNode * stored_msg)
{
const char *rsc = NULL;
const char *uname = NULL;
const char *op = NULL;
const char *interval_ms_s = NULL;
char *interval_spec = NULL;
guint interval_ms = 0;
gboolean is_remote_node = FALSE;
xmlNode *xml_op = get_message_xml(stored_msg, F_CRM_DATA);
if (xml_op) {
xmlNode *xml_rsc = first_named_child(xml_op, XML_CIB_TAG_RESOURCE);
xmlNode *xml_attrs = first_named_child(xml_op, XML_TAG_ATTRS);
if (xml_rsc) {
rsc = ID(xml_rsc);
}
if (xml_attrs) {
op = crm_element_value(xml_attrs,
CRM_META "_" XML_RSC_ATTR_CLEAR_OP);
interval_ms_s = crm_element_value(xml_attrs,
CRM_META "_" XML_RSC_ATTR_CLEAR_INTERVAL);
interval_ms = crm_parse_ms(interval_ms_s);
}
}
uname = crm_element_value(xml_op, XML_LRM_ATTR_TARGET);
if ((rsc == NULL) || (uname == NULL)) {
crm_log_xml_warn(stored_msg, "invalid failcount op");
return I_NULL;
}
if (crm_element_value(xml_op, XML_LRM_ATTR_ROUTER_NODE)) {
is_remote_node = TRUE;
}
if (interval_ms) {
interval_spec = crm_strdup_printf("%ums", interval_ms);
}
update_attrd_clear_failures(uname, rsc, op, interval_spec, is_remote_node);
free(interval_spec);
lrm_clear_last_failure(rsc, uname, op, interval_ms);
return I_NULL;
}
/*!
* \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_remote_state(xmlNode *msg)
{
const char *remote_uname = ID(msg);
const char *remote_is_up = crm_element_value(msg, XML_NODE_IN_CLUSTER);
crm_node_t *remote_peer;
CRM_CHECK(remote_uname && remote_is_up, return I_NULL);
remote_peer = crm_remote_peer_get(remote_uname);
CRM_CHECK(remote_peer, return I_NULL);
crm_update_peer_state(__FUNCTION__, remote_peer,
crm_is_true(remote_is_up)?
CRM_NODE_MEMBER : CRM_NODE_LOST, 0);
return I_NULL;
}
/*!
* \brief Handle a CRM_OP_PING message
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_ping(xmlNode *msg)
{
const char *value = NULL;
xmlNode *ping = NULL;
// Build reply
ping = create_xml_node(NULL, XML_CRM_TAG_PING);
value = crm_element_value(msg, F_CRM_SYS_TO);
crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
// Add controller state
value = fsa_state2string(fsa_state);
crm_xml_add(ping, XML_PING_ATTR_CRMDSTATE, value);
crm_notice("Current ping state: %s", value); // CTS needs this
// Add controller health
// @TODO maybe do some checks to determine meaningful status
crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
// Send reply
msg = create_reply(msg, ping);
free_xml(ping);
if (msg) {
(void) relay_message(msg, TRUE);
free_xml(msg);
}
// Nothing further to do
return I_NULL;
}
/*!
* \brief Handle a CRM_OP_NODE_INFO request
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_node_info_request(xmlNode *msg)
{
const char *value = NULL;
crm_node_t *node = NULL;
int node_id = 0;
xmlNode *reply = NULL;
// Build reply
reply = create_xml_node(NULL, XML_CIB_TAG_NODE);
crm_xml_add(reply, XML_PING_ATTR_SYSFROM, CRM_SYSTEM_CRMD);
// Add whether current partition has quorum
crm_xml_add_boolean(reply, XML_ATTR_HAVE_QUORUM, fsa_has_quorum);
// Check whether client requested node info by ID and/or name
crm_element_value_int(msg, XML_ATTR_ID, &node_id);
if (node_id < 0) {
node_id = 0;
}
value = crm_element_value(msg, XML_ATTR_UNAME);
// Default to local node if none given
if ((node_id == 0) && (value == NULL)) {
value = fsa_our_uname;
}
node = crm_find_peer_full(node_id, value, CRM_GET_PEER_ANY);
if (node) {
crm_xml_add_int(reply, XML_ATTR_ID, node->id);
crm_xml_add(reply, XML_ATTR_UUID, node->uuid);
crm_xml_add(reply, XML_ATTR_UNAME, node->uname);
crm_xml_add(reply, XML_NODE_IS_PEER, node->state);
crm_xml_add_boolean(reply, XML_NODE_IS_REMOTE,
node->flags & crm_remote_node);
}
// Send reply
msg = create_reply(msg, reply);
free_xml(reply);
if (msg) {
(void) relay_message(msg, TRUE);
free_xml(msg);
}
// Nothing further to do
return I_NULL;
}
static void
verify_feature_set(xmlNode *msg)
{
const char *dc_version = crm_element_value(msg, XML_ATTR_CRM_VERSION);
if (dc_version == NULL) {
/* All we really know is that the DC feature set is older than 3.1.0,
* but that's also all that really matters.
*/
dc_version = "3.0.14";
}
if (feature_set_compatible(dc_version, CRM_FEATURE_SET)) {
crm_trace("Local feature set (%s) is compatible with DC's (%s)",
CRM_FEATURE_SET, dc_version);
} else {
crm_err("Local feature set (%s) is incompatible with DC's (%s)",
CRM_FEATURE_SET, dc_version);
// Nothing is likely to improve without administrator involvement
set_bit(fsa_input_register, R_STAYDOWN);
crmd_exit(CRM_EX_FATAL);
}
}
enum crmd_fsa_input
handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause)
{
xmlNode *msg = NULL;
const char *op = crm_element_value(stored_msg, F_CRM_TASK);
/* Optimize this for the DC - it has the most to do */
if (op == NULL) {
crm_log_xml_err(stored_msg, "Bad message");
return I_NULL;
}
if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
const char *from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
crm_node_t *node = crm_find_peer(0, from);
crm_update_peer_expected(__FUNCTION__, node, CRMD_JOINSTATE_DOWN);
if(AM_I_DC == FALSE) {
return I_NULL; /* Done */
}
}
/*========== DC-Only Actions ==========*/
if (AM_I_DC) {
if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) {
return I_NODE_JOIN;
} else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) {
return I_JOIN_REQUEST;
} else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) {
return I_JOIN_RESULT;
} else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
gboolean dc_match = safe_str_eq(host_from, fsa_our_dc);
if (is_set(fsa_input_register, R_SHUTDOWN)) {
crm_info("Shutting ourselves down (DC)");
return I_STOP;
} else if (dc_match) {
crm_err("We didn't ask to be shut down, yet our"
" TE is telling us to. Better get out now!");
return I_TERMINATE;
} else if (fsa_state != S_STOPPING) {
crm_err("Another node is asking us to shutdown" " but we think we're ok.");
return I_ELECTION;
}
} else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
/* a slave wants to shut down */
/* create cib fragment and add to message */
return handle_shutdown_request(stored_msg);
} else if (strcmp(op, CRM_OP_REMOTE_STATE) == 0) {
/* a remote connection host is letting us know the node state */
return handle_remote_state(stored_msg);
}
}
/*========== common actions ==========*/
if (strcmp(op, CRM_OP_NOVOTE) == 0) {
ha_msg_input_t fsa_input;
fsa_input.msg = stored_msg;
register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__);
} else if (strcmp(op, CRM_OP_THROTTLE) == 0) {
throttle_update(stored_msg);
if (AM_I_DC && transition_graph != NULL) {
if (transition_graph->complete == FALSE) {
crm_debug("The throttle changed. Trigger a graph.");
trigger_graph();
}
}
return I_NULL;
} else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) {
return handle_failcount_op(stored_msg);
} else if (strcmp(op, CRM_OP_VOTE) == 0) {
/* count the vote and decide what to do after that */
ha_msg_input_t fsa_input;
fsa_input.msg = stored_msg;
register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __FUNCTION__);
/* Sometimes we _must_ go into S_ELECTION */
if (fsa_state == S_HALT) {
crm_debug("Forcing an election from S_HALT");
return I_ELECTION;
#if 0
} else if (AM_I_DC) {
/* This is the old way of doing things but what is gained? */
return I_ELECTION;
#endif
}
} else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) {
verify_feature_set(stored_msg);
crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
return I_JOIN_OFFER;
} else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) {
crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
return I_JOIN_RESULT;
} else if (strcmp(op, CRM_OP_LRM_DELETE) == 0
|| strcmp(op, CRM_OP_LRM_FAIL) == 0
|| strcmp(op, CRM_OP_LRM_REFRESH) == 0 || strcmp(op, CRM_OP_REPROBE) == 0) {
crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD);
return I_ROUTER;
} else if (strcmp(op, CRM_OP_NOOP) == 0) {
return I_NULL;
} else if (strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) {
crm_shutdown(SIGTERM);
/*return I_SHUTDOWN; */
return I_NULL;
} else if (strcmp(op, CRM_OP_PING) == 0) {
return handle_ping(stored_msg);
} else if (strcmp(op, CRM_OP_NODE_INFO) == 0) {
return handle_node_info_request(stored_msg);
} else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) {
int id = 0;
const char *name = NULL;
crm_element_value_int(stored_msg, XML_ATTR_ID, &id);
name = crm_element_value(stored_msg, XML_ATTR_UNAME);
if(cause == C_IPC_MESSAGE) {
msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
crm_err("Could not instruct peers to remove references to node %s/%u", name, id);
} else {
crm_notice("Instructing peers to remove references to node %s/%u", name, id);
}
free_xml(msg);
} else {
reap_crm_member(id, name);
/* If we're forgetting this node, also forget any failures to fence
* it, so we don't carry that over to any node added later with the
* same name.
*/
st_fail_count_reset(name);
}
} else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) {
xmlNode *xml = get_message_xml(stored_msg, F_CRM_DATA);
remote_ra_process_maintenance_nodes(xml);
/*========== (NOT_DC)-Only Actions ==========*/
} else if (AM_I_DC == FALSE && strcmp(op, CRM_OP_SHUTDOWN) == 0) {
const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
gboolean dc_match = safe_str_eq(host_from, fsa_our_dc);
if (dc_match || fsa_our_dc == NULL) {
if (is_set(fsa_input_register, R_SHUTDOWN) == FALSE) {
crm_err("We didn't ask to be shut down, yet our DC is telling us to.");
set_bit(fsa_input_register, R_STAYDOWN);
return I_STOP;
}
crm_info("Shutting down");
return I_STOP;
} else {
crm_warn("Discarding %s op from %s", op, host_from);
}
} else {
crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node");
crm_log_xml_err(stored_msg, "Unexpected");
}
return I_NULL;
}
void
handle_response(xmlNode * stored_msg)
{
const char *op = crm_element_value(stored_msg, F_CRM_TASK);
if (op == NULL) {
crm_log_xml_err(stored_msg, "Bad message");
} else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) {
// Check whether scheduler answer been superseded by subsequent request
const char *msg_ref = crm_element_value(stored_msg, XML_ATTR_REFERENCE);
if (msg_ref == NULL) {
crm_err("%s - Ignoring calculation with no reference", op);
} else if (safe_str_eq(msg_ref, fsa_pe_ref)) {
ha_msg_input_t fsa_input;
controld_stop_sched_timer();
fsa_input.msg = stored_msg;
register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
} else {
crm_info("%s calculation %s is obsolete", op, msg_ref);
}
} else if (strcmp(op, CRM_OP_VOTE) == 0
|| strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) {
} else {
const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
crm_err("Unexpected response (op=%s, src=%s) sent to the %s",
op, host_from, AM_I_DC ? "DC" : "controller");
}
}
enum crmd_fsa_input
handle_shutdown_request(xmlNode * stored_msg)
{
/* handle here to avoid potential version issues
* where the shutdown message/procedure may have
* been changed in later versions.
*
* This way the DC is always in control of the shutdown
*/
char *now_s = NULL;
time_t now = time(NULL);
const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
if (host_from == NULL) {
/* we're shutting down and the DC */
host_from = fsa_our_uname;
}
crm_info("Creating shutdown request for %s (state=%s)", host_from, fsa_state2string(fsa_state));
crm_log_xml_trace(stored_msg, "message");
now_s = crm_itoa(now);
update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, FALSE);
free(now_s);
/* will be picked up by the TE as long as its running */
return I_NULL;
}
/* msg is deleted by the time this returns */
extern gboolean process_te_message(xmlNode * msg, xmlNode * xml_data);
gboolean
send_msg_via_ipc(xmlNode * msg, const char *sys)
{
gboolean send_ok = TRUE;
crm_client_t *client_channel = crm_client_get_by_id(sys);
if (crm_element_value(msg, F_CRM_HOST_FROM) == NULL) {
crm_xml_add(msg, F_CRM_HOST_FROM, fsa_our_uname);
}
if (client_channel != NULL) {
/* Transient clients such as crmadmin */
send_ok = crm_ipcs_send(client_channel, 0, msg, crm_ipc_server_event);
} else if (sys != NULL && strcmp(sys, CRM_SYSTEM_TENGINE) == 0) {
xmlNode *data = get_message_xml(msg, F_CRM_DATA);
process_te_message(msg, data);
} else if (sys != NULL && strcmp(sys, CRM_SYSTEM_LRMD) == 0) {
fsa_data_t fsa_data;
ha_msg_input_t fsa_input;
fsa_input.msg = msg;
fsa_input.xml = get_message_xml(msg, F_CRM_DATA);
fsa_data.id = 0;
fsa_data.actions = 0;
fsa_data.data = &fsa_input;
fsa_data.fsa_input = I_MESSAGE;
fsa_data.fsa_cause = C_IPC_MESSAGE;
fsa_data.origin = __FUNCTION__;
fsa_data.data_type = fsa_dt_ha_msg;
#ifdef FSA_TRACE
crm_trace("Invoking action A_LRM_INVOKE (%.16llx)", A_LRM_INVOKE);
#endif
do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, fsa_state, I_MESSAGE, &fsa_data);
} else if (sys != NULL && crmd_is_proxy_session(sys)) {
crmd_proxy_send(sys, msg);
} else {
crm_debug("Unknown Sub-system (%s)... discarding message.", crm_str(sys));
send_ok = FALSE;
}
return send_ok;
}
ha_msg_input_t *
new_ha_msg_input(xmlNode * orig)
{
ha_msg_input_t *input_copy = NULL;
input_copy = calloc(1, sizeof(ha_msg_input_t));
input_copy->msg = orig;
input_copy->xml = get_message_xml(input_copy->msg, F_CRM_DATA);
return input_copy;
}
void
delete_ha_msg_input(ha_msg_input_t * orig)
{
if (orig == NULL) {
return;
}
free_xml(orig->msg);
free(orig);
}
/*!
* \internal
* \brief Notify the DC of a remote node state change
*
* \param[in] node_name Node's name
* \param[in] node_up TRUE if node is up, FALSE if down
*/
void
send_remote_state_message(const char *node_name, gboolean node_up)
{
/* If we don't have a DC, or the message fails, we have a failsafe:
* the DC will eventually pick up the change via the CIB node state.
* The message allows it to happen sooner if possible.
*/
if (fsa_our_dc) {
xmlNode *msg = create_request(CRM_OP_REMOTE_STATE, NULL, fsa_our_dc,
CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
crm_info("Notifying DC %s of pacemaker_remote node %s %s",
fsa_our_dc, node_name, (node_up? "coming up" : "going down"));
crm_xml_add(msg, XML_ATTR_ID, node_name);
crm_xml_add_boolean(msg, XML_NODE_IN_CLUSTER, node_up);
send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, msg,
TRUE);
free_xml(msg);
} else {
crm_debug("No DC to notify of pacemaker_remote node %s %s",
node_name, (node_up? "coming up" : "going down"));
}
}
diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c
index 2b38fa1b71..5700c076c7 100644
--- a/daemons/controld/controld_metadata.c
+++ b/daemons/controld/controld_metadata.c
@@ -1,269 +1,271 @@
/*
- * Copyright 2017-2018 Andrew Beekhof
+ * Copyright 2017-2019 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
-#include "controld_lrm.h"
+#include
#if ENABLE_VERSIONED_ATTRS
static regex_t *version_format_regex = NULL;
#endif
static void
ra_param_free(void *param)
{
if (param) {
struct ra_param_s *p = (struct ra_param_s *) param;
if (p->rap_name) {
free(p->rap_name);
}
free(param);
}
}
static void
metadata_free(void *metadata)
{
if (metadata) {
struct ra_metadata_s *md = (struct ra_metadata_s *) metadata;
if (md->ra_version) {
free(md->ra_version);
}
g_list_free_full(md->ra_params, ra_param_free);
free(metadata);
}
}
GHashTable *
metadata_cache_new()
{
return g_hash_table_new_full(crm_str_hash, g_str_equal, free,
metadata_free);
}
void
metadata_cache_free(GHashTable *mdc)
{
if (mdc) {
crm_trace("Destroying metadata cache with %d members", g_hash_table_size(mdc));
g_hash_table_destroy(mdc);
}
}
void
metadata_cache_reset(GHashTable *mdc)
{
if (mdc) {
crm_trace("Resetting metadata cache with %d members",
g_hash_table_size(mdc));
g_hash_table_remove_all(mdc);
}
}
#if ENABLE_VERSIONED_ATTRS
static gboolean
valid_version_format(const char *version)
{
if (version == NULL) {
return FALSE;
}
if (version_format_regex == NULL) {
/* The OCF standard allows free-form versioning, but for our purposes of
* versioned resource and operation attributes, we constrain it to
* dot-separated numbers. Agents are still free to use other schemes,
* but we can't determine attributes based on them.
*/
const char *regex_string = "^[[:digit:]]+([.][[:digit:]]+)*$";
version_format_regex = calloc(1, sizeof(regex_t));
regcomp(version_format_regex, regex_string, REG_EXTENDED | REG_NOSUB);
/* If our regex doesn't compile, it's a bug on our side, so CRM_CHECK()
* will give us a core dump to catch it. Pretend the version is OK
* because we don't want our mistake to break versioned attributes
* (which should only ever happen in a development branch anyway).
*/
CRM_CHECK(version_format_regex != NULL, return TRUE);
}
return regexec(version_format_regex, version, 0, NULL, 0) == 0;
}
#endif
void
metadata_cache_fini()
{
#if ENABLE_VERSIONED_ATTRS
if (version_format_regex) {
regfree(version_format_regex);
free(version_format_regex);
version_format_regex = NULL;
}
#endif
}
#if ENABLE_VERSIONED_ATTRS
static char *
ra_version_from_xml(xmlNode *metadata_xml, const lrmd_rsc_info_t *rsc)
{
const char *version = crm_element_value(metadata_xml, XML_ATTR_VERSION);
if (version == NULL) {
crm_debug("Metadata for %s:%s:%s does not specify a version",
rsc->standard, rsc->provider, rsc->type);
version = PCMK_DEFAULT_AGENT_VERSION;
} else if (!valid_version_format(version)) {
crm_notice("%s:%s:%s metadata version has unrecognized format",
rsc->standard, rsc->provider, rsc->type);
version = PCMK_DEFAULT_AGENT_VERSION;
} else {
crm_debug("Metadata for %s:%s:%s has version %s",
rsc->standard, rsc->provider, rsc->type, version);
}
return strdup(version);
}
#endif
static struct ra_param_s *
ra_param_from_xml(xmlNode *param_xml)
{
const char *param_name = crm_element_value(param_xml, "name");
const char *value;
struct ra_param_s *p;
p = calloc(1, sizeof(struct ra_param_s));
if (p == NULL) {
crm_crit("Could not allocate memory for resource metadata");
return NULL;
}
p->rap_name = strdup(param_name);
if (p->rap_name == NULL) {
crm_crit("Could not allocate memory for resource metadata");
free(p);
return NULL;
}
value = crm_element_value(param_xml, "unique");
if (crm_is_true(value)) {
set_bit(p->rap_flags, ra_param_unique);
}
value = crm_element_value(param_xml, "private");
if (crm_is_true(value)) {
set_bit(p->rap_flags, ra_param_private);
}
return p;
}
struct ra_metadata_s *
metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc,
const char *metadata_str)
{
char *key = NULL;
xmlNode *metadata = NULL;
xmlNode *match = NULL;
struct ra_metadata_s *md = NULL;
CRM_CHECK(mdc && rsc && metadata_str, return NULL);
key = crm_generate_ra_key(rsc->standard, rsc->provider, rsc->type);
if (!key) {
crm_crit("Could not allocate memory for resource metadata");
goto err;
}
metadata = string2xml(metadata_str);
if (!metadata) {
crm_err("Metadata for %s:%s:%s is not valid XML",
rsc->standard, rsc->provider, rsc->type);
goto err;
}
md = calloc(1, sizeof(struct ra_metadata_s));
if (md == NULL) {
crm_crit("Could not allocate memory for resource metadata");
goto err;
}
#if ENABLE_VERSIONED_ATTRS
md->ra_version = ra_version_from_xml(metadata, rsc);
#endif
// Check supported actions
match = first_named_child(metadata, "actions");
for (match = first_named_child(match, "action"); match != NULL;
match = crm_next_same_xml(match)) {
const char *action_name = crm_element_value(match, "name");
if (safe_str_eq(action_name, "reload")) {
set_bit(md->ra_flags, ra_supports_reload);
break; // since this is the only action we currently care about
}
}
// Build a parameter list
match = first_named_child(metadata, "parameters");
for (match = first_named_child(match, "parameter"); match != NULL;
match = crm_next_same_xml(match)) {
const char *param_name = crm_element_value(match, "name");
if (param_name == NULL) {
crm_warn("Metadata for %s:%s:%s has parameter without a name",
rsc->standard, rsc->provider, rsc->type);
} else {
struct ra_param_s *p = ra_param_from_xml(match);
if (p == NULL) {
goto err;
}
if (is_set(p->rap_flags, ra_param_private)) {
set_bit(md->ra_flags, ra_uses_private);
}
md->ra_params = g_list_prepend(md->ra_params, p);
}
}
g_hash_table_replace(mdc, key, md);
free_xml(metadata);
return md;
err:
free(key);
free_xml(metadata);
metadata_free(md);
return NULL;
}
struct ra_metadata_s *
metadata_cache_get(GHashTable *mdc, lrmd_rsc_info_t *rsc)
{
char *key = NULL;
struct ra_metadata_s *metadata = NULL;
CRM_CHECK(mdc && rsc, return NULL);
key = crm_generate_ra_key(rsc->standard, rsc->provider, rsc->type);
if (key) {
metadata = g_hash_table_lookup(mdc, key);
free(key);
}
return metadata;
}
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
index 05f557f31c..0cf5048053 100644
--- a/daemons/controld/controld_remote_ra.c
+++ b/daemons/controld/controld_remote_ra.c
@@ -1,1298 +1,1295 @@
/*
* Copyright 2013-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
+
#include
#include
-
-#include
-#include
-#include
-#include
-#include
#include
#include
+#include
+
#define REMOTE_LRMD_RA "remote"
/* The max start timeout before cmd retry */
#define MAX_START_TIMEOUT_MS 10000
typedef struct remote_ra_cmd_s {
/*! the local node the cmd is issued from */
char *owner;
/*! the remote node the cmd is executed on */
char *rsc_id;
/*! the action to execute */
char *action;
/*! some string the client wants us to give it back */
char *userdata;
char *exit_reason; // descriptive text on error
/*! start delay in ms */
int start_delay;
/*! timer id used for start delay. */
int delay_id;
/*! timeout in ms for cmd */
int timeout;
int remaining_timeout;
/*! recurring interval in ms */
guint interval_ms;
/*! interval timer id */
int interval_id;
int reported_success;
int monitor_timeout_id;
int takeover_timeout_id;
/*! action parameters */
lrmd_key_value_t *params;
/*! executed rc */
int rc;
int op_status;
int call_id;
time_t start_time;
gboolean cancel;
} remote_ra_cmd_t;
enum remote_migration_status {
expect_takeover = 1,
takeover_complete,
};
typedef struct remote_ra_data_s {
crm_trigger_t *work;
remote_ra_cmd_t *cur_cmd;
GList *cmds;
GList *recurring_cmds;
enum remote_migration_status migrate_status;
gboolean active;
/* Maintenance mode is difficult to determine from the controller's context,
* so we have it signalled back with the transition from the scheduler.
*/
gboolean is_maintenance;
/* Similar for whether we are controlling a guest node or remote node.
* Fortunately there is a meta-attribute in the transition already and
* as the situation doesn't change over time we can use the
* resource start for noting down the information for later use when
* the attributes aren't at hand.
*/
gboolean controlling_guest;
} remote_ra_data_t;
static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
static GList *fail_all_monitor_cmds(GList * list);
static void
free_cmd(gpointer user_data)
{
remote_ra_cmd_t *cmd = user_data;
if (!cmd) {
return;
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->interval_id) {
g_source_remove(cmd->interval_id);
}
if (cmd->monitor_timeout_id) {
g_source_remove(cmd->monitor_timeout_id);
}
if (cmd->takeover_timeout_id) {
g_source_remove(cmd->takeover_timeout_id);
}
free(cmd->owner);
free(cmd->rsc_id);
free(cmd->action);
free(cmd->userdata);
free(cmd->exit_reason);
lrmd_key_value_freeall(cmd->params);
free(cmd);
}
static int
generate_callid(void)
{
static int remote_ra_callid = 0;
remote_ra_callid++;
if (remote_ra_callid <= 0) {
remote_ra_callid = 1;
}
return remote_ra_callid;
}
static gboolean
recurring_helper(gpointer data)
{
remote_ra_cmd_t *cmd = data;
lrm_state_t *connection_rsc = NULL;
cmd->interval_id = 0;
connection_rsc = lrm_state_find(cmd->rsc_id);
if (connection_rsc && connection_rsc->remote_ra_data) {
remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
ra_data->cmds = g_list_append(ra_data->cmds, cmd);
mainloop_set_trigger(ra_data->work);
}
return FALSE;
}
static gboolean
start_delay_helper(gpointer data)
{
remote_ra_cmd_t *cmd = data;
lrm_state_t *connection_rsc = NULL;
cmd->delay_id = 0;
connection_rsc = lrm_state_find(cmd->rsc_id);
if (connection_rsc && connection_rsc->remote_ra_data) {
remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
mainloop_set_trigger(ra_data->work);
}
return FALSE;
}
/*!
* \internal
* \brief Handle cluster communication related to pacemaker_remote node joining
*
* \param[in] node_name Name of newly integrated pacemaker_remote node
*/
static void
remote_node_up(const char *node_name)
{
int call_opt, call_id = 0;
xmlNode *update, *state;
crm_node_t *node;
CRM_CHECK(node_name != NULL, return);
crm_info("Announcing pacemaker_remote node %s", node_name);
/* Clear node's operation history. The node's transient attributes should
* and normally will be cleared when the node leaves, but since remote node
* state has a number of corner cases, clear them here as well, to be sure.
*/
call_opt = crmd_cib_smart_opt();
erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt);
erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt);
/* Clear node's probed attribute */
update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
/* Ensure node is in the remote peer cache with member status */
node = crm_remote_peer_get(node_name);
CRM_CHECK(node != NULL, return);
crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, 0);
/* pacemaker_remote nodes don't participate in the membership layer,
* so cluster nodes don't automatically get notified when they come and go.
* We send a cluster message to the DC, and update the CIB node state entry,
* so the DC will get it sooner (via message) or later (via CIB refresh),
* and any other interested parties can query the CIB.
*/
send_remote_state_message(node_name, TRUE);
update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
state = create_node_state_update(node, node_update_cluster, update,
__FUNCTION__);
/* Clear the XML_NODE_IS_FENCED flag in the node state. If the node ever
* needs to be fenced, this flag will allow various actions to determine
* whether the fencing has happened yet.
*/
crm_xml_add(state, XML_NODE_IS_FENCED, "0");
/* TODO: If the remote connection drops, and this (async) CIB update either
* failed or has not yet completed, later actions could mistakenly think the
* node has already been fenced (if the XML_NODE_IS_FENCED attribute was
* previously set, because it won't have been cleared). This could prevent
* actual fencing or allow recurring monitor failures to be cleared too
* soon. Ideally, we wouldn't rely on the CIB for the fenced status.
*/
fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
if (call_id < 0) {
crm_perror(LOG_WARNING, "%s CIB node state setup", node_name);
}
free_xml(update);
}
enum down_opts {
DOWN_KEEP_LRM,
DOWN_ERASE_LRM
};
/*!
* \internal
* \brief Handle cluster communication related to pacemaker_remote node leaving
*
* \param[in] node_name Name of lost node
* \param[in] opts Whether to keep or erase LRM history
*/
static void
remote_node_down(const char *node_name, const enum down_opts opts)
{
xmlNode *update;
int call_id = 0;
int call_opt = crmd_cib_smart_opt();
crm_node_t *node;
/* Purge node from attrd's memory */
update_attrd_remote_node_removed(node_name, NULL);
/* Purge node's transient attributes */
erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt);
/* Normally, the LRM operation history should be kept until the node comes
* back up. However, after a successful fence, we want to clear it, so we
* don't think resources are still running on the node.
*/
if (opts == DOWN_ERASE_LRM) {
erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt);
}
/* Ensure node is in the remote peer cache with lost state */
node = crm_remote_peer_get(node_name);
CRM_CHECK(node != NULL, return);
crm_update_peer_state(__FUNCTION__, node, CRM_NODE_LOST, 0);
/* Notify DC */
send_remote_state_message(node_name, FALSE);
/* Update CIB node state */
update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
create_node_state_update(node, node_update_cluster, update, __FUNCTION__);
fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
if (call_id < 0) {
crm_perror(LOG_ERR, "%s CIB node state update", node_name);
}
free_xml(update);
}
/*!
* \internal
* \brief Handle effects of a remote RA command on node state
*
* \param[in] cmd Completed remote RA command
*/
static void
check_remote_node_state(remote_ra_cmd_t *cmd)
{
/* Only successful actions can change node state */
if (cmd->rc != PCMK_OCF_OK) {
return;
}
if (safe_str_eq(cmd->action, "start")) {
remote_node_up(cmd->rsc_id);
} else if (safe_str_eq(cmd->action, "migrate_from")) {
/* After a successful migration, we don't need to do remote_node_up()
* because the DC already knows the node is up, and we don't want to
* clear LRM history etc. We do need to add the remote node to this
* host's remote peer cache, because (unless it happens to be DC)
* it hasn't been tracking the remote node, and other code relies on
* the cache to distinguish remote nodes from unseen cluster nodes.
*/
crm_node_t *node = crm_remote_peer_get(cmd->rsc_id);
CRM_CHECK(node != NULL, return);
crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, 0);
} else if (safe_str_eq(cmd->action, "stop")) {
lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
if (ra_data) {
if (ra_data->migrate_status != takeover_complete) {
/* Stop means down if we didn't successfully migrate elsewhere */
remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
} else if (AM_I_DC == FALSE) {
/* Only the connection host and DC track node state,
* so if the connection migrated elsewhere and we aren't DC,
* un-cache the node, so we don't have stale info
*/
crm_remote_peer_cache_remove(cmd->rsc_id);
}
}
}
/* We don't do anything for successful monitors, which is correct for
* routine recurring monitors, and for monitors on nodes where the
* connection isn't supposed to be (the cluster will stop the connection in
* that case). However, if the initial probe finds the connection already
* active on the node where we want it, we probably should do
* remote_node_up(). Unfortunately, we can't distinguish that case here.
* Given that connections have to be initiated by the cluster, the chance of
* that should be close to zero.
*/
}
static void
report_remote_ra_result(remote_ra_cmd_t * cmd)
{
lrmd_event_data_t op = { 0, };
check_remote_node_state(cmd);
op.type = lrmd_event_exec_complete;
op.rsc_id = cmd->rsc_id;
op.op_type = cmd->action;
op.user_data = cmd->userdata;
op.exit_reason = cmd->exit_reason;
op.timeout = cmd->timeout;
op.interval_ms = cmd->interval_ms;
op.rc = cmd->rc;
op.op_status = cmd->op_status;
op.t_run = cmd->start_time;
op.t_rcchange = cmd->start_time;
if (cmd->reported_success && cmd->rc != PCMK_OCF_OK) {
op.t_rcchange = time(NULL);
/* This edge case will likely never ever occur, but if it does the
* result is that a failure will not be processed correctly. This is only
* remotely possible because we are able to detect a connection resource's tcp
* connection has failed at any moment after start has completed. The actual
* recurring operation is just a connectivity ping.
*
* basically, we are not guaranteed that the first successful monitor op and
* a subsequent failed monitor op will not occur in the same timestamp. We have to
* make it look like the operations occurred at separate times though. */
if (op.t_rcchange == op.t_run) {
op.t_rcchange++;
}
}
if (cmd->params) {
lrmd_key_value_t *tmp;
op.params = crm_str_table_new();
for (tmp = cmd->params; tmp; tmp = tmp->next) {
g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
}
}
op.call_id = cmd->call_id;
op.remote_nodename = cmd->owner;
lrm_op_callback(&op);
if (op.params) {
g_hash_table_destroy(op.params);
}
}
static void
update_remaining_timeout(remote_ra_cmd_t * cmd)
{
cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
}
static gboolean
retry_start_cmd_cb(gpointer data)
{
lrm_state_t *lrm_state = data;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
remote_ra_cmd_t *cmd = NULL;
int rc = -1;
if (!ra_data || !ra_data->cur_cmd) {
return FALSE;
}
cmd = ra_data->cur_cmd;
if (safe_str_neq(cmd->action, "start") && safe_str_neq(cmd->action, "migrate_from")) {
return FALSE;
}
update_remaining_timeout(cmd);
if (cmd->remaining_timeout > 0) {
rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
}
if (rc != 0) {
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
cmd->op_status = PCMK_LRM_OP_ERROR;
report_remote_ra_result(cmd);
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
ra_data->cur_cmd = NULL;
free_cmd(cmd);
} else {
/* wait for connection event */
}
return FALSE;
}
static gboolean
connection_takeover_timeout_cb(gpointer data)
{
lrm_state_t *lrm_state = NULL;
remote_ra_cmd_t *cmd = data;
crm_info("takeover event timed out for node %s", cmd->rsc_id);
cmd->takeover_timeout_id = 0;
lrm_state = lrm_state_find(cmd->rsc_id);
handle_remote_ra_stop(lrm_state, cmd);
free_cmd(cmd);
return FALSE;
}
static gboolean
monitor_timeout_cb(gpointer data)
{
lrm_state_t *lrm_state = NULL;
remote_ra_cmd_t *cmd = data;
lrm_state = lrm_state_find(cmd->rsc_id);
crm_info("Timed out waiting for remote poke response from %s%s",
cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
cmd->monitor_timeout_id = 0;
cmd->op_status = PCMK_LRM_OP_TIMEOUT;
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
if (lrm_state && lrm_state->remote_ra_data) {
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
if (ra_data->cur_cmd == cmd) {
ra_data->cur_cmd = NULL;
}
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
}
report_remote_ra_result(cmd);
free_cmd(cmd);
if(lrm_state) {
lrm_state_disconnect(lrm_state);
}
return FALSE;
}
static void
synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
{
lrmd_event_data_t op = { 0, };
if (lrm_state == NULL) {
/* if lrm_state not given assume local */
lrm_state = lrm_state_find(fsa_our_uname);
}
CRM_ASSERT(lrm_state != NULL);
op.type = lrmd_event_exec_complete;
op.rsc_id = rsc_id;
op.op_type = op_type;
op.rc = PCMK_OCF_OK;
op.op_status = PCMK_LRM_OP_DONE;
op.t_run = time(NULL);
op.t_rcchange = op.t_run;
op.call_id = generate_callid();
process_lrm_event(lrm_state, &op, NULL, NULL);
}
void
remote_lrm_op_callback(lrmd_event_data_t * op)
{
gboolean cmd_handled = FALSE;
lrm_state_t *lrm_state = NULL;
remote_ra_data_t *ra_data = NULL;
remote_ra_cmd_t *cmd = NULL;
crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
"(%d) status=%s (%d)",
(op->op_type? op->op_type : ""), (op->op_type? " " : ""),
lrmd_event_type2str(op->type), op->remote_nodename,
services_ocf_exitcode_str(op->rc), op->rc,
services_lrm_status_str(op->op_status), op->op_status);
lrm_state = lrm_state_find(op->remote_nodename);
if (!lrm_state || !lrm_state->remote_ra_data) {
crm_debug("No state information found for remote connection event");
return;
}
ra_data = lrm_state->remote_ra_data;
if (op->type == lrmd_event_new_client) {
// Another client has connected to the remote daemon
if (ra_data->migrate_status == expect_takeover) {
// Great, we knew this was coming
ra_data->migrate_status = takeover_complete;
} else {
crm_err("Unexpected pacemaker_remote client takeover for %s. Disconnecting", op->remote_nodename);
/* In this case, lrmd_tls_connection_destroy() will be called under the control of mainloop. */
/* Do not free lrm_state->conn yet. */
/* It'll be freed in the following stop action. */
lrm_state_disconnect_only(lrm_state);
}
return;
}
/* filter all EXEC events up */
if (op->type == lrmd_event_exec_complete) {
if (ra_data->migrate_status == takeover_complete) {
crm_debug("ignoring event, this connection is taken over by another node");
} else {
lrm_op_callback(op);
}
return;
}
if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
if (ra_data->active == FALSE) {
crm_debug("Disconnection from Pacemaker Remote node %s complete",
lrm_state->node_name);
} else if (!remote_ra_is_in_maintenance(lrm_state)) {
crm_err("Lost connection to Pacemaker Remote node %s",
lrm_state->node_name);
ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
} else {
crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
lrm_state->node_name);
/* Do roughly what a 'stop' on the remote-resource would do */
handle_remote_ra_stop(lrm_state, NULL);
remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
/* now fake the reply of a successful 'stop' */
synthesize_lrmd_success(NULL, lrm_state->node_name, "stop");
}
return;
}
if (!ra_data->cur_cmd) {
crm_debug("no event to match");
return;
}
cmd = ra_data->cur_cmd;
/* Start actions and migrate from actions complete after connection
* comes back to us. */
if (op->type == lrmd_event_connect && (safe_str_eq(cmd->action, "start") ||
safe_str_eq(cmd->action, "migrate_from"))) {
if (op->connection_rc < 0) {
update_remaining_timeout(cmd);
if (op->connection_rc == -ENOKEY) {
// Hard error, don't retry
cmd->op_status = PCMK_LRM_OP_ERROR;
cmd->rc = PCMK_OCF_INVALID_PARAM;
cmd->exit_reason = strdup("Authentication key not readable");
} else if (cmd->remaining_timeout > 3000) {
crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
return;
} else {
crm_trace("can't reschedule start, remaining timeout too small %d",
cmd->remaining_timeout);
cmd->op_status = PCMK_LRM_OP_TIMEOUT;
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
}
} else {
lrm_state_reset_tables(lrm_state, TRUE);
cmd->rc = PCMK_OCF_OK;
cmd->op_status = PCMK_LRM_OP_DONE;
ra_data->active = TRUE;
}
crm_debug("Remote connection event matched %s action", cmd->action);
report_remote_ra_result(cmd);
cmd_handled = TRUE;
} else if (op->type == lrmd_event_poke && safe_str_eq(cmd->action, "monitor")) {
if (cmd->monitor_timeout_id) {
g_source_remove(cmd->monitor_timeout_id);
cmd->monitor_timeout_id = 0;
}
/* Only report success the first time, after that only worry about failures.
* For this function, if we get the poke pack, it is always a success. Pokes
* only fail if the send fails, or the response times out. */
if (!cmd->reported_success) {
cmd->rc = PCMK_OCF_OK;
cmd->op_status = PCMK_LRM_OP_DONE;
report_remote_ra_result(cmd);
cmd->reported_success = 1;
}
crm_debug("Remote poke event matched %s action", cmd->action);
/* success, keep rescheduling if interval is present. */
if (cmd->interval_ms && (cmd->cancel == FALSE)) {
ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
cmd->interval_id = g_timeout_add(cmd->interval_ms,
recurring_helper, cmd);
cmd = NULL; /* prevent free */
}
cmd_handled = TRUE;
} else if (op->type == lrmd_event_disconnect && safe_str_eq(cmd->action, "monitor")) {
if (ra_data->active == TRUE && (cmd->cancel == FALSE)) {
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
cmd->op_status = PCMK_LRM_OP_ERROR;
report_remote_ra_result(cmd);
crm_err("Remote connection to %s unexpectedly dropped during monitor",
lrm_state->node_name);
}
cmd_handled = TRUE;
} else if (op->type == lrmd_event_new_client && safe_str_eq(cmd->action, "stop")) {
handle_remote_ra_stop(lrm_state, cmd);
cmd_handled = TRUE;
} else {
crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
}
if (cmd_handled) {
ra_data->cur_cmd = NULL;
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
free_cmd(cmd);
}
}
static void
handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
{
remote_ra_data_t *ra_data = NULL;
CRM_ASSERT(lrm_state);
ra_data = lrm_state->remote_ra_data;
if (ra_data->migrate_status != takeover_complete) {
/* delete pending ops when ever the remote connection is intentionally stopped */
g_hash_table_remove_all(lrm_state->pending_ops);
} else {
/* we no longer hold the history if this connection has been migrated,
* however, we keep metadata cache for future use */
lrm_state_reset_tables(lrm_state, FALSE);
}
ra_data->active = FALSE;
lrm_state_disconnect(lrm_state);
if (ra_data->cmds) {
g_list_free_full(ra_data->cmds, free_cmd);
}
if (ra_data->recurring_cmds) {
g_list_free_full(ra_data->recurring_cmds, free_cmd);
}
ra_data->cmds = NULL;
ra_data->recurring_cmds = NULL;
ra_data->cur_cmd = NULL;
if (cmd) {
cmd->rc = PCMK_OCF_OK;
cmd->op_status = PCMK_LRM_OP_DONE;
report_remote_ra_result(cmd);
}
}
static int
handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
{
const char *server = NULL;
lrmd_key_value_t *tmp = NULL;
int port = 0;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
for (tmp = cmd->params; tmp; tmp = tmp->next) {
if (safe_str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR) ||
safe_str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_SERVER)) {
server = tmp->value;
} else if (safe_str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT)) {
port = atoi(tmp->value);
} else if (safe_str_eq(tmp->key, CRM_META"_"XML_RSC_ATTR_CONTAINER)) {
ra_data->controlling_guest = TRUE;
}
}
return lrm_state_remote_connect_async(lrm_state, server, port, timeout_used);
}
static gboolean
handle_remote_ra_exec(gpointer user_data)
{
int rc = 0;
lrm_state_t *lrm_state = user_data;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
remote_ra_cmd_t *cmd;
GList *first = NULL;
if (ra_data->cur_cmd) {
/* still waiting on previous cmd */
return TRUE;
}
while (ra_data->cmds) {
first = ra_data->cmds;
cmd = first->data;
if (cmd->delay_id) {
/* still waiting for start delay timer to trip */
return TRUE;
}
ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
g_list_free_1(first);
if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) {
ra_data->migrate_status = 0;
rc = handle_remote_ra_start(lrm_state, cmd, cmd->timeout);
if (rc == 0) {
/* take care of this later when we get async connection result */
crm_debug("Initiated async remote connection, %s action will complete after connect event",
cmd->action);
ra_data->cur_cmd = cmd;
return TRUE;
} else {
crm_debug("Could not initiate remote connection for %s action",
cmd->action);
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
cmd->op_status = PCMK_LRM_OP_ERROR;
}
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, "monitor")) {
if (lrm_state_is_connected(lrm_state) == TRUE) {
rc = lrm_state_poke_connection(lrm_state);
if (rc < 0) {
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
cmd->op_status = PCMK_LRM_OP_ERROR;
}
} else {
rc = -1;
cmd->op_status = PCMK_LRM_OP_DONE;
cmd->rc = PCMK_OCF_NOT_RUNNING;
}
if (rc == 0) {
crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
cmd->rsc_id);
ra_data->cur_cmd = cmd;
cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
return TRUE;
}
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, "stop")) {
if (ra_data->migrate_status == expect_takeover) {
/* briefly wait on stop for the takeover event to occur. If the
* takeover event does not occur during the wait period, that's fine.
* It just means that the remote-node's lrm_status section is going to get
* cleared which will require all the resources running in the remote-node
* to be explicitly re-detected via probe actions. If the takeover does occur
* successfully, then we can leave the status section intact. */
cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
ra_data->cur_cmd = cmd;
return TRUE;
}
handle_remote_ra_stop(lrm_state, cmd);
} else if (!strcmp(cmd->action, "migrate_to")) {
ra_data->migrate_status = expect_takeover;
cmd->rc = PCMK_OCF_OK;
cmd->op_status = PCMK_LRM_OP_DONE;
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, "reload")) {
/* reloads are a no-op right now, add logic here when they become important */
cmd->rc = PCMK_OCF_OK;
cmd->op_status = PCMK_LRM_OP_DONE;
report_remote_ra_result(cmd);
}
free_cmd(cmd);
}
return TRUE;
}
static void
remote_ra_data_init(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = NULL;
if (lrm_state->remote_ra_data) {
return;
}
ra_data = calloc(1, sizeof(remote_ra_data_t));
ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
lrm_state->remote_ra_data = ra_data;
}
void
remote_ra_cleanup(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
if (!ra_data) {
return;
}
if (ra_data->cmds) {
g_list_free_full(ra_data->cmds, free_cmd);
}
if (ra_data->recurring_cmds) {
g_list_free_full(ra_data->recurring_cmds, free_cmd);
}
mainloop_destroy_trigger(ra_data->work);
free(ra_data);
lrm_state->remote_ra_data = NULL;
}
gboolean
is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
{
if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
return TRUE;
}
if (id && lrm_state_find(id) && safe_str_neq(id, fsa_our_uname)) {
return TRUE;
}
return FALSE;
}
lrmd_rsc_info_t *
remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
{
lrmd_rsc_info_t *info = NULL;
if ((lrm_state_find(rsc_id))) {
info = calloc(1, sizeof(lrmd_rsc_info_t));
info->id = strdup(rsc_id);
info->type = strdup(REMOTE_LRMD_RA);
info->standard = strdup(PCMK_RESOURCE_CLASS_OCF);
info->provider = strdup("pacemaker");
}
return info;
}
static gboolean
is_remote_ra_supported_action(const char *action)
{
if (!action) {
return FALSE;
} else if (strcmp(action, "start") &&
strcmp(action, "stop") &&
strcmp(action, "reload") &&
strcmp(action, "migrate_to") &&
strcmp(action, "migrate_from") && strcmp(action, "monitor")) {
return FALSE;
}
return TRUE;
}
static GList *
fail_all_monitor_cmds(GList * list)
{
GList *rm_list = NULL;
remote_ra_cmd_t *cmd = NULL;
GListPtr gIter = NULL;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms > 0) && safe_str_eq(cmd->action, "monitor")) {
rm_list = g_list_append(rm_list, cmd);
}
}
for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
cmd->op_status = PCMK_LRM_OP_ERROR;
crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
report_remote_ra_result(cmd);
list = g_list_remove(list, cmd);
free_cmd(cmd);
}
/* frees only the list data, not the cmds */
g_list_free(rm_list);
return list;
}
static GList *
remove_cmd(GList * list, const char *action, guint interval_ms)
{
remote_ra_cmd_t *cmd = NULL;
GListPtr gIter = NULL;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& safe_str_eq(cmd->action, action)) {
break;
}
cmd = NULL;
}
if (cmd) {
list = g_list_remove(list, cmd);
free_cmd(cmd);
}
return list;
}
int
remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
const char *action, guint interval_ms)
{
lrm_state_t *connection_rsc = NULL;
remote_ra_data_t *ra_data = NULL;
connection_rsc = lrm_state_find(rsc_id);
if (!connection_rsc || !connection_rsc->remote_ra_data) {
return -EINVAL;
}
ra_data = connection_rsc->remote_ra_data;
ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
interval_ms);
if (ra_data->cur_cmd &&
(ra_data->cur_cmd->interval_ms == interval_ms) &&
(safe_str_eq(ra_data->cur_cmd->action, action))) {
ra_data->cur_cmd->cancel = TRUE;
}
return 0;
}
static remote_ra_cmd_t *
handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
const char *userdata)
{
GList *gIter = NULL;
remote_ra_cmd_t *cmd = NULL;
/* there are 3 places a potential duplicate monitor operation
* could exist.
* 1. recurring_cmds list. where the op is waiting for its next interval
* 2. cmds list, where the op is queued to get executed immediately
* 3. cur_cmd, which means the monitor op is in flight right now.
*/
if (interval_ms == 0) {
return NULL;
}
if (ra_data->cur_cmd &&
ra_data->cur_cmd->cancel == FALSE &&
(ra_data->cur_cmd->interval_ms == interval_ms) &&
safe_str_eq(ra_data->cur_cmd->action, "monitor")) {
cmd = ra_data->cur_cmd;
goto handle_dup;
}
for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& safe_str_eq(cmd->action, "monitor")) {
goto handle_dup;
}
}
for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& safe_str_eq(cmd->action, "monitor")) {
goto handle_dup;
}
}
return NULL;
handle_dup:
crm_trace("merging duplicate monitor cmd " CRM_OP_FMT,
cmd->rsc_id, "monitor", interval_ms);
/* update the userdata */
if (userdata) {
free(cmd->userdata);
cmd->userdata = strdup(userdata);
}
/* if we've already reported success, generate a new call id */
if (cmd->reported_success) {
cmd->start_time = time(NULL);
cmd->call_id = generate_callid();
cmd->reported_success = 0;
}
/* if we have an interval_id set, that means we are in the process of
* waiting for this cmd's next interval. instead of waiting, cancel
* the timer and execute the action immediately */
if (cmd->interval_id) {
g_source_remove(cmd->interval_id);
cmd->interval_id = 0;
recurring_helper(cmd);
}
return cmd;
}
int
remote_ra_exec(lrm_state_t *lrm_state, const char *rsc_id, const char *action,
const char *userdata, guint interval_ms,
int timeout, /* ms */
int start_delay, /* ms */
lrmd_key_value_t * params)
{
int rc = 0;
lrm_state_t *connection_rsc = NULL;
remote_ra_cmd_t *cmd = NULL;
remote_ra_data_t *ra_data = NULL;
if (is_remote_ra_supported_action(action) == FALSE) {
rc = -EINVAL;
goto exec_done;
}
connection_rsc = lrm_state_find(rsc_id);
if (!connection_rsc) {
rc = -EINVAL;
goto exec_done;
}
remote_ra_data_init(connection_rsc);
ra_data = connection_rsc->remote_ra_data;
cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
if (cmd) {
rc = cmd->call_id;
goto exec_done;
}
cmd = calloc(1, sizeof(remote_ra_cmd_t));
cmd->owner = strdup(lrm_state->node_name);
cmd->rsc_id = strdup(rsc_id);
cmd->action = strdup(action);
cmd->userdata = strdup(userdata);
cmd->interval_ms = interval_ms;
cmd->timeout = timeout;
cmd->start_delay = start_delay;
cmd->params = params;
cmd->start_time = time(NULL);
cmd->call_id = generate_callid();
if (cmd->start_delay) {
cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
ra_data->cmds = g_list_append(ra_data->cmds, cmd);
mainloop_set_trigger(ra_data->work);
return cmd->call_id;
exec_done:
lrmd_key_value_freeall(params);
return rc;
}
/*!
* \internal
* \brief Immediately fail all monitors of a remote node, if proxied here
*
* \param[in] node_name Name of pacemaker_remote node
*/
void
remote_ra_fail(const char *node_name)
{
lrm_state_t *lrm_state = lrm_state_find(node_name);
if (lrm_state && lrm_state_is_connected(lrm_state)) {
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
crm_info("Failing monitors on pacemaker_remote node %s", node_name);
ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
}
}
/* A guest node fencing implied by host fencing looks like:
*
*
*
*
*
*
*
*/
#define XPATH_PSEUDO_FENCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
"[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \
"/" XML_CIB_TAG_NODE
/*!
* \internal
* \brief Check a pseudo-action for Pacemaker Remote node side effects
*
* \param[in] xml XML of pseudo-action to check
*/
void
remote_ra_process_pseudo(xmlNode *xml)
{
xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
if (numXpathResults(search) == 1) {
xmlNode *result = getXpathResult(search, 0);
/* Normally, we handle the necessary side effects of a guest node stop
* action when reporting the remote agent's result. However, if the stop
* is implied due to fencing, it will be a fencing pseudo-event, and
* there won't be a result to report. Handle that case here.
*
* This will result in a duplicate call to remote_node_down() if the
* guest stop was real instead of implied, but that shouldn't hurt.
*
* There is still one corner case that isn't handled: if a guest node
* isn't running any resources when its host is fenced, it will appear
* to be cleanly stopped, so there will be no pseudo-fence, and our
* peer cache state will be incorrect unless and until the guest is
* recovered.
*/
if (result) {
const char *remote = ID(result);
if (remote) {
remote_node_down(remote, DOWN_ERASE_LRM);
}
}
}
freeXpathObject(search);
}
static void
remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
xmlNode *update, *state;
int call_opt, call_id = 0;
crm_node_t *node;
call_opt = crmd_cib_smart_opt();
node = crm_remote_peer_get(lrm_state->node_name);
CRM_CHECK(node != NULL, return);
update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
state = create_node_state_update(node, node_update_none, update,
__FUNCTION__);
crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0");
fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
if (call_id < 0) {
crm_perror(LOG_WARNING, "%s CIB node state update failed", lrm_state->node_name);
} else {
/* TODO: still not 100% sure that async update will succeed ... */
ra_data->is_maintenance = maintenance;
}
free_xml(update);
}
#define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
"[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \
XML_GRAPH_TAG_MAINTENANCE
/*!
* \internal
* \brief Check a pseudo-action holding updates for maintenance state
*
* \param[in] xml XML of pseudo-action to check
*/
void
remote_ra_process_maintenance_nodes(xmlNode *xml)
{
xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
if (numXpathResults(search) == 1) {
xmlNode *node;
int cnt = 0, cnt_remote = 0;
for (node =
first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE);
node; node = __xml_next(node)) {
lrm_state_t *lrm_state = lrm_state_find(ID(node));
cnt++;
if (lrm_state && lrm_state->remote_ra_data &&
((remote_ra_data_t *) lrm_state->remote_ra_data)->active) {
cnt_remote++;
remote_ra_maintenance(lrm_state,
crm_atoi(crm_element_value(node,
XML_NODE_IS_MAINTENANCE), "0"));
}
}
crm_trace("Action holds %d nodes (%d remotes found) "
"adjusting maintenance-mode", cnt, cnt_remote);
}
freeXpathObject(search);
}
gboolean
remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
return ra_data->is_maintenance;
}
gboolean
remote_ra_controlling_guest(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
return ra_data->controlling_guest;
}
diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c
index ad411bd785..4b76355c8d 100644
--- a/daemons/controld/controld_schedulerd.c
+++ b/daemons/controld/controld_schedulerd.c
@@ -1,467 +1,468 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include /* pid_t, sleep, ssize_t */
#include
#include
#include
#include
#include
#include
-#include
-#include /* register_fsa_error_adv */
static mainloop_io_t *pe_subsystem = NULL;
/*!
* \internal
* \brief Close any scheduler connection and free associated memory
*/
void
pe_subsystem_free(void)
{
- // If we aren't connected to the scheduler, we can't expect a reply
- controld_expect_sched_reply(NULL);
-
+ clear_bit(fsa_input_register, R_PE_REQUIRED);
if (pe_subsystem) {
+ controld_expect_sched_reply(NULL);
mainloop_del_ipc_client(pe_subsystem);
pe_subsystem = NULL;
+ clear_bit(fsa_input_register, R_PE_CONNECTED);
}
}
/*!
* \internal
* \brief Save CIB query result to file, raising FSA error
*
* \param[in] msg Ignored
* \param[in] call_id Call ID of CIB query
* \param[in] rc Return code of CIB query
* \param[in] output Result of CIB query
* \param[in] user_data Unique identifier for filename (will be freed)
*
* \note This is intended to be called after a scheduler connection fails.
*/
static void
save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output,
void *user_data)
{
char *id = user_data;
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
CRM_CHECK(id != NULL, return);
if (rc == pcmk_ok) {
char *filename = crm_strdup_printf(PE_STATE_DIR "/pe-core-%s.bz2", id);
if (write_xml_file(output, filename, TRUE) < 0) {
crm_err("Could not save Cluster Information Base to %s after scheduler crash",
filename);
} else {
crm_notice("Saved Cluster Information Base to %s after scheduler crash",
filename);
}
free(filename);
}
}
/*!
* \internal
* \brief Respond to scheduler connection failure
*
* \param[in] user_data Ignored
*/
static void
pe_ipc_destroy(gpointer user_data)
{
+ // If we aren't connected to the scheduler, we can't expect a reply
+ controld_expect_sched_reply(NULL);
+
if (is_set(fsa_input_register, R_PE_REQUIRED)) {
int rc = pcmk_ok;
char *uuid_str = crm_generate_uuid();
crm_crit("Connection to the scheduler failed "
CRM_XS " uuid=%s", uuid_str);
/*
* The scheduler died...
*
* Save the current CIB so that we have a chance of
* figuring out what killed it.
*
* Delay raising the I_ERROR until the query below completes or
* 5s is up, whichever comes first.
*
*/
rc = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local);
fsa_register_cib_callback(rc, FALSE, uuid_str, save_cib_contents);
} else {
crm_info("Connection to the scheduler released");
}
clear_bit(fsa_input_register, R_PE_CONNECTED);
pe_subsystem = NULL;
mainloop_set_trigger(fsa_source);
return;
}
/*!
* \internal
* \brief Handle message from scheduler connection
*
* \param[in] buffer XML message (will be freed)
* \param[in] length Ignored
* \param[in] userdata Ignored
*
* \return 0
*/
static int
pe_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata)
{
xmlNode *msg = string2xml(buffer);
if (msg) {
route_message(C_IPC_MESSAGE, msg);
}
free_xml(msg);
return 0;
}
/*!
* \internal
- * \brief Make new connection to PE
+ * \brief Make new connection to scheduler
*
* \return TRUE on success, FALSE otherwise
*/
static bool
pe_subsystem_new()
{
static struct ipc_client_callbacks pe_callbacks = {
.dispatch = pe_ipc_dispatch,
.destroy = pe_ipc_destroy
};
+ set_bit(fsa_input_register, R_PE_REQUIRED);
pe_subsystem = mainloop_add_ipc_client(CRM_SYSTEM_PENGINE,
G_PRIORITY_DEFAULT,
5 * 1024 * 1024 /* 5MB */,
NULL, &pe_callbacks);
- return (pe_subsystem != NULL);
+ if (pe_subsystem == NULL) {
+ return FALSE;
+ }
+ set_bit(fsa_input_register, R_PE_CONNECTED);
+ return TRUE;
}
/*!
* \internal
* \brief Send an XML message to the PE
*
* \param[in] cmd XML message to send
*
* \return pcmk_ok on success, -errno otherwise
*/
static int
pe_subsystem_send(xmlNode *cmd)
{
if (pe_subsystem) {
int sent = crm_ipc_send(mainloop_get_ipc_client(pe_subsystem), cmd,
0, 0, NULL);
if (sent == 0) {
sent = -ENODATA;
} else if (sent > 0) {
sent = pcmk_ok;
}
return sent;
}
return -ENOTCONN;
}
static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc,
xmlNode *output, void *user_data);
/* A_PE_START, A_PE_STOP, O_PE_RESTART */
void
do_pe_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
if (action & A_PE_STOP) {
- clear_bit(fsa_input_register, R_PE_REQUIRED);
pe_subsystem_free();
- clear_bit(fsa_input_register, R_PE_CONNECTED);
}
+ if ((action & A_PE_START)
+ && (is_not_set(fsa_input_register, R_PE_CONNECTED))) {
- if ((action & A_PE_START) && (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE)) {
- if (cur_state != S_STOPPING) {
- set_bit(fsa_input_register, R_PE_REQUIRED);
- if (pe_subsystem_new()) {
- set_bit(fsa_input_register, R_PE_CONNECTED);
- } else {
- crm_warn("Could not connect to scheduler");
- register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
- }
- } else {
+ if (cur_state == S_STOPPING) {
crm_info("Ignoring request to connect to scheduler while shutting down");
+
+ } else if (!pe_subsystem_new()) {
+ crm_warn("Could not connect to scheduler");
+ register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
}
}
}
int fsa_pe_query = 0;
char *fsa_pe_ref = NULL;
static mainloop_timer_t *controld_sched_timer = NULL;
// @TODO Make this a configurable cluster option if there's demand for it
#define SCHED_TIMEOUT_MS (120000)
/*!
* \internal
* \brief Handle a timeout waiting for scheduler reply
*
* \param[in] user_data Ignored
*
* \return FALSE (indicating that timer should not be restarted)
*/
static gboolean
controld_sched_timeout(gpointer user_data)
{
if (AM_I_DC) {
/* If this node is the DC but can't communicate with the scheduler, just
* exit (and likely get fenced) so this node doesn't interfere with any
* further DC elections.
*
* @TODO We could try something less drastic first, like disconnecting
* and reconnecting to the scheduler, but something is likely going
* seriously wrong, so perhaps it's better to just fail as quickly as
* possible.
*/
crmd_exit(CRM_EX_FATAL);
}
return FALSE;
}
void
controld_stop_sched_timer()
{
if (controld_sched_timer && fsa_pe_ref) {
crm_trace("Stopping timer for scheduler reply %s", fsa_pe_ref);
}
mainloop_timer_stop(controld_sched_timer);
}
/*!
* \internal
* \brief Set the scheduler request currently being waited on
*
* \param[in] msg Request to expect reply to (or NULL for none)
*/
void
controld_expect_sched_reply(xmlNode *msg)
{
char *ref = NULL;
if (msg) {
ref = crm_element_value_copy(msg, XML_ATTR_REFERENCE);
CRM_ASSERT(ref != NULL);
if (controld_sched_timer == NULL) {
controld_sched_timer = mainloop_timer_add("scheduler_reply_timer",
SCHED_TIMEOUT_MS, FALSE,
controld_sched_timeout,
NULL);
}
mainloop_timer_start(controld_sched_timer);
} else {
controld_stop_sched_timer();
}
free(fsa_pe_ref);
fsa_pe_ref = ref;
}
/*!
* \internal
* \brief Free the scheduler reply timer
*/
void
controld_free_sched_timer()
{
if (controld_sched_timer != NULL) {
mainloop_timer_del(controld_sched_timer);
controld_sched_timer = NULL;
}
}
/* A_PE_INVOKE */
void
do_pe_invoke(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
if (AM_I_DC == FALSE) {
crm_err("Not invoking scheduler because not DC: %s",
fsa_action2string(action));
return;
}
if (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) {
if (is_set(fsa_input_register, R_SHUTDOWN)) {
crm_err("Cannot shut down gracefully without the scheduler");
register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL);
} else {
crm_info("Waiting for the scheduler to connect");
crmd_fsa_stall(FALSE);
register_fsa_action(A_PE_START);
}
return;
}
if (cur_state != S_POLICY_ENGINE) {
crm_notice("Not invoking scheduler because in state %s",
fsa_state2string(cur_state));
return;
}
if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!");
/* start the join from scratch */
register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL);
return;
}
fsa_pe_query = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local);
crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
fsa_state2string(fsa_state));
controld_expect_sched_reply(NULL);
fsa_register_cib_callback(fsa_pe_query, FALSE, NULL, do_pe_invoke_callback);
}
static void
force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value)
{
int max = 0;
int lpc = 0;
char *xpath_string = NULL;
xmlXPathObjectPtr xpathObj = NULL;
xpath_string = crm_strdup_printf("%.128s//%s//nvpair[@name='%.128s']",
get_object_path(XML_CIB_TAG_CRMCONFIG),
XML_CIB_TAG_PROPSET, attr_name);
xpathObj = xpath_search(xml, xpath_string);
max = numXpathResults(xpathObj);
free(xpath_string);
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
crm_trace("Forcing %s/%s = %s", ID(match), attr_name, attr_value);
crm_xml_add(match, XML_NVPAIR_ATTR_VALUE, attr_value);
}
if(max == 0) {
xmlNode *configuration = NULL;
xmlNode *crm_config = NULL;
xmlNode *cluster_property_set = NULL;
crm_trace("Creating %s-%s for %s=%s",
CIB_OPTIONS_FIRST, attr_name, attr_name, attr_value);
configuration = find_entity(xml, XML_CIB_TAG_CONFIGURATION, NULL);
if (configuration == NULL) {
configuration = create_xml_node(xml, XML_CIB_TAG_CONFIGURATION);
}
crm_config = find_entity(configuration, XML_CIB_TAG_CRMCONFIG, NULL);
if (crm_config == NULL) {
crm_config = create_xml_node(configuration, XML_CIB_TAG_CRMCONFIG);
}
cluster_property_set = find_entity(crm_config, XML_CIB_TAG_PROPSET, NULL);
if (cluster_property_set == NULL) {
cluster_property_set = create_xml_node(crm_config, XML_CIB_TAG_PROPSET);
crm_xml_add(cluster_property_set, XML_ATTR_ID, CIB_OPTIONS_FIRST);
}
xml = create_xml_node(cluster_property_set, XML_CIB_TAG_NVPAIR);
crm_xml_set_id(xml, "%s-%s", CIB_OPTIONS_FIRST, attr_name);
crm_xml_add(xml, XML_NVPAIR_ATTR_NAME, attr_name);
crm_xml_add(xml, XML_NVPAIR_ATTR_VALUE, attr_value);
}
freeXpathObject(xpathObj);
}
static void
do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
xmlNode *cmd = NULL;
pid_t watchdog = pcmk_locate_sbd();
if (rc != pcmk_ok) {
crm_err("Could not retrieve the Cluster Information Base: %s "
CRM_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id);
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
return;
} else if (call_id != fsa_pe_query) {
crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query);
return;
} else if (AM_I_DC == FALSE || is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) {
crm_debug("No need to invoke the scheduler anymore");
return;
} else if (fsa_state != S_POLICY_ENGINE) {
crm_debug("Discarding scheduler request in state: %s",
fsa_state2string(fsa_state));
return;
/* this callback counts as 1 */
} else if (num_cib_op_callbacks() > 1) {
crm_debug("Re-asking for the CIB: %d other peer updates still pending",
(num_cib_op_callbacks() - 1));
sleep(1);
register_fsa_action(A_PE_INVOKE);
return;
} else if (fsa_state != S_POLICY_ENGINE) {
crm_err("Invoking scheduler in state: %s", fsa_state2string(fsa_state));
return;
}
CRM_LOG_ASSERT(output != NULL);
/* Refresh the remote node cache and the known node cache when the
* scheduler is invoked */
crm_peer_caches_refresh(output);
crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid);
crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum);
force_local_option(output, XML_ATTR_HAVE_WATCHDOG, watchdog?"true":"false");
if (ever_had_quorum && crm_have_quorum == FALSE) {
crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1);
}
cmd = create_request(CRM_OP_PECALC, output, NULL, CRM_SYSTEM_PENGINE, CRM_SYSTEM_DC, NULL);
rc = pe_subsystem_send(cmd);
if (rc < 0) {
crm_err("Could not contact the scheduler: %s " CRM_XS " rc=%d",
pcmk_strerror(rc), rc);
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
} else {
controld_expect_sched_reply(cmd);
crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, quorate=%d",
fsa_pe_query, fsa_pe_ref, crm_peer_seq, fsa_has_quorum);
}
free_xml(cmd);
}
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
index 8f3b9d3cfd..505310bcf5 100644
--- a/daemons/controld/controld_te_actions.c
+++ b/daemons/controld/controld_te_actions.c
@@ -1,636 +1,635 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
+ * The version control history for this file may have further details.
+ *
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include