diff --git a/cts/lab/CTSaudits.py b/cts/lab/CTSaudits.py
index 433ef35389..51a04f8c19 100755
--- a/cts/lab/CTSaudits.py
+++ b/cts/lab/CTSaudits.py
@@ -1,878 +1,879 @@
""" Auditing classes for Pacemaker's Cluster Test Suite (CTS)
"""
__copyright__ = "Copyright 2000-2023 the Pacemaker project contributors"
__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
import time, re, uuid
from pacemaker.buildoptions import BuildOptions
from pacemaker._cts.watcher import LogKind, LogWatcher
class ClusterAudit(object):
def __init__(self, cm):
self.CM = cm
def __call__(self):
raise ValueError("Abstract Class member (__call__)")
def is_applicable(self):
'''Return TRUE if we are applicable in the current test configuration'''
raise ValueError("Abstract Class member (is_applicable)")
return 1
def log(self, args):
self.CM.log("audit: %s" % args)
def debug(self, args):
self.CM.debug("audit: %s" % args)
def name(self):
raise ValueError("Abstract Class member (name)")
AllAuditClasses = [ ]
class LogAudit(ClusterAudit):
def name(self):
return "LogAudit"
def __init__(self, cm):
self.CM = cm
def RestartClusterLogging(self, nodes=None):
if not nodes:
nodes = self.CM.Env["nodes"]
self.CM.debug("Restarting logging on: %s" % repr(nodes))
for node in nodes:
if self.CM.Env["have_systemd"]:
(rc, _) = self.CM.rsh(node, "systemctl stop systemd-journald.socket")
if rc != 0:
self.CM.log ("ERROR: Cannot stop 'systemd-journald' on %s" % node)
(rc, _) = self.CM.rsh(node, "systemctl start systemd-journald.service")
if rc != 0:
self.CM.log ("ERROR: Cannot start 'systemd-journald' on %s" % node)
(rc, _) = self.CM.rsh(node, "service %s restart" % self.CM.Env["syslogd"])
if rc != 0:
self.CM.log ("ERROR: Cannot restart '%s' on %s" % (self.CM.Env["syslogd"], node))
+ def _create_watcher(self, patterns, kind):
+ watch = LogWatcher(self.CM.Env["LogFileName"], patterns,
+ self.CM.Env["nodes"], kind, "LogAudit", 5,
+ silent=True)
+ watch.set_watch()
+ return watch
+
def TestLogging(self):
patterns = []
prefix = "Test message from"
suffix = str(uuid.uuid4())
watch = {}
for node in self.CM.Env["nodes"]:
# Look for the node name in two places to make sure
# that syslog is logging with the correct hostname
m = re.search("^([^.]+).*", node)
if m:
simple = m.group(1)
else:
simple = node
patterns.append("%s.*%s %s %s" % (simple, prefix, node, suffix))
watch_pref = self.CM.Env["LogWatcher"]
if watch_pref == LogKind.ANY:
- for k in LogKind:
- watch[k] = LogWatcher(self.CM.Env["LogFileName"], patterns, self.CM.Env["nodes"], k, "LogAudit", 5, silent=True)
- watch[k].set_watch()
+ kinds = [ LogKind.FILE ]
+ if self.CM.Env["have_systemd"]:
+ kinds += [ LogKind.JOURNAL ]
+ kinds += [ LogKind.REMOTE_FILE ]
+ for k in kinds:
+ watch[k] = self._create_watcher(patterns, k)
+ self.CM.log("Logging test message with identifier %s" % (suffix))
else:
- k = watch_pref
- watch[k] = LogWatcher(self.CM.Env["LogFileName"], patterns, self.CM.Env["nodes"], k, "LogAudit", 5, silent=True)
- watch[k].set_watch()
-
- if watch_pref == LogKind.ANY:
- self.CM.log("Writing log with key: %s" % (suffix))
+ watch[watch_pref] = self._create_watcher(patterns, watch_pref)
for node in self.CM.Env["nodes"]:
cmd = "logger -p %s.info %s %s %s" % (self.CM.Env["SyslogFacility"], prefix, node, suffix)
(rc, _) = self.CM.rsh(node, cmd, synchronous=False, verbose=0)
if rc != 0:
self.CM.log ("ERROR: Cannot execute remote command [%s] on %s" % (cmd, node))
- for k in LogKind:
- if k in watch:
- w = watch[k]
- if watch_pref == LogKind.ANY:
- self.CM.log("Testing for %s logs" % (k))
-
- w.look_for_all(silent=True)
- if not w.unmatched:
- if watch_pref == LogKind.ANY:
- self.CM.log ("Continuing with %s-based log reader" % (w.kind))
- self.CM.Env["LogWatcher"] = w.kind
- return 1
-
for k in list(watch.keys()):
w = watch[k]
+ if watch_pref == LogKind.ANY:
+ self.CM.log("Checking for test message in %s logs" % (k))
+ w.look_for_all(silent=True)
if w.unmatched:
for regex in w.unmatched:
- self.CM.log ("Test message [%s] not found in %s logs." % (regex, w.kind))
+ self.CM.log("Test message [%s] not found in %s logs" % (regex, w.kind))
+ else:
+ if watch_pref == LogKind.ANY:
+ self.CM.log("Found test message in %s logs" % (k))
+ self.CM.Env["LogWatcher"] = k
+ return 1
return 0
def __call__(self):
max = 3
attempt = 0
self.CM.ns.wait_for_all_nodes(self.CM.Env["nodes"])
while attempt <= max and self.TestLogging() == 0:
attempt = attempt + 1
self.RestartClusterLogging()
time.sleep(60*attempt)
if attempt > max:
self.CM.log("ERROR: Cluster logging unrecoverable.")
return 0
return 1
def is_applicable(self):
if self.CM.Env["DoBSC"]:
return 0
if self.CM.Env["LogAuditDisabled"]:
return 0
return 1
class DiskAudit(ClusterAudit):
def name(self):
return "DiskspaceAudit"
def __init__(self, cm):
self.CM = cm
def __call__(self):
result = 1
# @TODO Use directory of PCMK_logfile if set on host
dfcmd = "df -BM " + BuildOptions.LOG_DIR + " | tail -1 | awk '{print $(NF-1)\" \"$(NF-2)}' | tr -d 'M%'"
self.CM.ns.wait_for_all_nodes(self.CM.Env["nodes"])
for node in self.CM.Env["nodes"]:
(_, dfout) = self.CM.rsh(node, dfcmd, verbose=1)
if not dfout:
self.CM.log ("ERROR: Cannot execute remote df command [%s] on %s" % (dfcmd, node))
else:
dfout = dfout[0].strip()
try:
(used, remain) = dfout.split()
used_percent = int(used)
remaining_mb = int(remain)
except (ValueError, TypeError):
self.CM.log("Warning: df output '%s' from %s was invalid [%s, %s]"
% (dfout, node, used, remain))
else:
if remaining_mb < 10 or used_percent > 95:
self.CM.log("CRIT: Out of log disk space on %s (%d%% / %dMB)"
% (node, used_percent, remaining_mb))
result = None
if self.CM.Env["continue"]:
answer = "Y"
else:
try:
answer = input('Continue? [nY]')
except EOFError as e:
answer = "n"
if answer and answer == "n":
raise ValueError("Disk full on %s" % (node))
elif remaining_mb < 100 or used_percent > 90:
self.CM.log("WARN: Low on log disk space (%dMB) on %s" % (remaining_mb, node))
return result
def is_applicable(self):
if self.CM.Env["DoBSC"]:
return 0
return 1
class FileAudit(ClusterAudit):
def name(self):
return "FileAudit"
def __init__(self, cm):
self.CM = cm
self.known = []
def __call__(self):
result = 1
self.CM.ns.wait_for_all_nodes(self.CM.Env["nodes"])
for node in self.CM.Env["nodes"]:
(_, lsout) = self.CM.rsh(node, "ls -al /var/lib/pacemaker/cores/* | grep core.[0-9]", verbose=1)
for line in lsout:
line = line.strip()
if line not in self.known:
result = 0
self.known.append(line)
self.CM.log("Warning: Pacemaker core file on %s: %s" % (node, line))
(_, lsout) = self.CM.rsh(node, "ls -al /var/lib/corosync | grep core.[0-9]", verbose=1)
for line in lsout:
line = line.strip()
if line not in self.known:
result = 0
self.known.append(line)
self.CM.log("Warning: Corosync core file on %s: %s" % (node, line))
if node in self.CM.ShouldBeStatus and self.CM.ShouldBeStatus[node] == "down":
clean = 0
(_, lsout) = self.CM.rsh(node, "ls -al /dev/shm | grep qb-", verbose=1)
for line in lsout:
result = 0
clean = 1
self.CM.log("Warning: Stale IPC file on %s: %s" % (node, line))
if clean:
(_, lsout) = self.CM.rsh(node, "ps axf | grep -e pacemaker -e corosync", verbose=1)
for line in lsout:
self.CM.debug("ps[%s]: %s" % (node, line))
self.CM.rsh(node, "rm -rf /dev/shm/qb-*")
else:
self.CM.debug("Skipping %s" % node)
return result
def is_applicable(self):
return 1
class AuditResource(object):
def __init__(self, cm, line):
fields = line.split()
self.CM = cm
self.line = line
self.type = fields[1]
self.id = fields[2]
self.clone_id = fields[3]
self.parent = fields[4]
self.rprovider = fields[5]
self.rclass = fields[6]
self.rtype = fields[7]
self.host = fields[8]
self.needs_quorum = fields[9]
self.flags = int(fields[10])
self.flags_s = fields[11]
if self.parent == "NA":
self.parent = None
def unique(self):
if self.flags & int("0x00000020", 16):
return 1
return 0
def orphan(self):
if self.flags & int("0x00000001", 16):
return 1
return 0
def managed(self):
if self.flags & int("0x00000002", 16):
return 1
return 0
class AuditConstraint(object):
def __init__(self, cm, line):
fields = line.split()
self.CM = cm
self.line = line
self.type = fields[1]
self.id = fields[2]
self.rsc = fields[3]
self.target = fields[4]
self.score = fields[5]
self.rsc_role = fields[6]
self.target_role = fields[7]
if self.rsc_role == "NA":
self.rsc_role = None
if self.target_role == "NA":
self.target_role = None
class PrimitiveAudit(ClusterAudit):
def name(self):
return "PrimitiveAudit"
def __init__(self, cm):
self.CM = cm
def doResourceAudit(self, resource, quorum):
rc = 1
active = self.CM.ResourceLocation(resource.id)
if len(active) == 1:
if quorum:
self.debug("Resource %s active on %s" % (resource.id, repr(active)))
elif resource.needs_quorum == 1:
self.CM.log("Resource %s active without quorum: %s"
% (resource.id, repr(active)))
rc = 0
elif not resource.managed():
self.CM.log("Resource %s not managed. Active on %s"
% (resource.id, repr(active)))
elif not resource.unique():
# TODO: Figure out a clever way to actually audit these resource types
if len(active) > 1:
self.debug("Non-unique resource %s is active on: %s"
% (resource.id, repr(active)))
else:
self.debug("Non-unique resource %s is not active" % resource.id)
elif len(active) > 1:
self.CM.log("Resource %s is active multiple times: %s"
% (resource.id, repr(active)))
rc = 0
elif resource.orphan():
self.debug("Resource %s is an inactive orphan" % resource.id)
elif len(self.inactive_nodes) == 0:
self.CM.log("WARN: Resource %s not served anywhere" % resource.id)
rc = 0
elif self.CM.Env["warn-inactive"]:
if quorum or not resource.needs_quorum:
self.CM.log("WARN: Resource %s not served anywhere (Inactive nodes: %s)"
% (resource.id, repr(self.inactive_nodes)))
else:
self.debug("Resource %s not served anywhere (Inactive nodes: %s)"
% (resource.id, repr(self.inactive_nodes)))
elif quorum or not resource.needs_quorum:
self.debug("Resource %s not served anywhere (Inactive nodes: %s)"
% (resource.id, repr(self.inactive_nodes)))
return rc
def setup(self):
self.target = None
self.resources = []
self.constraints = []
self.active_nodes = []
self.inactive_nodes = []
for node in self.CM.Env["nodes"]:
if self.CM.ShouldBeStatus[node] == "up":
self.active_nodes.append(node)
else:
self.inactive_nodes.append(node)
for node in self.CM.Env["nodes"]:
if self.target == None and self.CM.ShouldBeStatus[node] == "up":
self.target = node
if not self.target:
# TODO: In Pacemaker 1.0 clusters we'll be able to run crm_resource
# with CIB_file=/path/to/cib.xml even when the cluster isn't running
self.debug("No nodes active - skipping %s" % self.name())
return 0
(_, lines) = self.CM.rsh(self.target, "crm_resource -c", verbose=1)
for line in lines:
if re.search("^Resource", line):
self.resources.append(AuditResource(self.CM, line))
elif re.search("^Constraint", line):
self.constraints.append(AuditConstraint(self.CM, line))
else:
self.CM.log("Unknown entry: %s" % line);
return 1
def __call__(self):
rc = 1
if not self.setup():
return 1
quorum = self.CM.HasQuorum(None)
for resource in self.resources:
if resource.type == "primitive":
if self.doResourceAudit(resource, quorum) == 0:
rc = 0
return rc
def is_applicable(self):
# @TODO Due to long-ago refactoring, this name test would never match,
# so this audit (and those derived from it) would never run.
# Uncommenting the next lines fixes the name test, but that then
# exposes pre-existing bugs that need to be fixed.
#if self.CM["Name"] == "crm-corosync":
# return 1
return 0
class GroupAudit(PrimitiveAudit):
def name(self):
return "GroupAudit"
def __call__(self):
rc = 1
if not self.setup():
return 1
for group in self.resources:
if group.type == "group":
first_match = 1
group_location = None
for child in self.resources:
if child.parent == group.id:
nodes = self.CM.ResourceLocation(child.id)
if first_match and len(nodes) > 0:
group_location = nodes[0]
first_match = 0
if len(nodes) > 1:
rc = 0
self.CM.log("Child %s of %s is active more than once: %s"
% (child.id, group.id, repr(nodes)))
elif len(nodes) == 0:
# Groups are allowed to be partially active
# However we do need to make sure later children aren't running
group_location = None
self.debug("Child %s of %s is stopped" % (child.id, group.id))
elif nodes[0] != group_location:
rc = 0
self.CM.log("Child %s of %s is active on the wrong node (%s) expected %s"
% (child.id, group.id, nodes[0], group_location))
else:
self.debug("Child %s of %s is active on %s" % (child.id, group.id, nodes[0]))
return rc
class CloneAudit(PrimitiveAudit):
def name(self):
return "CloneAudit"
def __call__(self):
rc = 1
if not self.setup():
return 1
for clone in self.resources:
if clone.type == "clone":
for child in self.resources:
if child.parent == clone.id and child.type == "primitive":
self.debug("Checking child %s of %s..." % (child.id, clone.id))
# Check max and node_max
# Obtain with:
# crm_resource -g clone_max --meta -r child.id
# crm_resource -g clone_node_max --meta -r child.id
return rc
class ColocationAudit(PrimitiveAudit):
def name(self):
return "ColocationAudit"
def crm_location(self, resource):
(rc, lines) = self.CM.rsh(self.target, "crm_resource -W -r %s -Q"%resource, verbose=1)
hosts = []
if rc == 0:
for line in lines:
fields = line.split()
hosts.append(fields[0])
return hosts
def __call__(self):
rc = 1
if not self.setup():
return 1
for coloc in self.constraints:
if coloc.type == "rsc_colocation":
source = self.crm_location(coloc.rsc)
target = self.crm_location(coloc.target)
if len(source) == 0:
self.debug("Colocation audit (%s): %s not running" % (coloc.id, coloc.rsc))
else:
for node in source:
if not node in target:
rc = 0
self.CM.log("Colocation audit (%s): %s running on %s (not in %s)"
% (coloc.id, coloc.rsc, node, repr(target)))
else:
self.debug("Colocation audit (%s): %s running on %s (in %s)"
% (coloc.id, coloc.rsc, node, repr(target)))
return rc
class ControllerStateAudit(ClusterAudit):
def __init__(self, cm):
self.CM = cm
self.Stats = {"calls":0
, "success":0
, "failure":0
, "skipped":0
, "auditfail":0}
def has_key(self, key):
return key in self.Stats
def __setitem__(self, key, value):
self.Stats[key] = value
def __getitem__(self, key):
return self.Stats[key]
def incr(self, name):
'''Increment (or initialize) the value associated with the given name'''
if not name in self.Stats:
self.Stats[name] = 0
self.Stats[name] = self.Stats[name]+1
def __call__(self):
passed = 1
up_are_down = 0
down_are_up = 0
unstable_list = []
for node in self.CM.Env["nodes"]:
should_be = self.CM.ShouldBeStatus[node]
rc = self.CM.test_node_CM(node)
if rc > 0:
if should_be == "down":
down_are_up = down_are_up + 1
if rc == 1:
unstable_list.append(node)
elif should_be == "up":
up_are_down = up_are_down + 1
if len(unstable_list) > 0:
passed = 0
self.CM.log("Cluster is not stable: %d (of %d): %s"
% (len(unstable_list), self.CM.upcount(), repr(unstable_list)))
if up_are_down > 0:
passed = 0
self.CM.log("%d (of %d) nodes expected to be up were down."
% (up_are_down, len(self.CM.Env["nodes"])))
if down_are_up > 0:
passed = 0
self.CM.log("%d (of %d) nodes expected to be down were up."
% (down_are_up, len(self.CM.Env["nodes"])))
return passed
def name(self):
return "ControllerStateAudit"
def is_applicable(self):
# @TODO Due to long-ago refactoring, this name test would never match,
# so this audit (and those derived from it) would never run.
# Uncommenting the next lines fixes the name test, but that then
# exposes pre-existing bugs that need to be fixed.
#if self.CM["Name"] == "crm-corosync":
# return 1
return 0
class CIBAudit(ClusterAudit):
def __init__(self, cm):
self.CM = cm
self.Stats = {"calls":0
, "success":0
, "failure":0
, "skipped":0
, "auditfail":0}
def has_key(self, key):
return key in self.Stats
def __setitem__(self, key, value):
self.Stats[key] = value
def __getitem__(self, key):
return self.Stats[key]
def incr(self, name):
'''Increment (or initialize) the value associated with the given name'''
if not name in self.Stats:
self.Stats[name] = 0
self.Stats[name] = self.Stats[name]+1
def __call__(self):
passed = 1
ccm_partitions = self.CM.find_partitions()
if len(ccm_partitions) == 0:
self.debug("\tNo partitions to audit")
return 1
for partition in ccm_partitions:
self.debug("\tAuditing CIB consistency for: %s" % partition)
partition_passed = 0
if self.audit_cib_contents(partition) == 0:
passed = 0
return passed
def audit_cib_contents(self, hostlist):
passed = 1
node0 = None
node0_xml = None
partition_hosts = hostlist.split()
for node in partition_hosts:
node_xml = self.store_remote_cib(node, node0)
if node_xml == None:
self.CM.log("Could not perform audit: No configuration from %s" % node)
passed = 0
elif node0 == None:
node0 = node
node0_xml = node_xml
elif node0_xml == None:
self.CM.log("Could not perform audit: No configuration from %s" % node0)
passed = 0
else:
(rc, result) = self.CM.rsh(
node0, "crm_diff -VV -cf --new %s --original %s" % (node_xml, node0_xml), verbose=1)
if rc != 0:
self.CM.log("Diff between %s and %s failed: %d" % (node0_xml, node_xml, rc))
passed = 0
for line in result:
if not re.search("", line):
passed = 0
self.debug("CibDiff[%s-%s]: %s" % (node0, node, line))
else:
self.debug("CibDiff[%s-%s] Ignoring: %s" % (node0, node, line))
# self.CM.rsh(node0, "rm -f %s" % node_xml)
# self.CM.rsh(node0, "rm -f %s" % node0_xml)
return passed
def store_remote_cib(self, node, target):
combined = ""
filename = "/tmp/ctsaudit.%s.xml" % node
if not target:
target = node
(rc, lines) = self.CM.rsh(node, self.CM["CibQuery"], verbose=1)
if rc != 0:
self.CM.log("Could not retrieve configuration")
return None
self.CM.rsh("localhost", "rm -f %s" % filename)
for line in lines:
self.CM.rsh("localhost", "echo \'%s\' >> %s" % (line[:-1], filename), verbose=0)
if self.CM.rsh.copy(filename, "root@%s:%s" % (target, filename), silent=True) != 0:
self.CM.log("Could not store configuration")
return None
return filename
def name(self):
return "CibAudit"
def is_applicable(self):
# @TODO Due to long-ago refactoring, this name test would never match,
# so this audit (and those derived from it) would never run.
# Uncommenting the next lines fixes the name test, but that then
# exposes pre-existing bugs that need to be fixed.
#if self.CM["Name"] == "crm-corosync":
# return 1
return 0
class PartitionAudit(ClusterAudit):
def __init__(self, cm):
self.CM = cm
self.Stats = {"calls":0
, "success":0
, "failure":0
, "skipped":0
, "auditfail":0}
self.NodeEpoch = {}
self.NodeState = {}
self.NodeQuorum = {}
def has_key(self, key):
return key in self.Stats
def __setitem__(self, key, value):
self.Stats[key] = value
def __getitem__(self, key):
return self.Stats[key]
def incr(self, name):
'''Increment (or initialize) the value associated with the given name'''
if not name in self.Stats:
self.Stats[name] = 0
self.Stats[name] = self.Stats[name]+1
def __call__(self):
passed = 1
ccm_partitions = self.CM.find_partitions()
if ccm_partitions == None or len(ccm_partitions) == 0:
return 1
self.CM.cluster_stable(double_check=True)
if len(ccm_partitions) != self.CM.partitions_expected:
self.CM.log("ERROR: %d cluster partitions detected:" % len(ccm_partitions))
passed = 0
for partition in ccm_partitions:
self.CM.log("\t %s" % partition)
for partition in ccm_partitions:
partition_passed = 0
if self.audit_partition(partition) == 0:
passed = 0
return passed
def trim_string(self, avalue):
if not avalue:
return None
if len(avalue) > 1:
return avalue[:-1]
def trim2int(self, avalue):
if not avalue:
return None
if len(avalue) > 1:
return int(avalue[:-1])
def audit_partition(self, partition):
passed = 1
dc_found = []
dc_allowed_list = []
lowest_epoch = None
node_list = partition.split()
self.debug("Auditing partition: %s" % (partition))
for node in node_list:
if self.CM.ShouldBeStatus[node] != "up":
self.CM.log("Warn: Node %s appeared out of nowhere" % (node))
self.CM.ShouldBeStatus[node] = "up"
# not in itself a reason to fail the audit (not what we're
# checking for in this audit)
(_, out) = self.CM.rsh(node, self.CM["StatusCmd"] % node, verbose=1)
self.NodeState[node] = out[0].strip()
(_, out) = self.CM.rsh(node, self.CM["EpochCmd"], verbose=1)
self.NodeEpoch[node] = out[0].strip()
(_, out) = self.CM.rsh(node, self.CM["QuorumCmd"], verbose=1)
self.NodeQuorum[node] = out[0].strip()
self.debug("Node %s: %s - %s - %s." % (node, self.NodeState[node], self.NodeEpoch[node], self.NodeQuorum[node]))
self.NodeState[node] = self.trim_string(self.NodeState[node])
self.NodeEpoch[node] = self.trim2int(self.NodeEpoch[node])
self.NodeQuorum[node] = self.trim_string(self.NodeQuorum[node])
if not self.NodeEpoch[node]:
self.CM.log("Warn: Node %s dissappeared: cant determin epoch" % (node))
self.CM.ShouldBeStatus[node] = "down"
# not in itself a reason to fail the audit (not what we're
# checking for in this audit)
elif lowest_epoch == None or self.NodeEpoch[node] < lowest_epoch:
lowest_epoch = self.NodeEpoch[node]
if not lowest_epoch:
self.CM.log("Lowest epoch not determined in %s" % (partition))
passed = 0
for node in node_list:
if self.CM.ShouldBeStatus[node] == "up":
if self.CM.is_node_dc(node, self.NodeState[node]):
dc_found.append(node)
if self.NodeEpoch[node] == lowest_epoch:
self.debug("%s: OK" % node)
elif not self.NodeEpoch[node]:
self.debug("Check on %s ignored: no node epoch" % node)
elif not lowest_epoch:
self.debug("Check on %s ignored: no lowest epoch" % node)
else:
self.CM.log("DC %s is not the oldest node (%d vs. %d)"
% (node, self.NodeEpoch[node], lowest_epoch))
passed = 0
if len(dc_found) == 0:
self.CM.log("DC not found on any of the %d allowed nodes: %s (of %s)"
% (len(dc_allowed_list), str(dc_allowed_list), str(node_list)))
elif len(dc_found) > 1:
self.CM.log("%d DCs (%s) found in cluster partition: %s"
% (len(dc_found), str(dc_found), str(node_list)))
passed = 0
if passed == 0:
for node in node_list:
if self.CM.ShouldBeStatus[node] == "up":
self.CM.log("epoch %s : %s"
% (self.NodeEpoch[node], self.NodeState[node]))
return passed
def name(self):
return "PartitionAudit"
def is_applicable(self):
# @TODO Due to long-ago refactoring, this name test would never match,
# so this audit (and those derived from it) would never run.
# Uncommenting the next lines fixes the name test, but that then
# exposes pre-existing bugs that need to be fixed.
#if self.CM["Name"] == "crm-corosync":
# return 1
return 0
AllAuditClasses.append(DiskAudit)
AllAuditClasses.append(FileAudit)
AllAuditClasses.append(LogAudit)
AllAuditClasses.append(ControllerStateAudit)
AllAuditClasses.append(PartitionAudit)
AllAuditClasses.append(PrimitiveAudit)
AllAuditClasses.append(GroupAudit)
AllAuditClasses.append(CloneAudit)
AllAuditClasses.append(ColocationAudit)
AllAuditClasses.append(CIBAudit)
def AuditList(cm):
result = []
for auditclass in AllAuditClasses:
a = auditclass(cm)
if a.is_applicable():
result.append(a)
return result
diff --git a/cts/scheduler/summary/primitive-with-group-with-promoted.summary b/cts/scheduler/summary/primitive-with-group-with-promoted.summary
index ce100915b4..b92ce1e50a 100644
--- a/cts/scheduler/summary/primitive-with-group-with-promoted.summary
+++ b/cts/scheduler/summary/primitive-with-group-with-promoted.summary
@@ -1,75 +1,75 @@
Current cluster status:
* Node List:
* Online: [ node1 node2 node3 node4 node5 ]
* Full List of Resources:
* Fencing (stonith:fence_xvm): Started node1
* Clone Set: rsc2-clone [rsc2] (promotable):
* Stopped: [ node1 node2 node3 node4 node5 ]
* rsc1 (ocf:pacemaker:Dummy): Stopped
* Resource Group: group1:
* group1rsc1 (ocf:pacemaker:Dummy): Stopped
* group1rsc2 (ocf:pacemaker:Dummy): Stopped
Transition Summary:
* Promote rsc2:0 ( Stopped -> Promoted node5 )
- * Start rsc2:1 ( node2 )
- * Start rsc2:2 ( node3 )
- * Start rsc1 ( node5 )
- * Start group1rsc1 ( node5 )
- * Start group1rsc2 ( node5 )
+ * Start rsc2:1 ( node2 )
+ * Start rsc2:2 ( node3 )
+ * Start rsc1 ( node5 )
+ * Start group1rsc1 ( node5 )
+ * Start group1rsc2 ( node5 )
Executing Cluster Transition:
* Resource action: rsc2:0 monitor on node5
* Resource action: rsc2:0 monitor on node4
* Resource action: rsc2:0 monitor on node1
* Resource action: rsc2:1 monitor on node2
* Resource action: rsc2:2 monitor on node3
* Pseudo action: rsc2-clone_start_0
* Resource action: rsc1 monitor on node5
* Resource action: rsc1 monitor on node4
* Resource action: rsc1 monitor on node3
* Resource action: rsc1 monitor on node2
* Resource action: rsc1 monitor on node1
* Pseudo action: group1_start_0
* Resource action: group1rsc1 monitor on node5
* Resource action: group1rsc1 monitor on node4
* Resource action: group1rsc1 monitor on node3
* Resource action: group1rsc1 monitor on node2
* Resource action: group1rsc1 monitor on node1
* Resource action: group1rsc2 monitor on node5
* Resource action: group1rsc2 monitor on node4
* Resource action: group1rsc2 monitor on node3
* Resource action: group1rsc2 monitor on node2
* Resource action: group1rsc2 monitor on node1
* Resource action: rsc2:0 start on node5
* Resource action: rsc2:1 start on node2
* Resource action: rsc2:2 start on node3
* Pseudo action: rsc2-clone_running_0
* Resource action: rsc1 start on node5
* Resource action: group1rsc1 start on node5
* Resource action: group1rsc2 start on node5
* Resource action: rsc2:1 monitor=11000 on node2
* Resource action: rsc2:2 monitor=11000 on node3
* Pseudo action: rsc2-clone_promote_0
* Resource action: rsc1 monitor=10000 on node5
* Pseudo action: group1_running_0
* Resource action: group1rsc1 monitor=10000 on node5
* Resource action: group1rsc2 monitor=10000 on node5
* Resource action: rsc2:0 promote on node5
* Pseudo action: rsc2-clone_promoted_0
* Resource action: rsc2:0 monitor=10000 on node5
Revised Cluster Status:
* Node List:
* Online: [ node1 node2 node3 node4 node5 ]
* Full List of Resources:
* Fencing (stonith:fence_xvm): Started node1
* Clone Set: rsc2-clone [rsc2] (promotable):
* Promoted: [ node5 ]
* Unpromoted: [ node2 node3 ]
* rsc1 (ocf:pacemaker:Dummy): Started node5
* Resource Group: group1:
* group1rsc1 (ocf:pacemaker:Dummy): Started node5
* group1rsc2 (ocf:pacemaker:Dummy): Started node5
diff --git a/cts/scheduler/summary/promoted-partially-demoted-group.summary b/cts/scheduler/summary/promoted-partially-demoted-group.summary
index 91e1ee7013..b85c805711 100644
--- a/cts/scheduler/summary/promoted-partially-demoted-group.summary
+++ b/cts/scheduler/summary/promoted-partially-demoted-group.summary
@@ -1,118 +1,118 @@
Current cluster status:
* Node List:
* Online: [ sd01-0 sd01-1 ]
* Full List of Resources:
* stonith-xvm-sd01-0 (stonith:fence_xvm): Started sd01-1
* stonith-xvm-sd01-1 (stonith:fence_xvm): Started sd01-0
* Resource Group: cdev-pool-0-iscsi-export:
* cdev-pool-0-iscsi-target (ocf:vds-ok:iSCSITarget): Started sd01-1
* cdev-pool-0-iscsi-lun-1 (ocf:vds-ok:iSCSILogicalUnit): Started sd01-1
* Clone Set: ms-cdev-pool-0-drbd [cdev-pool-0-drbd] (promotable):
* Promoted: [ sd01-1 ]
* Unpromoted: [ sd01-0 ]
* Clone Set: cl-ietd [ietd]:
* Started: [ sd01-0 sd01-1 ]
* Clone Set: cl-vlan1-net [vlan1-net]:
* Started: [ sd01-0 sd01-1 ]
* Resource Group: cdev-pool-0-iscsi-vips:
* vip-164 (ocf:heartbeat:IPaddr2): Started sd01-1
* vip-165 (ocf:heartbeat:IPaddr2): Started sd01-1
* Clone Set: ms-cdev-pool-0-iscsi-vips-fw [cdev-pool-0-iscsi-vips-fw] (promotable):
* Promoted: [ sd01-1 ]
* Unpromoted: [ sd01-0 ]
Transition Summary:
* Move vip-164 ( sd01-1 -> sd01-0 )
* Move vip-165 ( sd01-1 -> sd01-0 )
- * Move cdev-pool-0-iscsi-target ( sd01-1 -> sd01-0 )
- * Move cdev-pool-0-iscsi-lun-1 ( sd01-1 -> sd01-0 )
+ * Move cdev-pool-0-iscsi-target ( sd01-1 -> sd01-0 )
+ * Move cdev-pool-0-iscsi-lun-1 ( sd01-1 -> sd01-0 )
* Demote vip-164-fw:0 ( Promoted -> Unpromoted sd01-1 )
* Promote vip-164-fw:1 ( Unpromoted -> Promoted sd01-0 )
* Promote vip-165-fw:1 ( Unpromoted -> Promoted sd01-0 )
* Demote cdev-pool-0-drbd:0 ( Promoted -> Unpromoted sd01-1 )
* Promote cdev-pool-0-drbd:1 ( Unpromoted -> Promoted sd01-0 )
Executing Cluster Transition:
* Resource action: vip-165-fw monitor=10000 on sd01-1
* Pseudo action: ms-cdev-pool-0-iscsi-vips-fw_demote_0
* Pseudo action: ms-cdev-pool-0-drbd_pre_notify_demote_0
* Pseudo action: cdev-pool-0-iscsi-vips-fw:0_demote_0
* Resource action: vip-164-fw demote on sd01-1
* Resource action: cdev-pool-0-drbd notify on sd01-1
* Resource action: cdev-pool-0-drbd notify on sd01-0
* Pseudo action: ms-cdev-pool-0-drbd_confirmed-pre_notify_demote_0
* Pseudo action: cdev-pool-0-iscsi-vips-fw:0_demoted_0
* Resource action: vip-164-fw monitor=10000 on sd01-1
* Pseudo action: ms-cdev-pool-0-iscsi-vips-fw_demoted_0
* Pseudo action: cdev-pool-0-iscsi-vips_stop_0
* Resource action: vip-165 stop on sd01-1
* Resource action: vip-164 stop on sd01-1
* Pseudo action: cdev-pool-0-iscsi-vips_stopped_0
* Pseudo action: cdev-pool-0-iscsi-export_stop_0
* Resource action: cdev-pool-0-iscsi-lun-1 stop on sd01-1
* Resource action: cdev-pool-0-iscsi-target stop on sd01-1
* Pseudo action: cdev-pool-0-iscsi-export_stopped_0
* Pseudo action: ms-cdev-pool-0-drbd_demote_0
* Resource action: cdev-pool-0-drbd demote on sd01-1
* Pseudo action: ms-cdev-pool-0-drbd_demoted_0
* Pseudo action: ms-cdev-pool-0-drbd_post_notify_demoted_0
* Resource action: cdev-pool-0-drbd notify on sd01-1
* Resource action: cdev-pool-0-drbd notify on sd01-0
* Pseudo action: ms-cdev-pool-0-drbd_confirmed-post_notify_demoted_0
* Pseudo action: ms-cdev-pool-0-drbd_pre_notify_promote_0
* Resource action: cdev-pool-0-drbd notify on sd01-1
* Resource action: cdev-pool-0-drbd notify on sd01-0
* Pseudo action: ms-cdev-pool-0-drbd_confirmed-pre_notify_promote_0
* Pseudo action: ms-cdev-pool-0-drbd_promote_0
* Resource action: cdev-pool-0-drbd promote on sd01-0
* Pseudo action: ms-cdev-pool-0-drbd_promoted_0
* Pseudo action: ms-cdev-pool-0-drbd_post_notify_promoted_0
* Resource action: cdev-pool-0-drbd notify on sd01-1
* Resource action: cdev-pool-0-drbd notify on sd01-0
* Pseudo action: ms-cdev-pool-0-drbd_confirmed-post_notify_promoted_0
* Pseudo action: cdev-pool-0-iscsi-export_start_0
* Resource action: cdev-pool-0-iscsi-target start on sd01-0
* Resource action: cdev-pool-0-iscsi-lun-1 start on sd01-0
* Resource action: cdev-pool-0-drbd monitor=20000 on sd01-1
* Resource action: cdev-pool-0-drbd monitor=10000 on sd01-0
* Pseudo action: cdev-pool-0-iscsi-export_running_0
* Resource action: cdev-pool-0-iscsi-target monitor=10000 on sd01-0
* Resource action: cdev-pool-0-iscsi-lun-1 monitor=10000 on sd01-0
* Pseudo action: cdev-pool-0-iscsi-vips_start_0
* Resource action: vip-164 start on sd01-0
* Resource action: vip-165 start on sd01-0
* Pseudo action: cdev-pool-0-iscsi-vips_running_0
* Resource action: vip-164 monitor=30000 on sd01-0
* Resource action: vip-165 monitor=30000 on sd01-0
* Pseudo action: ms-cdev-pool-0-iscsi-vips-fw_promote_0
* Pseudo action: cdev-pool-0-iscsi-vips-fw:0_promote_0
* Pseudo action: cdev-pool-0-iscsi-vips-fw:1_promote_0
* Resource action: vip-164-fw promote on sd01-0
* Resource action: vip-165-fw promote on sd01-0
* Pseudo action: cdev-pool-0-iscsi-vips-fw:1_promoted_0
* Pseudo action: ms-cdev-pool-0-iscsi-vips-fw_promoted_0
Revised Cluster Status:
* Node List:
* Online: [ sd01-0 sd01-1 ]
* Full List of Resources:
* stonith-xvm-sd01-0 (stonith:fence_xvm): Started sd01-1
* stonith-xvm-sd01-1 (stonith:fence_xvm): Started sd01-0
* Resource Group: cdev-pool-0-iscsi-export:
* cdev-pool-0-iscsi-target (ocf:vds-ok:iSCSITarget): Started sd01-0
* cdev-pool-0-iscsi-lun-1 (ocf:vds-ok:iSCSILogicalUnit): Started sd01-0
* Clone Set: ms-cdev-pool-0-drbd [cdev-pool-0-drbd] (promotable):
* Promoted: [ sd01-0 ]
* Unpromoted: [ sd01-1 ]
* Clone Set: cl-ietd [ietd]:
* Started: [ sd01-0 sd01-1 ]
* Clone Set: cl-vlan1-net [vlan1-net]:
* Started: [ sd01-0 sd01-1 ]
* Resource Group: cdev-pool-0-iscsi-vips:
* vip-164 (ocf:heartbeat:IPaddr2): Started sd01-0
* vip-165 (ocf:heartbeat:IPaddr2): Started sd01-0
* Clone Set: ms-cdev-pool-0-iscsi-vips-fw [cdev-pool-0-iscsi-vips-fw] (promotable):
* Promoted: [ sd01-0 ]
* Unpromoted: [ sd01-1 ]
diff --git a/po/zh_CN.po b/po/zh_CN.po
index 212c81ef85..a107f0b4eb 100644
--- a/po/zh_CN.po
+++ b/po/zh_CN.po
@@ -1,1105 +1,1105 @@
#
# Copyright 2003-2022 the Pacemaker project contributors
#
# The version control history for this file may have further details.
#
# This source code is licensed under the GNU Lesser General Public License
# version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: Pacemaker 2\n"
"Report-Msgid-Bugs-To: developers@clusterlabs.org\n"
-"POT-Creation-Date: 2023-01-28 10:28+0800\n"
+"POT-Creation-Date: 2023-04-05 16:20-0500\n"
"PO-Revision-Date: 2021-11-08 11:04+0800\n"
"Last-Translator: Vivi \n"
"Language-Team: CHINESE \n"
"Language: zh_CN\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-#: daemons/controld/controld_control.c:531
+#: daemons/controld/controld_control.c:533
msgid "Pacemaker version on cluster node elected Designated Controller (DC)"
msgstr "集群选定的控制器节点(DC)的 Pacemaker 版本"
-#: daemons/controld/controld_control.c:532
+#: daemons/controld/controld_control.c:534
msgid ""
"Includes a hash which identifies the exact changeset the code was built "
"from. Used for diagnostic purposes."
msgstr "它包含一个标识所构建代码变更版本的哈希值,其可用于诊断。"
-#: daemons/controld/controld_control.c:537
+#: daemons/controld/controld_control.c:539
msgid "The messaging stack on which Pacemaker is currently running"
msgstr "Pacemaker 正在使用的消息传输引擎"
-#: daemons/controld/controld_control.c:538
+#: daemons/controld/controld_control.c:540
msgid "Used for informational and diagnostic purposes."
msgstr "用于提供信息和诊断。"
-#: daemons/controld/controld_control.c:542
+#: daemons/controld/controld_control.c:544
msgid "An arbitrary name for the cluster"
msgstr "任意的集群名称"
-#: daemons/controld/controld_control.c:543
+#: daemons/controld/controld_control.c:545
msgid ""
"This optional value is mostly for users' convenience as desired in "
"administration, but may also be used in Pacemaker configuration rules via "
"the #cluster-name node attribute, and by higher-level tools and resource "
"agents."
msgstr ""
"该可选值主要是为了方便用户管理使用,也可以在pacemaker 配置规则中通过 "
"#cluster-name 节点属性配置使用,也可以通过高级工具和资源代理使用。"
-#: daemons/controld/controld_control.c:551
+#: daemons/controld/controld_control.c:553
msgid "How long to wait for a response from other nodes during start-up"
msgstr "启动过程中等待其他节点响应的时间"
-#: daemons/controld/controld_control.c:552
+#: daemons/controld/controld_control.c:554
msgid ""
"The optimal value will depend on the speed and load of your network and the "
"type of switches used."
msgstr "其最佳值将取决于你的网络速度和负载以及所用交换机的类型。"
-#: daemons/controld/controld_control.c:557
+#: daemons/controld/controld_control.c:559
msgid ""
"Zero disables polling, while positive values are an interval in "
"seconds(unless other units are specified, for example \"5min\")"
msgstr ""
"设置为0将禁用轮询,设置为正数将是以秒为单位的时间间隔(除非使用了其他单位,比"
"如\"5min\"表示5分钟)"
-#: daemons/controld/controld_control.c:560
+#: daemons/controld/controld_control.c:562
msgid ""
"Polling interval to recheck cluster state and evaluate rules with date "
"specifications"
msgstr "重新检查集群状态并且评估具有日期规格的配置规则的轮询间隔"
-#: daemons/controld/controld_control.c:562
+#: daemons/controld/controld_control.c:564
msgid ""
"Pacemaker is primarily event-driven, and looks ahead to know when to recheck "
"cluster state for failure timeouts and most time-based rules. However, it "
"will also recheck the cluster after this amount of inactivity, to evaluate "
"rules with date specifications and serve as a fail-safe for certain types of "
"scheduler bugs."
msgstr ""
"Pacemaker 主要是通过事件驱动的,并能预期重新检查集群状态以评估大多数基于时间"
"的规则以及过期的错误。然而无论如何,在集群经过该时间间隔的不活动状态后,它还"
"将重新检查集群,以评估具有日期规格的规则,并为某些类型的调度程序缺陷提供故障"
"保护。"
-#: daemons/controld/controld_control.c:571
+#: daemons/controld/controld_control.c:573
msgid "Maximum amount of system load that should be used by cluster nodes"
msgstr "集群节点应该使用的最大系统负载量"
-#: daemons/controld/controld_control.c:572
+#: daemons/controld/controld_control.c:574
msgid ""
"The cluster will slow down its recovery process when the amount of system "
"resources used (currently CPU) approaches this limit"
msgstr "当使用的系统资源量(当前为CPU)接近此限制时,集群将减慢其恢复过程"
-#: daemons/controld/controld_control.c:578
+#: daemons/controld/controld_control.c:580
msgid ""
"Maximum number of jobs that can be scheduled per node (defaults to 2x cores)"
msgstr "每个节点可以调度的最大作业数(默认为2x内核数)"
-#: daemons/controld/controld_control.c:582
+#: daemons/controld/controld_control.c:584
msgid "How a cluster node should react if notified of its own fencing"
msgstr "集群节点在收到针对自己的 fence 操作结果通知时应如何反应"
-#: daemons/controld/controld_control.c:583
+#: daemons/controld/controld_control.c:585
msgid ""
"A cluster node may receive notification of its own fencing if fencing is "
"misconfigured, or if fabric fencing is in use that doesn't cut cluster "
"communication. Allowed values are \"stop\" to attempt to immediately stop "
"Pacemaker and stay stopped, or \"panic\" to attempt to immediately reboot "
"the local node, falling back to stop on failure."
msgstr ""
"如果有错误的 fence 配置,或者在使用 fabric fence 机制 (并不会切断集群通信),"
"则集群节点可能会收到针对自己的 fence 结果通知。允许的值为 \"stop\" 尝试立即停"
"止 pacemaker 并保持停用状态,或者 \"panic\" 尝试立即重新启动本地节点,并在失败"
"时返回执行stop。"
-#: daemons/controld/controld_control.c:593
+#: daemons/controld/controld_control.c:595
msgid ""
"Declare an election failed if it is not decided within this much time. If "
"you need to adjust this value, it probably indicates the presence of a bug."
msgstr ""
"如果集群在本项设置时间内没有作出决定则宣布选举失败。如果您需要调整该值,这可"
"能代表存在某些缺陷。"
-#: daemons/controld/controld_control.c:601
+#: daemons/controld/controld_control.c:603
msgid ""
"Exit immediately if shutdown does not complete within this much time. If you "
"need to adjust this value, it probably indicates the presence of a bug."
msgstr ""
"如果在这段时间内关机仍未完成,则立即退出。如果您需要调整该值,这可能代表存在"
"某些缺陷。"
-#: daemons/controld/controld_control.c:609
-#: daemons/controld/controld_control.c:616
+#: daemons/controld/controld_control.c:611
+#: daemons/controld/controld_control.c:618
msgid ""
"If you need to adjust this value, it probably indicates the presence of a "
"bug."
msgstr "如果您需要调整该值,这可能代表存在某些缺陷。"
-#: daemons/controld/controld_control.c:622
+#: daemons/controld/controld_control.c:624
msgid ""
"*** Advanced Use Only *** Enabling this option will slow down cluster "
"recovery under all conditions"
msgstr "*** Advanced Use Only *** 启用此选项将在所有情况下减慢集群恢复的速度"
-#: daemons/controld/controld_control.c:624
+#: daemons/controld/controld_control.c:626
msgid ""
"Delay cluster recovery for this much time to allow for additional events to "
"occur. Useful if your configuration is sensitive to the order in which ping "
"updates arrive."
msgstr ""
"集群恢复将被推迟指定的时间间隔,以等待更多事件发生。如果您的配置对 ping 更新"
"到达的顺序很敏感,这就很有用"
-#: daemons/controld/controld_control.c:631
+#: daemons/controld/controld_control.c:633
#, fuzzy
msgid ""
"How long before nodes can be assumed to be safely down when watchdog-based "
"self-fencing via SBD is in use"
msgstr ""
"当基于 watchdog 的自我 fence 机制通过SBD 被执行时,我们可以假设节点安全关闭之"
"前需要等待多长时间"
-#: daemons/controld/controld_control.c:633
+#: daemons/controld/controld_control.c:635
msgid ""
"If this is set to a positive value, lost nodes are assumed to self-fence "
"using watchdog-based SBD within this much time. This does not require a "
"fencing resource to be explicitly configured, though a fence_watchdog "
"resource can be configured, to limit use to specific nodes. If this is set "
"to 0 (the default), the cluster will never assume watchdog-based self-"
"fencing. If this is set to a negative value, the cluster will use twice the "
"local value of the `SBD_WATCHDOG_TIMEOUT` environment variable if that is "
"positive, or otherwise treat this as 0. WARNING: When used, this timeout "
"must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use watchdog-"
"based SBD, and Pacemaker will refuse to start on any of those nodes where "
"this is not true for the local value or SBD is not active. When this is set "
"to a negative value, `SBD_WATCHDOG_TIMEOUT` must be set to the same value on "
"all nodes that use SBD, otherwise data corruption or loss could occur."
msgstr ""
"如果设置为正值,则假定丢失的节点在这段时间内使用基于watchdog的SBD进行自我防"
"护。这不需要明确配置fence资源,但可以配置一个fence_watchdog资源,以限制特定节"
"点的使用。如果设置为0(默认值),集群将永远不会假定基于watchdog的自我防护。如"
"果设置为负值,且如果`SBD_WATCHDOG_TIMEOUT`环境变量的本地值为正值,则集群将使"
"用该值的两倍,否则将其视为0。警告:在使用基于watchdog的SBD的所有节点上,此超"
"时必须大于`SBD_WATCGDOG_TIMEOUT`,如果本地值不是这样,或者SBD未运行,则"
"Pacemaker将拒绝在任何节点上启动。如果设置为负值,则在使用SBD的所有节点上,"
"`SBD_WATCHDOG_TIMEOUT`必须设置为相同的值,否则可能会发生数据损坏或丢失。"
-#: daemons/controld/controld_control.c:652
+#: daemons/controld/controld_control.c:654
msgid ""
"How many times fencing can fail before it will no longer be immediately re-"
"attempted on a target"
msgstr "fence操作失败多少次会停止立即尝试"
-#: daemons/fenced/pacemaker-fenced.c:1378
+#: daemons/controld/controld_control.c:662 lib/pengine/common.c:39
+msgid "What to do when the cluster does not have quorum"
+msgstr "当集群没有必需票数时该如何作"
+
+#: daemons/controld/controld_control.c:667 lib/pengine/common.c:73
+msgid "Whether to lock resources to a cleanly shut down node"
+msgstr "是否锁定资源到完全关闭的节点"
+
+#: daemons/controld/controld_control.c:668 lib/pengine/common.c:74
+msgid ""
+"When true, resources active on a node when it is cleanly shut down are kept "
+"\"locked\" to that node (not allowed to run elsewhere) until they start "
+"again on that node after it rejoins (or for at most shutdown-lock-limit, if "
+"set). Stonith resources and Pacemaker Remote connections are never locked. "
+"Clone and bundle instances and the promoted role of promotable clones are "
+"currently never locked, though support could be added in a future release."
+msgstr ""
+"设置为true时,在完全关闭的节点上活动的资源将被“锁定”到该节点(不允许在其他地"
+"方运行),直到该节点重新加入后资源重新启动(或最长shutdown-lock-limit,如果已"
+"设置)。 Stonith资源和Pacemaker Remote连接永远不会被锁定。 克隆和捆绑实例以及"
+"可升级克隆的主角色目前从未锁定,尽管可以在将来的发行版中添加支持。"
+
+#: daemons/controld/controld_control.c:680 lib/pengine/common.c:86
+msgid "Do not lock resources to a cleanly shut down node longer than this"
+msgstr "资源会被锁定到完全关闭的节点的最长时间"
+
+#: daemons/controld/controld_control.c:682 lib/pengine/common.c:88
+msgid ""
+"If shutdown-lock is true and this is set to a nonzero time duration, "
+"shutdown locks will expire after this much time has passed since the "
+"shutdown was initiated, even if the node has not rejoined."
+msgstr ""
+"如果shutdown-lock为true,并且将此选项设置为非零持续时间,则自从开始shutdown以"
+"来经过了这么长的时间后,shutdown锁将过期,即使该节点尚未重新加入。"
+
+#: daemons/fenced/pacemaker-fenced.c:1379
msgid "Advanced use only: An alternate parameter to supply instead of 'port'"
msgstr "仅高级使用:使用替代的参数名,而不是'port'"
-#: daemons/fenced/pacemaker-fenced.c:1379
+#: daemons/fenced/pacemaker-fenced.c:1380
msgid ""
"some devices do not support the standard 'port' parameter or may provide "
"additional ones. Use this to specify an alternate, device-specific, "
"parameter that should indicate the machine to be fenced. A value of none can "
"be used to tell the cluster not to supply any additional parameters."
msgstr ""
"一些设备不支持标准的'port'参数,或者可能提供其他参数。使用此选项可指定一个该"
"设备专用的参数名,该参数用于标识需要fence的机器。值none可以用于告诉集群不要提"
"供任何其他的参数。"
-#: daemons/fenced/pacemaker-fenced.c:1388
+#: daemons/fenced/pacemaker-fenced.c:1389
msgid ""
"A mapping of host names to ports numbers for devices that do not support "
"host names."
msgstr "为不支持主机名的设备提供主机名到端口号的映射。"
-#: daemons/fenced/pacemaker-fenced.c:1389
+#: daemons/fenced/pacemaker-fenced.c:1390
msgid ""
"Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and "
"ports 2 and 3 for node2"
msgstr ""
"例如 node1:1;node2:2,3,将会告诉集群对node1使用端口1,对node2使用端口2和3 "
-#: daemons/fenced/pacemaker-fenced.c:1393
+#: daemons/fenced/pacemaker-fenced.c:1394
msgid "Eg. node1,node2,node3"
msgstr "例如 node1,node2,node3"
-#: daemons/fenced/pacemaker-fenced.c:1394
+#: daemons/fenced/pacemaker-fenced.c:1395
msgid ""
"A list of machines controlled by this device (Optional unless "
"pcmk_host_list=static-list)"
msgstr "该设备控制的机器列表(可选参数,除非 pcmk_host_list 设置为 static-list)"
-#: daemons/fenced/pacemaker-fenced.c:1399
+#: daemons/fenced/pacemaker-fenced.c:1400
msgid "How to determine which machines are controlled by the device."
msgstr "如何确定设备控制哪些机器。"
-#: daemons/fenced/pacemaker-fenced.c:1400
+#: daemons/fenced/pacemaker-fenced.c:1401
msgid ""
"Allowed values: dynamic-list (query the device via the 'list' command), "
"static-list (check the pcmk_host_list attribute), status (query the device "
"via the 'status' command), none (assume every device can fence every machine)"
msgstr ""
"允许的值:dynamic-list(通过'list'命令查询设备),static-list(检查"
"pcmk_host_list属性),status(通过'status'命令查询设备),none(假设每个设备"
"都可fence 每台机器 )"
-#: daemons/fenced/pacemaker-fenced.c:1409
-#: daemons/fenced/pacemaker-fenced.c:1418
+#: daemons/fenced/pacemaker-fenced.c:1410
+#: daemons/fenced/pacemaker-fenced.c:1419
msgid "Enable a base delay for fencing actions and specify base delay value."
msgstr "在执行 fencing 操作前启用不超过指定时间的延迟。"
-#: daemons/fenced/pacemaker-fenced.c:1410
+#: daemons/fenced/pacemaker-fenced.c:1411
msgid ""
"Enable a delay of no more than the time specified before executing fencing "
"actions. Pacemaker derives the overall delay by taking the value of "
"pcmk_delay_base and adding a random delay value such that the sum is kept "
"below this maximum."
msgstr ""
"在执行 fencing 操作前启用不超过指定时间的延迟。 Pacemaker通过获取"
"pcmk_delay_base的值并添加随机延迟值来得出总体延迟,从而使总和保持在此最大值以"
"下。"
-#: daemons/fenced/pacemaker-fenced.c:1420
+#: daemons/fenced/pacemaker-fenced.c:1421
msgid ""
"This enables a static delay for fencing actions, which can help avoid "
"\"death matches\" where two nodes try to fence each other at the same time. "
"If pcmk_delay_max is also used, a random delay will be added such that the "
"total delay is kept below that value.This can be set to a single time value "
"to apply to any node targeted by this device (useful if a separate device is "
"configured for each target), or to a node map (for example, \"node1:1s;"
"node2:5\") to set a different value per target."
msgstr ""
"这使fencing 操作启用静态延迟,这可以帮助避免\"death matches\"即两个节点试图同"
"时互相fence.如果还使用了pcmk_delay_max,则将添加随机延迟,以使总延迟保持在该"
"值以下。可以将其设置为单个时间值,以应用于该设备针对的任何节点(适用于为每个"
"目标分别配置了各自的设备的情况), 或着设置为一个节点映射 (例如,\"node1:1s;"
"node2:5\")从而为每个目标设置不同值。"
-#: daemons/fenced/pacemaker-fenced.c:1432
+#: daemons/fenced/pacemaker-fenced.c:1433
msgid ""
"The maximum number of actions can be performed in parallel on this device"
msgstr "可以在该设备上并发执行的最多操作数量"
-#: daemons/fenced/pacemaker-fenced.c:1433
+#: daemons/fenced/pacemaker-fenced.c:1434
msgid ""
"Cluster property concurrent-fencing=true needs to be configured first.Then "
"use this to specify the maximum number of actions can be performed in "
"parallel on this device. -1 is unlimited."
msgstr ""
"需要首先配置集群属性 concurrent-fencing=true 。然后使用此参数指定可以在该设备"
"上并发执行的最多操作数量。 -1 代表没有限制"
-#: daemons/fenced/pacemaker-fenced.c:1438
+#: daemons/fenced/pacemaker-fenced.c:1439
msgid "Advanced use only: An alternate command to run instead of 'reboot'"
msgstr "仅高级使用:运行替代命令,而不是'reboot'"
-#: daemons/fenced/pacemaker-fenced.c:1439
+#: daemons/fenced/pacemaker-fenced.c:1440
msgid ""
"Some devices do not support the standard commands or may provide additional "
"ones.\n"
"Use this to specify an alternate, device-specific, command that implements "
"the 'reboot' action."
msgstr ""
"一些设备不支持标准命令或可能提供其他命令,使用此选项可以指定一个该设备特定的"
"替代命令,用来实现'reboot'操作。"
-#: daemons/fenced/pacemaker-fenced.c:1444
+#: daemons/fenced/pacemaker-fenced.c:1445
msgid ""
"Advanced use only: Specify an alternate timeout to use for reboot actions "
"instead of stonith-timeout"
msgstr "仅高级使用:指定用于'reboot' 操作的替代超时,而不是stonith-timeout"
-#: daemons/fenced/pacemaker-fenced.c:1445
+#: daemons/fenced/pacemaker-fenced.c:1446
msgid ""
"Some devices need much more/less time to complete than normal.Use this to "
"specify an alternate, device-specific, timeout for 'reboot' actions."
msgstr ""
"一些设备需要比正常情况下更多或更少的时间来完成操作,使用此选项指定一个用"
"于'reboot'操作的该设备特定的替代超时。"
-#: daemons/fenced/pacemaker-fenced.c:1450
+#: daemons/fenced/pacemaker-fenced.c:1451
msgid ""
"Advanced use only: The maximum number of times to retry the 'reboot' command "
"within the timeout period"
msgstr "仅高级使用:在超时前重试'reboot'命令的最大次数"
-#: daemons/fenced/pacemaker-fenced.c:1451
+#: daemons/fenced/pacemaker-fenced.c:1452
msgid ""
"Some devices do not support multiple connections. Operations may 'fail' if "
"the device is busy with another task so Pacemaker will automatically retry "
"the operation, if there is time remaining. Use this option to alter the "
"number of times Pacemaker retries 'reboot' actions before giving up."
msgstr ""
"一些设备不支持多个连接。 如果设备忙于另一个任务,则操作可能会'失败' ,因此"
"Pacemaker将自动重试(如果时间允许)。 使用此选项更改Pacemaker在放弃之前重"
"试'reboot' 操作的次数."
-#: daemons/fenced/pacemaker-fenced.c:1457
+#: daemons/fenced/pacemaker-fenced.c:1458
msgid "Advanced use only: An alternate command to run instead of 'off'"
msgstr "仅高级使用:运行替代命令,而不是'off'"
-#: daemons/fenced/pacemaker-fenced.c:1458
+#: daemons/fenced/pacemaker-fenced.c:1459
msgid ""
"Some devices do not support the standard commands or may provide additional "
"ones.Use this to specify an alternate, device-specific, command that "
"implements the 'off' action."
msgstr ""
"一些设备不支持标准命令或可能提供其他命令,使用此选项可指定一个该设备专用的替代"
"命令,用来实现'off'操作。"
-#: daemons/fenced/pacemaker-fenced.c:1463
+#: daemons/fenced/pacemaker-fenced.c:1464
msgid ""
"Advanced use only: Specify an alternate timeout to use for off actions "
"instead of stonith-timeout"
msgstr "仅高级使用:指定用于off 操作的替代超时,而不是stonith-timeout"
-#: daemons/fenced/pacemaker-fenced.c:1464
+#: daemons/fenced/pacemaker-fenced.c:1465
msgid ""
"Some devices need much more/less time to complete than normal.Use this to "
"specify an alternate, device-specific, timeout for 'off' actions."
msgstr ""
"一些设备需要比正常情况下更多或更少的时间来完成操作,使用此选项指定一个用"
"于'off'操作的该设备特定的替代超时。"
-#: daemons/fenced/pacemaker-fenced.c:1469
+#: daemons/fenced/pacemaker-fenced.c:1470
msgid ""
"Advanced use only: The maximum number of times to retry the 'off' command "
"within the timeout period"
msgstr "仅高级使用:在超时前重试'off'命令的最大次数"
-#: daemons/fenced/pacemaker-fenced.c:1470
+#: daemons/fenced/pacemaker-fenced.c:1471
msgid ""
"Some devices do not support multiple connections. Operations may 'fail' if "
"the device is busy with another task so Pacemaker will automatically retry "
"the operation, if there is time remaining. Use this option to alter the "
"number of times Pacemaker retries 'off' actions before giving up."
msgstr ""
" 一些设备不支持多个连接。 如果设备忙于另一个任务,则操作可能会'失败' , 因此"
"Pacemaker将自动重试(如果时间允许)。 使用此选项更改Pacemaker在放弃之前重"
"试'off' 操作的次数."
-#: daemons/fenced/pacemaker-fenced.c:1476
+#: daemons/fenced/pacemaker-fenced.c:1477
msgid "Advanced use only: An alternate command to run instead of 'on'"
msgstr "仅高级使用:运行替代命令,而不是'on'"
-#: daemons/fenced/pacemaker-fenced.c:1477
+#: daemons/fenced/pacemaker-fenced.c:1478
msgid ""
"Some devices do not support the standard commands or may provide additional "
"ones.Use this to specify an alternate, device-specific, command that "
"implements the 'on' action."
msgstr ""
"一些设备不支持标准命令或可能提供其他命令,使用此选项可指定一个该设备特定的替"
"代命令,用来实现'on'操作。"
-#: daemons/fenced/pacemaker-fenced.c:1482
+#: daemons/fenced/pacemaker-fenced.c:1483
msgid ""
"Advanced use only: Specify an alternate timeout to use for on actions "
"instead of stonith-timeout"
msgstr "仅高级使用:指定用于on 操作的替代超时,而不是stonith-timeout"
-#: daemons/fenced/pacemaker-fenced.c:1483
+#: daemons/fenced/pacemaker-fenced.c:1484
msgid ""
"Some devices need much more/less time to complete than normal.Use this to "
"specify an alternate, device-specific, timeout for 'on' actions."
msgstr ""
"一些设备需要比正常情况下更多或更少的时间来完成操作,使用此选项指定一个用"
"于'on'操作的该设备特定的替代超时。"
-#: daemons/fenced/pacemaker-fenced.c:1488
+#: daemons/fenced/pacemaker-fenced.c:1489
msgid ""
"Advanced use only: The maximum number of times to retry the 'on' command "
"within the timeout period"
msgstr "仅高级使用:在超时前重试'on'命令的最大次数"
-#: daemons/fenced/pacemaker-fenced.c:1489
+#: daemons/fenced/pacemaker-fenced.c:1490
msgid ""
"Some devices do not support multiple connections. Operations may 'fail' if "
"the device is busy with another task so Pacemaker will automatically retry "
"the operation, if there is time remaining. Use this option to alter the "
"number of times Pacemaker retries 'on' actions before giving up."
msgstr ""
" 一些设备不支持多个连接。 如果设备忙于另一个任务,则操作可能会'失败' , 因此"
"Pacemaker将自动重试(如果时间允许)。 使用此选项更改Pacemaker在放弃之前重"
"试'on' 操作的次数."
-#: daemons/fenced/pacemaker-fenced.c:1495
+#: daemons/fenced/pacemaker-fenced.c:1496
msgid "Advanced use only: An alternate command to run instead of 'list'"
msgstr "仅高级使用:运行替代命令,而不是'list'"
-#: daemons/fenced/pacemaker-fenced.c:1496
+#: daemons/fenced/pacemaker-fenced.c:1497
msgid ""
"Some devices do not support the standard commands or may provide additional "
"ones.Use this to specify an alternate, device-specific, command that "
"implements the 'list' action."
msgstr ""
"一些设备不支持标准命令或可能提供其他命令,使用此选项可指定一个该设备特定的替"
"代命令,用来实现'list'操作。"
-#: daemons/fenced/pacemaker-fenced.c:1501
+#: daemons/fenced/pacemaker-fenced.c:1502
msgid ""
"Advanced use only: Specify an alternate timeout to use for list actions "
"instead of stonith-timeout"
msgstr "仅高级使用:指定用于list 操作的替代超时,而不是stonith-timeout"
-#: daemons/fenced/pacemaker-fenced.c:1502
+#: daemons/fenced/pacemaker-fenced.c:1503
msgid ""
"Some devices need much more/less time to complete than normal.Use this to "
"specify an alternate, device-specific, timeout for 'list' actions."
msgstr ""
"一些设备需要比正常情况下更多或更少的时间来完成操作,使用此选项指定一个用"
"于'list'操作的该设备特定的替代超时。"
-#: daemons/fenced/pacemaker-fenced.c:1507
+#: daemons/fenced/pacemaker-fenced.c:1508
msgid ""
"Advanced use only: The maximum number of times to retry the 'list' command "
"within the timeout period"
msgstr "仅高级使用:在超时前重试'list'命令的最大次数"
-#: daemons/fenced/pacemaker-fenced.c:1508
+#: daemons/fenced/pacemaker-fenced.c:1509
msgid ""
"Some devices do not support multiple connections. Operations may 'fail' if "
"the device is busy with another task so Pacemaker will automatically retry "
"the operation, if there is time remaining. Use this option to alter the "
"number of times Pacemaker retries 'list' actions before giving up."
msgstr ""
" 一些设备不支持多个连接。 如果设备忙于另一个任务,则操作可能会'失败' , 因此"
"Pacemaker将自动重试(如果时间允许)。 使用此选项更改Pacemaker在放弃之前重"
"试'list' 操作的次数."
-#: daemons/fenced/pacemaker-fenced.c:1514
+#: daemons/fenced/pacemaker-fenced.c:1515
msgid "Advanced use only: An alternate command to run instead of 'monitor'"
msgstr "仅高级使用:运行替代命令,而不是'monitor'"
-#: daemons/fenced/pacemaker-fenced.c:1515
+#: daemons/fenced/pacemaker-fenced.c:1516
msgid ""
"Some devices do not support the standard commands or may provide additional "
"ones.Use this to specify an alternate, device-specific, command that "
"implements the 'monitor' action."
msgstr ""
"一些设备不支持标准命令或可能提供其他命令,使用此选项可指定一个该设备特定的替"
"代命令,用来实现'monitor'操作。"
-#: daemons/fenced/pacemaker-fenced.c:1520
+#: daemons/fenced/pacemaker-fenced.c:1521
msgid ""
"Advanced use only: Specify an alternate timeout to use for monitor actions "
"instead of stonith-timeout"
msgstr "仅高级使用:指定用于monitor 操作的替代超时,而不是stonith-timeout"
-#: daemons/fenced/pacemaker-fenced.c:1521
+#: daemons/fenced/pacemaker-fenced.c:1522
msgid ""
"Some devices need much more/less time to complete than normal.\n"
"Use this to specify an alternate, device-specific, timeout for 'monitor' "
"actions."
msgstr ""
"一些设备需要比正常情况下更多或更少的时间来完成操作,使用此选项指定一个用"
"于'monitor'操作的该设备特定的替代超时。"
-#: daemons/fenced/pacemaker-fenced.c:1526
+#: daemons/fenced/pacemaker-fenced.c:1527
msgid ""
"Advanced use only: The maximum number of times to retry the 'monitor' "
"command within the timeout period"
msgstr "仅高级使用:在超时前重试'monitor'命令的最大次数"
-#: daemons/fenced/pacemaker-fenced.c:1527
+#: daemons/fenced/pacemaker-fenced.c:1528
msgid ""
"Some devices do not support multiple connections. Operations may 'fail' if "
"the device is busy with another task so Pacemaker will automatically retry "
"the operation, if there is time remaining. Use this option to alter the "
"number of times Pacemaker retries 'monitor' actions before giving up."
msgstr ""
" 一些设备不支持多个连接。 如果设备忙于另一个任务,则操作可能会'失败' , 因此"
"Pacemaker将自动重试(如果时间允许)。 使用此选项更改Pacemaker在放弃之前重"
"试'monitor' 操作的次数."
-#: daemons/fenced/pacemaker-fenced.c:1533
+#: daemons/fenced/pacemaker-fenced.c:1534
msgid "Advanced use only: An alternate command to run instead of 'status'"
msgstr "仅高级使用:运行替代命令,而不是'status'"
-#: daemons/fenced/pacemaker-fenced.c:1534
+#: daemons/fenced/pacemaker-fenced.c:1535
msgid ""
"Some devices do not support the standard commands or may provide additional "
"ones.Use this to specify an alternate, device-specific, command that "
"implements the 'status' action."
msgstr ""
"一些设备不支持标准命令或可能提供其他命令,使用此选项可指定一个该设备特定的替"
"代命令,用来实现'status'操作。"
-#: daemons/fenced/pacemaker-fenced.c:1539
+#: daemons/fenced/pacemaker-fenced.c:1540
msgid ""
"Advanced use only: Specify an alternate timeout to use for status actions "
"instead of stonith-timeout"
msgstr "仅高级使用:指定用于status 操作的替代超时,而不是stonith-timeout"
-#: daemons/fenced/pacemaker-fenced.c:1540
+#: daemons/fenced/pacemaker-fenced.c:1541
msgid ""
"Some devices need much more/less time to complete than normal.Use this to "
"specify an alternate, device-specific, timeout for 'status' actions."
msgstr ""
"一些设备需要比正常情况下更多或更少的时间来完成操作,使用此选项指定一个用"
"于'status'操作的该设备特定的替代超时"
-#: daemons/fenced/pacemaker-fenced.c:1545
+#: daemons/fenced/pacemaker-fenced.c:1546
msgid ""
"Advanced use only: The maximum number of times to retry the 'status' command "
"within the timeout period"
msgstr "仅高级使用:在超时前重试'status'命令的最大次数"
-#: daemons/fenced/pacemaker-fenced.c:1546
+#: daemons/fenced/pacemaker-fenced.c:1547
msgid ""
"Some devices do not support multiple connections. Operations may 'fail' if "
"the device is busy with another task so Pacemaker will automatically retry "
"the operation, if there is time remaining. Use this option to alter the "
"number of times Pacemaker retries 'status' actions before giving up."
msgstr ""
" 一些设备不支持多个连接。 如果设备忙于另一个任务,则操作可能会'失败' , 因此"
"Pacemaker将自动重试(如果时间允许)。 使用此选项更改Pacemaker在放弃之前重"
"试'status' 操作的次数."
-#: daemons/fenced/pacemaker-fenced.c:1555
+#: daemons/fenced/pacemaker-fenced.c:1556
msgid "Instance attributes available for all \"stonith\"-class resources"
msgstr " 可用于所有stonith类资源的实例属性"
-#: daemons/fenced/pacemaker-fenced.c:1557
+#: daemons/fenced/pacemaker-fenced.c:1558
msgid ""
"Instance attributes available for all \"stonith\"-class resources and used "
"by Pacemaker's fence daemon, formerly known as stonithd"
msgstr ""
" 可用于所有stonith类资源的实例属性,并由Pacemaker的fence守护程序使用(以前称"
"为stonithd)"
-#: lib/cib/cib_utils.c:559
+#: lib/cib/cib_utils.c:589
msgid "Enable Access Control Lists (ACLs) for the CIB"
msgstr "为CIB启用访问控制列表(ACL)"
-#: lib/cib/cib_utils.c:565
+#: lib/cib/cib_utils.c:595
msgid "Maximum IPC message backlog before disconnecting a cluster daemon"
msgstr "断开集群守护程序之前的最大IPC消息积压"
-#: lib/cib/cib_utils.c:566
+#: lib/cib/cib_utils.c:596
msgid ""
"Raise this if log has \"Evicting client\" messages for cluster daemon PIDs "
"(a good value is the number of resources in the cluster multiplied by the "
"number of nodes)."
msgstr ""
"如果日志中有针对集群守护程序PID的消息“Evicting client”,(则建议将值设为集群"
"中的资源数量乘以节点数量)"
#: lib/common/options.c:401
msgid " Allowed values: "
msgstr " 允许的值: "
#: lib/common/cmdline.c:70
msgid "Display software version and exit"
msgstr "显示软件版本信息"
#: lib/common/cmdline.c:73
msgid "Increase debug output (may be specified multiple times)"
msgstr "显示更多调试信息(可多次指定)"
#: lib/common/cmdline.c:92
msgid "FORMAT"
msgstr "格式"
#: lib/common/cmdline.c:94
msgid "Specify file name for output (or \"-\" for stdout)"
msgstr "指定输出的文件名 或指定'-' 表示标准输出"
#: lib/common/cmdline.c:94
msgid "DEST"
msgstr "目标"
#: lib/common/cmdline.c:100
msgid "Output Options:"
msgstr "输出选项"
#: lib/common/cmdline.c:100
msgid "Show output help"
msgstr "显示输出帮助"
-#: lib/pengine/common.c:39
-msgid "What to do when the cluster does not have quorum"
-msgstr "当集群没有必需票数时该如何作"
-
#: lib/pengine/common.c:45
msgid "Whether resources can run on any node by default"
msgstr "资源是否默认可以在任何节点上运行"
#: lib/pengine/common.c:51
msgid ""
"Whether the cluster should refrain from monitoring, starting, and stopping "
"resources"
msgstr "集群是否应避免监视,启动和停止资源"
#: lib/pengine/common.c:58
msgid ""
"Whether a start failure should prevent a resource from being recovered on "
"the same node"
msgstr "是否避免在同一节点上重启启动失败的资源"
#: lib/pengine/common.c:60
msgid ""
"When true, the cluster will immediately ban a resource from a node if it "
"fails to start there. When false, the cluster will instead check the "
"resource's fail count against its migration-threshold."
msgstr ""
"当为true,如果资源启动失败,集群将立即禁止节点启动该资源,当为false,群集将根"
"据其迁移阈值来检查资源的失败计数。"
#: lib/pengine/common.c:67
msgid "Whether the cluster should check for active resources during start-up"
msgstr "群集是否在启动期间检查运行资源"
-#: lib/pengine/common.c:73
-msgid "Whether to lock resources to a cleanly shut down node"
-msgstr "是否锁定资源到完全关闭的节点"
-
-#: lib/pengine/common.c:74
-msgid ""
-"When true, resources active on a node when it is cleanly shut down are kept "
-"\"locked\" to that node (not allowed to run elsewhere) until they start "
-"again on that node after it rejoins (or for at most shutdown-lock-limit, if "
-"set). Stonith resources and Pacemaker Remote connections are never locked. "
-"Clone and bundle instances and the promoted role of promotable clones are "
-"currently never locked, though support could be added in a future release."
-msgstr ""
-"设置为true时,在完全关闭的节点上活动的资源将被“锁定”到该节点(不允许在其他地"
-"方运行),直到该节点重新加入后资源重新启动(或最长shutdown-lock-limit,如果已"
-"设置)。 Stonith资源和Pacemaker Remote连接永远不会被锁定。 克隆和捆绑实例以及"
-"可升级克隆的主角色目前从未锁定,尽管可以在将来的发行版中添加支持。"
-
-#: lib/pengine/common.c:85
-msgid "Do not lock resources to a cleanly shut down node longer than this"
-msgstr "资源会被锁定到完全关闭的节点的最长时间"
-
-#: lib/pengine/common.c:86
-msgid ""
-"If shutdown-lock is true and this is set to a nonzero time duration, "
-"shutdown locks will expire after this much time has passed since the "
-"shutdown was initiated, even if the node has not rejoined."
-msgstr ""
-"如果shutdown-lock为true,并且将此选项设置为非零持续时间,则自从开始shutdown以"
-"来经过了这么长的时间后,shutdown锁将过期,即使该节点尚未重新加入。"
-
-#: lib/pengine/common.c:95
+#: lib/pengine/common.c:98
msgid ""
"*** Advanced Use Only *** Whether nodes may be fenced as part of recovery"
msgstr "*** Advanced Use Only *** 节点是否可以被 fence 以作为集群恢复的一部分"
-#: lib/pengine/common.c:97
+#: lib/pengine/common.c:100
msgid ""
"If false, unresponsive nodes are immediately assumed to be harmless, and "
"resources that were active on them may be recovered elsewhere. This can "
"result in a \"split-brain\" situation, potentially leading to data loss and/"
"or service unavailability."
msgstr ""
"如果为false,则立即假定无响应的节点是无害的,并且可以在其他位置恢复在其上活动"
"的资源。 这可能会导致 \"split-brain\" 情况,可能导致数据丢失和/或服务不可用。"
-#: lib/pengine/common.c:105
+#: lib/pengine/common.c:108
msgid ""
"Action to send to fence device when a node needs to be fenced (\"poweroff\" "
"is a deprecated alias for \"off\")"
msgstr "发送到 fence 设备的操作( \"poweroff\" 是 \"off \"的别名,不建议使用)"
-#: lib/pengine/common.c:112
+#: lib/pengine/common.c:115
msgid "*** Advanced Use Only *** Unused by Pacemaker"
msgstr "*** Advanced Use Only *** pacemaker未使用"
-#: lib/pengine/common.c:113
+#: lib/pengine/common.c:116
msgid ""
"This value is not used by Pacemaker, but is kept for backward compatibility, "
"and certain legacy fence agents might use it."
msgstr ""
"Pacemaker不使用此值,但保留此值是为了向后兼容,某些传统的fence 代理可能会使用"
"它。"
-#: lib/pengine/common.c:119
+#: lib/pengine/common.c:122
msgid "Whether watchdog integration is enabled"
msgstr "是否启用watchdog集成设置"
-#: lib/pengine/common.c:120
+#: lib/pengine/common.c:123
msgid ""
"This is set automatically by the cluster according to whether SBD is "
"detected to be in use. User-configured values are ignored. The value `true` "
"is meaningful if diskless SBD is used and `stonith-watchdog-timeout` is "
"nonzero. In that case, if fencing is required, watchdog-based self-fencing "
"will be performed via SBD without requiring a fencing resource explicitly "
"configured."
msgstr ""
"这是由集群检测是否正在使用 SBD 并自动设置。用户配置的值将被忽略。如果使用无"
"盘 SBD 并且 stonith-watchdog-timeout 不为零时,此选项为 true 才有实际意义。在"
"这种情况下,无需明确配置fence资源,如果需要fence时,基于watchdog的自我fence会"
"通过SBD执行。"
-#: lib/pengine/common.c:130
+#: lib/pengine/common.c:133
msgid "Allow performing fencing operations in parallel"
msgstr "允许并行执行 fencing 操作"
-#: lib/pengine/common.c:136
+#: lib/pengine/common.c:139
msgid "*** Advanced Use Only *** Whether to fence unseen nodes at start-up"
msgstr "*** 仅高级使用 *** 是否在启动时fence不可见节点"
-#: lib/pengine/common.c:137
+#: lib/pengine/common.c:140
msgid ""
"Setting this to false may lead to a \"split-brain\" situation,potentially "
"leading to data loss and/or service unavailability."
msgstr ""
"将此设置为 false 可能会导致 \"split-brain\" 的情况,可能导致数据丢失和/或服务"
"不可用。"
-#: lib/pengine/common.c:143
+#: lib/pengine/common.c:146
msgid ""
"Apply fencing delay targeting the lost nodes with the highest total resource "
"priority"
msgstr "针对具有最高总资源优先级的丢失节点应用fencing延迟"
-#: lib/pengine/common.c:144
+#: lib/pengine/common.c:147
msgid ""
"Apply specified delay for the fencings that are targeting the lost nodes "
"with the highest total resource priority in case we don't have the majority "
"of the nodes in our cluster partition, so that the more significant nodes "
"potentially win any fencing match, which is especially meaningful under "
"split-brain of 2-node cluster. A promoted resource instance takes the base "
"priority + 1 on calculation if the base priority is not 0. Any static/random "
"delays that are introduced by `pcmk_delay_base/max` configured for the "
"corresponding fencing resources will be added to this delay. This delay "
"should be significantly greater than, safely twice, the maximum "
"`pcmk_delay_base/max`. By default, priority fencing delay is disabled."
msgstr ""
"如果我们所在的集群分区并不拥有大多数集群节点,则针对丢失节点的fence操作应用指"
"定的延迟,这样更重要的节点就能够赢得fence竞赛。这对于双节点集群在split-brain"
"状况下尤其有意义。如果基本优先级不为0,在计算时主资源实例获得基本优先级+1。任"
"何对于相应的 fence 资源由 pcmk_delay_base/max 配置所引入的静态/随机延迟会被添"
"加到此延迟。为了安全, 这个延迟应该明显大于 pcmk_delay_base/max 的最大设置值,"
"例如两倍。默认情况下,优先级fencing延迟已禁用。"
-#: lib/pengine/common.c:161
+#: lib/pengine/common.c:164
msgid "Maximum time for node-to-node communication"
msgstr "最大节点间通信时间"
-#: lib/pengine/common.c:162
+#: lib/pengine/common.c:165
msgid ""
"The node elected Designated Controller (DC) will consider an action failed "
"if it does not get a response from the node executing the action within this "
"time (after considering the action's own timeout). The \"correct\" value "
"will depend on the speed and load of your network and cluster nodes."
msgstr ""
"如果一个操作未在该时间内(并且考虑操作本身的超时时长)从执行该操作的节点获得"
"响应,则会被选为指定控制器(DC)的节点认定为失败。\"正确\" 值将取决于速度和您"
"的网络和集群节点的负载。"
-#: lib/pengine/common.c:171
+#: lib/pengine/common.c:174
#, fuzzy
msgid ""
"Maximum number of jobs that the cluster may execute in parallel across all "
"nodes"
msgstr "集群可以在所有节点上并发执行的最大作业数"
-#: lib/pengine/common.c:173
+#: lib/pengine/common.c:176
msgid ""
"The \"correct\" value will depend on the speed and load of your network and "
"cluster nodes. If set to 0, the cluster will impose a dynamically calculated "
"limit when any node has a high load."
msgstr ""
"\"正确\" 值将取决于速度和您的网络与集群节点的负载。如果设置为0,当任何节点具"
"有高负载时,集群将施加一个动态计算的限制。"
-#: lib/pengine/common.c:181
+#: lib/pengine/common.c:184
msgid ""
"The number of live migration actions that the cluster is allowed to execute "
"in parallel on a node (-1 means no limit)"
msgstr "允许集群在一个节点上并行执行的实时迁移操作的数量(-1表示没有限制)"
-#: lib/pengine/common.c:189
+#: lib/pengine/common.c:192
#, fuzzy
msgid "Whether the cluster should stop all active resources"
msgstr "群集是否在启动期间检查运行资源"
-#: lib/pengine/common.c:195
+#: lib/pengine/common.c:198
msgid "Whether to stop resources that were removed from the configuration"
msgstr "是否停止配置已被删除的资源"
-#: lib/pengine/common.c:201
+#: lib/pengine/common.c:204
msgid "Whether to cancel recurring actions removed from the configuration"
msgstr "是否取消配置已被删除的的重复操作"
-#: lib/pengine/common.c:207
+#: lib/pengine/common.c:210
msgid ""
"*** Deprecated *** Whether to remove stopped resources from the executor"
msgstr "***不推荐***是否从pacemaker-execd 守护进程中清除已停止的资源"
-#: lib/pengine/common.c:209
+#: lib/pengine/common.c:212
msgid ""
"Values other than default are poorly tested and potentially dangerous. This "
"option will be removed in a future release."
msgstr "非默认值未经过充分的测试,有潜在的风险。该选项将在未来的版本中删除。"
-#: lib/pengine/common.c:217
+#: lib/pengine/common.c:220
msgid "The number of scheduler inputs resulting in errors to save"
msgstr "保存导致错误的调度程序输入的数量"
-#: lib/pengine/common.c:218 lib/pengine/common.c:224 lib/pengine/common.c:230
+#: lib/pengine/common.c:221 lib/pengine/common.c:227 lib/pengine/common.c:233
msgid "Zero to disable, -1 to store unlimited."
msgstr "零表示禁用,-1表示存储不受限制。"
-#: lib/pengine/common.c:223
+#: lib/pengine/common.c:226
msgid "The number of scheduler inputs resulting in warnings to save"
msgstr "保存导致警告的调度程序输入的数量"
-#: lib/pengine/common.c:229
+#: lib/pengine/common.c:232
msgid "The number of scheduler inputs without errors or warnings to save"
msgstr "保存没有错误或警告的调度程序输入的数量"
-#: lib/pengine/common.c:240
+#: lib/pengine/common.c:243
#, fuzzy
msgid "How cluster should react to node health attributes"
msgstr "集群节点对节点健康属性如何反应"
-#: lib/pengine/common.c:241
+#: lib/pengine/common.c:244
msgid ""
"Requires external entities to create node attributes (named with the prefix "
"\"#health\") with values \"red\", \"yellow\", or \"green\"."
msgstr ""
"需要外部实体创建具有“red”,“yellow”或“green”值的节点属性(前缀为“#health”)"
-#: lib/pengine/common.c:248
+#: lib/pengine/common.c:251
msgid "Base health score assigned to a node"
msgstr "分配给节点的基本健康分数"
-#: lib/pengine/common.c:249
+#: lib/pengine/common.c:252
msgid "Only used when \"node-health-strategy\" is set to \"progressive\"."
msgstr "仅在“node-health-strategy”设置为“progressive”时使用。"
-#: lib/pengine/common.c:254
+#: lib/pengine/common.c:257
msgid "The score to use for a node health attribute whose value is \"green\""
msgstr "为节点健康属性值为“green”所使用的分数"
-#: lib/pengine/common.c:255 lib/pengine/common.c:261 lib/pengine/common.c:267
+#: lib/pengine/common.c:258 lib/pengine/common.c:264 lib/pengine/common.c:270
msgid ""
"Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive"
"\"."
msgstr "仅在“node-health-strategy”设置为“custom”或“progressive”时使用。"
-#: lib/pengine/common.c:260
+#: lib/pengine/common.c:263
msgid "The score to use for a node health attribute whose value is \"yellow\""
msgstr "为节点健康属性值为“yellow”所使用的分数"
-#: lib/pengine/common.c:266
+#: lib/pengine/common.c:269
msgid "The score to use for a node health attribute whose value is \"red\""
msgstr "为节点健康属性值为“red”所使用的分数"
-#: lib/pengine/common.c:275
+#: lib/pengine/common.c:278
#, fuzzy
msgid "How the cluster should allocate resources to nodes"
msgstr "群集应该如何分配资源到节点"
#: tools/crm_resource.c:258
#, c-format
msgid "Aborting because no messages received in %d seconds"
msgstr "中止,因为在%d秒内没有接收到消息"
-#: tools/crm_resource.c:909
+#: tools/crm_resource.c:915
#, c-format
msgid "Invalid check level setting: %s"
msgstr "无效的检查级别设置:%s"
-#: tools/crm_resource.c:993
+#: tools/crm_resource.c:999
#, c-format
msgid ""
"Resource '%s' not moved: active in %d locations (promoted in %d).\n"
"To prevent '%s' from running on a specific location, specify a node.To "
"prevent '%s' from being promoted at a specific location, specify a node and "
"the --promoted option."
msgstr ""
"资源'%s'未移动:在%d个位置运行(其中在%d个位置为主实例)\n"
"若要阻止'%s'在特定位置运行,请指定一个节点。若要防止'%s'在指定位置升级,指定"
"一个节点并使用--promoted选项"
-#: tools/crm_resource.c:1004
+#: tools/crm_resource.c:1010
#, c-format
msgid ""
"Resource '%s' not moved: active in %d locations.\n"
"To prevent '%s' from running on a specific location, specify a node."
msgstr ""
"资源%s未移动:在%d个位置运行\n"
"若要防止'%s'运行在特定位置,指定一个节点"
-#: tools/crm_resource.c:1079
+#: tools/crm_resource.c:1085
#, c-format
msgid "Could not get modified CIB: %s\n"
msgstr "无法获得修改的CIB:%s\n"
-#: tools/crm_resource.c:1113
+#: tools/crm_resource.c:1119
msgid "You need to specify a resource type with -t"
msgstr "需要使用-t指定资源类型"
-#: tools/crm_resource.c:1156
+#: tools/crm_resource.c:1162
#, c-format
msgid "No agents found for standard '%s'"
msgstr "没有发现指定的'%s'标准代理"
-#: tools/crm_resource.c:1159
+#: tools/crm_resource.c:1165
#, fuzzy, c-format
msgid "No agents found for standard '%s' and provider '%s'"
msgstr "没有发现指定的标准%s和提供者%S的资源代理"
-#: tools/crm_resource.c:1226
+#: tools/crm_resource.c:1232
#, c-format
msgid "No %s found for %s"
msgstr "没有发现%s符合%s"
-#: tools/crm_resource.c:1231
+#: tools/crm_resource.c:1237
#, c-format
msgid "No %s found"
msgstr "没有发现%s"
-#: tools/crm_resource.c:1291
+#: tools/crm_resource.c:1297
#, c-format
msgid "No cluster connection to Pacemaker Remote node %s detected"
msgstr "未检测到至pacemaker远程节点%s的集群连接"
-#: tools/crm_resource.c:1352
+#: tools/crm_resource.c:1358
msgid "Must specify -t with resource type"
msgstr "需要使用-t指定资源类型"
-#: tools/crm_resource.c:1358
+#: tools/crm_resource.c:1364
msgid "Must supply -v with new value"
msgstr "必须使用-v指定新值"
-#: tools/crm_resource.c:1390
+#: tools/crm_resource.c:1396
msgid "Could not create executor connection"
msgstr "无法创建到pacemaker-execd守护进程的连接"
-#: tools/crm_resource.c:1415
+#: tools/crm_resource.c:1421
#, fuzzy, c-format
msgid "Metadata query for %s failed: %s"
msgstr ",查询%s的元数据失败: %s\n"
-#: tools/crm_resource.c:1421
+#: tools/crm_resource.c:1427
#, c-format
msgid "'%s' is not a valid agent specification"
msgstr "'%s' 是一个无效的代理"
-#: tools/crm_resource.c:1434
+#: tools/crm_resource.c:1440
msgid "--resource cannot be used with --class, --agent, and --provider"
msgstr "--resource 不能与 --class, --agent, --provider一起使用"
-#: tools/crm_resource.c:1439
+#: tools/crm_resource.c:1445
msgid ""
"--class, --agent, and --provider can only be used with --validate and --"
"force-*"
msgstr "--class, --agent和--provider只能被用于--validate和--force-*"
-#: tools/crm_resource.c:1448
+#: tools/crm_resource.c:1454
msgid "stonith does not support providers"
msgstr "stonith 不支持提供者"
-#: tools/crm_resource.c:1452
+#: tools/crm_resource.c:1458
#, c-format
msgid "%s is not a known stonith agent"
msgstr "%s 不是一个已知stonith代理"
-#: tools/crm_resource.c:1457
+#: tools/crm_resource.c:1463
#, c-format
msgid "%s:%s:%s is not a known resource"
msgstr "%s:%s:%s 不是一个已知资源"
-#: tools/crm_resource.c:1571
+#: tools/crm_resource.c:1577
#, c-format
msgid "Error creating output format %s: %s"
msgstr "创建输出格式错误 %s:%s"
-#: tools/crm_resource.c:1598
+#: tools/crm_resource.c:1604
msgid "--expired requires --clear or -U"
msgstr "--expired需要和--clear或-U一起使用"
-#: tools/crm_resource.c:1615
+#: tools/crm_resource.c:1621
#, c-format
msgid "Error parsing '%s' as a name=value pair"
msgstr "'%s'解析错误,格式为name=value"
-#: tools/crm_resource.c:1712
+#: tools/crm_resource.c:1718
msgid "Must supply a resource id with -r"
msgstr "必须使用-r指定资源id"
-#: tools/crm_resource.c:1718
+#: tools/crm_resource.c:1724
msgid "Must supply a node name with -N"
msgstr "必须使用-N指定节点名称"
#: tools/crm_resource.c:1742
msgid "Could not create CIB connection"
msgstr "无法创建到CIB的连接"
#: tools/crm_resource.c:1750
#, c-format
msgid "Could not connect to the CIB: %s"
msgstr "不能连接到CIB:%s"
#: tools/crm_resource.c:1771
#, c-format
msgid "Resource '%s' not found"
msgstr "没有发现'%s'资源"
#: tools/crm_resource.c:1783
#, c-format
msgid "Cannot operate on clone resource instance '%s'"
msgstr "不能操作克隆资源实例'%s'"
#: tools/crm_resource.c:1795
#, c-format
msgid "Node '%s' not found"
msgstr "没有发现%s节点"
#: tools/crm_resource.c:1806 tools/crm_resource.c:1815
#, c-format
msgid "Error connecting to the controller: %s"
msgstr "连接到控制器错误:%s"
-#: tools/crm_resource.c:2051
+#: tools/crm_resource.c:2064
msgid "You need to supply a value with the -v option"
msgstr "需要使用-v选项提供一个值"
-#: tools/crm_resource.c:2106
+#: tools/crm_resource.c:2119
#, c-format
msgid "Unimplemented command: %d"
msgstr "无效的命令:%d"
-#: tools/crm_resource.c:2140
+#: tools/crm_resource.c:2149
#, c-format
msgid "Error performing operation: %s"
msgstr "执行操作错误:%s"
#~ msgid ""
#~ "If nonzero, along with `have-watchdog=true` automatically set by the "
#~ "cluster, when fencing is required, watchdog-based self-fencing will be "
#~ "performed via SBD without requiring a fencing resource explicitly "
#~ "configured. If `stonith-watchdog-timeout` is set to a positive value, "
#~ "unseen nodes are assumed to self-fence within this much time. +WARNING:+ "
#~ "It must be ensured that this value is larger than the "
#~ "`SBD_WATCHDOG_TIMEOUT` environment variable on all nodes. Pacemaker "
#~ "verifies the settings individually on all nodes and prevents startup or "
#~ "shuts down if configured wrongly on the fly. It's strongly recommended "
#~ "that `SBD_WATCHDOG_TIMEOUT` is set to the same value on all nodes. If "
#~ "`stonith-watchdog-timeout` is set to a negative value, and "
#~ "`SBD_WATCHDOG_TIMEOUT` is set, twice that value will be used. +WARNING:+ "
#~ "In this case, it's essential (currently not verified by Pacemaker) that "
#~ "`SBD_WATCHDOG_TIMEOUT` is set to the same value on all nodes."
#~ msgstr ""
#~ "如果值非零,且集群设置了 `have-watchdog=true` ,当需要 fence 操作时,基于 "
#~ "watchdog 的自我 fence 机制将通过SBD执行,而不需要显式配置 fence 资源。如"
#~ "果 `stonith-watchdog-timeout` 被设为正值,则假定不可见的节点在这段时间内自"
#~ "我fence。 +WARNING:+ 必须确保该值大于所有节点上的`SBD_WATCHDOG_TIMEOUT` 环"
#~ "境变量。Pacemaker将在所有节点上单独验证设置,如发现有错误的动态配置,将防"
#~ "止节点启动或关闭。强烈建议在所有节点上将 `SBD_WATCHDOG_TIMEOUT` 设置为相同"
#~ "的值。如果 `stonith-watchdog-timeout` 设置为负值。并且设置了 "
#~ "`SBD_WATCHDOG_TIMEOUT` ,则将使用该值的两倍, +WARNING:+ 在这种情况下,必"
#~ "须将所有节点上 `SBD_WATCHDOG_TIMEOUT` 设置为相同的值(目前没有通过pacemaker"
#~ "验证)。"
diff --git a/python/pacemaker/_cts/environment.py b/python/pacemaker/_cts/environment.py
index 9ee99f1a92..f81d9876e5 100644
--- a/python/pacemaker/_cts/environment.py
+++ b/python/pacemaker/_cts/environment.py
@@ -1,646 +1,650 @@
""" Test environment classes for Pacemaker's Cluster Test Suite (CTS) """
__all__ = ["EnvFactory"]
__copyright__ = "Copyright 2014-2023 the Pacemaker project contributors"
__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
import argparse
import os
import random
import socket
import sys
import time
from pacemaker._cts.logging import LogFactory
from pacemaker._cts.remote import RemoteFactory
from pacemaker._cts.watcher import LogKind
class Environment:
""" A class for managing the CTS environment, consisting largely of processing
and storing command line parameters
"""
# pylint doesn't understand that self._rsh is callable (it stores the
# singleton instance of RemoteExec, as returned by the getInstance method
# of RemoteFactory). It's possible we could fix this with type annotations,
# but those were introduced with python 3.5 and we only support python 3.4.
# I think we could also fix this by getting rid of the getInstance methods,
# but that's a project for another day. For now, just disable the warning.
# pylint: disable=not-callable
def __init__(self, args):
""" Create a new Environment instance. This class can be treated kind
of like a dictionary due to the presence of typical dict functions
like has_key, __getitem__, and __setitem__. However, it is not a
dictionary so do not rely on standard dictionary behavior.
Arguments:
args -- A list of command line parameters, minus the program name.
If None, sys.argv will be used.
"""
self.data = {}
self._nodes = []
# Set some defaults before processing command line arguments. These are
# either not set by any command line parameter, or they need a default
# that can't be set in add_argument.
self["DeadTime"] = 300
self["StartTime"] = 300
self["StableTime"] = 30
self["tests"] = []
self["IPagent"] = "IPaddr2"
self["DoFencing"] = True
self["ClobberCIB"] = False
self["CIBfilename"] = None
self["CIBResource"] = False
self["LogWatcher"] = LogKind.ANY
self["node-limit"] = 0
self["scenario"] = "random"
self.random_gen = random.Random()
self._logger = LogFactory()
self._rsh = RemoteFactory().getInstance()
self._target = "localhost"
self._seed_random()
self._parse_args(args)
if not self["ListTests"]:
self._validate()
self._discover()
def _seed_random(self, seed=None):
""" Initialize the random number generator with the given seed, or use
the current time if None
"""
if not seed:
seed = int(time.time())
self["RandSeed"] = seed
self.random_gen.seed(str(seed))
def dump(self):
""" Print the current environment """
keys = []
for key in list(self.data.keys()):
keys.append(key)
keys.sort()
for key in keys:
s = "Environment[%s]" % key
self._logger.debug("{key:35}: {val}".format(key=s, val=str(self[key])))
def keys(self):
""" Return a list of all environment keys stored in this instance """
return list(self.data.keys())
def has_key(self, key):
""" Does the given environment key exist? """
if key == "nodes":
return True
return key in self.data
def __getitem__(self, key):
""" Return the given environment key, or None if it does not exist """
if str(key) == "0":
raise ValueError("Bad call to 'foo in X', should reference 'foo in X.keys()' instead")
if key == "nodes":
return self._nodes
if key == "Name":
return self._get_stack_short()
if key in self.data:
return self.data[key]
return None
def __setitem__(self, key, value):
""" Set the given environment key to the given value, overriding any
previous value
"""
if key == "Stack":
self._set_stack(value)
elif key == "node-limit":
self.data[key] = value
self._filter_nodes()
elif key == "nodes":
self._nodes = []
for node in value:
# I don't think I need the IP address, etc. but this validates
# the node name against /etc/hosts and/or DNS, so it's a
# GoodThing(tm).
try:
n = node.strip()
socket.gethostbyname_ex(n)
self._nodes.append(n)
except:
self._logger.log("%s not found in DNS... aborting" % node)
raise
self._filter_nodes()
else:
self.data[key] = value
def random_node(self):
""" Choose a random node from the cluster """
return self.random_gen.choice(self["nodes"])
def _set_stack(self, name):
""" Normalize the given cluster stack name """
if name in ["corosync", "cs", "mcp"]:
self.data["Stack"] = "corosync 2+"
else:
raise ValueError("Unknown stack: %s" % name)
def _get_stack_short(self):
""" Return the short name for the currently set cluster stack """
if "Stack" not in self.data:
return "unknown"
if self.data["Stack"] == "corosync 2+":
return "crm-corosync"
LogFactory().log("Unknown stack: %s" % self["stack"])
raise ValueError("Unknown stack: %s" % self["stack"])
+ def _detect_systemd(self):
+ """ Detect whether systemd is in use on the target node """
+
+ if "have_systemd" not in self.data:
+ (rc, _) = self._rsh(self._target, "systemctl list-units", verbose=0)
+ self["have_systemd"] = rc == 0
+
def _detect_syslog(self):
""" Detect the syslog variant in use on the target node """
if "syslogd" not in self.data:
if self["have_systemd"]:
# Systemd
(_, lines) = self._rsh(self._target, r"systemctl list-units | grep syslog.*\.service.*active.*running | sed 's:.service.*::'", verbose=1)
self["syslogd"] = lines[0].strip()
else:
# SYS-V
(_, lines) = self._rsh(self._target, "chkconfig --list | grep syslog.*on | awk '{print $1}' | head -n 1", verbose=1)
self["syslogd"] = lines[0].strip()
if "syslogd" not in self.data or not self["syslogd"]:
# default
self["syslogd"] = "rsyslog"
def disable_service(self, node, service):
""" Disable the given service on the given node """
if self["have_systemd"]:
# Systemd
(rc, _) = self._rsh(node, "systemctl disable %s" % service)
return rc
# SYS-V
(rc, _) = self._rsh(node, "chkconfig %s off" % service)
return rc
def enable_service(self, node, service):
""" Enable the given service on the given node """
if self["have_systemd"]:
# Systemd
(rc, _) = self._rsh(node, "systemctl enable %s" % service)
return rc
# SYS-V
(rc, _) = self._rsh(node, "chkconfig %s on" % service)
return rc
def service_is_enabled(self, node, service):
""" Is the given service enabled on the given node? """
if self["have_systemd"]:
# Systemd
# With "systemctl is-enabled", we should check if the service is
# explicitly "enabled" instead of the return code. For example it returns
# 0 if the service is "static" or "indirect", but they don't really count
# as "enabled".
(rc, _) = self._rsh(node, "systemctl is-enabled %s | grep enabled" % service)
return rc == 0
# SYS-V
(rc, _) = self._rsh(node, "chkconfig --list | grep -e %s.*on" % service)
return rc == 0
def _detect_at_boot(self):
""" Detect if the cluster starts at boot """
if "at-boot" not in self.data:
self["at-boot"] = self.service_is_enabled(self._target, "corosync") \
or self.service_is_enabled(self._target, "pacemaker")
def _detect_ip_offset(self):
""" Detect the offset for IPaddr resources """
if self["CIBResource"] and "IPBase" not in self.data:
(_, lines) = self._rsh(self._target, "ip addr | grep inet | grep -v -e link -e inet6 -e '/32' -e ' lo' | awk '{print $2}'", verbose=0)
network = lines[0].strip()
(_, lines) = self._rsh(self._target, "nmap -sn -n %s | grep 'scan report' | awk '{print $NF}' | sed 's:(::' | sed 's:)::' | sort -V | tail -n 1" % network, verbose=0)
try:
self["IPBase"] = lines[0].strip()
except (IndexError, TypeError):
self["IPBase"] = None
if not self["IPBase"]:
self["IPBase"] = " fe80::1234:56:7890:1000"
self._logger.log("Could not determine an offset for IPaddr resources. Perhaps nmap is not installed on the nodes.")
self._logger.log("Defaulting to '%s', use --test-ip-base to override" % self["IPBase"])
return
# pylint thinks self["IPBase"] is a list, not a string, which causes it
# to error out because a list doesn't have split().
# pylint: disable=no-member
if int(self["IPBase"].split('.')[3]) >= 240:
self._logger.log("Could not determine an offset for IPaddr resources. Upper bound is too high: %s %s"
% (self["IPBase"], self["IPBase"].split('.')[3]))
self["IPBase"] = " fe80::1234:56:7890:1000"
self._logger.log("Defaulting to '%s', use --test-ip-base to override" % self["IPBase"])
def _filter_nodes(self):
""" If --limit-nodes is given, keep that many nodes from the front of the
list of cluster nodes and drop the rest
"""
if self["node-limit"] > 0:
if len(self["nodes"]) > self["node-limit"]:
# pylint thinks self["node-limit"] is a list even though we initialize
# it as an int in __init__ and treat it as an int everywhere.
# pylint: disable=bad-string-format-type
self._logger.log("Limiting the number of nodes configured=%d (max=%d)"
%(len(self["nodes"]), self["node-limit"]))
while len(self["nodes"]) > self["node-limit"]:
self["nodes"].pop(len(self["nodes"])-1)
def _validate(self):
""" Were we given all the required command line parameters? """
if not self["nodes"]:
raise ValueError("No nodes specified!")
def _discover(self):
""" Probe cluster nodes to figure out how to log and manage services """
self._target = random.Random().choice(self["nodes"])
exerciser = socket.gethostname()
# Use the IP where possible to avoid name lookup failures
for ip in socket.gethostbyname_ex(exerciser)[2]:
if ip != "127.0.0.1":
exerciser = ip
break
self["cts-exerciser"] = exerciser
- if "have_systemd" not in self.data:
- (rc, _) = self._rsh(self._target, "systemctl list-units", verbose=0)
- self["have_systemd"] = rc == 0
-
+ self._detect_systemd()
self._detect_syslog()
self._detect_at_boot()
self._detect_ip_offset()
def _parse_args(self, argv):
""" Parse and validate command line parameters, setting the appropriate
values in the environment dictionary. If argv is None, use sys.argv
instead.
"""
if not argv:
argv = sys.argv[1:]
parser = argparse.ArgumentParser(epilog="%s -g virt1 -r --stonith ssh --schema pacemaker-2.0 500" % sys.argv[0])
grp1 = parser.add_argument_group("Common options")
grp1.add_argument("-g", "--dsh-group", "--group",
metavar="GROUP", dest="group",
help="Use the nodes listed in the named DSH group (~/.dsh/groups/$name)")
grp1.add_argument("-l", "--limit-nodes",
type=int, default=0,
metavar="MAX",
help="Only use the first MAX cluster nodes supplied with --nodes")
grp1.add_argument("--benchmark",
action="store_true",
help="Add timing information")
grp1.add_argument("--list", "--list-tests",
action="store_true", dest="list_tests",
help="List the valid tests")
grp1.add_argument("--nodes",
metavar="NODES",
help="List of cluster nodes separated by whitespace")
grp1.add_argument("--stack",
default="corosync",
metavar="STACK",
help="Which cluster stack is installed")
grp2 = parser.add_argument_group("Options that CTS will usually auto-detect correctly")
grp2.add_argument("-L", "--logfile",
metavar="PATH",
help="Where to look for logs from cluster nodes")
grp2.add_argument("--at-boot", "--cluster-starts-at-boot",
choices=["1", "0", "yes", "no"],
help="Does the cluster software start at boot time?")
grp2.add_argument("--facility", "--syslog-facility",
default="daemon",
metavar="NAME",
help="Which syslog facility to log to")
grp2.add_argument("--ip", "--test-ip-base",
metavar="IP",
help="Offset for generated IP address resources")
grp3 = parser.add_argument_group("Options for release testing")
grp3.add_argument("-r", "--populate-resources",
action="store_true",
help="Generate a sample configuration")
grp3.add_argument("--choose",
metavar="NAME",
help="Run only the named test")
grp3.add_argument("--fencing", "--stonith",
choices=["1", "0", "yes", "no", "lha", "openstack", "rhcs", "rhevm", "scsi", "ssh", "virt", "xvm"],
default="1",
help="What fencing agent to use")
grp3.add_argument("--once",
action="store_true",
help="Run all valid tests once")
grp4 = parser.add_argument_group("Additional (less common) options")
grp4.add_argument("-c", "--clobber-cib",
action="store_true",
help="Erase any existing configuration")
grp4.add_argument("-y", "--yes",
action="store_true", dest="always_continue",
help="Continue to run whenever prompted")
grp4.add_argument("--boot",
action="store_true",
help="")
grp4.add_argument("--bsc",
action="store_true",
help="")
grp4.add_argument("--cib-filename",
metavar="PATH",
help="Install the given CIB file to the cluster")
grp4.add_argument("--container-tests",
action="store_true",
help="Include pacemaker_remote tests that run in lxc container resources")
grp4.add_argument("--experimental-tests",
action="store_true",
help="Include experimental tests")
grp4.add_argument("--loop-minutes",
type=int, default=60,
help="")
grp4.add_argument("--no-loop-tests",
action="store_true",
help="Don't run looping/time-based tests")
grp4.add_argument("--no-unsafe-tests",
action="store_true",
help="Don't run tests that are unsafe for use with ocfs2/drbd")
grp4.add_argument("--notification-agent",
metavar="PATH",
default="/var/lib/pacemaker/notify.sh",
help="Script to configure for Pacemaker alerts")
grp4.add_argument("--notification-recipient",
metavar="R",
default="/var/lib/pacemaker/notify.log",
help="Recipient to pass to alert script")
grp4.add_argument("--oprofile",
metavar="NODES",
help="List of cluster nodes to run oprofile on")
grp4.add_argument("--outputfile",
metavar="PATH",
help="Location to write logs to")
grp4.add_argument("--qarsh",
action="store_true",
help="Use QARSH to access nodes instead of SSH")
grp4.add_argument("--schema",
metavar="SCHEMA",
default="pacemaker-3.0",
help="Create a CIB conforming to the given schema")
grp4.add_argument("--seed",
metavar="SEED",
help="Use the given string as the random number seed")
grp4.add_argument("--set",
action="append",
metavar="ARG",
default=[],
help="Set key=value pairs (can be specified multiple times)")
grp4.add_argument("--stonith-args",
metavar="ARGS",
default="hostlist=all,livedangerously=yes",
help="")
grp4.add_argument("--stonith-type",
metavar="TYPE",
default="external/ssh",
help="")
grp4.add_argument("--trunc",
action="store_true", dest="truncate",
help="Truncate log file before starting")
grp4.add_argument("--valgrind-procs",
metavar="PROCS",
default="pacemaker-attrd pacemaker-based pacemaker-controld pacemaker-execd pacemaker-fenced pacemaker-schedulerd",
help="Run valgrind against the given space-separated list of processes")
grp4.add_argument("--valgrind-tests",
action="store_true",
help="Include tests using valgrind")
grp4.add_argument("--warn-inactive",
action="store_true",
help="Warn if a resource is assigned to an inactive node")
parser.add_argument("iterations",
type=int,
help="Number of tests to run")
args = parser.parse_args(args=argv)
# Set values on this object based on what happened with command line
# processing. This has to be done in several blocks.
# These values can always be set. They get a default from the add_argument
# calls, only do one thing, and they do not have any side effects.
self["ClobberCIB"] = args.clobber_cib
self["ListTests"] = args.list_tests
self["Schema"] = args.schema
self["Stack"] = args.stack
self["SyslogFacility"] = args.facility
self["TruncateLog"] = args.truncate
self["at-boot"] = args.at_boot in ["1", "yes"]
self["benchmark"] = args.benchmark
self["continue"] = args.always_continue
self["container-tests"] = args.container_tests
self["experimental-tests"] = args.experimental_tests
self["iterations"] = args.iterations
self["loop-minutes"] = args.loop_minutes
self["loop-tests"] = not args.no_loop_tests
self["notification-agent"] = args.notification_agent
self["notification-recipient"] = args.notification_recipient
self["node-limit"] = args.limit_nodes
self["stonith-params"] = args.stonith_args
self["stonith-type"] = args.stonith_type
self["unsafe-tests"] = not args.no_unsafe_tests
self["valgrind-procs"] = args.valgrind_procs
self["valgrind-tests"] = args.valgrind_tests
self["warn-inactive"] = args.warn_inactive
# Nodes and groups are mutually exclusive, so their defaults cannot be
# set in their add_argument calls. Additionally, groups does more than
# just set a value. Here, set nodes first and then if a group is
# specified, override the previous nodes value.
if args.nodes:
self["nodes"] = args.nodes.split(" ")
else:
self["nodes"] = []
if args.group:
self["OutputFile"] = "%s/cluster-%s.log" % (os.environ['HOME'], args.dsh_group)
LogFactory().add_file(self["OutputFile"], "CTS")
dsh_file = "%s/.dsh/group/%s" % (os.environ['HOME'], args.dsh_group)
if os.path.isfile(dsh_file):
self["nodes"] = []
with open(dsh_file, "r", encoding="utf-8") as f:
for line in f:
l = line.strip()
if not l.startswith('#'):
self["nodes"].append(l)
else:
print("Unknown DSH group: %s" % args.dsh_group)
# Everything else either can't have a default set in an add_argument
# call (likely because we don't want to always have a value set for it)
# or it does something fancier than just set a single value. However,
# order does not matter for these as long as the user doesn't provide
# conflicting arguments on the command line. So just do Everything
# alphabetically.
if args.boot:
self["scenario"] = "boot"
if args.bsc:
self["DoBSC"] = True
self["scenario"] = "basic-sanity"
if args.cib_filename:
self["CIBfilename"] = args.cib_filename
else:
self["CIBfilename"] = None
if args.choose:
self["scenario"] = "sequence"
self["tests"].append(args.choose)
if args.fencing:
if args.fencing in ["0", "no"]:
self["DoFencing"] = False
else:
self["DoFencing"] = True
if args.fencing in ["rhcs", "virt", "xvm"]:
self["stonith-type"] = "fence_xvm"
elif args.fencing == "scsi":
self["stonith-type"] = "fence_scsi"
elif args.fencing in ["lha", "ssh"]:
self["stonith-params"] = "hostlist=all,livedangerously=yes"
self["stonith-type"] = "external/ssh"
elif args.fencing == "openstack":
self["stonith-type"] = "fence_openstack"
print("Obtaining OpenStack credentials from the current environment")
self["stonith-params"] = "region=%s,tenant=%s,auth=%s,user=%s,password=%s" % (
os.environ['OS_REGION_NAME'],
os.environ['OS_TENANT_NAME'],
os.environ['OS_AUTH_URL'],
os.environ['OS_USERNAME'],
os.environ['OS_PASSWORD']
)
elif args.fencing == "rhevm":
self["stonith-type"] = "fence_rhevm"
print("Obtaining RHEV-M credentials from the current environment")
self["stonith-params"] = "login=%s,passwd=%s,ipaddr=%s,ipport=%s,ssl=1,shell_timeout=10" % (
os.environ['RHEVM_USERNAME'],
os.environ['RHEVM_PASSWORD'],
os.environ['RHEVM_SERVER'],
os.environ['RHEVM_PORT'],
)
if args.ip:
self["CIBResource"] = True
self["ClobberCIB"] = True
self["IPBase"] = args.ip
if args.logfile:
self["LogAuditDisabled"] = True
self["LogFileName"] = args.logfile
self["LogWatcher"] = LogKind.REMOTE_FILE
else:
# We can't set this as the default on the parser.add_argument call
# for this option because then args.logfile will be set, which means
# the above branch will be taken and those other values will also be
# set.
self["LogFileName"] = "/var/log/messages"
if args.once:
self["scenario"] = "all-once"
if args.oprofile:
self["oprofile"] = args.oprofile.split(" ")
else:
self["oprofile"] = []
if args.outputfile:
self["OutputFile"] = args.outputfile
LogFactory().add_file(self["OutputFile"])
if args.populate_resources:
self["CIBResource"] = True
self["ClobberCIB"] = True
if args.qarsh:
self._rsh.enable_qarsh()
for kv in args.set:
(name, value) = kv.split("=")
self[name] = value
print("Setting %s = %s" % (name, value))
class EnvFactory:
""" A class for constructing a singleton instance of an Environment object """
instance = None
# pylint: disable=invalid-name
def getInstance(self, args=None):
""" Returns the previously created instance of Environment, or creates a
new instance if one does not already exist.
"""
if not EnvFactory.instance:
EnvFactory.instance = Environment(args)
return EnvFactory.instance