diff --git a/cts/lab/CTSaudits.py b/cts/lab/CTSaudits.py index 433ef35389..51a04f8c19 100755 --- a/cts/lab/CTSaudits.py +++ b/cts/lab/CTSaudits.py @@ -1,878 +1,879 @@ """ Auditing classes for Pacemaker's Cluster Test Suite (CTS) """ __copyright__ = "Copyright 2000-2023 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import time, re, uuid from pacemaker.buildoptions import BuildOptions from pacemaker._cts.watcher import LogKind, LogWatcher class ClusterAudit(object): def __init__(self, cm): self.CM = cm def __call__(self): raise ValueError("Abstract Class member (__call__)") def is_applicable(self): '''Return TRUE if we are applicable in the current test configuration''' raise ValueError("Abstract Class member (is_applicable)") return 1 def log(self, args): self.CM.log("audit: %s" % args) def debug(self, args): self.CM.debug("audit: %s" % args) def name(self): raise ValueError("Abstract Class member (name)") AllAuditClasses = [ ] class LogAudit(ClusterAudit): def name(self): return "LogAudit" def __init__(self, cm): self.CM = cm def RestartClusterLogging(self, nodes=None): if not nodes: nodes = self.CM.Env["nodes"] self.CM.debug("Restarting logging on: %s" % repr(nodes)) for node in nodes: if self.CM.Env["have_systemd"]: (rc, _) = self.CM.rsh(node, "systemctl stop systemd-journald.socket") if rc != 0: self.CM.log ("ERROR: Cannot stop 'systemd-journald' on %s" % node) (rc, _) = self.CM.rsh(node, "systemctl start systemd-journald.service") if rc != 0: self.CM.log ("ERROR: Cannot start 'systemd-journald' on %s" % node) (rc, _) = self.CM.rsh(node, "service %s restart" % self.CM.Env["syslogd"]) if rc != 0: self.CM.log ("ERROR: Cannot restart '%s' on %s" % (self.CM.Env["syslogd"], node)) + def _create_watcher(self, patterns, kind): + watch = LogWatcher(self.CM.Env["LogFileName"], patterns, + self.CM.Env["nodes"], kind, "LogAudit", 5, + silent=True) + watch.set_watch() + return watch + def TestLogging(self): patterns = [] prefix = "Test message from" suffix = str(uuid.uuid4()) watch = {} for node in self.CM.Env["nodes"]: # Look for the node name in two places to make sure # that syslog is logging with the correct hostname m = re.search("^([^.]+).*", node) if m: simple = m.group(1) else: simple = node patterns.append("%s.*%s %s %s" % (simple, prefix, node, suffix)) watch_pref = self.CM.Env["LogWatcher"] if watch_pref == LogKind.ANY: - for k in LogKind: - watch[k] = LogWatcher(self.CM.Env["LogFileName"], patterns, self.CM.Env["nodes"], k, "LogAudit", 5, silent=True) - watch[k].set_watch() + kinds = [ LogKind.FILE ] + if self.CM.Env["have_systemd"]: + kinds += [ LogKind.JOURNAL ] + kinds += [ LogKind.REMOTE_FILE ] + for k in kinds: + watch[k] = self._create_watcher(patterns, k) + self.CM.log("Logging test message with identifier %s" % (suffix)) else: - k = watch_pref - watch[k] = LogWatcher(self.CM.Env["LogFileName"], patterns, self.CM.Env["nodes"], k, "LogAudit", 5, silent=True) - watch[k].set_watch() - - if watch_pref == LogKind.ANY: - self.CM.log("Writing log with key: %s" % (suffix)) + watch[watch_pref] = self._create_watcher(patterns, watch_pref) for node in self.CM.Env["nodes"]: cmd = "logger -p %s.info %s %s %s" % (self.CM.Env["SyslogFacility"], prefix, node, suffix) (rc, _) = self.CM.rsh(node, cmd, synchronous=False, verbose=0) if rc != 0: self.CM.log ("ERROR: Cannot execute remote command [%s] on %s" % (cmd, node)) - for k in LogKind: - if k in watch: - w = watch[k] - if watch_pref == LogKind.ANY: - self.CM.log("Testing for %s logs" % (k)) - - w.look_for_all(silent=True) - if not w.unmatched: - if watch_pref == LogKind.ANY: - self.CM.log ("Continuing with %s-based log reader" % (w.kind)) - self.CM.Env["LogWatcher"] = w.kind - return 1 - for k in list(watch.keys()): w = watch[k] + if watch_pref == LogKind.ANY: + self.CM.log("Checking for test message in %s logs" % (k)) + w.look_for_all(silent=True) if w.unmatched: for regex in w.unmatched: - self.CM.log ("Test message [%s] not found in %s logs." % (regex, w.kind)) + self.CM.log("Test message [%s] not found in %s logs" % (regex, w.kind)) + else: + if watch_pref == LogKind.ANY: + self.CM.log("Found test message in %s logs" % (k)) + self.CM.Env["LogWatcher"] = k + return 1 return 0 def __call__(self): max = 3 attempt = 0 self.CM.ns.wait_for_all_nodes(self.CM.Env["nodes"]) while attempt <= max and self.TestLogging() == 0: attempt = attempt + 1 self.RestartClusterLogging() time.sleep(60*attempt) if attempt > max: self.CM.log("ERROR: Cluster logging unrecoverable.") return 0 return 1 def is_applicable(self): if self.CM.Env["DoBSC"]: return 0 if self.CM.Env["LogAuditDisabled"]: return 0 return 1 class DiskAudit(ClusterAudit): def name(self): return "DiskspaceAudit" def __init__(self, cm): self.CM = cm def __call__(self): result = 1 # @TODO Use directory of PCMK_logfile if set on host dfcmd = "df -BM " + BuildOptions.LOG_DIR + " | tail -1 | awk '{print $(NF-1)\" \"$(NF-2)}' | tr -d 'M%'" self.CM.ns.wait_for_all_nodes(self.CM.Env["nodes"]) for node in self.CM.Env["nodes"]: (_, dfout) = self.CM.rsh(node, dfcmd, verbose=1) if not dfout: self.CM.log ("ERROR: Cannot execute remote df command [%s] on %s" % (dfcmd, node)) else: dfout = dfout[0].strip() try: (used, remain) = dfout.split() used_percent = int(used) remaining_mb = int(remain) except (ValueError, TypeError): self.CM.log("Warning: df output '%s' from %s was invalid [%s, %s]" % (dfout, node, used, remain)) else: if remaining_mb < 10 or used_percent > 95: self.CM.log("CRIT: Out of log disk space on %s (%d%% / %dMB)" % (node, used_percent, remaining_mb)) result = None if self.CM.Env["continue"]: answer = "Y" else: try: answer = input('Continue? [nY]') except EOFError as e: answer = "n" if answer and answer == "n": raise ValueError("Disk full on %s" % (node)) elif remaining_mb < 100 or used_percent > 90: self.CM.log("WARN: Low on log disk space (%dMB) on %s" % (remaining_mb, node)) return result def is_applicable(self): if self.CM.Env["DoBSC"]: return 0 return 1 class FileAudit(ClusterAudit): def name(self): return "FileAudit" def __init__(self, cm): self.CM = cm self.known = [] def __call__(self): result = 1 self.CM.ns.wait_for_all_nodes(self.CM.Env["nodes"]) for node in self.CM.Env["nodes"]: (_, lsout) = self.CM.rsh(node, "ls -al /var/lib/pacemaker/cores/* | grep core.[0-9]", verbose=1) for line in lsout: line = line.strip() if line not in self.known: result = 0 self.known.append(line) self.CM.log("Warning: Pacemaker core file on %s: %s" % (node, line)) (_, lsout) = self.CM.rsh(node, "ls -al /var/lib/corosync | grep core.[0-9]", verbose=1) for line in lsout: line = line.strip() if line not in self.known: result = 0 self.known.append(line) self.CM.log("Warning: Corosync core file on %s: %s" % (node, line)) if node in self.CM.ShouldBeStatus and self.CM.ShouldBeStatus[node] == "down": clean = 0 (_, lsout) = self.CM.rsh(node, "ls -al /dev/shm | grep qb-", verbose=1) for line in lsout: result = 0 clean = 1 self.CM.log("Warning: Stale IPC file on %s: %s" % (node, line)) if clean: (_, lsout) = self.CM.rsh(node, "ps axf | grep -e pacemaker -e corosync", verbose=1) for line in lsout: self.CM.debug("ps[%s]: %s" % (node, line)) self.CM.rsh(node, "rm -rf /dev/shm/qb-*") else: self.CM.debug("Skipping %s" % node) return result def is_applicable(self): return 1 class AuditResource(object): def __init__(self, cm, line): fields = line.split() self.CM = cm self.line = line self.type = fields[1] self.id = fields[2] self.clone_id = fields[3] self.parent = fields[4] self.rprovider = fields[5] self.rclass = fields[6] self.rtype = fields[7] self.host = fields[8] self.needs_quorum = fields[9] self.flags = int(fields[10]) self.flags_s = fields[11] if self.parent == "NA": self.parent = None def unique(self): if self.flags & int("0x00000020", 16): return 1 return 0 def orphan(self): if self.flags & int("0x00000001", 16): return 1 return 0 def managed(self): if self.flags & int("0x00000002", 16): return 1 return 0 class AuditConstraint(object): def __init__(self, cm, line): fields = line.split() self.CM = cm self.line = line self.type = fields[1] self.id = fields[2] self.rsc = fields[3] self.target = fields[4] self.score = fields[5] self.rsc_role = fields[6] self.target_role = fields[7] if self.rsc_role == "NA": self.rsc_role = None if self.target_role == "NA": self.target_role = None class PrimitiveAudit(ClusterAudit): def name(self): return "PrimitiveAudit" def __init__(self, cm): self.CM = cm def doResourceAudit(self, resource, quorum): rc = 1 active = self.CM.ResourceLocation(resource.id) if len(active) == 1: if quorum: self.debug("Resource %s active on %s" % (resource.id, repr(active))) elif resource.needs_quorum == 1: self.CM.log("Resource %s active without quorum: %s" % (resource.id, repr(active))) rc = 0 elif not resource.managed(): self.CM.log("Resource %s not managed. Active on %s" % (resource.id, repr(active))) elif not resource.unique(): # TODO: Figure out a clever way to actually audit these resource types if len(active) > 1: self.debug("Non-unique resource %s is active on: %s" % (resource.id, repr(active))) else: self.debug("Non-unique resource %s is not active" % resource.id) elif len(active) > 1: self.CM.log("Resource %s is active multiple times: %s" % (resource.id, repr(active))) rc = 0 elif resource.orphan(): self.debug("Resource %s is an inactive orphan" % resource.id) elif len(self.inactive_nodes) == 0: self.CM.log("WARN: Resource %s not served anywhere" % resource.id) rc = 0 elif self.CM.Env["warn-inactive"]: if quorum or not resource.needs_quorum: self.CM.log("WARN: Resource %s not served anywhere (Inactive nodes: %s)" % (resource.id, repr(self.inactive_nodes))) else: self.debug("Resource %s not served anywhere (Inactive nodes: %s)" % (resource.id, repr(self.inactive_nodes))) elif quorum or not resource.needs_quorum: self.debug("Resource %s not served anywhere (Inactive nodes: %s)" % (resource.id, repr(self.inactive_nodes))) return rc def setup(self): self.target = None self.resources = [] self.constraints = [] self.active_nodes = [] self.inactive_nodes = [] for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == "up": self.active_nodes.append(node) else: self.inactive_nodes.append(node) for node in self.CM.Env["nodes"]: if self.target == None and self.CM.ShouldBeStatus[node] == "up": self.target = node if not self.target: # TODO: In Pacemaker 1.0 clusters we'll be able to run crm_resource # with CIB_file=/path/to/cib.xml even when the cluster isn't running self.debug("No nodes active - skipping %s" % self.name()) return 0 (_, lines) = self.CM.rsh(self.target, "crm_resource -c", verbose=1) for line in lines: if re.search("^Resource", line): self.resources.append(AuditResource(self.CM, line)) elif re.search("^Constraint", line): self.constraints.append(AuditConstraint(self.CM, line)) else: self.CM.log("Unknown entry: %s" % line); return 1 def __call__(self): rc = 1 if not self.setup(): return 1 quorum = self.CM.HasQuorum(None) for resource in self.resources: if resource.type == "primitive": if self.doResourceAudit(resource, quorum) == 0: rc = 0 return rc def is_applicable(self): # @TODO Due to long-ago refactoring, this name test would never match, # so this audit (and those derived from it) would never run. # Uncommenting the next lines fixes the name test, but that then # exposes pre-existing bugs that need to be fixed. #if self.CM["Name"] == "crm-corosync": # return 1 return 0 class GroupAudit(PrimitiveAudit): def name(self): return "GroupAudit" def __call__(self): rc = 1 if not self.setup(): return 1 for group in self.resources: if group.type == "group": first_match = 1 group_location = None for child in self.resources: if child.parent == group.id: nodes = self.CM.ResourceLocation(child.id) if first_match and len(nodes) > 0: group_location = nodes[0] first_match = 0 if len(nodes) > 1: rc = 0 self.CM.log("Child %s of %s is active more than once: %s" % (child.id, group.id, repr(nodes))) elif len(nodes) == 0: # Groups are allowed to be partially active # However we do need to make sure later children aren't running group_location = None self.debug("Child %s of %s is stopped" % (child.id, group.id)) elif nodes[0] != group_location: rc = 0 self.CM.log("Child %s of %s is active on the wrong node (%s) expected %s" % (child.id, group.id, nodes[0], group_location)) else: self.debug("Child %s of %s is active on %s" % (child.id, group.id, nodes[0])) return rc class CloneAudit(PrimitiveAudit): def name(self): return "CloneAudit" def __call__(self): rc = 1 if not self.setup(): return 1 for clone in self.resources: if clone.type == "clone": for child in self.resources: if child.parent == clone.id and child.type == "primitive": self.debug("Checking child %s of %s..." % (child.id, clone.id)) # Check max and node_max # Obtain with: # crm_resource -g clone_max --meta -r child.id # crm_resource -g clone_node_max --meta -r child.id return rc class ColocationAudit(PrimitiveAudit): def name(self): return "ColocationAudit" def crm_location(self, resource): (rc, lines) = self.CM.rsh(self.target, "crm_resource -W -r %s -Q"%resource, verbose=1) hosts = [] if rc == 0: for line in lines: fields = line.split() hosts.append(fields[0]) return hosts def __call__(self): rc = 1 if not self.setup(): return 1 for coloc in self.constraints: if coloc.type == "rsc_colocation": source = self.crm_location(coloc.rsc) target = self.crm_location(coloc.target) if len(source) == 0: self.debug("Colocation audit (%s): %s not running" % (coloc.id, coloc.rsc)) else: for node in source: if not node in target: rc = 0 self.CM.log("Colocation audit (%s): %s running on %s (not in %s)" % (coloc.id, coloc.rsc, node, repr(target))) else: self.debug("Colocation audit (%s): %s running on %s (in %s)" % (coloc.id, coloc.rsc, node, repr(target))) return rc class ControllerStateAudit(ClusterAudit): def __init__(self, cm): self.CM = cm self.Stats = {"calls":0 , "success":0 , "failure":0 , "skipped":0 , "auditfail":0} def has_key(self, key): return key in self.Stats def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): return self.Stats[key] def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not name in self.Stats: self.Stats[name] = 0 self.Stats[name] = self.Stats[name]+1 def __call__(self): passed = 1 up_are_down = 0 down_are_up = 0 unstable_list = [] for node in self.CM.Env["nodes"]: should_be = self.CM.ShouldBeStatus[node] rc = self.CM.test_node_CM(node) if rc > 0: if should_be == "down": down_are_up = down_are_up + 1 if rc == 1: unstable_list.append(node) elif should_be == "up": up_are_down = up_are_down + 1 if len(unstable_list) > 0: passed = 0 self.CM.log("Cluster is not stable: %d (of %d): %s" % (len(unstable_list), self.CM.upcount(), repr(unstable_list))) if up_are_down > 0: passed = 0 self.CM.log("%d (of %d) nodes expected to be up were down." % (up_are_down, len(self.CM.Env["nodes"]))) if down_are_up > 0: passed = 0 self.CM.log("%d (of %d) nodes expected to be down were up." % (down_are_up, len(self.CM.Env["nodes"]))) return passed def name(self): return "ControllerStateAudit" def is_applicable(self): # @TODO Due to long-ago refactoring, this name test would never match, # so this audit (and those derived from it) would never run. # Uncommenting the next lines fixes the name test, but that then # exposes pre-existing bugs that need to be fixed. #if self.CM["Name"] == "crm-corosync": # return 1 return 0 class CIBAudit(ClusterAudit): def __init__(self, cm): self.CM = cm self.Stats = {"calls":0 , "success":0 , "failure":0 , "skipped":0 , "auditfail":0} def has_key(self, key): return key in self.Stats def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): return self.Stats[key] def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not name in self.Stats: self.Stats[name] = 0 self.Stats[name] = self.Stats[name]+1 def __call__(self): passed = 1 ccm_partitions = self.CM.find_partitions() if len(ccm_partitions) == 0: self.debug("\tNo partitions to audit") return 1 for partition in ccm_partitions: self.debug("\tAuditing CIB consistency for: %s" % partition) partition_passed = 0 if self.audit_cib_contents(partition) == 0: passed = 0 return passed def audit_cib_contents(self, hostlist): passed = 1 node0 = None node0_xml = None partition_hosts = hostlist.split() for node in partition_hosts: node_xml = self.store_remote_cib(node, node0) if node_xml == None: self.CM.log("Could not perform audit: No configuration from %s" % node) passed = 0 elif node0 == None: node0 = node node0_xml = node_xml elif node0_xml == None: self.CM.log("Could not perform audit: No configuration from %s" % node0) passed = 0 else: (rc, result) = self.CM.rsh( node0, "crm_diff -VV -cf --new %s --original %s" % (node_xml, node0_xml), verbose=1) if rc != 0: self.CM.log("Diff between %s and %s failed: %d" % (node0_xml, node_xml, rc)) passed = 0 for line in result: if not re.search("", line): passed = 0 self.debug("CibDiff[%s-%s]: %s" % (node0, node, line)) else: self.debug("CibDiff[%s-%s] Ignoring: %s" % (node0, node, line)) # self.CM.rsh(node0, "rm -f %s" % node_xml) # self.CM.rsh(node0, "rm -f %s" % node0_xml) return passed def store_remote_cib(self, node, target): combined = "" filename = "/tmp/ctsaudit.%s.xml" % node if not target: target = node (rc, lines) = self.CM.rsh(node, self.CM["CibQuery"], verbose=1) if rc != 0: self.CM.log("Could not retrieve configuration") return None self.CM.rsh("localhost", "rm -f %s" % filename) for line in lines: self.CM.rsh("localhost", "echo \'%s\' >> %s" % (line[:-1], filename), verbose=0) if self.CM.rsh.copy(filename, "root@%s:%s" % (target, filename), silent=True) != 0: self.CM.log("Could not store configuration") return None return filename def name(self): return "CibAudit" def is_applicable(self): # @TODO Due to long-ago refactoring, this name test would never match, # so this audit (and those derived from it) would never run. # Uncommenting the next lines fixes the name test, but that then # exposes pre-existing bugs that need to be fixed. #if self.CM["Name"] == "crm-corosync": # return 1 return 0 class PartitionAudit(ClusterAudit): def __init__(self, cm): self.CM = cm self.Stats = {"calls":0 , "success":0 , "failure":0 , "skipped":0 , "auditfail":0} self.NodeEpoch = {} self.NodeState = {} self.NodeQuorum = {} def has_key(self, key): return key in self.Stats def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): return self.Stats[key] def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not name in self.Stats: self.Stats[name] = 0 self.Stats[name] = self.Stats[name]+1 def __call__(self): passed = 1 ccm_partitions = self.CM.find_partitions() if ccm_partitions == None or len(ccm_partitions) == 0: return 1 self.CM.cluster_stable(double_check=True) if len(ccm_partitions) != self.CM.partitions_expected: self.CM.log("ERROR: %d cluster partitions detected:" % len(ccm_partitions)) passed = 0 for partition in ccm_partitions: self.CM.log("\t %s" % partition) for partition in ccm_partitions: partition_passed = 0 if self.audit_partition(partition) == 0: passed = 0 return passed def trim_string(self, avalue): if not avalue: return None if len(avalue) > 1: return avalue[:-1] def trim2int(self, avalue): if not avalue: return None if len(avalue) > 1: return int(avalue[:-1]) def audit_partition(self, partition): passed = 1 dc_found = [] dc_allowed_list = [] lowest_epoch = None node_list = partition.split() self.debug("Auditing partition: %s" % (partition)) for node in node_list: if self.CM.ShouldBeStatus[node] != "up": self.CM.log("Warn: Node %s appeared out of nowhere" % (node)) self.CM.ShouldBeStatus[node] = "up" # not in itself a reason to fail the audit (not what we're # checking for in this audit) (_, out) = self.CM.rsh(node, self.CM["StatusCmd"] % node, verbose=1) self.NodeState[node] = out[0].strip() (_, out) = self.CM.rsh(node, self.CM["EpochCmd"], verbose=1) self.NodeEpoch[node] = out[0].strip() (_, out) = self.CM.rsh(node, self.CM["QuorumCmd"], verbose=1) self.NodeQuorum[node] = out[0].strip() self.debug("Node %s: %s - %s - %s." % (node, self.NodeState[node], self.NodeEpoch[node], self.NodeQuorum[node])) self.NodeState[node] = self.trim_string(self.NodeState[node]) self.NodeEpoch[node] = self.trim2int(self.NodeEpoch[node]) self.NodeQuorum[node] = self.trim_string(self.NodeQuorum[node]) if not self.NodeEpoch[node]: self.CM.log("Warn: Node %s dissappeared: cant determin epoch" % (node)) self.CM.ShouldBeStatus[node] = "down" # not in itself a reason to fail the audit (not what we're # checking for in this audit) elif lowest_epoch == None or self.NodeEpoch[node] < lowest_epoch: lowest_epoch = self.NodeEpoch[node] if not lowest_epoch: self.CM.log("Lowest epoch not determined in %s" % (partition)) passed = 0 for node in node_list: if self.CM.ShouldBeStatus[node] == "up": if self.CM.is_node_dc(node, self.NodeState[node]): dc_found.append(node) if self.NodeEpoch[node] == lowest_epoch: self.debug("%s: OK" % node) elif not self.NodeEpoch[node]: self.debug("Check on %s ignored: no node epoch" % node) elif not lowest_epoch: self.debug("Check on %s ignored: no lowest epoch" % node) else: self.CM.log("DC %s is not the oldest node (%d vs. %d)" % (node, self.NodeEpoch[node], lowest_epoch)) passed = 0 if len(dc_found) == 0: self.CM.log("DC not found on any of the %d allowed nodes: %s (of %s)" % (len(dc_allowed_list), str(dc_allowed_list), str(node_list))) elif len(dc_found) > 1: self.CM.log("%d DCs (%s) found in cluster partition: %s" % (len(dc_found), str(dc_found), str(node_list))) passed = 0 if passed == 0: for node in node_list: if self.CM.ShouldBeStatus[node] == "up": self.CM.log("epoch %s : %s" % (self.NodeEpoch[node], self.NodeState[node])) return passed def name(self): return "PartitionAudit" def is_applicable(self): # @TODO Due to long-ago refactoring, this name test would never match, # so this audit (and those derived from it) would never run. # Uncommenting the next lines fixes the name test, but that then # exposes pre-existing bugs that need to be fixed. #if self.CM["Name"] == "crm-corosync": # return 1 return 0 AllAuditClasses.append(DiskAudit) AllAuditClasses.append(FileAudit) AllAuditClasses.append(LogAudit) AllAuditClasses.append(ControllerStateAudit) AllAuditClasses.append(PartitionAudit) AllAuditClasses.append(PrimitiveAudit) AllAuditClasses.append(GroupAudit) AllAuditClasses.append(CloneAudit) AllAuditClasses.append(ColocationAudit) AllAuditClasses.append(CIBAudit) def AuditList(cm): result = [] for auditclass in AllAuditClasses: a = auditclass(cm) if a.is_applicable(): result.append(a) return result diff --git a/cts/scheduler/summary/primitive-with-group-with-promoted.summary b/cts/scheduler/summary/primitive-with-group-with-promoted.summary index ce100915b4..b92ce1e50a 100644 --- a/cts/scheduler/summary/primitive-with-group-with-promoted.summary +++ b/cts/scheduler/summary/primitive-with-group-with-promoted.summary @@ -1,75 +1,75 @@ Current cluster status: * Node List: * Online: [ node1 node2 node3 node4 node5 ] * Full List of Resources: * Fencing (stonith:fence_xvm): Started node1 * Clone Set: rsc2-clone [rsc2] (promotable): * Stopped: [ node1 node2 node3 node4 node5 ] * rsc1 (ocf:pacemaker:Dummy): Stopped * Resource Group: group1: * group1rsc1 (ocf:pacemaker:Dummy): Stopped * group1rsc2 (ocf:pacemaker:Dummy): Stopped Transition Summary: * Promote rsc2:0 ( Stopped -> Promoted node5 ) - * Start rsc2:1 ( node2 ) - * Start rsc2:2 ( node3 ) - * Start rsc1 ( node5 ) - * Start group1rsc1 ( node5 ) - * Start group1rsc2 ( node5 ) + * Start rsc2:1 ( node2 ) + * Start rsc2:2 ( node3 ) + * Start rsc1 ( node5 ) + * Start group1rsc1 ( node5 ) + * Start group1rsc2 ( node5 ) Executing Cluster Transition: * Resource action: rsc2:0 monitor on node5 * Resource action: rsc2:0 monitor on node4 * Resource action: rsc2:0 monitor on node1 * Resource action: rsc2:1 monitor on node2 * Resource action: rsc2:2 monitor on node3 * Pseudo action: rsc2-clone_start_0 * Resource action: rsc1 monitor on node5 * Resource action: rsc1 monitor on node4 * Resource action: rsc1 monitor on node3 * Resource action: rsc1 monitor on node2 * Resource action: rsc1 monitor on node1 * Pseudo action: group1_start_0 * Resource action: group1rsc1 monitor on node5 * Resource action: group1rsc1 monitor on node4 * Resource action: group1rsc1 monitor on node3 * Resource action: group1rsc1 monitor on node2 * Resource action: group1rsc1 monitor on node1 * Resource action: group1rsc2 monitor on node5 * Resource action: group1rsc2 monitor on node4 * Resource action: group1rsc2 monitor on node3 * Resource action: group1rsc2 monitor on node2 * Resource action: group1rsc2 monitor on node1 * Resource action: rsc2:0 start on node5 * Resource action: rsc2:1 start on node2 * Resource action: rsc2:2 start on node3 * Pseudo action: rsc2-clone_running_0 * Resource action: rsc1 start on node5 * Resource action: group1rsc1 start on node5 * Resource action: group1rsc2 start on node5 * Resource action: rsc2:1 monitor=11000 on node2 * Resource action: rsc2:2 monitor=11000 on node3 * Pseudo action: rsc2-clone_promote_0 * Resource action: rsc1 monitor=10000 on node5 * Pseudo action: group1_running_0 * Resource action: group1rsc1 monitor=10000 on node5 * Resource action: group1rsc2 monitor=10000 on node5 * Resource action: rsc2:0 promote on node5 * Pseudo action: rsc2-clone_promoted_0 * Resource action: rsc2:0 monitor=10000 on node5 Revised Cluster Status: * Node List: * Online: [ node1 node2 node3 node4 node5 ] * Full List of Resources: * Fencing (stonith:fence_xvm): Started node1 * Clone Set: rsc2-clone [rsc2] (promotable): * Promoted: [ node5 ] * Unpromoted: [ node2 node3 ] * rsc1 (ocf:pacemaker:Dummy): Started node5 * Resource Group: group1: * group1rsc1 (ocf:pacemaker:Dummy): Started node5 * group1rsc2 (ocf:pacemaker:Dummy): Started node5 diff --git a/cts/scheduler/summary/promoted-partially-demoted-group.summary b/cts/scheduler/summary/promoted-partially-demoted-group.summary index 91e1ee7013..b85c805711 100644 --- a/cts/scheduler/summary/promoted-partially-demoted-group.summary +++ b/cts/scheduler/summary/promoted-partially-demoted-group.summary @@ -1,118 +1,118 @@ Current cluster status: * Node List: * Online: [ sd01-0 sd01-1 ] * Full List of Resources: * stonith-xvm-sd01-0 (stonith:fence_xvm): Started sd01-1 * stonith-xvm-sd01-1 (stonith:fence_xvm): Started sd01-0 * Resource Group: cdev-pool-0-iscsi-export: * cdev-pool-0-iscsi-target (ocf:vds-ok:iSCSITarget): Started sd01-1 * cdev-pool-0-iscsi-lun-1 (ocf:vds-ok:iSCSILogicalUnit): Started sd01-1 * Clone Set: ms-cdev-pool-0-drbd [cdev-pool-0-drbd] (promotable): * Promoted: [ sd01-1 ] * Unpromoted: [ sd01-0 ] * Clone Set: cl-ietd [ietd]: * Started: [ sd01-0 sd01-1 ] * Clone Set: cl-vlan1-net [vlan1-net]: * Started: [ sd01-0 sd01-1 ] * Resource Group: cdev-pool-0-iscsi-vips: * vip-164 (ocf:heartbeat:IPaddr2): Started sd01-1 * vip-165 (ocf:heartbeat:IPaddr2): Started sd01-1 * Clone Set: ms-cdev-pool-0-iscsi-vips-fw [cdev-pool-0-iscsi-vips-fw] (promotable): * Promoted: [ sd01-1 ] * Unpromoted: [ sd01-0 ] Transition Summary: * Move vip-164 ( sd01-1 -> sd01-0 ) * Move vip-165 ( sd01-1 -> sd01-0 ) - * Move cdev-pool-0-iscsi-target ( sd01-1 -> sd01-0 ) - * Move cdev-pool-0-iscsi-lun-1 ( sd01-1 -> sd01-0 ) + * Move cdev-pool-0-iscsi-target ( sd01-1 -> sd01-0 ) + * Move cdev-pool-0-iscsi-lun-1 ( sd01-1 -> sd01-0 ) * Demote vip-164-fw:0 ( Promoted -> Unpromoted sd01-1 ) * Promote vip-164-fw:1 ( Unpromoted -> Promoted sd01-0 ) * Promote vip-165-fw:1 ( Unpromoted -> Promoted sd01-0 ) * Demote cdev-pool-0-drbd:0 ( Promoted -> Unpromoted sd01-1 ) * Promote cdev-pool-0-drbd:1 ( Unpromoted -> Promoted sd01-0 ) Executing Cluster Transition: * Resource action: vip-165-fw monitor=10000 on sd01-1 * Pseudo action: ms-cdev-pool-0-iscsi-vips-fw_demote_0 * Pseudo action: ms-cdev-pool-0-drbd_pre_notify_demote_0 * Pseudo action: cdev-pool-0-iscsi-vips-fw:0_demote_0 * Resource action: vip-164-fw demote on sd01-1 * Resource action: cdev-pool-0-drbd notify on sd01-1 * Resource action: cdev-pool-0-drbd notify on sd01-0 * Pseudo action: ms-cdev-pool-0-drbd_confirmed-pre_notify_demote_0 * Pseudo action: cdev-pool-0-iscsi-vips-fw:0_demoted_0 * Resource action: vip-164-fw monitor=10000 on sd01-1 * Pseudo action: ms-cdev-pool-0-iscsi-vips-fw_demoted_0 * Pseudo action: cdev-pool-0-iscsi-vips_stop_0 * Resource action: vip-165 stop on sd01-1 * Resource action: vip-164 stop on sd01-1 * Pseudo action: cdev-pool-0-iscsi-vips_stopped_0 * Pseudo action: cdev-pool-0-iscsi-export_stop_0 * Resource action: cdev-pool-0-iscsi-lun-1 stop on sd01-1 * Resource action: cdev-pool-0-iscsi-target stop on sd01-1 * Pseudo action: cdev-pool-0-iscsi-export_stopped_0 * Pseudo action: ms-cdev-pool-0-drbd_demote_0 * Resource action: cdev-pool-0-drbd demote on sd01-1 * Pseudo action: ms-cdev-pool-0-drbd_demoted_0 * Pseudo action: ms-cdev-pool-0-drbd_post_notify_demoted_0 * Resource action: cdev-pool-0-drbd notify on sd01-1 * Resource action: cdev-pool-0-drbd notify on sd01-0 * Pseudo action: ms-cdev-pool-0-drbd_confirmed-post_notify_demoted_0 * Pseudo action: ms-cdev-pool-0-drbd_pre_notify_promote_0 * Resource action: cdev-pool-0-drbd notify on sd01-1 * Resource action: cdev-pool-0-drbd notify on sd01-0 * Pseudo action: ms-cdev-pool-0-drbd_confirmed-pre_notify_promote_0 * Pseudo action: ms-cdev-pool-0-drbd_promote_0 * Resource action: cdev-pool-0-drbd promote on sd01-0 * Pseudo action: ms-cdev-pool-0-drbd_promoted_0 * Pseudo action: ms-cdev-pool-0-drbd_post_notify_promoted_0 * Resource action: cdev-pool-0-drbd notify on sd01-1 * Resource action: cdev-pool-0-drbd notify on sd01-0 * Pseudo action: ms-cdev-pool-0-drbd_confirmed-post_notify_promoted_0 * Pseudo action: cdev-pool-0-iscsi-export_start_0 * Resource action: cdev-pool-0-iscsi-target start on sd01-0 * Resource action: cdev-pool-0-iscsi-lun-1 start on sd01-0 * Resource action: cdev-pool-0-drbd monitor=20000 on sd01-1 * Resource action: cdev-pool-0-drbd monitor=10000 on sd01-0 * Pseudo action: cdev-pool-0-iscsi-export_running_0 * Resource action: cdev-pool-0-iscsi-target monitor=10000 on sd01-0 * Resource action: cdev-pool-0-iscsi-lun-1 monitor=10000 on sd01-0 * Pseudo action: cdev-pool-0-iscsi-vips_start_0 * Resource action: vip-164 start on sd01-0 * Resource action: vip-165 start on sd01-0 * Pseudo action: cdev-pool-0-iscsi-vips_running_0 * Resource action: vip-164 monitor=30000 on sd01-0 * Resource action: vip-165 monitor=30000 on sd01-0 * Pseudo action: ms-cdev-pool-0-iscsi-vips-fw_promote_0 * Pseudo action: cdev-pool-0-iscsi-vips-fw:0_promote_0 * Pseudo action: cdev-pool-0-iscsi-vips-fw:1_promote_0 * Resource action: vip-164-fw promote on sd01-0 * Resource action: vip-165-fw promote on sd01-0 * Pseudo action: cdev-pool-0-iscsi-vips-fw:1_promoted_0 * Pseudo action: ms-cdev-pool-0-iscsi-vips-fw_promoted_0 Revised Cluster Status: * Node List: * Online: [ sd01-0 sd01-1 ] * Full List of Resources: * stonith-xvm-sd01-0 (stonith:fence_xvm): Started sd01-1 * stonith-xvm-sd01-1 (stonith:fence_xvm): Started sd01-0 * Resource Group: cdev-pool-0-iscsi-export: * cdev-pool-0-iscsi-target (ocf:vds-ok:iSCSITarget): Started sd01-0 * cdev-pool-0-iscsi-lun-1 (ocf:vds-ok:iSCSILogicalUnit): Started sd01-0 * Clone Set: ms-cdev-pool-0-drbd [cdev-pool-0-drbd] (promotable): * Promoted: [ sd01-0 ] * Unpromoted: [ sd01-1 ] * Clone Set: cl-ietd [ietd]: * Started: [ sd01-0 sd01-1 ] * Clone Set: cl-vlan1-net [vlan1-net]: * Started: [ sd01-0 sd01-1 ] * Resource Group: cdev-pool-0-iscsi-vips: * vip-164 (ocf:heartbeat:IPaddr2): Started sd01-0 * vip-165 (ocf:heartbeat:IPaddr2): Started sd01-0 * Clone Set: ms-cdev-pool-0-iscsi-vips-fw [cdev-pool-0-iscsi-vips-fw] (promotable): * Promoted: [ sd01-0 ] * Unpromoted: [ sd01-1 ] diff --git a/po/zh_CN.po b/po/zh_CN.po index 212c81ef85..a107f0b4eb 100644 --- a/po/zh_CN.po +++ b/po/zh_CN.po @@ -1,1105 +1,1105 @@ # # Copyright 2003-2022 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU Lesser General Public License # version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. # #, fuzzy msgid "" msgstr "" "Project-Id-Version: Pacemaker 2\n" "Report-Msgid-Bugs-To: developers@clusterlabs.org\n" -"POT-Creation-Date: 2023-01-28 10:28+0800\n" +"POT-Creation-Date: 2023-04-05 16:20-0500\n" "PO-Revision-Date: 2021-11-08 11:04+0800\n" "Last-Translator: Vivi \n" "Language-Team: CHINESE \n" "Language: zh_CN\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -#: daemons/controld/controld_control.c:531 +#: daemons/controld/controld_control.c:533 msgid "Pacemaker version on cluster node elected Designated Controller (DC)" msgstr "集群选定的控制器节点(DC)的 Pacemaker 版本" -#: daemons/controld/controld_control.c:532 +#: daemons/controld/controld_control.c:534 msgid "" "Includes a hash which identifies the exact changeset the code was built " "from. Used for diagnostic purposes." msgstr "它包含一个标识所构建代码变更版本的哈希值,其可用于诊断。" -#: daemons/controld/controld_control.c:537 +#: daemons/controld/controld_control.c:539 msgid "The messaging stack on which Pacemaker is currently running" msgstr "Pacemaker 正在使用的消息传输引擎" -#: daemons/controld/controld_control.c:538 +#: daemons/controld/controld_control.c:540 msgid "Used for informational and diagnostic purposes." msgstr "用于提供信息和诊断。" -#: daemons/controld/controld_control.c:542 +#: daemons/controld/controld_control.c:544 msgid "An arbitrary name for the cluster" msgstr "任意的集群名称" -#: daemons/controld/controld_control.c:543 +#: daemons/controld/controld_control.c:545 msgid "" "This optional value is mostly for users' convenience as desired in " "administration, but may also be used in Pacemaker configuration rules via " "the #cluster-name node attribute, and by higher-level tools and resource " "agents." msgstr "" "该可选值主要是为了方便用户管理使用,也可以在pacemaker 配置规则中通过 " "#cluster-name 节点属性配置使用,也可以通过高级工具和资源代理使用。" -#: daemons/controld/controld_control.c:551 +#: daemons/controld/controld_control.c:553 msgid "How long to wait for a response from other nodes during start-up" msgstr "启动过程中等待其他节点响应的时间" -#: daemons/controld/controld_control.c:552 +#: daemons/controld/controld_control.c:554 msgid "" "The optimal value will depend on the speed and load of your network and the " "type of switches used." msgstr "其最佳值将取决于你的网络速度和负载以及所用交换机的类型。" -#: daemons/controld/controld_control.c:557 +#: daemons/controld/controld_control.c:559 msgid "" "Zero disables polling, while positive values are an interval in " "seconds(unless other units are specified, for example \"5min\")" msgstr "" "设置为0将禁用轮询,设置为正数将是以秒为单位的时间间隔(除非使用了其他单位,比" "如\"5min\"表示5分钟)" -#: daemons/controld/controld_control.c:560 +#: daemons/controld/controld_control.c:562 msgid "" "Polling interval to recheck cluster state and evaluate rules with date " "specifications" msgstr "重新检查集群状态并且评估具有日期规格的配置规则的轮询间隔" -#: daemons/controld/controld_control.c:562 +#: daemons/controld/controld_control.c:564 msgid "" "Pacemaker is primarily event-driven, and looks ahead to know when to recheck " "cluster state for failure timeouts and most time-based rules. However, it " "will also recheck the cluster after this amount of inactivity, to evaluate " "rules with date specifications and serve as a fail-safe for certain types of " "scheduler bugs." msgstr "" "Pacemaker 主要是通过事件驱动的,并能预期重新检查集群状态以评估大多数基于时间" "的规则以及过期的错误。然而无论如何,在集群经过该时间间隔的不活动状态后,它还" "将重新检查集群,以评估具有日期规格的规则,并为某些类型的调度程序缺陷提供故障" "保护。" -#: daemons/controld/controld_control.c:571 +#: daemons/controld/controld_control.c:573 msgid "Maximum amount of system load that should be used by cluster nodes" msgstr "集群节点应该使用的最大系统负载量" -#: daemons/controld/controld_control.c:572 +#: daemons/controld/controld_control.c:574 msgid "" "The cluster will slow down its recovery process when the amount of system " "resources used (currently CPU) approaches this limit" msgstr "当使用的系统资源量(当前为CPU)接近此限制时,集群将减慢其恢复过程" -#: daemons/controld/controld_control.c:578 +#: daemons/controld/controld_control.c:580 msgid "" "Maximum number of jobs that can be scheduled per node (defaults to 2x cores)" msgstr "每个节点可以调度的最大作业数(默认为2x内核数)" -#: daemons/controld/controld_control.c:582 +#: daemons/controld/controld_control.c:584 msgid "How a cluster node should react if notified of its own fencing" msgstr "集群节点在收到针对自己的 fence 操作结果通知时应如何反应" -#: daemons/controld/controld_control.c:583 +#: daemons/controld/controld_control.c:585 msgid "" "A cluster node may receive notification of its own fencing if fencing is " "misconfigured, or if fabric fencing is in use that doesn't cut cluster " "communication. Allowed values are \"stop\" to attempt to immediately stop " "Pacemaker and stay stopped, or \"panic\" to attempt to immediately reboot " "the local node, falling back to stop on failure." msgstr "" "如果有错误的 fence 配置,或者在使用 fabric fence 机制 (并不会切断集群通信)," "则集群节点可能会收到针对自己的 fence 结果通知。允许的值为 \"stop\" 尝试立即停" "止 pacemaker 并保持停用状态,或者 \"panic\" 尝试立即重新启动本地节点,并在失败" "时返回执行stop。" -#: daemons/controld/controld_control.c:593 +#: daemons/controld/controld_control.c:595 msgid "" "Declare an election failed if it is not decided within this much time. If " "you need to adjust this value, it probably indicates the presence of a bug." msgstr "" "如果集群在本项设置时间内没有作出决定则宣布选举失败。如果您需要调整该值,这可" "能代表存在某些缺陷。" -#: daemons/controld/controld_control.c:601 +#: daemons/controld/controld_control.c:603 msgid "" "Exit immediately if shutdown does not complete within this much time. If you " "need to adjust this value, it probably indicates the presence of a bug." msgstr "" "如果在这段时间内关机仍未完成,则立即退出。如果您需要调整该值,这可能代表存在" "某些缺陷。" -#: daemons/controld/controld_control.c:609 -#: daemons/controld/controld_control.c:616 +#: daemons/controld/controld_control.c:611 +#: daemons/controld/controld_control.c:618 msgid "" "If you need to adjust this value, it probably indicates the presence of a " "bug." msgstr "如果您需要调整该值,这可能代表存在某些缺陷。" -#: daemons/controld/controld_control.c:622 +#: daemons/controld/controld_control.c:624 msgid "" "*** Advanced Use Only *** Enabling this option will slow down cluster " "recovery under all conditions" msgstr "*** Advanced Use Only *** 启用此选项将在所有情况下减慢集群恢复的速度" -#: daemons/controld/controld_control.c:624 +#: daemons/controld/controld_control.c:626 msgid "" "Delay cluster recovery for this much time to allow for additional events to " "occur. Useful if your configuration is sensitive to the order in which ping " "updates arrive." msgstr "" "集群恢复将被推迟指定的时间间隔,以等待更多事件发生。如果您的配置对 ping 更新" "到达的顺序很敏感,这就很有用" -#: daemons/controld/controld_control.c:631 +#: daemons/controld/controld_control.c:633 #, fuzzy msgid "" "How long before nodes can be assumed to be safely down when watchdog-based " "self-fencing via SBD is in use" msgstr "" "当基于 watchdog 的自我 fence 机制通过SBD 被执行时,我们可以假设节点安全关闭之" "前需要等待多长时间" -#: daemons/controld/controld_control.c:633 +#: daemons/controld/controld_control.c:635 msgid "" "If this is set to a positive value, lost nodes are assumed to self-fence " "using watchdog-based SBD within this much time. This does not require a " "fencing resource to be explicitly configured, though a fence_watchdog " "resource can be configured, to limit use to specific nodes. If this is set " "to 0 (the default), the cluster will never assume watchdog-based self-" "fencing. If this is set to a negative value, the cluster will use twice the " "local value of the `SBD_WATCHDOG_TIMEOUT` environment variable if that is " "positive, or otherwise treat this as 0. WARNING: When used, this timeout " "must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use watchdog-" "based SBD, and Pacemaker will refuse to start on any of those nodes where " "this is not true for the local value or SBD is not active. When this is set " "to a negative value, `SBD_WATCHDOG_TIMEOUT` must be set to the same value on " "all nodes that use SBD, otherwise data corruption or loss could occur." msgstr "" "如果设置为正值,则假定丢失的节点在这段时间内使用基于watchdog的SBD进行自我防" "护。这不需要明确配置fence资源,但可以配置一个fence_watchdog资源,以限制特定节" "点的使用。如果设置为0(默认值),集群将永远不会假定基于watchdog的自我防护。如" "果设置为负值,且如果`SBD_WATCHDOG_TIMEOUT`环境变量的本地值为正值,则集群将使" "用该值的两倍,否则将其视为0。警告:在使用基于watchdog的SBD的所有节点上,此超" "时必须大于`SBD_WATCGDOG_TIMEOUT`,如果本地值不是这样,或者SBD未运行,则" "Pacemaker将拒绝在任何节点上启动。如果设置为负值,则在使用SBD的所有节点上," "`SBD_WATCHDOG_TIMEOUT`必须设置为相同的值,否则可能会发生数据损坏或丢失。" -#: daemons/controld/controld_control.c:652 +#: daemons/controld/controld_control.c:654 msgid "" "How many times fencing can fail before it will no longer be immediately re-" "attempted on a target" msgstr "fence操作失败多少次会停止立即尝试" -#: daemons/fenced/pacemaker-fenced.c:1378 +#: daemons/controld/controld_control.c:662 lib/pengine/common.c:39 +msgid "What to do when the cluster does not have quorum" +msgstr "当集群没有必需票数时该如何作" + +#: daemons/controld/controld_control.c:667 lib/pengine/common.c:73 +msgid "Whether to lock resources to a cleanly shut down node" +msgstr "是否锁定资源到完全关闭的节点" + +#: daemons/controld/controld_control.c:668 lib/pengine/common.c:74 +msgid "" +"When true, resources active on a node when it is cleanly shut down are kept " +"\"locked\" to that node (not allowed to run elsewhere) until they start " +"again on that node after it rejoins (or for at most shutdown-lock-limit, if " +"set). Stonith resources and Pacemaker Remote connections are never locked. " +"Clone and bundle instances and the promoted role of promotable clones are " +"currently never locked, though support could be added in a future release." +msgstr "" +"设置为true时,在完全关闭的节点上活动的资源将被“锁定”到该节点(不允许在其他地" +"方运行),直到该节点重新加入后资源重新启动(或最长shutdown-lock-limit,如果已" +"设置)。 Stonith资源和Pacemaker Remote连接永远不会被锁定。 克隆和捆绑实例以及" +"可升级克隆的主角色目前从未锁定,尽管可以在将来的发行版中添加支持。" + +#: daemons/controld/controld_control.c:680 lib/pengine/common.c:86 +msgid "Do not lock resources to a cleanly shut down node longer than this" +msgstr "资源会被锁定到完全关闭的节点的最长时间" + +#: daemons/controld/controld_control.c:682 lib/pengine/common.c:88 +msgid "" +"If shutdown-lock is true and this is set to a nonzero time duration, " +"shutdown locks will expire after this much time has passed since the " +"shutdown was initiated, even if the node has not rejoined." +msgstr "" +"如果shutdown-lock为true,并且将此选项设置为非零持续时间,则自从开始shutdown以" +"来经过了这么长的时间后,shutdown锁将过期,即使该节点尚未重新加入。" + +#: daemons/fenced/pacemaker-fenced.c:1379 msgid "Advanced use only: An alternate parameter to supply instead of 'port'" msgstr "仅高级使用:使用替代的参数名,而不是'port'" -#: daemons/fenced/pacemaker-fenced.c:1379 +#: daemons/fenced/pacemaker-fenced.c:1380 msgid "" "some devices do not support the standard 'port' parameter or may provide " "additional ones. Use this to specify an alternate, device-specific, " "parameter that should indicate the machine to be fenced. A value of none can " "be used to tell the cluster not to supply any additional parameters." msgstr "" "一些设备不支持标准的'port'参数,或者可能提供其他参数。使用此选项可指定一个该" "设备专用的参数名,该参数用于标识需要fence的机器。值none可以用于告诉集群不要提" "供任何其他的参数。" -#: daemons/fenced/pacemaker-fenced.c:1388 +#: daemons/fenced/pacemaker-fenced.c:1389 msgid "" "A mapping of host names to ports numbers for devices that do not support " "host names." msgstr "为不支持主机名的设备提供主机名到端口号的映射。" -#: daemons/fenced/pacemaker-fenced.c:1389 +#: daemons/fenced/pacemaker-fenced.c:1390 msgid "" "Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and " "ports 2 and 3 for node2" msgstr "" "例如 node1:1;node2:2,3,将会告诉集群对node1使用端口1,对node2使用端口2和3 " -#: daemons/fenced/pacemaker-fenced.c:1393 +#: daemons/fenced/pacemaker-fenced.c:1394 msgid "Eg. node1,node2,node3" msgstr "例如 node1,node2,node3" -#: daemons/fenced/pacemaker-fenced.c:1394 +#: daemons/fenced/pacemaker-fenced.c:1395 msgid "" "A list of machines controlled by this device (Optional unless " "pcmk_host_list=static-list)" msgstr "该设备控制的机器列表(可选参数,除非 pcmk_host_list 设置为 static-list)" -#: daemons/fenced/pacemaker-fenced.c:1399 +#: daemons/fenced/pacemaker-fenced.c:1400 msgid "How to determine which machines are controlled by the device." msgstr "如何确定设备控制哪些机器。" -#: daemons/fenced/pacemaker-fenced.c:1400 +#: daemons/fenced/pacemaker-fenced.c:1401 msgid "" "Allowed values: dynamic-list (query the device via the 'list' command), " "static-list (check the pcmk_host_list attribute), status (query the device " "via the 'status' command), none (assume every device can fence every machine)" msgstr "" "允许的值:dynamic-list(通过'list'命令查询设备),static-list(检查" "pcmk_host_list属性),status(通过'status'命令查询设备),none(假设每个设备" "都可fence 每台机器 )" -#: daemons/fenced/pacemaker-fenced.c:1409 -#: daemons/fenced/pacemaker-fenced.c:1418 +#: daemons/fenced/pacemaker-fenced.c:1410 +#: daemons/fenced/pacemaker-fenced.c:1419 msgid "Enable a base delay for fencing actions and specify base delay value." msgstr "在执行 fencing 操作前启用不超过指定时间的延迟。" -#: daemons/fenced/pacemaker-fenced.c:1410 +#: daemons/fenced/pacemaker-fenced.c:1411 msgid "" "Enable a delay of no more than the time specified before executing fencing " "actions. Pacemaker derives the overall delay by taking the value of " "pcmk_delay_base and adding a random delay value such that the sum is kept " "below this maximum." msgstr "" "在执行 fencing 操作前启用不超过指定时间的延迟。 Pacemaker通过获取" "pcmk_delay_base的值并添加随机延迟值来得出总体延迟,从而使总和保持在此最大值以" "下。" -#: daemons/fenced/pacemaker-fenced.c:1420 +#: daemons/fenced/pacemaker-fenced.c:1421 msgid "" "This enables a static delay for fencing actions, which can help avoid " "\"death matches\" where two nodes try to fence each other at the same time. " "If pcmk_delay_max is also used, a random delay will be added such that the " "total delay is kept below that value.This can be set to a single time value " "to apply to any node targeted by this device (useful if a separate device is " "configured for each target), or to a node map (for example, \"node1:1s;" "node2:5\") to set a different value per target." msgstr "" "这使fencing 操作启用静态延迟,这可以帮助避免\"death matches\"即两个节点试图同" "时互相fence.如果还使用了pcmk_delay_max,则将添加随机延迟,以使总延迟保持在该" "值以下。可以将其设置为单个时间值,以应用于该设备针对的任何节点(适用于为每个" "目标分别配置了各自的设备的情况), 或着设置为一个节点映射 (例如,\"node1:1s;" "node2:5\")从而为每个目标设置不同值。" -#: daemons/fenced/pacemaker-fenced.c:1432 +#: daemons/fenced/pacemaker-fenced.c:1433 msgid "" "The maximum number of actions can be performed in parallel on this device" msgstr "可以在该设备上并发执行的最多操作数量" -#: daemons/fenced/pacemaker-fenced.c:1433 +#: daemons/fenced/pacemaker-fenced.c:1434 msgid "" "Cluster property concurrent-fencing=true needs to be configured first.Then " "use this to specify the maximum number of actions can be performed in " "parallel on this device. -1 is unlimited." msgstr "" "需要首先配置集群属性 concurrent-fencing=true 。然后使用此参数指定可以在该设备" "上并发执行的最多操作数量。 -1 代表没有限制" -#: daemons/fenced/pacemaker-fenced.c:1438 +#: daemons/fenced/pacemaker-fenced.c:1439 msgid "Advanced use only: An alternate command to run instead of 'reboot'" msgstr "仅高级使用:运行替代命令,而不是'reboot'" -#: daemons/fenced/pacemaker-fenced.c:1439 +#: daemons/fenced/pacemaker-fenced.c:1440 msgid "" "Some devices do not support the standard commands or may provide additional " "ones.\n" "Use this to specify an alternate, device-specific, command that implements " "the 'reboot' action." msgstr "" "一些设备不支持标准命令或可能提供其他命令,使用此选项可以指定一个该设备特定的" "替代命令,用来实现'reboot'操作。" -#: daemons/fenced/pacemaker-fenced.c:1444 +#: daemons/fenced/pacemaker-fenced.c:1445 msgid "" "Advanced use only: Specify an alternate timeout to use for reboot actions " "instead of stonith-timeout" msgstr "仅高级使用:指定用于'reboot' 操作的替代超时,而不是stonith-timeout" -#: daemons/fenced/pacemaker-fenced.c:1445 +#: daemons/fenced/pacemaker-fenced.c:1446 msgid "" "Some devices need much more/less time to complete than normal.Use this to " "specify an alternate, device-specific, timeout for 'reboot' actions." msgstr "" "一些设备需要比正常情况下更多或更少的时间来完成操作,使用此选项指定一个用" "于'reboot'操作的该设备特定的替代超时。" -#: daemons/fenced/pacemaker-fenced.c:1450 +#: daemons/fenced/pacemaker-fenced.c:1451 msgid "" "Advanced use only: The maximum number of times to retry the 'reboot' command " "within the timeout period" msgstr "仅高级使用:在超时前重试'reboot'命令的最大次数" -#: daemons/fenced/pacemaker-fenced.c:1451 +#: daemons/fenced/pacemaker-fenced.c:1452 msgid "" "Some devices do not support multiple connections. Operations may 'fail' if " "the device is busy with another task so Pacemaker will automatically retry " "the operation, if there is time remaining. Use this option to alter the " "number of times Pacemaker retries 'reboot' actions before giving up." msgstr "" "一些设备不支持多个连接。 如果设备忙于另一个任务,则操作可能会'失败' ,因此" "Pacemaker将自动重试(如果时间允许)。 使用此选项更改Pacemaker在放弃之前重" "试'reboot' 操作的次数." -#: daemons/fenced/pacemaker-fenced.c:1457 +#: daemons/fenced/pacemaker-fenced.c:1458 msgid "Advanced use only: An alternate command to run instead of 'off'" msgstr "仅高级使用:运行替代命令,而不是'off'" -#: daemons/fenced/pacemaker-fenced.c:1458 +#: daemons/fenced/pacemaker-fenced.c:1459 msgid "" "Some devices do not support the standard commands or may provide additional " "ones.Use this to specify an alternate, device-specific, command that " "implements the 'off' action." msgstr "" "一些设备不支持标准命令或可能提供其他命令,使用此选项可指定一个该设备专用的替代" "命令,用来实现'off'操作。" -#: daemons/fenced/pacemaker-fenced.c:1463 +#: daemons/fenced/pacemaker-fenced.c:1464 msgid "" "Advanced use only: Specify an alternate timeout to use for off actions " "instead of stonith-timeout" msgstr "仅高级使用:指定用于off 操作的替代超时,而不是stonith-timeout" -#: daemons/fenced/pacemaker-fenced.c:1464 +#: daemons/fenced/pacemaker-fenced.c:1465 msgid "" "Some devices need much more/less time to complete than normal.Use this to " "specify an alternate, device-specific, timeout for 'off' actions." msgstr "" "一些设备需要比正常情况下更多或更少的时间来完成操作,使用此选项指定一个用" "于'off'操作的该设备特定的替代超时。" -#: daemons/fenced/pacemaker-fenced.c:1469 +#: daemons/fenced/pacemaker-fenced.c:1470 msgid "" "Advanced use only: The maximum number of times to retry the 'off' command " "within the timeout period" msgstr "仅高级使用:在超时前重试'off'命令的最大次数" -#: daemons/fenced/pacemaker-fenced.c:1470 +#: daemons/fenced/pacemaker-fenced.c:1471 msgid "" "Some devices do not support multiple connections. Operations may 'fail' if " "the device is busy with another task so Pacemaker will automatically retry " "the operation, if there is time remaining. Use this option to alter the " "number of times Pacemaker retries 'off' actions before giving up." msgstr "" " 一些设备不支持多个连接。 如果设备忙于另一个任务,则操作可能会'失败' , 因此" "Pacemaker将自动重试(如果时间允许)。 使用此选项更改Pacemaker在放弃之前重" "试'off' 操作的次数." -#: daemons/fenced/pacemaker-fenced.c:1476 +#: daemons/fenced/pacemaker-fenced.c:1477 msgid "Advanced use only: An alternate command to run instead of 'on'" msgstr "仅高级使用:运行替代命令,而不是'on'" -#: daemons/fenced/pacemaker-fenced.c:1477 +#: daemons/fenced/pacemaker-fenced.c:1478 msgid "" "Some devices do not support the standard commands or may provide additional " "ones.Use this to specify an alternate, device-specific, command that " "implements the 'on' action." msgstr "" "一些设备不支持标准命令或可能提供其他命令,使用此选项可指定一个该设备特定的替" "代命令,用来实现'on'操作。" -#: daemons/fenced/pacemaker-fenced.c:1482 +#: daemons/fenced/pacemaker-fenced.c:1483 msgid "" "Advanced use only: Specify an alternate timeout to use for on actions " "instead of stonith-timeout" msgstr "仅高级使用:指定用于on 操作的替代超时,而不是stonith-timeout" -#: daemons/fenced/pacemaker-fenced.c:1483 +#: daemons/fenced/pacemaker-fenced.c:1484 msgid "" "Some devices need much more/less time to complete than normal.Use this to " "specify an alternate, device-specific, timeout for 'on' actions." msgstr "" "一些设备需要比正常情况下更多或更少的时间来完成操作,使用此选项指定一个用" "于'on'操作的该设备特定的替代超时。" -#: daemons/fenced/pacemaker-fenced.c:1488 +#: daemons/fenced/pacemaker-fenced.c:1489 msgid "" "Advanced use only: The maximum number of times to retry the 'on' command " "within the timeout period" msgstr "仅高级使用:在超时前重试'on'命令的最大次数" -#: daemons/fenced/pacemaker-fenced.c:1489 +#: daemons/fenced/pacemaker-fenced.c:1490 msgid "" "Some devices do not support multiple connections. Operations may 'fail' if " "the device is busy with another task so Pacemaker will automatically retry " "the operation, if there is time remaining. Use this option to alter the " "number of times Pacemaker retries 'on' actions before giving up." msgstr "" " 一些设备不支持多个连接。 如果设备忙于另一个任务,则操作可能会'失败' , 因此" "Pacemaker将自动重试(如果时间允许)。 使用此选项更改Pacemaker在放弃之前重" "试'on' 操作的次数." -#: daemons/fenced/pacemaker-fenced.c:1495 +#: daemons/fenced/pacemaker-fenced.c:1496 msgid "Advanced use only: An alternate command to run instead of 'list'" msgstr "仅高级使用:运行替代命令,而不是'list'" -#: daemons/fenced/pacemaker-fenced.c:1496 +#: daemons/fenced/pacemaker-fenced.c:1497 msgid "" "Some devices do not support the standard commands or may provide additional " "ones.Use this to specify an alternate, device-specific, command that " "implements the 'list' action." msgstr "" "一些设备不支持标准命令或可能提供其他命令,使用此选项可指定一个该设备特定的替" "代命令,用来实现'list'操作。" -#: daemons/fenced/pacemaker-fenced.c:1501 +#: daemons/fenced/pacemaker-fenced.c:1502 msgid "" "Advanced use only: Specify an alternate timeout to use for list actions " "instead of stonith-timeout" msgstr "仅高级使用:指定用于list 操作的替代超时,而不是stonith-timeout" -#: daemons/fenced/pacemaker-fenced.c:1502 +#: daemons/fenced/pacemaker-fenced.c:1503 msgid "" "Some devices need much more/less time to complete than normal.Use this to " "specify an alternate, device-specific, timeout for 'list' actions." msgstr "" "一些设备需要比正常情况下更多或更少的时间来完成操作,使用此选项指定一个用" "于'list'操作的该设备特定的替代超时。" -#: daemons/fenced/pacemaker-fenced.c:1507 +#: daemons/fenced/pacemaker-fenced.c:1508 msgid "" "Advanced use only: The maximum number of times to retry the 'list' command " "within the timeout period" msgstr "仅高级使用:在超时前重试'list'命令的最大次数" -#: daemons/fenced/pacemaker-fenced.c:1508 +#: daemons/fenced/pacemaker-fenced.c:1509 msgid "" "Some devices do not support multiple connections. Operations may 'fail' if " "the device is busy with another task so Pacemaker will automatically retry " "the operation, if there is time remaining. Use this option to alter the " "number of times Pacemaker retries 'list' actions before giving up." msgstr "" " 一些设备不支持多个连接。 如果设备忙于另一个任务,则操作可能会'失败' , 因此" "Pacemaker将自动重试(如果时间允许)。 使用此选项更改Pacemaker在放弃之前重" "试'list' 操作的次数." -#: daemons/fenced/pacemaker-fenced.c:1514 +#: daemons/fenced/pacemaker-fenced.c:1515 msgid "Advanced use only: An alternate command to run instead of 'monitor'" msgstr "仅高级使用:运行替代命令,而不是'monitor'" -#: daemons/fenced/pacemaker-fenced.c:1515 +#: daemons/fenced/pacemaker-fenced.c:1516 msgid "" "Some devices do not support the standard commands or may provide additional " "ones.Use this to specify an alternate, device-specific, command that " "implements the 'monitor' action." msgstr "" "一些设备不支持标准命令或可能提供其他命令,使用此选项可指定一个该设备特定的替" "代命令,用来实现'monitor'操作。" -#: daemons/fenced/pacemaker-fenced.c:1520 +#: daemons/fenced/pacemaker-fenced.c:1521 msgid "" "Advanced use only: Specify an alternate timeout to use for monitor actions " "instead of stonith-timeout" msgstr "仅高级使用:指定用于monitor 操作的替代超时,而不是stonith-timeout" -#: daemons/fenced/pacemaker-fenced.c:1521 +#: daemons/fenced/pacemaker-fenced.c:1522 msgid "" "Some devices need much more/less time to complete than normal.\n" "Use this to specify an alternate, device-specific, timeout for 'monitor' " "actions." msgstr "" "一些设备需要比正常情况下更多或更少的时间来完成操作,使用此选项指定一个用" "于'monitor'操作的该设备特定的替代超时。" -#: daemons/fenced/pacemaker-fenced.c:1526 +#: daemons/fenced/pacemaker-fenced.c:1527 msgid "" "Advanced use only: The maximum number of times to retry the 'monitor' " "command within the timeout period" msgstr "仅高级使用:在超时前重试'monitor'命令的最大次数" -#: daemons/fenced/pacemaker-fenced.c:1527 +#: daemons/fenced/pacemaker-fenced.c:1528 msgid "" "Some devices do not support multiple connections. Operations may 'fail' if " "the device is busy with another task so Pacemaker will automatically retry " "the operation, if there is time remaining. Use this option to alter the " "number of times Pacemaker retries 'monitor' actions before giving up." msgstr "" " 一些设备不支持多个连接。 如果设备忙于另一个任务,则操作可能会'失败' , 因此" "Pacemaker将自动重试(如果时间允许)。 使用此选项更改Pacemaker在放弃之前重" "试'monitor' 操作的次数." -#: daemons/fenced/pacemaker-fenced.c:1533 +#: daemons/fenced/pacemaker-fenced.c:1534 msgid "Advanced use only: An alternate command to run instead of 'status'" msgstr "仅高级使用:运行替代命令,而不是'status'" -#: daemons/fenced/pacemaker-fenced.c:1534 +#: daemons/fenced/pacemaker-fenced.c:1535 msgid "" "Some devices do not support the standard commands or may provide additional " "ones.Use this to specify an alternate, device-specific, command that " "implements the 'status' action." msgstr "" "一些设备不支持标准命令或可能提供其他命令,使用此选项可指定一个该设备特定的替" "代命令,用来实现'status'操作。" -#: daemons/fenced/pacemaker-fenced.c:1539 +#: daemons/fenced/pacemaker-fenced.c:1540 msgid "" "Advanced use only: Specify an alternate timeout to use for status actions " "instead of stonith-timeout" msgstr "仅高级使用:指定用于status 操作的替代超时,而不是stonith-timeout" -#: daemons/fenced/pacemaker-fenced.c:1540 +#: daemons/fenced/pacemaker-fenced.c:1541 msgid "" "Some devices need much more/less time to complete than normal.Use this to " "specify an alternate, device-specific, timeout for 'status' actions." msgstr "" "一些设备需要比正常情况下更多或更少的时间来完成操作,使用此选项指定一个用" "于'status'操作的该设备特定的替代超时" -#: daemons/fenced/pacemaker-fenced.c:1545 +#: daemons/fenced/pacemaker-fenced.c:1546 msgid "" "Advanced use only: The maximum number of times to retry the 'status' command " "within the timeout period" msgstr "仅高级使用:在超时前重试'status'命令的最大次数" -#: daemons/fenced/pacemaker-fenced.c:1546 +#: daemons/fenced/pacemaker-fenced.c:1547 msgid "" "Some devices do not support multiple connections. Operations may 'fail' if " "the device is busy with another task so Pacemaker will automatically retry " "the operation, if there is time remaining. Use this option to alter the " "number of times Pacemaker retries 'status' actions before giving up." msgstr "" " 一些设备不支持多个连接。 如果设备忙于另一个任务,则操作可能会'失败' , 因此" "Pacemaker将自动重试(如果时间允许)。 使用此选项更改Pacemaker在放弃之前重" "试'status' 操作的次数." -#: daemons/fenced/pacemaker-fenced.c:1555 +#: daemons/fenced/pacemaker-fenced.c:1556 msgid "Instance attributes available for all \"stonith\"-class resources" msgstr " 可用于所有stonith类资源的实例属性" -#: daemons/fenced/pacemaker-fenced.c:1557 +#: daemons/fenced/pacemaker-fenced.c:1558 msgid "" "Instance attributes available for all \"stonith\"-class resources and used " "by Pacemaker's fence daemon, formerly known as stonithd" msgstr "" " 可用于所有stonith类资源的实例属性,并由Pacemaker的fence守护程序使用(以前称" "为stonithd)" -#: lib/cib/cib_utils.c:559 +#: lib/cib/cib_utils.c:589 msgid "Enable Access Control Lists (ACLs) for the CIB" msgstr "为CIB启用访问控制列表(ACL)" -#: lib/cib/cib_utils.c:565 +#: lib/cib/cib_utils.c:595 msgid "Maximum IPC message backlog before disconnecting a cluster daemon" msgstr "断开集群守护程序之前的最大IPC消息积压" -#: lib/cib/cib_utils.c:566 +#: lib/cib/cib_utils.c:596 msgid "" "Raise this if log has \"Evicting client\" messages for cluster daemon PIDs " "(a good value is the number of resources in the cluster multiplied by the " "number of nodes)." msgstr "" "如果日志中有针对集群守护程序PID的消息“Evicting client”,(则建议将值设为集群" "中的资源数量乘以节点数量)" #: lib/common/options.c:401 msgid " Allowed values: " msgstr " 允许的值: " #: lib/common/cmdline.c:70 msgid "Display software version and exit" msgstr "显示软件版本信息" #: lib/common/cmdline.c:73 msgid "Increase debug output (may be specified multiple times)" msgstr "显示更多调试信息(可多次指定)" #: lib/common/cmdline.c:92 msgid "FORMAT" msgstr "格式" #: lib/common/cmdline.c:94 msgid "Specify file name for output (or \"-\" for stdout)" msgstr "指定输出的文件名 或指定'-' 表示标准输出" #: lib/common/cmdline.c:94 msgid "DEST" msgstr "目标" #: lib/common/cmdline.c:100 msgid "Output Options:" msgstr "输出选项" #: lib/common/cmdline.c:100 msgid "Show output help" msgstr "显示输出帮助" -#: lib/pengine/common.c:39 -msgid "What to do when the cluster does not have quorum" -msgstr "当集群没有必需票数时该如何作" - #: lib/pengine/common.c:45 msgid "Whether resources can run on any node by default" msgstr "资源是否默认可以在任何节点上运行" #: lib/pengine/common.c:51 msgid "" "Whether the cluster should refrain from monitoring, starting, and stopping " "resources" msgstr "集群是否应避免监视,启动和停止资源" #: lib/pengine/common.c:58 msgid "" "Whether a start failure should prevent a resource from being recovered on " "the same node" msgstr "是否避免在同一节点上重启启动失败的资源" #: lib/pengine/common.c:60 msgid "" "When true, the cluster will immediately ban a resource from a node if it " "fails to start there. When false, the cluster will instead check the " "resource's fail count against its migration-threshold." msgstr "" "当为true,如果资源启动失败,集群将立即禁止节点启动该资源,当为false,群集将根" "据其迁移阈值来检查资源的失败计数。" #: lib/pengine/common.c:67 msgid "Whether the cluster should check for active resources during start-up" msgstr "群集是否在启动期间检查运行资源" -#: lib/pengine/common.c:73 -msgid "Whether to lock resources to a cleanly shut down node" -msgstr "是否锁定资源到完全关闭的节点" - -#: lib/pengine/common.c:74 -msgid "" -"When true, resources active on a node when it is cleanly shut down are kept " -"\"locked\" to that node (not allowed to run elsewhere) until they start " -"again on that node after it rejoins (or for at most shutdown-lock-limit, if " -"set). Stonith resources and Pacemaker Remote connections are never locked. " -"Clone and bundle instances and the promoted role of promotable clones are " -"currently never locked, though support could be added in a future release." -msgstr "" -"设置为true时,在完全关闭的节点上活动的资源将被“锁定”到该节点(不允许在其他地" -"方运行),直到该节点重新加入后资源重新启动(或最长shutdown-lock-limit,如果已" -"设置)。 Stonith资源和Pacemaker Remote连接永远不会被锁定。 克隆和捆绑实例以及" -"可升级克隆的主角色目前从未锁定,尽管可以在将来的发行版中添加支持。" - -#: lib/pengine/common.c:85 -msgid "Do not lock resources to a cleanly shut down node longer than this" -msgstr "资源会被锁定到完全关闭的节点的最长时间" - -#: lib/pengine/common.c:86 -msgid "" -"If shutdown-lock is true and this is set to a nonzero time duration, " -"shutdown locks will expire after this much time has passed since the " -"shutdown was initiated, even if the node has not rejoined." -msgstr "" -"如果shutdown-lock为true,并且将此选项设置为非零持续时间,则自从开始shutdown以" -"来经过了这么长的时间后,shutdown锁将过期,即使该节点尚未重新加入。" - -#: lib/pengine/common.c:95 +#: lib/pengine/common.c:98 msgid "" "*** Advanced Use Only *** Whether nodes may be fenced as part of recovery" msgstr "*** Advanced Use Only *** 节点是否可以被 fence 以作为集群恢复的一部分" -#: lib/pengine/common.c:97 +#: lib/pengine/common.c:100 msgid "" "If false, unresponsive nodes are immediately assumed to be harmless, and " "resources that were active on them may be recovered elsewhere. This can " "result in a \"split-brain\" situation, potentially leading to data loss and/" "or service unavailability." msgstr "" "如果为false,则立即假定无响应的节点是无害的,并且可以在其他位置恢复在其上活动" "的资源。 这可能会导致 \"split-brain\" 情况,可能导致数据丢失和/或服务不可用。" -#: lib/pengine/common.c:105 +#: lib/pengine/common.c:108 msgid "" "Action to send to fence device when a node needs to be fenced (\"poweroff\" " "is a deprecated alias for \"off\")" msgstr "发送到 fence 设备的操作( \"poweroff\" 是 \"off \"的别名,不建议使用)" -#: lib/pengine/common.c:112 +#: lib/pengine/common.c:115 msgid "*** Advanced Use Only *** Unused by Pacemaker" msgstr "*** Advanced Use Only *** pacemaker未使用" -#: lib/pengine/common.c:113 +#: lib/pengine/common.c:116 msgid "" "This value is not used by Pacemaker, but is kept for backward compatibility, " "and certain legacy fence agents might use it." msgstr "" "Pacemaker不使用此值,但保留此值是为了向后兼容,某些传统的fence 代理可能会使用" "它。" -#: lib/pengine/common.c:119 +#: lib/pengine/common.c:122 msgid "Whether watchdog integration is enabled" msgstr "是否启用watchdog集成设置" -#: lib/pengine/common.c:120 +#: lib/pengine/common.c:123 msgid "" "This is set automatically by the cluster according to whether SBD is " "detected to be in use. User-configured values are ignored. The value `true` " "is meaningful if diskless SBD is used and `stonith-watchdog-timeout` is " "nonzero. In that case, if fencing is required, watchdog-based self-fencing " "will be performed via SBD without requiring a fencing resource explicitly " "configured." msgstr "" "这是由集群检测是否正在使用 SBD 并自动设置。用户配置的值将被忽略。如果使用无" "盘 SBD 并且 stonith-watchdog-timeout 不为零时,此选项为 true 才有实际意义。在" "这种情况下,无需明确配置fence资源,如果需要fence时,基于watchdog的自我fence会" "通过SBD执行。" -#: lib/pengine/common.c:130 +#: lib/pengine/common.c:133 msgid "Allow performing fencing operations in parallel" msgstr "允许并行执行 fencing 操作" -#: lib/pengine/common.c:136 +#: lib/pengine/common.c:139 msgid "*** Advanced Use Only *** Whether to fence unseen nodes at start-up" msgstr "*** 仅高级使用 *** 是否在启动时fence不可见节点" -#: lib/pengine/common.c:137 +#: lib/pengine/common.c:140 msgid "" "Setting this to false may lead to a \"split-brain\" situation,potentially " "leading to data loss and/or service unavailability." msgstr "" "将此设置为 false 可能会导致 \"split-brain\" 的情况,可能导致数据丢失和/或服务" "不可用。" -#: lib/pengine/common.c:143 +#: lib/pengine/common.c:146 msgid "" "Apply fencing delay targeting the lost nodes with the highest total resource " "priority" msgstr "针对具有最高总资源优先级的丢失节点应用fencing延迟" -#: lib/pengine/common.c:144 +#: lib/pengine/common.c:147 msgid "" "Apply specified delay for the fencings that are targeting the lost nodes " "with the highest total resource priority in case we don't have the majority " "of the nodes in our cluster partition, so that the more significant nodes " "potentially win any fencing match, which is especially meaningful under " "split-brain of 2-node cluster. A promoted resource instance takes the base " "priority + 1 on calculation if the base priority is not 0. Any static/random " "delays that are introduced by `pcmk_delay_base/max` configured for the " "corresponding fencing resources will be added to this delay. This delay " "should be significantly greater than, safely twice, the maximum " "`pcmk_delay_base/max`. By default, priority fencing delay is disabled." msgstr "" "如果我们所在的集群分区并不拥有大多数集群节点,则针对丢失节点的fence操作应用指" "定的延迟,这样更重要的节点就能够赢得fence竞赛。这对于双节点集群在split-brain" "状况下尤其有意义。如果基本优先级不为0,在计算时主资源实例获得基本优先级+1。任" "何对于相应的 fence 资源由 pcmk_delay_base/max 配置所引入的静态/随机延迟会被添" "加到此延迟。为了安全, 这个延迟应该明显大于 pcmk_delay_base/max 的最大设置值," "例如两倍。默认情况下,优先级fencing延迟已禁用。" -#: lib/pengine/common.c:161 +#: lib/pengine/common.c:164 msgid "Maximum time for node-to-node communication" msgstr "最大节点间通信时间" -#: lib/pengine/common.c:162 +#: lib/pengine/common.c:165 msgid "" "The node elected Designated Controller (DC) will consider an action failed " "if it does not get a response from the node executing the action within this " "time (after considering the action's own timeout). The \"correct\" value " "will depend on the speed and load of your network and cluster nodes." msgstr "" "如果一个操作未在该时间内(并且考虑操作本身的超时时长)从执行该操作的节点获得" "响应,则会被选为指定控制器(DC)的节点认定为失败。\"正确\" 值将取决于速度和您" "的网络和集群节点的负载。" -#: lib/pengine/common.c:171 +#: lib/pengine/common.c:174 #, fuzzy msgid "" "Maximum number of jobs that the cluster may execute in parallel across all " "nodes" msgstr "集群可以在所有节点上并发执行的最大作业数" -#: lib/pengine/common.c:173 +#: lib/pengine/common.c:176 msgid "" "The \"correct\" value will depend on the speed and load of your network and " "cluster nodes. If set to 0, the cluster will impose a dynamically calculated " "limit when any node has a high load." msgstr "" "\"正确\" 值将取决于速度和您的网络与集群节点的负载。如果设置为0,当任何节点具" "有高负载时,集群将施加一个动态计算的限制。" -#: lib/pengine/common.c:181 +#: lib/pengine/common.c:184 msgid "" "The number of live migration actions that the cluster is allowed to execute " "in parallel on a node (-1 means no limit)" msgstr "允许集群在一个节点上并行执行的实时迁移操作的数量(-1表示没有限制)" -#: lib/pengine/common.c:189 +#: lib/pengine/common.c:192 #, fuzzy msgid "Whether the cluster should stop all active resources" msgstr "群集是否在启动期间检查运行资源" -#: lib/pengine/common.c:195 +#: lib/pengine/common.c:198 msgid "Whether to stop resources that were removed from the configuration" msgstr "是否停止配置已被删除的资源" -#: lib/pengine/common.c:201 +#: lib/pengine/common.c:204 msgid "Whether to cancel recurring actions removed from the configuration" msgstr "是否取消配置已被删除的的重复操作" -#: lib/pengine/common.c:207 +#: lib/pengine/common.c:210 msgid "" "*** Deprecated *** Whether to remove stopped resources from the executor" msgstr "***不推荐***是否从pacemaker-execd 守护进程中清除已停止的资源" -#: lib/pengine/common.c:209 +#: lib/pengine/common.c:212 msgid "" "Values other than default are poorly tested and potentially dangerous. This " "option will be removed in a future release." msgstr "非默认值未经过充分的测试,有潜在的风险。该选项将在未来的版本中删除。" -#: lib/pengine/common.c:217 +#: lib/pengine/common.c:220 msgid "The number of scheduler inputs resulting in errors to save" msgstr "保存导致错误的调度程序输入的数量" -#: lib/pengine/common.c:218 lib/pengine/common.c:224 lib/pengine/common.c:230 +#: lib/pengine/common.c:221 lib/pengine/common.c:227 lib/pengine/common.c:233 msgid "Zero to disable, -1 to store unlimited." msgstr "零表示禁用,-1表示存储不受限制。" -#: lib/pengine/common.c:223 +#: lib/pengine/common.c:226 msgid "The number of scheduler inputs resulting in warnings to save" msgstr "保存导致警告的调度程序输入的数量" -#: lib/pengine/common.c:229 +#: lib/pengine/common.c:232 msgid "The number of scheduler inputs without errors or warnings to save" msgstr "保存没有错误或警告的调度程序输入的数量" -#: lib/pengine/common.c:240 +#: lib/pengine/common.c:243 #, fuzzy msgid "How cluster should react to node health attributes" msgstr "集群节点对节点健康属性如何反应" -#: lib/pengine/common.c:241 +#: lib/pengine/common.c:244 msgid "" "Requires external entities to create node attributes (named with the prefix " "\"#health\") with values \"red\", \"yellow\", or \"green\"." msgstr "" "需要外部实体创建具有“red”,“yellow”或“green”值的节点属性(前缀为“#health”)" -#: lib/pengine/common.c:248 +#: lib/pengine/common.c:251 msgid "Base health score assigned to a node" msgstr "分配给节点的基本健康分数" -#: lib/pengine/common.c:249 +#: lib/pengine/common.c:252 msgid "Only used when \"node-health-strategy\" is set to \"progressive\"." msgstr "仅在“node-health-strategy”设置为“progressive”时使用。" -#: lib/pengine/common.c:254 +#: lib/pengine/common.c:257 msgid "The score to use for a node health attribute whose value is \"green\"" msgstr "为节点健康属性值为“green”所使用的分数" -#: lib/pengine/common.c:255 lib/pengine/common.c:261 lib/pengine/common.c:267 +#: lib/pengine/common.c:258 lib/pengine/common.c:264 lib/pengine/common.c:270 msgid "" "Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive" "\"." msgstr "仅在“node-health-strategy”设置为“custom”或“progressive”时使用。" -#: lib/pengine/common.c:260 +#: lib/pengine/common.c:263 msgid "The score to use for a node health attribute whose value is \"yellow\"" msgstr "为节点健康属性值为“yellow”所使用的分数" -#: lib/pengine/common.c:266 +#: lib/pengine/common.c:269 msgid "The score to use for a node health attribute whose value is \"red\"" msgstr "为节点健康属性值为“red”所使用的分数" -#: lib/pengine/common.c:275 +#: lib/pengine/common.c:278 #, fuzzy msgid "How the cluster should allocate resources to nodes" msgstr "群集应该如何分配资源到节点" #: tools/crm_resource.c:258 #, c-format msgid "Aborting because no messages received in %d seconds" msgstr "中止,因为在%d秒内没有接收到消息" -#: tools/crm_resource.c:909 +#: tools/crm_resource.c:915 #, c-format msgid "Invalid check level setting: %s" msgstr "无效的检查级别设置:%s" -#: tools/crm_resource.c:993 +#: tools/crm_resource.c:999 #, c-format msgid "" "Resource '%s' not moved: active in %d locations (promoted in %d).\n" "To prevent '%s' from running on a specific location, specify a node.To " "prevent '%s' from being promoted at a specific location, specify a node and " "the --promoted option." msgstr "" "资源'%s'未移动:在%d个位置运行(其中在%d个位置为主实例)\n" "若要阻止'%s'在特定位置运行,请指定一个节点。若要防止'%s'在指定位置升级,指定" "一个节点并使用--promoted选项" -#: tools/crm_resource.c:1004 +#: tools/crm_resource.c:1010 #, c-format msgid "" "Resource '%s' not moved: active in %d locations.\n" "To prevent '%s' from running on a specific location, specify a node." msgstr "" "资源%s未移动:在%d个位置运行\n" "若要防止'%s'运行在特定位置,指定一个节点" -#: tools/crm_resource.c:1079 +#: tools/crm_resource.c:1085 #, c-format msgid "Could not get modified CIB: %s\n" msgstr "无法获得修改的CIB:%s\n" -#: tools/crm_resource.c:1113 +#: tools/crm_resource.c:1119 msgid "You need to specify a resource type with -t" msgstr "需要使用-t指定资源类型" -#: tools/crm_resource.c:1156 +#: tools/crm_resource.c:1162 #, c-format msgid "No agents found for standard '%s'" msgstr "没有发现指定的'%s'标准代理" -#: tools/crm_resource.c:1159 +#: tools/crm_resource.c:1165 #, fuzzy, c-format msgid "No agents found for standard '%s' and provider '%s'" msgstr "没有发现指定的标准%s和提供者%S的资源代理" -#: tools/crm_resource.c:1226 +#: tools/crm_resource.c:1232 #, c-format msgid "No %s found for %s" msgstr "没有发现%s符合%s" -#: tools/crm_resource.c:1231 +#: tools/crm_resource.c:1237 #, c-format msgid "No %s found" msgstr "没有发现%s" -#: tools/crm_resource.c:1291 +#: tools/crm_resource.c:1297 #, c-format msgid "No cluster connection to Pacemaker Remote node %s detected" msgstr "未检测到至pacemaker远程节点%s的集群连接" -#: tools/crm_resource.c:1352 +#: tools/crm_resource.c:1358 msgid "Must specify -t with resource type" msgstr "需要使用-t指定资源类型" -#: tools/crm_resource.c:1358 +#: tools/crm_resource.c:1364 msgid "Must supply -v with new value" msgstr "必须使用-v指定新值" -#: tools/crm_resource.c:1390 +#: tools/crm_resource.c:1396 msgid "Could not create executor connection" msgstr "无法创建到pacemaker-execd守护进程的连接" -#: tools/crm_resource.c:1415 +#: tools/crm_resource.c:1421 #, fuzzy, c-format msgid "Metadata query for %s failed: %s" msgstr ",查询%s的元数据失败: %s\n" -#: tools/crm_resource.c:1421 +#: tools/crm_resource.c:1427 #, c-format msgid "'%s' is not a valid agent specification" msgstr "'%s' 是一个无效的代理" -#: tools/crm_resource.c:1434 +#: tools/crm_resource.c:1440 msgid "--resource cannot be used with --class, --agent, and --provider" msgstr "--resource 不能与 --class, --agent, --provider一起使用" -#: tools/crm_resource.c:1439 +#: tools/crm_resource.c:1445 msgid "" "--class, --agent, and --provider can only be used with --validate and --" "force-*" msgstr "--class, --agent和--provider只能被用于--validate和--force-*" -#: tools/crm_resource.c:1448 +#: tools/crm_resource.c:1454 msgid "stonith does not support providers" msgstr "stonith 不支持提供者" -#: tools/crm_resource.c:1452 +#: tools/crm_resource.c:1458 #, c-format msgid "%s is not a known stonith agent" msgstr "%s 不是一个已知stonith代理" -#: tools/crm_resource.c:1457 +#: tools/crm_resource.c:1463 #, c-format msgid "%s:%s:%s is not a known resource" msgstr "%s:%s:%s 不是一个已知资源" -#: tools/crm_resource.c:1571 +#: tools/crm_resource.c:1577 #, c-format msgid "Error creating output format %s: %s" msgstr "创建输出格式错误 %s:%s" -#: tools/crm_resource.c:1598 +#: tools/crm_resource.c:1604 msgid "--expired requires --clear or -U" msgstr "--expired需要和--clear或-U一起使用" -#: tools/crm_resource.c:1615 +#: tools/crm_resource.c:1621 #, c-format msgid "Error parsing '%s' as a name=value pair" msgstr "'%s'解析错误,格式为name=value" -#: tools/crm_resource.c:1712 +#: tools/crm_resource.c:1718 msgid "Must supply a resource id with -r" msgstr "必须使用-r指定资源id" -#: tools/crm_resource.c:1718 +#: tools/crm_resource.c:1724 msgid "Must supply a node name with -N" msgstr "必须使用-N指定节点名称" #: tools/crm_resource.c:1742 msgid "Could not create CIB connection" msgstr "无法创建到CIB的连接" #: tools/crm_resource.c:1750 #, c-format msgid "Could not connect to the CIB: %s" msgstr "不能连接到CIB:%s" #: tools/crm_resource.c:1771 #, c-format msgid "Resource '%s' not found" msgstr "没有发现'%s'资源" #: tools/crm_resource.c:1783 #, c-format msgid "Cannot operate on clone resource instance '%s'" msgstr "不能操作克隆资源实例'%s'" #: tools/crm_resource.c:1795 #, c-format msgid "Node '%s' not found" msgstr "没有发现%s节点" #: tools/crm_resource.c:1806 tools/crm_resource.c:1815 #, c-format msgid "Error connecting to the controller: %s" msgstr "连接到控制器错误:%s" -#: tools/crm_resource.c:2051 +#: tools/crm_resource.c:2064 msgid "You need to supply a value with the -v option" msgstr "需要使用-v选项提供一个值" -#: tools/crm_resource.c:2106 +#: tools/crm_resource.c:2119 #, c-format msgid "Unimplemented command: %d" msgstr "无效的命令:%d" -#: tools/crm_resource.c:2140 +#: tools/crm_resource.c:2149 #, c-format msgid "Error performing operation: %s" msgstr "执行操作错误:%s" #~ msgid "" #~ "If nonzero, along with `have-watchdog=true` automatically set by the " #~ "cluster, when fencing is required, watchdog-based self-fencing will be " #~ "performed via SBD without requiring a fencing resource explicitly " #~ "configured. If `stonith-watchdog-timeout` is set to a positive value, " #~ "unseen nodes are assumed to self-fence within this much time. +WARNING:+ " #~ "It must be ensured that this value is larger than the " #~ "`SBD_WATCHDOG_TIMEOUT` environment variable on all nodes. Pacemaker " #~ "verifies the settings individually on all nodes and prevents startup or " #~ "shuts down if configured wrongly on the fly. It's strongly recommended " #~ "that `SBD_WATCHDOG_TIMEOUT` is set to the same value on all nodes. If " #~ "`stonith-watchdog-timeout` is set to a negative value, and " #~ "`SBD_WATCHDOG_TIMEOUT` is set, twice that value will be used. +WARNING:+ " #~ "In this case, it's essential (currently not verified by Pacemaker) that " #~ "`SBD_WATCHDOG_TIMEOUT` is set to the same value on all nodes." #~ msgstr "" #~ "如果值非零,且集群设置了 `have-watchdog=true` ,当需要 fence 操作时,基于 " #~ "watchdog 的自我 fence 机制将通过SBD执行,而不需要显式配置 fence 资源。如" #~ "果 `stonith-watchdog-timeout` 被设为正值,则假定不可见的节点在这段时间内自" #~ "我fence。 +WARNING:+ 必须确保该值大于所有节点上的`SBD_WATCHDOG_TIMEOUT` 环" #~ "境变量。Pacemaker将在所有节点上单独验证设置,如发现有错误的动态配置,将防" #~ "止节点启动或关闭。强烈建议在所有节点上将 `SBD_WATCHDOG_TIMEOUT` 设置为相同" #~ "的值。如果 `stonith-watchdog-timeout` 设置为负值。并且设置了 " #~ "`SBD_WATCHDOG_TIMEOUT` ,则将使用该值的两倍, +WARNING:+ 在这种情况下,必" #~ "须将所有节点上 `SBD_WATCHDOG_TIMEOUT` 设置为相同的值(目前没有通过pacemaker" #~ "验证)。" diff --git a/python/pacemaker/_cts/environment.py b/python/pacemaker/_cts/environment.py index 9ee99f1a92..f81d9876e5 100644 --- a/python/pacemaker/_cts/environment.py +++ b/python/pacemaker/_cts/environment.py @@ -1,646 +1,650 @@ """ Test environment classes for Pacemaker's Cluster Test Suite (CTS) """ __all__ = ["EnvFactory"] __copyright__ = "Copyright 2014-2023 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import argparse import os import random import socket import sys import time from pacemaker._cts.logging import LogFactory from pacemaker._cts.remote import RemoteFactory from pacemaker._cts.watcher import LogKind class Environment: """ A class for managing the CTS environment, consisting largely of processing and storing command line parameters """ # pylint doesn't understand that self._rsh is callable (it stores the # singleton instance of RemoteExec, as returned by the getInstance method # of RemoteFactory). It's possible we could fix this with type annotations, # but those were introduced with python 3.5 and we only support python 3.4. # I think we could also fix this by getting rid of the getInstance methods, # but that's a project for another day. For now, just disable the warning. # pylint: disable=not-callable def __init__(self, args): """ Create a new Environment instance. This class can be treated kind of like a dictionary due to the presence of typical dict functions like has_key, __getitem__, and __setitem__. However, it is not a dictionary so do not rely on standard dictionary behavior. Arguments: args -- A list of command line parameters, minus the program name. If None, sys.argv will be used. """ self.data = {} self._nodes = [] # Set some defaults before processing command line arguments. These are # either not set by any command line parameter, or they need a default # that can't be set in add_argument. self["DeadTime"] = 300 self["StartTime"] = 300 self["StableTime"] = 30 self["tests"] = [] self["IPagent"] = "IPaddr2" self["DoFencing"] = True self["ClobberCIB"] = False self["CIBfilename"] = None self["CIBResource"] = False self["LogWatcher"] = LogKind.ANY self["node-limit"] = 0 self["scenario"] = "random" self.random_gen = random.Random() self._logger = LogFactory() self._rsh = RemoteFactory().getInstance() self._target = "localhost" self._seed_random() self._parse_args(args) if not self["ListTests"]: self._validate() self._discover() def _seed_random(self, seed=None): """ Initialize the random number generator with the given seed, or use the current time if None """ if not seed: seed = int(time.time()) self["RandSeed"] = seed self.random_gen.seed(str(seed)) def dump(self): """ Print the current environment """ keys = [] for key in list(self.data.keys()): keys.append(key) keys.sort() for key in keys: s = "Environment[%s]" % key self._logger.debug("{key:35}: {val}".format(key=s, val=str(self[key]))) def keys(self): """ Return a list of all environment keys stored in this instance """ return list(self.data.keys()) def has_key(self, key): """ Does the given environment key exist? """ if key == "nodes": return True return key in self.data def __getitem__(self, key): """ Return the given environment key, or None if it does not exist """ if str(key) == "0": raise ValueError("Bad call to 'foo in X', should reference 'foo in X.keys()' instead") if key == "nodes": return self._nodes if key == "Name": return self._get_stack_short() if key in self.data: return self.data[key] return None def __setitem__(self, key, value): """ Set the given environment key to the given value, overriding any previous value """ if key == "Stack": self._set_stack(value) elif key == "node-limit": self.data[key] = value self._filter_nodes() elif key == "nodes": self._nodes = [] for node in value: # I don't think I need the IP address, etc. but this validates # the node name against /etc/hosts and/or DNS, so it's a # GoodThing(tm). try: n = node.strip() socket.gethostbyname_ex(n) self._nodes.append(n) except: self._logger.log("%s not found in DNS... aborting" % node) raise self._filter_nodes() else: self.data[key] = value def random_node(self): """ Choose a random node from the cluster """ return self.random_gen.choice(self["nodes"]) def _set_stack(self, name): """ Normalize the given cluster stack name """ if name in ["corosync", "cs", "mcp"]: self.data["Stack"] = "corosync 2+" else: raise ValueError("Unknown stack: %s" % name) def _get_stack_short(self): """ Return the short name for the currently set cluster stack """ if "Stack" not in self.data: return "unknown" if self.data["Stack"] == "corosync 2+": return "crm-corosync" LogFactory().log("Unknown stack: %s" % self["stack"]) raise ValueError("Unknown stack: %s" % self["stack"]) + def _detect_systemd(self): + """ Detect whether systemd is in use on the target node """ + + if "have_systemd" not in self.data: + (rc, _) = self._rsh(self._target, "systemctl list-units", verbose=0) + self["have_systemd"] = rc == 0 + def _detect_syslog(self): """ Detect the syslog variant in use on the target node """ if "syslogd" not in self.data: if self["have_systemd"]: # Systemd (_, lines) = self._rsh(self._target, r"systemctl list-units | grep syslog.*\.service.*active.*running | sed 's:.service.*::'", verbose=1) self["syslogd"] = lines[0].strip() else: # SYS-V (_, lines) = self._rsh(self._target, "chkconfig --list | grep syslog.*on | awk '{print $1}' | head -n 1", verbose=1) self["syslogd"] = lines[0].strip() if "syslogd" not in self.data or not self["syslogd"]: # default self["syslogd"] = "rsyslog" def disable_service(self, node, service): """ Disable the given service on the given node """ if self["have_systemd"]: # Systemd (rc, _) = self._rsh(node, "systemctl disable %s" % service) return rc # SYS-V (rc, _) = self._rsh(node, "chkconfig %s off" % service) return rc def enable_service(self, node, service): """ Enable the given service on the given node """ if self["have_systemd"]: # Systemd (rc, _) = self._rsh(node, "systemctl enable %s" % service) return rc # SYS-V (rc, _) = self._rsh(node, "chkconfig %s on" % service) return rc def service_is_enabled(self, node, service): """ Is the given service enabled on the given node? """ if self["have_systemd"]: # Systemd # With "systemctl is-enabled", we should check if the service is # explicitly "enabled" instead of the return code. For example it returns # 0 if the service is "static" or "indirect", but they don't really count # as "enabled". (rc, _) = self._rsh(node, "systemctl is-enabled %s | grep enabled" % service) return rc == 0 # SYS-V (rc, _) = self._rsh(node, "chkconfig --list | grep -e %s.*on" % service) return rc == 0 def _detect_at_boot(self): """ Detect if the cluster starts at boot """ if "at-boot" not in self.data: self["at-boot"] = self.service_is_enabled(self._target, "corosync") \ or self.service_is_enabled(self._target, "pacemaker") def _detect_ip_offset(self): """ Detect the offset for IPaddr resources """ if self["CIBResource"] and "IPBase" not in self.data: (_, lines) = self._rsh(self._target, "ip addr | grep inet | grep -v -e link -e inet6 -e '/32' -e ' lo' | awk '{print $2}'", verbose=0) network = lines[0].strip() (_, lines) = self._rsh(self._target, "nmap -sn -n %s | grep 'scan report' | awk '{print $NF}' | sed 's:(::' | sed 's:)::' | sort -V | tail -n 1" % network, verbose=0) try: self["IPBase"] = lines[0].strip() except (IndexError, TypeError): self["IPBase"] = None if not self["IPBase"]: self["IPBase"] = " fe80::1234:56:7890:1000" self._logger.log("Could not determine an offset for IPaddr resources. Perhaps nmap is not installed on the nodes.") self._logger.log("Defaulting to '%s', use --test-ip-base to override" % self["IPBase"]) return # pylint thinks self["IPBase"] is a list, not a string, which causes it # to error out because a list doesn't have split(). # pylint: disable=no-member if int(self["IPBase"].split('.')[3]) >= 240: self._logger.log("Could not determine an offset for IPaddr resources. Upper bound is too high: %s %s" % (self["IPBase"], self["IPBase"].split('.')[3])) self["IPBase"] = " fe80::1234:56:7890:1000" self._logger.log("Defaulting to '%s', use --test-ip-base to override" % self["IPBase"]) def _filter_nodes(self): """ If --limit-nodes is given, keep that many nodes from the front of the list of cluster nodes and drop the rest """ if self["node-limit"] > 0: if len(self["nodes"]) > self["node-limit"]: # pylint thinks self["node-limit"] is a list even though we initialize # it as an int in __init__ and treat it as an int everywhere. # pylint: disable=bad-string-format-type self._logger.log("Limiting the number of nodes configured=%d (max=%d)" %(len(self["nodes"]), self["node-limit"])) while len(self["nodes"]) > self["node-limit"]: self["nodes"].pop(len(self["nodes"])-1) def _validate(self): """ Were we given all the required command line parameters? """ if not self["nodes"]: raise ValueError("No nodes specified!") def _discover(self): """ Probe cluster nodes to figure out how to log and manage services """ self._target = random.Random().choice(self["nodes"]) exerciser = socket.gethostname() # Use the IP where possible to avoid name lookup failures for ip in socket.gethostbyname_ex(exerciser)[2]: if ip != "127.0.0.1": exerciser = ip break self["cts-exerciser"] = exerciser - if "have_systemd" not in self.data: - (rc, _) = self._rsh(self._target, "systemctl list-units", verbose=0) - self["have_systemd"] = rc == 0 - + self._detect_systemd() self._detect_syslog() self._detect_at_boot() self._detect_ip_offset() def _parse_args(self, argv): """ Parse and validate command line parameters, setting the appropriate values in the environment dictionary. If argv is None, use sys.argv instead. """ if not argv: argv = sys.argv[1:] parser = argparse.ArgumentParser(epilog="%s -g virt1 -r --stonith ssh --schema pacemaker-2.0 500" % sys.argv[0]) grp1 = parser.add_argument_group("Common options") grp1.add_argument("-g", "--dsh-group", "--group", metavar="GROUP", dest="group", help="Use the nodes listed in the named DSH group (~/.dsh/groups/$name)") grp1.add_argument("-l", "--limit-nodes", type=int, default=0, metavar="MAX", help="Only use the first MAX cluster nodes supplied with --nodes") grp1.add_argument("--benchmark", action="store_true", help="Add timing information") grp1.add_argument("--list", "--list-tests", action="store_true", dest="list_tests", help="List the valid tests") grp1.add_argument("--nodes", metavar="NODES", help="List of cluster nodes separated by whitespace") grp1.add_argument("--stack", default="corosync", metavar="STACK", help="Which cluster stack is installed") grp2 = parser.add_argument_group("Options that CTS will usually auto-detect correctly") grp2.add_argument("-L", "--logfile", metavar="PATH", help="Where to look for logs from cluster nodes") grp2.add_argument("--at-boot", "--cluster-starts-at-boot", choices=["1", "0", "yes", "no"], help="Does the cluster software start at boot time?") grp2.add_argument("--facility", "--syslog-facility", default="daemon", metavar="NAME", help="Which syslog facility to log to") grp2.add_argument("--ip", "--test-ip-base", metavar="IP", help="Offset for generated IP address resources") grp3 = parser.add_argument_group("Options for release testing") grp3.add_argument("-r", "--populate-resources", action="store_true", help="Generate a sample configuration") grp3.add_argument("--choose", metavar="NAME", help="Run only the named test") grp3.add_argument("--fencing", "--stonith", choices=["1", "0", "yes", "no", "lha", "openstack", "rhcs", "rhevm", "scsi", "ssh", "virt", "xvm"], default="1", help="What fencing agent to use") grp3.add_argument("--once", action="store_true", help="Run all valid tests once") grp4 = parser.add_argument_group("Additional (less common) options") grp4.add_argument("-c", "--clobber-cib", action="store_true", help="Erase any existing configuration") grp4.add_argument("-y", "--yes", action="store_true", dest="always_continue", help="Continue to run whenever prompted") grp4.add_argument("--boot", action="store_true", help="") grp4.add_argument("--bsc", action="store_true", help="") grp4.add_argument("--cib-filename", metavar="PATH", help="Install the given CIB file to the cluster") grp4.add_argument("--container-tests", action="store_true", help="Include pacemaker_remote tests that run in lxc container resources") grp4.add_argument("--experimental-tests", action="store_true", help="Include experimental tests") grp4.add_argument("--loop-minutes", type=int, default=60, help="") grp4.add_argument("--no-loop-tests", action="store_true", help="Don't run looping/time-based tests") grp4.add_argument("--no-unsafe-tests", action="store_true", help="Don't run tests that are unsafe for use with ocfs2/drbd") grp4.add_argument("--notification-agent", metavar="PATH", default="/var/lib/pacemaker/notify.sh", help="Script to configure for Pacemaker alerts") grp4.add_argument("--notification-recipient", metavar="R", default="/var/lib/pacemaker/notify.log", help="Recipient to pass to alert script") grp4.add_argument("--oprofile", metavar="NODES", help="List of cluster nodes to run oprofile on") grp4.add_argument("--outputfile", metavar="PATH", help="Location to write logs to") grp4.add_argument("--qarsh", action="store_true", help="Use QARSH to access nodes instead of SSH") grp4.add_argument("--schema", metavar="SCHEMA", default="pacemaker-3.0", help="Create a CIB conforming to the given schema") grp4.add_argument("--seed", metavar="SEED", help="Use the given string as the random number seed") grp4.add_argument("--set", action="append", metavar="ARG", default=[], help="Set key=value pairs (can be specified multiple times)") grp4.add_argument("--stonith-args", metavar="ARGS", default="hostlist=all,livedangerously=yes", help="") grp4.add_argument("--stonith-type", metavar="TYPE", default="external/ssh", help="") grp4.add_argument("--trunc", action="store_true", dest="truncate", help="Truncate log file before starting") grp4.add_argument("--valgrind-procs", metavar="PROCS", default="pacemaker-attrd pacemaker-based pacemaker-controld pacemaker-execd pacemaker-fenced pacemaker-schedulerd", help="Run valgrind against the given space-separated list of processes") grp4.add_argument("--valgrind-tests", action="store_true", help="Include tests using valgrind") grp4.add_argument("--warn-inactive", action="store_true", help="Warn if a resource is assigned to an inactive node") parser.add_argument("iterations", type=int, help="Number of tests to run") args = parser.parse_args(args=argv) # Set values on this object based on what happened with command line # processing. This has to be done in several blocks. # These values can always be set. They get a default from the add_argument # calls, only do one thing, and they do not have any side effects. self["ClobberCIB"] = args.clobber_cib self["ListTests"] = args.list_tests self["Schema"] = args.schema self["Stack"] = args.stack self["SyslogFacility"] = args.facility self["TruncateLog"] = args.truncate self["at-boot"] = args.at_boot in ["1", "yes"] self["benchmark"] = args.benchmark self["continue"] = args.always_continue self["container-tests"] = args.container_tests self["experimental-tests"] = args.experimental_tests self["iterations"] = args.iterations self["loop-minutes"] = args.loop_minutes self["loop-tests"] = not args.no_loop_tests self["notification-agent"] = args.notification_agent self["notification-recipient"] = args.notification_recipient self["node-limit"] = args.limit_nodes self["stonith-params"] = args.stonith_args self["stonith-type"] = args.stonith_type self["unsafe-tests"] = not args.no_unsafe_tests self["valgrind-procs"] = args.valgrind_procs self["valgrind-tests"] = args.valgrind_tests self["warn-inactive"] = args.warn_inactive # Nodes and groups are mutually exclusive, so their defaults cannot be # set in their add_argument calls. Additionally, groups does more than # just set a value. Here, set nodes first and then if a group is # specified, override the previous nodes value. if args.nodes: self["nodes"] = args.nodes.split(" ") else: self["nodes"] = [] if args.group: self["OutputFile"] = "%s/cluster-%s.log" % (os.environ['HOME'], args.dsh_group) LogFactory().add_file(self["OutputFile"], "CTS") dsh_file = "%s/.dsh/group/%s" % (os.environ['HOME'], args.dsh_group) if os.path.isfile(dsh_file): self["nodes"] = [] with open(dsh_file, "r", encoding="utf-8") as f: for line in f: l = line.strip() if not l.startswith('#'): self["nodes"].append(l) else: print("Unknown DSH group: %s" % args.dsh_group) # Everything else either can't have a default set in an add_argument # call (likely because we don't want to always have a value set for it) # or it does something fancier than just set a single value. However, # order does not matter for these as long as the user doesn't provide # conflicting arguments on the command line. So just do Everything # alphabetically. if args.boot: self["scenario"] = "boot" if args.bsc: self["DoBSC"] = True self["scenario"] = "basic-sanity" if args.cib_filename: self["CIBfilename"] = args.cib_filename else: self["CIBfilename"] = None if args.choose: self["scenario"] = "sequence" self["tests"].append(args.choose) if args.fencing: if args.fencing in ["0", "no"]: self["DoFencing"] = False else: self["DoFencing"] = True if args.fencing in ["rhcs", "virt", "xvm"]: self["stonith-type"] = "fence_xvm" elif args.fencing == "scsi": self["stonith-type"] = "fence_scsi" elif args.fencing in ["lha", "ssh"]: self["stonith-params"] = "hostlist=all,livedangerously=yes" self["stonith-type"] = "external/ssh" elif args.fencing == "openstack": self["stonith-type"] = "fence_openstack" print("Obtaining OpenStack credentials from the current environment") self["stonith-params"] = "region=%s,tenant=%s,auth=%s,user=%s,password=%s" % ( os.environ['OS_REGION_NAME'], os.environ['OS_TENANT_NAME'], os.environ['OS_AUTH_URL'], os.environ['OS_USERNAME'], os.environ['OS_PASSWORD'] ) elif args.fencing == "rhevm": self["stonith-type"] = "fence_rhevm" print("Obtaining RHEV-M credentials from the current environment") self["stonith-params"] = "login=%s,passwd=%s,ipaddr=%s,ipport=%s,ssl=1,shell_timeout=10" % ( os.environ['RHEVM_USERNAME'], os.environ['RHEVM_PASSWORD'], os.environ['RHEVM_SERVER'], os.environ['RHEVM_PORT'], ) if args.ip: self["CIBResource"] = True self["ClobberCIB"] = True self["IPBase"] = args.ip if args.logfile: self["LogAuditDisabled"] = True self["LogFileName"] = args.logfile self["LogWatcher"] = LogKind.REMOTE_FILE else: # We can't set this as the default on the parser.add_argument call # for this option because then args.logfile will be set, which means # the above branch will be taken and those other values will also be # set. self["LogFileName"] = "/var/log/messages" if args.once: self["scenario"] = "all-once" if args.oprofile: self["oprofile"] = args.oprofile.split(" ") else: self["oprofile"] = [] if args.outputfile: self["OutputFile"] = args.outputfile LogFactory().add_file(self["OutputFile"]) if args.populate_resources: self["CIBResource"] = True self["ClobberCIB"] = True if args.qarsh: self._rsh.enable_qarsh() for kv in args.set: (name, value) = kv.split("=") self[name] = value print("Setting %s = %s" % (name, value)) class EnvFactory: """ A class for constructing a singleton instance of an Environment object """ instance = None # pylint: disable=invalid-name def getInstance(self, args=None): """ Returns the previously created instance of Environment, or creates a new instance if one does not already exist. """ if not EnvFactory.instance: EnvFactory.instance = Environment(args) return EnvFactory.instance