diff --git a/cts/CTSaudits.py.in b/cts/CTSaudits.py.in index aeac9e9abb..4ea1a70a84 100755 --- a/cts/CTSaudits.py.in +++ b/cts/CTSaudits.py.in @@ -1,777 +1,795 @@ #!@PYTHON@ '''CTS: Cluster Testing System: Audit module ''' __copyright__=''' Copyright (C) 2000, 2001,2005 Alan Robertson Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import time, os, popen2, string, re import CTS import os import popen2 class ClusterAudit: def __init__(self, cm): self.CM = cm def __call__(self): raise ValueError("Abstract Class member (__call__)") def is_applicable(self): '''Return TRUE if we are applicable in the current test configuration''' raise ValueError("Abstract Class member (is_applicable)") return 1 def name(self): raise ValueError("Abstract Class member (name)") AllAuditClasses = [ ] -class ResourceAudit(ClusterAudit): +class LogAudit(ClusterAudit): def name(self): return "LogAudit" def __init__(self, cm): self.CM = cm def RestartClusterLogging(self): self.CM.log("WARN: Restarting logging on cluster nodes") for node in self.CM.Env["nodes"]: cmd=self.CM.Env["logrestartcmd"] if self.CM.rsh.noBlock(node, cmd) != 0: self.CM.log ("ERROR: Cannot restart logging on %s [%s failed]" % (node, cmd)) def TestLogging(self): patterns= [] prefix="Test message from " for node in self.CM.Env["nodes"]: patterns.append(prefix + node) - watch = LogWatcher(self["LogFileName"], patterns, 30 + len(self.CM.Env["nodes"])) + watch = CTS.LogWatcher(self.CM.Env["LogFileName"], patterns, 30 + len(self.CM.Env["nodes"])) watch.setwatch() for node in self.CM.Env["nodes"]: cmd="logger -p %s.info %s%s" % (self.CM.Env["logfacility"], prefix, node) if self.CM.rsh.noBlock(node, cmd) != 0: self.CM.log ("ERROR: Cannot execute remote command [%s] on %s" % (cmd, node)) watch_result = watch.lookforall() if watch.unmatched: self.CM.log("ERROR: Remote logging is not working.") for regex in watch.unmatched: self.CM.log ("ERROR: Test message [%s] not found in logs." % (regex)) return 0 return 1 def __call__(self): max=3 attempt=0 while attempt <= max and self.TestLogging() == 0: attempt = attempt + 1 self.RestartClusterLogging() time.sleep(60*attempt) if attempt > max: self.CM.log("Cluster logging unrecoverable.") return 0 if attempt > 0: self.CM.log("NOTE: Cluster logging now working.") return 1 def is_applicable(self): if self.CM.Env["DoBSC"]: return 0 return 1 class DiskAudit(ClusterAudit): def name(self): return "DiskspaceAudit" def __init__(self, cm): self.CM = cm def __call__(self): result=1 dfcmd="df -k /var/log | tail -1 | tr -s ' ' | cut -d' ' -f2" for node in self.CM.Env["nodes"]: dfout=self.CM.rsh.readaline(node, dfcmd) if not dfout: self.CM.log ("ERROR: Cannot execute remote df command [%s] on %s" % (dfcmd, node)) else: try: idfout = int(dfout) except (ValueError, TypeError): self.CM.log("Warning: df output from %s was invalid [%s]" % (node, dfout)) else: if idfout == 0: self.CM.log("CRIT: Completely out of log disk space on %s" % node) result=None elif idfout <= 1000: self.CM.log("WARN: Low on log disk space (%d Mbytes) on %s" % (idfout, node)) return result def is_applicable(self): if self.CM.Env["DoBSC"]: return 0 return 1 +class AuditResource: + def __init__(self, cm, line): + fields = line.split() + self.CM = cm + self.type = fields[0] + self.id = fields[1] + if fields[2] == "NA": + self.parent = None + else: + self.parent = fields[2] + self.managed = fields[3] + self.needs_quorum = fields[4] + +class PrimitiveAudit(ClusterAudit): + def name(self): + return "PrimitiveAudit" + + def __init__(self, cm): + self.CM = cm + + def doResourceAudit(self, resource): + rc=1 + active = self.CM.ResourceLocation(resource.id) + + if len(active) > 1: + rc=0 + self.CM.log("Resource %s is active multiple times: %s" + % (resource.id, repr(active))) + + elif len(active) == 1: + if self.CM.HasQuorum(None): + self.CM.debug("Resource %s active on %s" % (resource.id, repr(active))) + + elif resource.needs_quorum: + rc=0 + self.CM.log("Resource %s active without quorum: " + % (resource.id, repr(active))) + + elif self.CM.HasQuorum(None) or not resource.needs_quorum: + self.CM.log("WARN: Resource %s not served anywhere (Inactive nodes: %s)" + % (resource.id, repr(self.inactive_nodes))) + + else: + self.CM.debug("Resource %s not served anywhere (Inactive nodes: %s)" + % (resource.id, repr(self.inactive_nodes))) + + if resource.managed == "0": + self.CM.log("Resource %s not managed: faking success" % resource.id) + rc = 1 + + return rc + + def setup(self): + self.target = None + self.resources = [] + self.active_nodes = [] + self.inactive_nodes = [] + for node in self.CM.Env["nodes"]: + if self.CM.ShouldBeStatus[node] == self.CM["up"]: + self.active_nodes.append(node) + else: + self.inactive_nodes.append(node) + + for node in self.CM.Env["nodes"]: + if self.target == None and self.CM.ShouldBeStatus[node] == self.CM["up"]: + self.target = node + + if not self.target: + self.CM.log("No nodes active - skipping resource audit") + return 0 + + (rc, lines) = self.CM.rsh.remote_py(self.target, "os", "system", "@sbindir@/crm_resource -c") + + for line in lines: + self.resources.append(AuditResource(self.CM, line)) + + return 1 + + def __call__(self): + rc = 1 + + if not self.setup(): + return 1 + + for resource in self.resources: + if resource.type == "primitive": + if self.doResourceAudit(resource) == 0: + rc = 0 + return rc + + def is_applicable(self): + if self.CM["Name"] == "linux-ha-v2" and self.CM.Env["ResCanStop"] == 0: + return 1 + if self.CM["Name"] == "crm-ais" and self.CM.Env["ResCanStop"] == 0: + return 1 + return 0 + +class GroupAudit(PrimitiveAudit): + def name(self): + return "GroupAudit" + def __call__(self): + rc = 1 + if not self.setup(): + return 1 + + for group in self.resources: + if group.type == "group": + first_match = 1 + group_location = None + for child in self.resources: + if child.parent == group.id: + nodes = self.CM.ResourceLocation(child.id) + + if first_match and len(nodes) > 0: + group_location = nodes[0] + + first_match = 0 + + if len(nodes) > 1: + rc = 0 + self.CM.log("Child %s of %s is active more than once: %s" + % (child.id, group.id, repr(nodes))) + + elif len(nodes) == 0: + # Groups are allowed to be partially active + # However we do need to make sure later children aren't running + group_location = None + self.CM.debug("Child %s of %s is stopped" % (child.id, group.id)) + + elif nodes[0] != group_location: + rc = 0 + self.CM.log("Child %s of %s is active on the wrong node (%s) expected %s" + % (child.id, group.id, nodes[0], group_location)) + else: + self.CM.debug("Child %s of %s is active on %s" % (child.id, group.id, nodes[0])) + + return rc + +class CloneAudit(PrimitiveAudit): + def name(self): + return "CloneAudit" + + def __call__(self): + rc = 1 + if not self.setup(): + return 1 + + for clone in self.resources: + if clone.type == "clone": + for child in self.resources: + if child.parent == clone.id and child.type == "primitive": + self.CM.debug("Checking child %s of %s..." % (child.id, clone.id)) + # Check max and node_max + # Obtain with: + # crm_resource -g clone_max --meta -r child.id + # crm_resource -g clone_node_max --meta -r child.id + + return rc + class ResourceAudit(ClusterAudit): def name(self): return "ResourceAudit" def _doauditRsc(self, resource): ResourceNodes = [] for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == self.CM["up"]: if resource.IsRunningOn(node): ResourceNodes.append(node) return ResourceNodes def _doaudit(self): '''Check to see if all resources are running in exactly one place in the cluster. We also verify that the members of a resource group are all running on the same node in the cluster, and we monitor that they are all running "properly". ''' Fatal = 0 result = [] # Thought: use self.CM.find_partitions() and make this audit # aware of partitions. Since in a split cluster one # partition may have quorum (and permission to run resources) # and the other not. Groups = self.CM.ResourceGroups() for group in Groups: GrpServedBy = None lastResource = None for resource in group: # # _doauditRsc returns the set of nodes serving # the given resource. This is normally a single node. # ResourceNodes = self._doauditRsc(resource) # Is the resource served without quorum present? if not self.CM.HasQuorum(None) and len(ResourceNodes) != 0 and resource.needs_quorum: result.append("Resource " + repr(resource) + " active without Quorum: " + repr(ResourceNodes)) # Is the resource served at all? elif len(ResourceNodes) == 0 and self.CM.HasQuorum(None): result.append("Resource " + repr(resource) + " not served anywhere.") # Is the resource served too many times? elif len(ResourceNodes) > 1: result.append("Resource " + repr(resource) + " served too many times: " + repr(ResourceNodes)) self.CM.log("Resource " + repr(resource) + " served too many times: " + repr(ResourceNodes)) Fatal = 1 elif GrpServedBy == None: GrpServedBy = ResourceNodes # Are all the members of the Rsc Grp served by the same node? elif GrpServedBy != ResourceNodes: result.append("Resource group resources" + repr(resource) + " running on different nodes: " + repr(ResourceNodes)+" vs "+repr(GrpServedBy) + "(otherRsc = " + repr(lastResource) + ")") self.CM.log("Resource group resources" + repr(resource) + " running on different nodes: " + repr(ResourceNodes)+" vs "+repr(GrpServedBy) + "(otherRsc = " + repr(lastResource) + ")") Fatal = 1 if self.CM.Env.has_key("SuppressMonitoring") and \ self.CM.Env["SuppressMonitoring"]: continue # Is the resource working correctly ? if not Fatal and len(ResourceNodes) == 1: beforearpchild = popen2.Popen3("date;/sbin/arp -n|cut -c1-15,26-50,75-" , None) beforearpchild.tochild.close() # /dev/null if not resource.IsWorkingCorrectly(ResourceNodes[0]): afterarpchild = popen2.Popen3("/sbin/arp -n|cut -c1-15,26-50,75-" , None) afterarpchild.tochild.close() # /dev/null result.append("Resource " + repr(resource) + " not operating properly." + " Resource is running on " + ResourceNodes[0]); Fatal = 1 self.CM.log("ARP table before failure ========"); for line in beforearpchild.fromchild.readlines(): self.CM.log(line) self.CM.log("ARP table after failure ========"); for line in afterarpchild.fromchild.readlines(): self.CM.log(line) self.CM.log("End of ARP tables ========"); try: beforearpchild.wait() afterarpchild.wait() except OSError: pass afterarpchild.fromchild.close() beforearpchild.fromchild.close() lastResource = resource if (Fatal): result.insert(0, "FATAL") # Kludgy. return result def __call__(self): # # Audit the resources. Since heartbeat doesn't really # know when resource acquisition is complete, we will # poll until things get stable. # # Having a resource duplicately implemented is a Fatal Error # with no tolerance granted. # audresult = self._doaudit() # # Probably the constant below should be a CM parameter. # Then it could be 0 for FailSafe. # Of course, it really depends on what resources # you have in the test suite, and how long it takes # for them to settle. # Recently, we've changed heartbeat so we know better when # resource acquisition is done. # audcount=5; while(audcount > 0): audresult = self._doaudit() if (len(audresult) <= 0 or audresult[0] == "FATAL"): audcount=0 else: audcount = audcount - 1 if (audcount > 0): time.sleep(1) if (len(audresult) > 0): self.CM.log("Fatal Audit error: " + repr(audresult)) return (len(audresult) == 0) def is_applicable(self): if self.CM["Name"] == "heartbeat": return 1 return 0 -class HAResourceAudit(ClusterAudit): - def __init__(self, cm): - self.CM = cm - - def _RscRunningNodes(self, resource): - ResourceNodes = [] - for node in self.CM.Env["nodes"]: - if self.CM.ShouldBeStatus[node] == self.CM["up"]: - if resource.IsRunningOn(node): - ResourceNodes.append(node) - return ResourceNodes - - def __call__(self): - passed = 1 - NodeofRsc = {} - NumofInc = {} - MaxofInc = {} - self.CM.debug("Do Audit HAResourceAudit") - - #Calculate the count of active nodes - up_count = 0; - for node in self.CM.Env["nodes"]: - if self.CM.ShouldBeStatus[node] == self.CM["up"]: - up_count += 1 - - #Make sure the resouces are running on one and only one node - Resources = self.CM.Resources() - for resource in Resources : - RunningNodes = self._RscRunningNodes(resource) - NodeofRsc[resource.rid]=RunningNodes - if resource.inc_name == None: - #Is the resource served without quorum present? - if not self.CM.HasQuorum(None) and len(RunningNodes) != 0 and resource.needs_quorum: - self.CM.log("Resource " + repr(resource) - + " active without Quorum: " - + repr(RunningNodes)) - passed = 0 - #Is the resource served at all? - elif len(RunningNodes) == 0 and self.CM.HasQuorum(None): - self.CM.log("Resource " + repr(resource) - + " not served anywhere.") - passed = 0 - # Is the resource served too many times? - elif len(RunningNodes) > 1: - self.CM.log("Resource " + repr(resource) - + " served too many times: " - + repr(RunningNodes)) - passed = 0 - else: - if not NumofInc.has_key(resource.inc_name): - NumofInc[resource.inc_name]=0 - MaxofInc[resource.inc_name]=resource.inc_max - running = 1 - #Is the resource served without quorum present? - if not self.CM.HasQuorum(None) and len(RunningNodes) != 0 and resource.needs_quorum == 1: - self.CM.log("Resource " + repr(resource) - + " active without Quorum: " - + repr(RunningNodes)) - passed = 0 - #Is the resource served at all? - elif len(RunningNodes) == 0 : - running = 0 - # Is the resource served too many times? - elif len(RunningNodes) > 1: - self.CM.log("Resource " + repr(resource) - + " served too many times: " - + repr(RunningNodes)) - passed = 0 - - if running: - NumofInc[resource.inc_name] += 1 - - if self.CM.HasQuorum(None): - for inc_name in NumofInc.keys(): - if NumofInc[inc_name] != min(up_count, MaxofInc[inc_name]): - passed = 0 - self.CM.log("Cloned resource "+ str(inc_name) - +" has "+ str(NumofInc[inc_name]) - +" active instances (max: " - + str(MaxofInc[inc_name]) - +", active nodes: "+ str(up_count) + ")") - - Groups = self.CM.ResourceGroups() - for group in Groups : - group_printed = 0 - first_rsc = group[0].rid - RunningNodes = NodeofRsc[first_rsc] - for rsc in group : - if RunningNodes != NodeofRsc[rsc.rid]: - passed = 0 - - if group_printed == 0: - group_printed = 1 - self.CM.log("Group audit failed for: %s" % repr(group)) - if not NodeofRsc[first_rsc] or len(NodeofRsc[first_rsc]) == 0: - self.CM.log("* %s not running" % first_rsc) - else: - self.CM.log("* %s running on %s" - %(first_rsc, repr(NodeofRsc[first_rsc]))) - - if not NodeofRsc[rsc.rid] or len(NodeofRsc[rsc.rid]) == 0: - self.CM.log("* %s not running" % rsc.rid) - else: - self.CM.log("* %s running on %s" - %(rsc.rid, repr(NodeofRsc[rsc.rid]))) - - # Make sure the resouces with "must","placement" constraint - # are running on the same node - Dependancies = self.CM.Dependencies() - for dependency in Dependancies: - if dependency["type"] == "placement" and dependency["strength"] == "must": - if NodeofRsc[dependency["from"]] != NodeofRsc[dependency["to"]]: - print dependency["from"] + " and " + dependency["to"] + " should be run on same node" - passed = 0 - - return passed - - def is_applicable(self): - if self.CM["Name"] == "linux-ha-v2" and self.CM.Env["ResCanStop"] == 0: - return 1 - if self.CM["Name"] == "crm-ais" and self.CM.Env["ResCanStop"] == 0: - return 1 - return 0 - - def name(self): - return "HAResourceAudit" - - class CrmdStateAudit(ClusterAudit): def __init__(self, cm): self.CM = cm self.Stats = {"calls":0 , "success":0 , "failure":0 , "skipped":0 , "auditfail":0} def has_key(self, key): return self.Stats.has_key(key) def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): return self.Stats[key] def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name]=0 self.Stats[name] = self.Stats[name]+1 def __call__(self): passed = 1 up_are_down = 0 down_are_up = 0 unstable_list = [] self.CM.debug("Do Audit %s"%self.name()) for node in self.CM.Env["nodes"]: should_be = self.CM.ShouldBeStatus[node] rc = self.CM.test_node_CM(node) if rc > 0: if should_be == self.CM["down"]: down_are_up = down_are_up + 1 if rc == 1: unstable_list.append(node) elif should_be == self.CM["up"]: up_are_down = up_are_down + 1 if len(unstable_list) > 0: passed = 0 self.CM.log("Cluster is not stable: %d (of %d): %s" %(len(unstable_list), self.CM.upcount(), repr(unstable_list))) if up_are_down > 0: passed = 0 self.CM.log("%d (of %d) nodes expected to be up were down." %(up_are_down, len(self.CM.Env["nodes"]))) if down_are_up > 0: passed = 0 self.CM.log("%d (of %d) nodes expected to be down were up." %(down_are_up, len(self.CM.Env["nodes"]))) return passed def name(self): return "CrmdStateAudit" def is_applicable(self): if self.CM["Name"] == "linux-ha-v2": return 1 if self.CM["Name"] == "crm-ais": return 1 return 0 class CIBAudit(ClusterAudit): def __init__(self, cm): self.CM = cm self.Stats = {"calls":0 , "success":0 , "failure":0 , "skipped":0 , "auditfail":0} def has_key(self, key): return self.Stats.has_key(key) def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): return self.Stats[key] def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name]=0 self.Stats[name] = self.Stats[name]+1 def __call__(self): self.CM.debug("Do Audit %s"%self.name()) passed = 1 ccm_partitions = self.CM.find_partitions() if len(ccm_partitions) == 0: self.CM.debug("\tNo partitions to audit") return 1 for partition in ccm_partitions: self.CM.debug("\tAuditing CIB consistency for: %s" %partition) partition_passed = 0 if self.audit_cib_contents(partition) == 0: passed = 0 return passed def audit_cib_contents(self, hostlist): passed = 1 first_host = None first_host_xml = "" partition_hosts = hostlist.split() for a_host in partition_hosts: if first_host == None: first_host = a_host first_host_xml = self.store_remote_cib(a_host) #self.CM.debug("Retrieved CIB: %s" % first_host_xml) else: a_host_xml = self.store_remote_cib(a_host) diff_cmd="@sbindir@/crm_diff -c -VV -f -N \'%s\' -O '%s'" % (a_host_xml, first_host_xml) infile, outfile, errfile = os.popen3(diff_cmd) diff_lines = outfile.readlines() for line in diff_lines: if not re.search("", line): passed = 0 self.CM.log("CibDiff[%s-%s]: %s" % (first_host, a_host, line)) else: self.CM.debug("CibDiff[%s-%s] Ignoring: %s" % (first_host, a_host, line)) diff_lines = errfile.readlines() for line in diff_lines: passed = 0 self.CM.log("CibDiff[%s-%s] ERROR: %s" % (first_host, a_host, line)) return passed def store_remote_cib(self, node): combined = "" first_line = 1 extra_debug = 0 #self.CM.debug("\tRetrieving CIB from: %s" % node) lines = self.CM.rsh.readlines(node, self.CM["CibQuery"]) if extra_debug: self.CM.debug("Start Cib[%s]" % node) for line in lines: combined = combined + line[:-1] if first_line: self.CM.debug("[Cib]" + line) first_line = 0 elif extra_debug: self.CM.debug("[Cib]" + line) if extra_debug: self.CM.debug("End Cib[%s]" % node) #self.CM.debug("Complete CIB: %s" % combined) return combined def name(self): return "CibAudit" def is_applicable(self): if self.CM["Name"] == "linux-ha-v2": return 1 if self.CM["Name"] == "crm-ais": return 1 return 0 class PartitionAudit(ClusterAudit): def __init__(self, cm): self.CM = cm self.Stats = {"calls":0 , "success":0 , "failure":0 , "skipped":0 , "auditfail":0} self.NodeEpoche={} self.NodeState={} self.NodeQuorum={} def has_key(self, key): return self.Stats.has_key(key) def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): return self.Stats[key] def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name]=0 self.Stats[name] = self.Stats[name]+1 def __call__(self): self.CM.debug("Do Audit %s"%self.name()) passed = 1 ccm_partitions = self.CM.find_partitions() if ccm_partitions == None or len(ccm_partitions) == 0: return 1 if len(ccm_partitions) > 1: self.CM.log("ERROR: %d cluster partitions detected:" %len(ccm_partitions)) passed = 0 for partition in ccm_partitions: self.CM.log("\t %s" %partition) for partition in ccm_partitions: partition_passed = 0 if self.audit_partition(partition) == 0: passed = 0 return passed def trim_string(self, avalue): if not avalue: return None if len(avalue) > 1: return avalue[:-1] def trim2int(self, avalue): if not avalue: return None if len(avalue) > 1: return int(avalue[:-1]) - def audit_partition(self, partition): passed = 1 dc_found = [] dc_allowed_list = [] lowest_epoche = None node_list = partition.split() self.CM.debug("Auditing partition: %s" %(partition)) for node in node_list: if self.CM.ShouldBeStatus[node] != self.CM["up"]: self.CM.log("Warn: Node %s appeared out of nowhere" %(node)) self.CM.ShouldBeStatus[node] = self.CM["up"] # not in itself a reason to fail the audit (not what we're # checking for in this audit) self.NodeState[node] = self.CM.rsh.readaline( node, self.CM["StatusCmd"]%node) self.NodeEpoche[node] = self.CM.rsh.readaline( node, self.CM["EpocheCmd"]) self.NodeQuorum[node] = self.CM.rsh.readaline( node, self.CM["QuorumCmd"]) self.CM.debug("Node %s: %s - %s - %s." %(node, self.NodeState[node], self.NodeEpoche[node], self.NodeQuorum[node])) self.NodeState[node] = self.trim_string(self.NodeState[node]) self.NodeEpoche[node] = self.trim2int(self.NodeEpoche[node]) self.NodeQuorum[node] = self.trim_string(self.NodeQuorum[node]) if not self.NodeEpoche[node]: self.CM.log("Warn: Node %s dissappeared: cant determin epoche" %(node)) self.CM.ShouldBeStatus[node] = self.CM["down"] # not in itself a reason to fail the audit (not what we're # checking for in this audit) elif lowest_epoche == None or self.NodeEpoche[node] < lowest_epoche: lowest_epoche = self.NodeEpoche[node] if not lowest_epoche: self.CM.log("Lowest epoche not determined in %s" % (partition)) passed = 0 for node in node_list: if self.CM.ShouldBeStatus[node] == self.CM["up"]: if self.CM.is_node_dc(node, self.NodeState[node]): dc_found.append(node) if self.NodeEpoche[node] == lowest_epoche: self.CM.debug("%s: OK" % node) elif not self.NodeEpoche[node]: self.CM.debug("Check on %s ignored: no node epoche" % node) elif not lowest_epoche: self.CM.debug("Check on %s ignored: no lowest epoche" % node) else: self.CM.log("DC %s is not the oldest node (%d vs. %d)" %(node, self.NodeEpoche[node], lowest_epoche)) passed = 0 if len(dc_found) == 0: self.CM.log("DC not found on any of the %d allowed nodes: %s (of %s)" %(len(dc_allowed_list), str(dc_allowed_list), str(node_list))) elif len(dc_found) > 1: self.CM.log("%d DCs (%s) found in cluster partition: %s" %(len(dc_found), str(dc_found), str(node_list))) passed = 0 - elif self.CM.Env["CIBResource"] == 1 and self.NodeQuorum[dc_found[0]] == "1": - self.CM.debug("%s: %s" % (dc_found[0], self.NodeQuorum[dc_found[0]])) - Resources = self.CM.Resources() - for node in node_list: - if self.CM.ShouldBeStatus[node] == self.CM["up"]: - for resource in Resources: - if resource.rid == "rsc_"+node: - if resource.IsRunningOn(node) == 0: - self.CM.log("Node %s is not running its own resource" %(node)) - passed = 0 - - elif self.CM.Env["CIBResource"] == 1: - # no quorum means no resource management - self.CM.debug("Not auditing resources - no quorum") - if passed == 0: for node in node_list: if self.CM.ShouldBeStatus[node] == self.CM["up"]: self.CM.log("epoche %s : %s" %(self.NodeEpoche[node], self.NodeState[node])) return passed def name(self): return "PartitionAudit" def is_applicable(self): if self.CM["Name"] == "linux-ha-v2": return 1 if self.CM["Name"] == "crm-ais": return 1 return 0 AllAuditClasses.append(DiskAudit) -AllAuditClasses.append(LoggingAudit) +AllAuditClasses.append(LogAudit) AllAuditClasses.append(CrmdStateAudit) AllAuditClasses.append(PartitionAudit) AllAuditClasses.append(ResourceAudit) -AllAuditClasses.append(HAResourceAudit) +#AllAuditClasses.append(HAResourceAudit) +AllAuditClasses.append(PrimitiveAudit) +AllAuditClasses.append(GroupAudit) +AllAuditClasses.append(CloneAudit) AllAuditClasses.append(CIBAudit) def AuditList(cm): result = [] for auditclass in AllAuditClasses: result.append(auditclass(cm)) return result