diff --git a/cts/CM_LinuxHAv2.py.in b/cts/CM_LinuxHAv2.py.in index 5786668df5..ea2f42ebe1 100755 --- a/cts/CM_LinuxHAv2.py.in +++ b/cts/CM_LinuxHAv2.py.in @@ -1,349 +1,349 @@ #!@PYTHON@ '''CTS: Cluster Testing System: LinuxHA v2 dependent modules... ''' __copyright__=''' Author: Huang Zhen Copyright (C) 2004 International Business Machines ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import CTS from CTS import * from CM_hb import HeartbeatCM from xml.dom.minidom import * import CTSaudits from CTSaudits import ClusterAudit import CTStests from CTStests import * ####################################################################### # # LinuxHA v2 dependent modules # ####################################################################### class LinuxHAv2(HeartbeatCM): ''' The linux-ha version 2 cluster manager class. It implements the things we need to talk to and manipulate linux-ha version 2 clusters ''' def __init__(self, Environment, randseed=None): HeartbeatCM.__init__(self, Environment, randseed=randseed) self.update({ "Name" : "linux-ha-v2", "DeadTime" : 90, "StartCmd" : "@libdir@/heartbeat/heartbeat >/dev/null 2>&1", "StopCmd" : "@libdir@/heartbeat/heartbeat -k", "StatusCmd" : "@libdir@/heartbeat/crmadmin -S %s 2>/dev/null", "IsRscRunning" : "@libdir@/heartbeat/lrmadmin -E %s status 0 0 EVERYTIME 2>/dev/null|grep return", "ExecuteRscOp" : "@libdir@/heartbeat/lrmadmin -E %s %s 0 0 EVERYTIME 2>/dev/null", "CIBfile" : "%s:@HA_VARLIBDIR@/heartbeat/crm/cib.xml", # Patterns to look for in the log files for various occasions... "Pat:We_started" : " %s crmd: .* State transition .*-> (S_NOT_DC|S_IDLE)", "Pat:They_started" : " %s crmd: .* State transition .*-> (S_NOT_DC|S_IDLE)", # Bad news Regexes. Should never occur. "BadRegexes" : ( r"Shutting down\.", r"Forcing shutdown\.", r"Timer I_TERMINATE just popped", r"Both machines own .* resources!", r"No one owns .* resources!", r", exiting\.", r"ERROR:", r"CRIT.*:", ), }) # self.rsh.cp(self.Env["cib_config"], self["CIBfile"]%self.Env["nodes"][0]) - # KLUDGE! Expedient, but a Kludge (FIXME) + # KLUDGE! Expedient, but a Kludge (FIXME) # CTStests.AllTestClasses = [FlipTest,RestartTest,StartOnebyOne,SimulStart,SimulStop,Split_brainTest,BandwidthTest] CTStests.AllTestClasses = [FlipTest, RestartTest, StartOnebyOne, SimulStart, SimulStop] CTSaudits.AllAuditClasses = [CrmdStateAudit, HAResourceAudit] def StataCM(self, node): '''Report the status of the cluster manager on a given node''' out=self.rsh.readaline(node, self["StatusCmd"]%node) ret= (string.find(out, 'ok') != -1) try: if ret: if self.ShouldBeStatus[node] != self["up"]: self.log( "Node status for %s is %s but we think it should be %s" - % (node, self["up"], self.ShouldBeStatus[node])) + % (node, self["up"], self.ShouldBeStatus[node])) else: if self.ShouldBeStatus[node] != self["down"]: self.log( "Node status for %s is %s but we think it should be %s" - % (node, self["down"], self.ShouldBeStatus[node])) - except KeyError: pass + % (node, self["down"], self.ShouldBeStatus[node])) + except KeyError: pass - if ret: self.ShouldBeStatus[node]=self["up"] - else: self.ShouldBeStatus[node]=self["down"] + if ret: self.ShouldBeStatus[node]=self["up"] + else: self.ShouldBeStatus[node]=self["down"] return ret def StartaCM(self, node): '''Start up the cluster manager on a given node''' watch = CTS.LogWatcher(self["LogFileName"] , [self["Pat:We_started"]%node] , 60) watch.setwatch() self.log ("CM_LinuxHAv2.py: Starting %s on node %s" %(self["Name"], node)) self.rsh(node, self["StartCmd"]) if watch.look(): self.ShouldBeStatus[node]=self["up"] return 1 self.ShouldBeStatus[node]=self["down"] self.log ("Could not start %s on node %s" - % (self["Name"], node)) + % (self["Name"], node)) return None def Configuration(self): if not self.rsh.cp(self["CIBfile"]%self.Env["nodes"][0],self.Env["HAdir"]): raise ValueError("Can not copy file to %s, maybe permission denied"%self.Env["HAdir"]) cib=parse("%s/cib.xml"%self.Env["HAdir"]) return cib.getElementsByTagName('configuration')[0] def Resources(self): ResourceList = [] #read resources in cib configuration=self.Configuration() resources=configuration.getElementsByTagName('resources')[0] rscs=configuration.getElementsByTagName('resource') for rsc in rscs: ResourceList.append(HAResource(self,rsc)) return ResourceList def Dependancies(self): DependancyList = [] #read dependancy in cib configuration=self.Configuration() constraints=configuration.getElementsByTagName('constraints')[0] rsc_to_rscs=configuration.getElementsByTagName('rsc_to_rsc') for node in rsc_to_rscs: dependancy = {} dependancy["id"]=node.getAttribute('id') dependancy["from"]=node.getAttribute('from') dependancy["to"]=node.getAttribute('to') dependancy["type"]=node.getAttribute('type') dependancy["strength"]=node.getAttribute('strength') DependancyList.append(dependancy) return DependancyList class HAResourceAudit(ClusterAudit): def __init__(self, cm): self.CM = cm def _RscRunningNodes(self, resource): ResourceNodes = [] for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == self.CM["up"]: if resource.IsRunningOn(node): ResourceNodes.append(node) return ResourceNodes def __call__(self): self.CM.log ("Do Audit %s"%self.name()) passed = 1 NodeofRsc = {} #Make sure the resouces are running on one and only one node Resources = self.CM.Resources() for resource in Resources : RunningNodes = self._RscRunningNodes(resource) NodeofRsc[resource.rid]=RunningNodes if len(RunningNodes) == 0 : print resource.rid + " isn't running anywhere" passed = 0 if len(RunningNodes) > 1: print resource.rid + " is running more than once: " \ + str(RunningNodes) passed = 0 #Make sure the resouces with "must","placement" constraint are running on the same node Dependancies = self.CM.Dependancies() for dependancy in Dependancies: if dependancy["type"] == "placement" and dependancy["strength"] == "must": if NodeofRsc[dependancy["from"]] != NodeofRsc[dependancy["to"]]: print dependancy["from"] + " and " + dependancy["to"] + " should be run on same node" passed = 0 return passed def name(self): return "HAResourceAudit" class HAResource(Resource): def __init__(self, cm, node): ''' Get information from xml node ''' self.rid = node.getAttribute('id') self.rclass = node.getAttribute('class') self.rtype = node.getAttribute('type') self.rparameters = {} attributes = node.getElementsByTagName('instance_attributes')[0] parameters = node.getElementsByTagName('rsc_parameters')[0] nvpairs = node.getElementsByTagName('nvpair') for nvpair in nvpairs: name=nvpair.getAttribute('name') value=nvpair.getAttribute('value') self.rparameters[name]=value Resource.__init__(self, cm, self.rtype, self.rid) def IsRunningOn(self, nodename): ''' This member function returns true if our resource is running on the given node in the cluster. We call the status operation for the resource script. ''' out=self.CM.rsh.readaline(nodename, self.CM["IsRscRunning"]%self.rid) return re.search("0",out) def _ResourceOperation(self, operation, nodename): ''' Execute an operation on the resource ''' self.CM.rsh.readaline(nodename, self.CM["ExecuteRscOp"]%(self.rid,operation)) return self.CM.rsh.lastrc == 0 def Start(self, nodename): ''' This member function starts or activates the resource. ''' return self._ResourceOperation("start", nodename) def Stop(self, nodename): ''' This member function stops or deactivates the resource. ''' return self._ResourceOperation("stop", nodename) def IsWorkingCorrectly(self, nodename): return self._ResourceOperation("monitor", nodename) class CrmdStateAudit(ClusterAudit): def __init__(self, cm): self.CM = cm self.Stats = {"calls":0 - , "success":0 - , "failure":0 - , "skipped":0 - , "auditfail":0} + , "success":0 + , "failure":0 + , "skipped":0 + , "auditfail":0} def has_key(self, key): return self.Stats.has_key(key) def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): return self.Stats[key] def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name]=0 self.Stats[name] = self.Stats[name]+1 def __call__(self): self.CM.log ("Do Audit %s"%self.name()) passed = 1 - dc_count = 0 - up_count = 0 - node_count = 0 - up_are_down = 0 - down_are_up = 0 - slave_count = 0 - unstable_count = 0 + dc_count = 0 + up_count = 0 + node_count = 0 + up_are_down = 0 + down_are_up = 0 + slave_count = 0 + unstable_count = 0 for node in self.CM.Env["nodes"]: - out=self.CM.rsh.readaline(node, self.CM["StatusCmd"]%node) - ret = (string.find(out, 'ok') != -1) - node_count = node_count + 1 - if ret: - up_count = up_count + 1 - if self.CM.ShouldBeStatus[node] == self.CM["down"]: - self.CM.log( - "Node %s %s when it should be %s" - % (node, self.CM["up"], self.CM.ShouldBeStatus[node])) - self.CM.ShouldBeStatus[node] = self.CM["up"] - down_are_up = down_are_up + 1 - - ret= (string.find(out, 'S_NOT_DC') != -1) - if ret: - slave_count = slave_count + 1 - else: - ret= (string.find(out, 'S_IDLE') != -1) - if ret: - dc_count = dc_count + 1 - else: - unstable_count = unstable_count + 1 - else: - if self.CM.ShouldBeStatus[node] == self.CM["up"]: - self.CM.log( - "Node %s %s when it should be %s" - % (node, self.CM["down"], self.CM.ShouldBeStatus[node])) - self.CM.ShouldBeStatus[node] = self.CM["down"] - up_are_down = up_are_down + 1 - - if up_count > 0 and dc_count != 1: - passed = 0 - self.CM.log("Exactly 1 node should be DC. We found %d (of %d)" - %(dc_count, up_count)) - - if unstable_count > 0: - passed = 0 - self.CM.log("Cluster is not stable. We found %d (of %d) unstable nodes" - %(dc_count, up_count)) - - if up_are_down > 0: - passed = 0 - self.CM.log("%d (of %d) nodes expected to be up were down." - %(up_are_down, node_count)) - - if down_are_up > 0: - passed = 0 - self.CM.log("%d (of %d) nodes expected to be down were up." - %(down_are_up, node_count)) - + out=self.CM.rsh.readaline(node, self.CM["StatusCmd"]%node) + ret = (string.find(out, 'ok') != -1) + node_count = node_count + 1 + if ret: + up_count = up_count + 1 + if self.CM.ShouldBeStatus[node] == self.CM["down"]: + self.CM.log( + "Node %s %s when it should be %s" + % (node, self.CM["up"], self.CM.ShouldBeStatus[node])) + self.CM.ShouldBeStatus[node] = self.CM["up"] + down_are_up = down_are_up + 1 + + ret= (string.find(out, 'S_NOT_DC') != -1) + if ret: + slave_count = slave_count + 1 + else: + ret= (string.find(out, 'S_IDLE') != -1) + if ret: + dc_count = dc_count + 1 + else: + unstable_count = unstable_count + 1 + else: + if self.CM.ShouldBeStatus[node] == self.CM["up"]: + self.CM.log( + "Node %s %s when it should be %s" + % (node, self.CM["down"], self.CM.ShouldBeStatus[node])) + self.CM.ShouldBeStatus[node] = self.CM["down"] + up_are_down = up_are_down + 1 + + if up_count > 0 and dc_count != 1: + passed = 0 + self.CM.log("Exactly 1 node should be DC. We found %d (of %d)" + %(dc_count, up_count)) + + if unstable_count > 0: + passed = 0 + self.CM.log("Cluster is not stable. We found %d (of %d) unstable nodes" + %(dc_count, up_count)) + + if up_are_down > 0: + passed = 0 + self.CM.log("%d (of %d) nodes expected to be up were down." + %(up_are_down, node_count)) + + if down_are_up > 0: + passed = 0 + self.CM.log("%d (of %d) nodes expected to be down were up." + %(down_are_up, node_count)) + return passed def name(self): return "CrmdStateAudit" ####################################################################### # # A little test code... # # Which you are advised to completely ignore... # ####################################################################### if __name__ == '__main__': pass