diff --git a/cts/CM_LinuxHAv2.py.in b/cts/CM_LinuxHAv2.py.in index 478567f007..7f71d5de8e 100755 --- a/cts/CM_LinuxHAv2.py.in +++ b/cts/CM_LinuxHAv2.py.in @@ -1,534 +1,534 @@ #!@PYTHON@ '''CTS: Cluster Testing System: LinuxHA v2 dependent modules... ''' __copyright__=''' Author: Huang Zhen Copyright (C) 2004 International Business Machines Additional Audits, Revised Start action, Default Configuration: Copyright (C) 2004 Andrew Beekhof ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import os,sys,CTS,CTSaudits,CTStests from CTS import * from CM_hb import HeartbeatCM from xml.dom.minidom import * from CTSaudits import ClusterAudit from CTStests import * ####################################################################### # # LinuxHA v2 dependent modules # ####################################################################### class LinuxHAv2(HeartbeatCM): ''' The linux-ha version 2 cluster manager class. It implements the things we need to talk to and manipulate linux-ha version 2 clusters ''' def __init__(self, Environment, randseed=None): HeartbeatCM.__init__(self, Environment, randseed=randseed) self.update({ "Name" : "linux-ha-v2", "DeadTime" : 600, "StartTime" : 180, # Max time to start up "StableTime" : 10, "StartCmd" : "@libdir@/heartbeat/ha_logd -d >/dev/null 2>&1; @libdir@/heartbeat/heartbeat >/dev/null 2>&1", "StopCmd" : "@libdir@/heartbeat/heartbeat -k", "ElectionCmd" : "@libdir@/heartbeat/crmadmin -E %s", "StatusCmd" : "@libdir@/heartbeat/crmadmin -S %s 2>/dev/null", "EpocheCmd" : "@libdir@/heartbeat/ccm_tool -e", "QuorumCmd" : "@libdir@/heartbeat/ccm_tool -q", "ParitionCmd" : "@libdir@/heartbeat/ccm_tool -p", "IsRscRunning" : "@libdir@/heartbeat/lrmadmin -E %s monitor 0 0 EVERYTIME 2>/dev/null|grep return", "ExecuteRscOp" : "@libdir@/heartbeat/lrmadmin -E %s %s 0 0 EVERYTIME 2>/dev/null", "CIBfile" : "%s:@HA_VARLIBDIR@/heartbeat/crm/cib.xml", "TmpDir" : "/tmp", "BreakCommCmd2" : "/usr/lib/heartbeat/TestHeartbeatComm break-communication %s>/dev/null 2>&1", "IsIPAddrRscRunning" : "", # Patterns to look for in the log files for various occasions... "Pat:DC_IDLE" : "crmd:.*State transition.*-> S_IDLE", # This wont work if we have multiple partitions # Use: "Pat:They_started" : "%s crmd:.*State transition.*-> S_NOT_DC", "Pat:They_started" : "Updating node state to member for %s", "Pat:We_started" : "%s crmd:.*State transition.*-> S_IDLE", "Pat:We_stopped" : "%s heartbeat.*Heartbeat shutdown complete", "Pat:They_stopped" : "%s crmd:.*LOST:.*%s", "Pat:All_stopped" : "%s .*heartbeat.*Heartbeat shutdown complete", "Pat:They_dead" : "node %s.*: is dead", "Pat:TransitionComplete" : "Transition status: Complete: complete", # Bad news Regexes. Should never occur. "BadRegexes" : ( r"Shutting down\.", r"Forcing shutdown\.", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r", exiting\.", r"WARN.*Ignoring HA message.*vote.*not in our membership list", r"pengine:.*Attempting recovery of resource", r"pengine:.*Handling failed ", r"tengine:.*is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Resource .* was active at shutdown", r"ERROR:", r"CRIT:", ), }) del self["Standby"] self.check_transitions = 0 self.check_elections = 0 self.CIBsync = {} cib_prefix=''' ''' cib_options=''' ''' cib_glue_1=''' ''' cib_glue_2=''' ''' cib_suffix=''' ''' resources=''' ''' constraints=''' ''' cib_fencing = "" if self.Env["CIBResource"] == 1: self.log("Enabling DC resource") resources=''' - + - ''' + ''' % self.Env["IPBase"] # DcIPaddr cant run anywhere but the DC constraints=''' ''' - - ip_num=21 + fields = string.split(self.Env["IPBase"], '.') for node in self.Env["nodes"]: # These resources prefer to run on the node with the same name + fields[3] = str(int(fields[3])+1) + ip = string.join(fields, '.') node_resource=(""" - + - """ %("rsc_"+node, ip_num)) - ip_num=ip_num+1 + """ %("rsc_"+node, ip)) resources = resources + node_resource node_constraint=(""" """ % ("rsc_"+node, "rsc_"+node, "rsc_"+node, node)) constraints = constraints + node_constraint if self.Env["DoFencing"] == 1 : cib_options=cib_options + ''' ''' nodelist = "" for node in self.Env["nodes"]: nodelist += node + " " stonith_resource=(""" """ %(len(self.Env["nodes"]), nodelist)) resources = resources + stonith_resource self.default_cts_cib=cib_prefix + cib_options + cib_glue_1 + \ resources + cib_glue_2 + constraints + cib_suffix self.debug(self.default_cts_cib) def errorstoignore(self): # At some point implement a more elegant solution that # also produces a report at the end '''Return list of errors which are known and very noisey should be ignored''' if 1: return [ "crmadmin:" ] return [] def install_config(self, node): if not self.CIBsync.has_key(node) and self.Env["ClobberCIB"] == 1: self.CIBsync[node] = 1 if self.Env["CIBfilename"] == None: self.debug("Installing Generated CIB on node %s" %(node)) os.system("rm -f /tmp/cts.default.cib") os.system("echo \'" + self.default_cts_cib + "\' > /tmp/cts.default.cib") if 0!=self.rsh.cp("/tmp/cts.default.cib", "root@" + (self["CIBfile"]%node)): raise ValueError("Can not scp file to %s "%node) os.system("rm -f /tmp/cts.default.cib") else: self.debug("Installing CIB (%s) on node %s" %(self.Env["CIBfilename"], node)) if 0!=self.rsh.cp(self.Env["CIBfilename"], "root@" + (self["CIBfile"]%node)): raise ValueError("Can not scp file to %s "%node) def prepare(self): '''Finish the Initialization process. Prepare to test...''' for node in self.Env["nodes"]: self.ShouldBeStatus[node] = "" self.StataCM(node) def test_node_CM(self, node): '''Report the status of the cluster manager on a given node''' watchpats = [ ] watchpats.append("Current state: (S_IDLE|S_NOT_DC)") watchpats.append(self["Pat:They_started"]%node) idle_watch = CTS.LogWatcher(self["LogFileName"], watchpats) idle_watch.setwatch() out=self.rsh.readaline(node, self["StatusCmd"]%node) ret= (string.find(out, 'ok') != -1) self.debug("Node %s status: %s" %(node, out)) if not ret: if self.ShouldBeStatus[node] == self["up"]: self.log( "Node status for %s is %s but we think it should be %s" %(node, self["down"], self.ShouldBeStatus[node])) self.ShouldBeStatus[node]=self["down"] return 0 if self.ShouldBeStatus[node] == self["down"]: self.log( "Node status for %s is %s but we think it should be %s: %s" %(node, self["up"], self.ShouldBeStatus[node], out)) self.ShouldBeStatus[node]=self["up"] if not idle_watch.look(): # just up self.debug("Warn: Node %s is unstable: %s" %(node, out)) return 1 # Up and stable return 2 # Is the node up or is the node down def StataCM(self, node): '''Report the status of the cluster manager on a given node''' if self.test_node_CM(node) > 0: return 1 return None # Being up and being stable is not the same question... def node_stable(self, node): '''Report the status of the cluster manager on a given node''' if self.test_node_CM(node) == 2: return 1 self.log("Warn: Node %s not stable" %(node)) return None def cluster_stable(self, timeout=None): watchpats = [ ] watchpats.append("Current state: S_IDLE") watchpats.append(self["Pat:DC_IDLE"]) if timeout == None: timeout = self["DeadTime"] idle_watch = CTS.LogWatcher(self["LogFileName"], watchpats, timeout) idle_watch.setwatch() any_up = 0 for node in self.Env["nodes"]: # have each node dump its current state if self.ShouldBeStatus[node] == self["up"]: self.rsh.readaline(node, (self["StatusCmd"] %node) ) any_up = 1 if any_up == 0 or idle_watch.look(): return 1 self.log("Warn: Cluster Master not IDLE") return None def is_node_dc(self, node, status_line=None): rc = 0 if not status_line: status_line = self.rsh.readaline(node, self["StatusCmd"]%node) if not status_line: rc = 0 elif string.find(status_line, 'S_IDLE') != -1: rc = 1 elif string.find(status_line, 'S_INTEGRATION') != -1: rc = 1 elif string.find(status_line, 'S_FINALIZE_JOIN') != -1: rc = 1 elif string.find(status_line, 'S_POLICY_ENGINE') != -1: rc = 1 elif string.find(status_line, 'S_TRANSITION_ENGINE') != -1: rc = 1 if rc == 1: self.debug("%s _is_ the DC" % node) return rc def isolate_node(self, node, allowlist): '''isolate the communication between the nodes''' rc = self.rsh(node, self["BreakCommCmd2"]%allowlist) if rc == 0: return 1 else: self.log("Could not break the communication from node: %s",node) return None def Configuration(self): if self.Env["ClobberCIB"] == 1: if self.Env["CIBfilename"] == None: os.system("rm -f /tmp/cts.default.cib") os.system("echo \'" + self.default_cts_cib + "\' > /tmp/cts.default.cib") cib=parse("/tmp/cts.default.cib") # os.system("rm -f /tmp/cts.default.cib") else: cib=parse(self.Env["CIBfilename"]) else: local_cib = "%s/cts_cib_%s.xml"%(self["TmpDir"],str(os.getpid())) if 0!=self.rsh.cp("root@"+self["CIBfile"]%self.Env["nodes"][0],local_cib): raise ValueError("Can not copy file to %s, maybe permission denied"%self["TmpDir"]) cib=parse(local_cib) os.remove(local_cib) return cib.getElementsByTagName('configuration')[0] def Resources(self): ResourceList = [] #read resources in cib configuration = self.Configuration() resources = configuration.getElementsByTagName('resources')[0] rscs = configuration.getElementsByTagName('resource') for rsc in rscs: if rsc in resources.childNodes: ResourceList.append(HAResource(self,rsc)) incs = configuration.getElementsByTagName('incarnation') for inc in incs: max = 0 inc_name = inc.getAttribute("id") instance_attributes = inc.getElementsByTagName('instance_attributes')[0] attributes = instance_attributes.getElementsByTagName('attributes')[0] nvpairs = attributes.getElementsByTagName('nvpair') for nvpair in nvpairs: if nvpair.getAttribute("name") == "incarnation_max": max = int(nvpair.getAttribute("value")) inc_rsc = inc.getElementsByTagName('resource')[0] for i in range(0,max): rsc = HAResource(self,inc_rsc) rsc.inc_no = i rsc.inc_name = inc_name rsc.inc_max = max rsc.rid = inc_name+":"+rsc.rid + ":%d"%i rsc.Instance = rsc.rid ResourceList.append(rsc) return ResourceList def Dependancies(self): DependancyList = [] #read dependancy in cib configuration=self.Configuration() constraints=configuration.getElementsByTagName('constraints')[0] rsc_to_rscs=configuration.getElementsByTagName('rsc_to_rsc') for node in rsc_to_rscs: dependancy = {} dependancy["id"]=node.getAttribute('id') dependancy["from"]=node.getAttribute('from') dependancy["to"]=node.getAttribute('to') dependancy["type"]=node.getAttribute('type') dependancy["strength"]=node.getAttribute('strength') DependancyList.append(dependancy) return DependancyList def find_partitions(self): ccm_partitions = [] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == self["up"]: partition = self.rsh.readaline(node, self["ParitionCmd"]) if not partition: self.log("no partition details for %s" %node) elif len(partition) > 2: partition = partition[:-1] for a_partition in ccm_partitions: if partition != a_partition: ccm_partitions.append(partition) else: self.log("bad partition details for %s" %node) return ccm_partitions def HasQuorum(self, node_list): # If we are auditing a partition, then one side will # have quorum and the other not. # So the caller needs to tell us which we are checking # If no value for node_list is specified... assume all nodes if not node_list: node_list = self.Env["nodes"] for node in node_list: if self.ShouldBeStatus[node] == self["up"]: quorum = self.rsh.readaline(node, self["QuorumCmd"]) return string.find(quorum,"1") != -1 return 0 def Components(self): complist = [Process("lrmd",self),Process("crmd",self)] if self.Env["DoFencing"] == 1 : complist.append(Process("stonithd",self)) complist.append(Process("heartbeat",self)) return complist class HAResource(Resource): def __init__(self, cm, node): ''' Get information from xml node ''' self.rid = str(node.getAttribute('id')) self.rclass = str(node.getAttribute('class')) self.rtype = str(node.getAttribute('type')) self.inc_name = None self.inc_no = -1 self.inc_max = -1 self.rparameters = {} list = node.getElementsByTagName('instance_attributes') if len(list) > 0: attributes = list[0] list = attributes.getElementsByTagName('attributes') if len(list) > 0: parameters = list[0] nvpairs = parameters.getElementsByTagName('nvpair') for nvpair in nvpairs: name=nvpair.getAttribute('name') value=nvpair.getAttribute('value') self.rparameters[name]=value Resource.__init__(self, cm, self.rtype, self.rid) def IsRunningOn(self, nodename): ''' This member function returns true if our resource is running on the given node in the cluster. We call the status operation for the resource script. ''' out=self.CM.rsh.readaline(nodename, self.CM["IsRscRunning"]%self.rid) return re.search("0",out) def RunningNodes(self): ResourceNodes = [] for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == self.CM["up"]: if self.IsRunningOn(node): ResourceNodes.append(node) return ResourceNodes def _ResourceOperation(self, operation, nodename): ''' Execute an operation on the resource ''' self.CM.rsh.readaline(nodename, self.CM["ExecuteRscOp"]%(self.rid,operation)) return self.CM.rsh.lastrc == 0 def Start(self, nodename): ''' This member function starts or activates the resource. ''' return self._ResourceOperation("start", nodename) def Stop(self, nodename): ''' This member function stops or deactivates the resource. ''' return self._ResourceOperation("stop", nodename) def IsWorkingCorrectly(self, nodename): return self._ResourceOperation("monitor", nodename) ####################################################################### # # A little test code... # # Which you are advised to completely ignore... # ####################################################################### if __name__ == '__main__': pass diff --git a/cts/CTSlab.py.in b/cts/CTSlab.py.in index 890514e349..a5dda7926c 100755 --- a/cts/CTSlab.py.in +++ b/cts/CTSlab.py.in @@ -1,669 +1,675 @@ #!@PYTHON@ '''CTS: Cluster Testing System: Lab environment module ''' __copyright__=''' Copyright (C) 2001 Alan Robertson Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. from UserDict import UserDict import sys, time, types, syslog, whrandom, os, struct, string from CTS import ClusterManager from CM_hb import HeartbeatCM from socket import gethostbyname_ex class ResetMechanism: def reset(self, node): raise ValueError("Abstract class member (reset)") class Stonith(ResetMechanism): def __init__(self, sttype="ssh", parm="foobar" , path="@sbindir@/stonith"): self.pathname=path self.configstring=parm self.stonithtype=sttype def reset(self, node): cmdstring = "%s -t '%s' -p '%s' '%s' 2>/dev/null" % (self.pathname , self.stonithtype, self.configstring, node) return (os.system(cmdstring) == 0) class Stonithd(ResetMechanism): def __init__(self, nodes, sttype = 'ssh'): self.sttype = sttype self.nodes = nodes self.query_cmd_pat = '/usr/lib/heartbeat/stonithdtest/apitest 0 %s 1000 0' self.reset_cmd_pat = '/usr/lib/heartbeat/stonithdtest/apitest 1 %s 1000 0' self.poweron_cmd_pat = '/usr/lib/heartbeat/stonithdtest/apitest 2 %s 1000 0' self.poweroff_cmd_pat= '/usr/lib/heartbeat/stonithdtest/apitest 3 %s 1000 0' self.lrmd_add_pat = '/usr/lib/heartbeat/lrmadmin -A %s stonith ' + sttype + ' NULL hostlist=%s' self.lrmd_start_pat = '/usr/lib/heartbeat/lrmadmin -E %s start 0 0 0' self.lrmd_stop_pat = '/usr/lib/heartbeat/lrmadmin -E %s stop 0 0 0' self.lrmd_del_pat = '/usr/lib/heartbeat/lrmadmin -D %s' self.rsc_id = 'my_stonithd_id' # -n: no stdin, -x: no X11 self.command = "/usr/bin/ssh -l root -n -x" # -f: ssh to background self.command_noblock = "/usr/bin/ssh -f -l root -n -x" #self.startall = SimulStartLite(cm) #self.restart = RestartTest(self.CM) self.stonithd_started_nodes = [] def log(self, msg): 'Just print it' pass def remote_exec(self, node, cmnd): return os.system("%s %s %s > /dev/null" % (self.command, node, cmnd)) def stonithd_started(self, node): return node in self.stonithd_started_nodes def start_stonithd(self, node, hosts): hostlist = string.join(hosts, ',') lrmd_add_cmd = self.lrmd_add_pat % (self.rsc_id, hostlist) self.log("%s : %s" % (node, lrmd_add_cmd)) line = self.remote_exec(node, lrmd_add_cmd) lrmd_start_cmd = self.lrmd_start_pat % self.rsc_id self.log("%s : %s" % (node, lrmd_start_cmd)) line = self.remote_exec(node, lrmd_start_cmd) self.stonithd_started_nodes.append(node) return 1 def stop_stonithd(self, node): lrmd_stop_cmd = self.lrmd_stop_pat % self.rsc_id self.log("%s : %s" % (node, lrmd_stop_cmd)) line = self.remote_exec(node, lrmd_stop_cmd) lrmd_del_cmd = self.lrmd_del_pat % self.rsc_id self.log("%s : %s" % (node, lrmd_del_cmd)) line = self.remote_exec(node, lrmd_del_cmd) self.stonithd_started_nodes.remove(node) return 1 def do_stonith(self, init_node, target_node, action): stonithd_started = self.stonithd_started(init_node) if not stonithd_started: self.log("Node %s : not start stonithd yet, start it" % init_node) self.start_stonithd(init_node, [target_node]) self.log("Stonithd %s node %s from node %s" % (action, target_node, init_node)) command = "" if action == "RESET": command = self.reset_cmd_pat % target_node elif action == "POWEROFF": command = self.poweroff_cmd_pat % target_node elif action == "POWERON": command = self.poweron_cmd_pat % target_node self.log("%s : %s" % (init_node, command)) line = self.remote_exec(init_node, command) if not stonithd_started: self.log("Node %s : stop stonithd" % init_node) self.stop_stonithd(init_node) return 1 # Should we random choose a node as init_node here if init_node not specified? def reset(self, init_node, target_node): return self.do_stonith(init_node, target_node, "RESET") def poweron(self, init_node, target_node): return self.do_stonith(init_node, target_node, "POWERON") def poweroff(self, init_node, target_node): return self.do_stonith(init_node, target_node, "POWEROFF") class Logger: TimeFormat = "%Y/%m/%d_%H:%M:%S\t" def __call__(self, lines): raise ValueError("Abstract class member (__call__)") class SysLog(Logger): # http://docs.python.org/lib/module-syslog.html defaultsource="CTS" defaultfacility= syslog.LOG_LOCAL7 map = { "kernel": syslog.LOG_KERN, "user": syslog.LOG_USER, "mail": syslog.LOG_MAIL, "daemon": syslog.LOG_MAIL, "auth": syslog.LOG_AUTH, "lpr": syslog.LOG_LPR, "news": syslog.LOG_NEWS, "uucp": syslog.LOG_UUCP, "cron": syslog.LOG_CRON, "local0": syslog.LOG_LOCAL0, "local1": syslog.LOG_LOCAL1, "local2": syslog.LOG_LOCAL2, "local3": syslog.LOG_LOCAL3, "local4": syslog.LOG_LOCAL4, "local5": syslog.LOG_LOCAL5, "local6": syslog.LOG_LOCAL6, "local7": syslog.LOG_LOCAL7, } def __init__(self, labinfo): if labinfo.has_key("syslogsource"): self.source=labinfo["syslogsource"] else: self.source=SysLog.defaultsource if labinfo.has_key("SyslogFacility"): self.facility=labinfo["SyslogFacility"] if SysLog.map.has_key(self.facility): self.facility=SysLog.map[self.facility] else: self.facility=SysLog.defaultfacility syslog.openlog(self.source, 0, self.facility) def setfacility(self, facility): self.facility = facility if SysLog.map.has_key(self.facility): self.facility=SysLog.map[self.facility] syslog.closelog() syslog.openlog(self.source, 0, self.facility) def __call__(self, lines): if isinstance(lines, types.StringType): syslog.syslog(lines) else: for line in lines: syslog.syslog(line) def name(self): return "Syslog" class StdErrLog(Logger): def __init__(self, labinfo): pass def __call__(self, lines): t = time.strftime(Logger.TimeFormat, time.localtime(time.time())) if isinstance(lines, types.StringType): sys.__stderr__.writelines([t, lines, "\n"]) else: for line in lines: sys.__stderr__.writelines([t, line, "\n"]) sys.__stderr__.flush() def name(self): return "StdErrLog" class FileLog(Logger): def __init__(self, labinfo, filename=None): if filename == None: filename=labinfo["logfile"] self.logfile=filename def __call__(self, lines): fd = open(self.logfile, "a") t = time.strftime(Logger.TimeFormat, time.localtime(time.time())) if isinstance(lines, types.StringType): fd.writelines([t, lines, "\n"]) else: for line in lines: fd.writelines([t, line, "\n"]) fd.close() def name(self): return "FileLog" class CtsLab(UserDict): '''This class defines the Lab Environment for the Cluster Test System. It defines those things which are expected to change from test environment to test environment for the same cluster manager. It is where you define the set of nodes that are in your test lab what kind of reset mechanism you use, etc. This class is derived from a UserDict because we hold many different parameters of different kinds, and this provides provide a uniform and extensible interface useful for any kind of communication between the user/administrator/tester and CTS. At this point in time, it is the intent of this class to model static configuration and/or environmental data about the environment which doesn't change as the tests proceed. Well-known names (keys) are an important concept in this class. The HasMinimalKeys member function knows the minimal set of well-known names for the class. The following names are standard (well-known) at this time: nodes An array of the nodes in the cluster reset A ResetMechanism object logger An array of objects that log strings... CMclass The type of ClusterManager we are running (This is a class object, not a class instance) RandSeed Random seed. It is a triple of bytes. (optional) HAdir Base directory for HA installation The CTS code ignores names it doesn't know about/need. The individual tests have access to this information, and it is perfectly acceptable to provide hints, tweaks, fine-tuning directions or other information to the tests through this mechanism. ''' def __init__(self, nodes): self.data = {} self["nodes"] = nodes self.MinimalKeys=["nodes", "reset", "logger", "CMclass", "HAdir"] def HasMinimalKeys(self): 'Return TRUE if our object has the minimal set of keys/values in it' result = 1 for key in self.MinimalKeys: if not self.has_key(key): result = None return result def SupplyDefaults(self): if not self.has_key("logger"): self["logger"] = (SysLog(self), StdErrLog(self)) if not self.has_key("reset"): self["reset"] = Stonith() if not self.has_key("CMclass"): self["CMclass"] = HeartbeatCM if not self.has_key("HAdir"): self["HAdir"] = "@sysconfdir@/ha.d" if not self.has_key("LogFileName"): self["LogFileName"] = "/var/log/ha-log" # # Now set up our random number generator... # self.RandomGen = whrandom.whrandom() # Get a random seed for the random number generator. if self.has_key("RandSeed"): randseed = self["RandSeed"] else: f=open("/dev/urandom", "r") string=f.read(3) f.close() randseed=struct.unpack("BBB", string) self.log("Random seed is: " + str(randseed)) self.randseed=randseed self.RandomGen.seed(randseed[0], randseed[1], randseed[2]) def log(self, args): "Log using each of the supplied logging methods" for logfcn in self._logfunctions: logfcn(string.strip(args)) def debug(self, args): "Log using each of the supplied logging methods" for logfcn in self._logfunctions: if logfcn.name() != "StdErrLog": logfcn(string.strip(args)) def __setitem__(self, key, value): '''Since this function gets called whenever we modify the dictionary (object), we can (and do) validate those keys that we know how to validate. For the most part, we know how to validate the "MinimalKeys" elements. ''' # # List of nodes in the system # if key == "nodes": self.Nodes = {} for node in value: # I don't think I need the IP address, etc. but this validates # the node name against /etc/hosts and/or DNS, so it's a # GoodThing(tm). self.Nodes[node] = gethostbyname_ex(node) if len(value) < 2: raise ValueError("Must have at least two nodes in system") # # Reset Mechanism # elif key == "reset": if not issubclass(value.__class__, ResetMechanism): raise ValueError("'reset' Value must be a subclass" " of ResetMechanism") # # List of Logging Mechanism(s) # elif key == "logger": if len(value) < 1: raise ValueError("Must have at least one logging mechanism") for logger in value: if not callable(logger): raise ValueError("'logger' elements must be callable") self._logfunctions = value # # Cluster Manager Class # elif key == "CMclass": if not issubclass(value, ClusterManager): raise ValueError("'CMclass' must be a subclass of" " ClusterManager") # # Initial Random seed... # elif key == "RandSeed": if len(value) != 3: raise ValueError("'Randseed' must be a 3-element list/tuple") for elem in value: if not isinstance(elem, types.IntType): raise ValueError("'Randseed' list must all be ints") self.data[key] = value def IsValidNode(self, node): 'Return TRUE if the given node is valid' return self.Nodes.has_key(node) def __CheckNode(self, node): "Raise a ValueError if the given node isn't valid" if not self.IsValidNode(node): raise ValueError("Invalid node [%s] in CheckNode" % node) def RandomNode(self): '''Choose a random node from the cluster''' return self.RandomGen.choice(self["nodes"]) def ResetNode(self, node): "Reset a node, (normally) using a hardware mechanism" self.__CheckNode(node) return self["reset"].reset(node) def ResetNode2(self, init_node, target_node): self.__CheckNode(target_node) return Stonithd(self["nodes"]).reset(init_node, target_node) def usage(arg): print "Illegal argument " + arg print "usage: " + sys.argv[0] \ + " --directory config-directory" \ + " -D config-directory" \ + " --logfile system-logfile-name" \ + " --trunc (truncate logfile before starting)" \ + " -L system-logfile-name" \ + " --limit-nodes maxnumnodes" \ + " --xmit-loss lost-rate(0.0-1.0)" \ + " --recv-loss lost-rate(0.0-1.0)" \ + " --suppressmonitoring" \ + " --syslog-facility syslog-facility" \ + " --facility syslog-facility" \ + " --choose testcase-name" \ + + " --test-ip-base ip" \ + " (-2 |"\ + " -v2 |"\ + " --crm |"\ + " --classic)"\ + " --resource-can-stop" \ + " --stonith (1 | 0 | yes | no)" \ + " --standby (1 | 0 | yes | no)" \ + " --fencing (1 | 0 | yes | no)" \ + " [number-of-iterations]" sys.exit(1) # # A little test code... # if __name__ == '__main__': from CTSaudits import AuditList from CTStests import TestList,RandomTests from CTS import Scenario, InitClusterManager, PingFest, PacketLoss import CM_hb HAdir = "/etc/ha.d" LogFile = "/var/log/ha-log-local7" DoStonith = 1 DoStandby = 1 DoFencing = 0 NumIter = 500 SuppressMonitoring = None Version = 1 CIBfilename = None CIBResource = 0 ClobberCIB = 0 LimitNodes = 0 TestCase = None LogFacility = None TruncateLog = 0 ResCanStop = 0 XmitLoss = "0.0" RecvLoss = "0.0" + IPBase = "127.0.0.10" # # The values of the rest of the parameters are now properly derived from # the configuration files. # # Stonith is configurable because it's slow, I have a few machines which # don't reboot very reliably, and it can mild damage to your machine if # you're using a real power switch. # # Standby is configurable because the test is very heartbeat specific # and I haven't written the code to set it properly yet. Patches are # being accepted... # Process arguments... skipthis=None args=sys.argv[1:] for i in range(0, len(args)): if skipthis: skipthis=None continue elif args[i] == "-D" or args[i] == "--directory": skipthis=1 HAdir = args[i+1] elif args[i] == "-l" or args[i] == "--limit-nodes": skipthis=1 LimitNodes = int(args[i+1]) elif args[i] == "-r": CIBResource = 1 elif args[i] == "-L" or args[i] == "--logfile": skipthis=1 LogFile = args[i+1] + elif args[i] == "--test-ip-base": + skipthis=1 + IPBase = args[i+1] elif args[i] == "--trunc": TruncateLog=1 elif args[i] == "-v2": Version=2 elif args[i] == "--stonith": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": DoStonith=1 elif args[i+1] == "0" or args[i+1] == "no": DoStonith=0 else: usage(args[i+1]) elif args[i] == "--standby": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": DoStandby=1 elif args[i+1] == "0" or args[i+1] == "no": DoStandby=0 else: usage(args[i+1]) elif args[i] == "--fencing": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": DoFencing=1 elif args[i+1] == "0" or args[i+1] == "no": DoFencing=0 else: usage(args[i+1]) elif args[i] == "--suppressmonitoring": SuppressMonitoring = 1 elif args[i] == "--resource-can-stop": ResCanStop = 1 elif args[i] == "-2" or args[i] == "--crm": Version = 2 elif args[i] == "-1" or args[i] == "--classic": Version = 1 elif args[i] == "--clobber-cib" or args[i] == "-c": ClobberCIB = 1 elif args[i] == "--cib-filename": skipthis=1 CIBfilename = args[i+1] elif args[i] == "--xmit-loss": try: float(args[i+1]) except ValueError: print ("--xmit-loss parameter should be float") usage(args[i+1]) skipthis=1 XmitLoss = args[i+1] elif args[i] == "--recv-loss": try: float(args[i+1]) except ValueError: print ("--recv-loss parameter should be float") usage(args[i+1]) skipthis=1 RecvLoss = args[i+1] elif args[i] == "--choose": skipthis=1 TestCase = args[i+1] elif args[i] == "--syslog-facility" or args[i] == "--facility": skipthis=1 LogFacility = args[i+1] else: NumIter=int(args[i]) # # This reading of HBconfig here is ugly, and I suppose ought to # be done by the Cluster manager. This would probably mean moving the # list of cluster nodes into the ClusterManager class. A good thought # for our Copious Spare Time in the future... # config = CM_hb.HBConfig(HAdir) node_list = config.Parameters["node"] if LogFacility == None: if config.Parameters.has_key("logfacility"): LogFacility = config.Parameters["logfacility"][0] else: LogFacility = "local7" if LimitNodes > 0: if len(node_list) > LimitNodes: print("Limiting the number of nodes configured=%d (max=%d)" %(len(node_list), LimitNodes)) while len(node_list) > LimitNodes: node_list.pop(len(node_list)-1) Environment = CtsLab(node_list) Environment["HAdir"] = HAdir Environment["ClobberCIB"] = ClobberCIB Environment["CIBfilename"] = CIBfilename Environment["CIBResource"] = CIBResource Environment["LogFileName"] = LogFile Environment["DoStonith"] = DoStonith Environment["SyslogFacility"] = LogFacility Environment["DoStandby"] = DoStandby Environment["DoFencing"] = DoFencing Environment["ResCanStop"] = ResCanStop Environment["SuppressMonitoring"] = SuppressMonitoring Environment["XmitLoss"] = XmitLoss Environment["RecvLoss"] = RecvLoss + Environment["IPBase"] = IPBase if Version == 2: from CM_LinuxHAv2 import LinuxHAv2 Environment['CMclass']=LinuxHAv2 #Environment["RandSeed"] = (156, 104, 218) Environment["reset"] = Stonith(sttype="external/ssh", parm=string.join(node_list, " ")) Environment.SupplyDefaults() # Your basic start up the world type of test scenario... #scenario = Scenario( #[ InitClusterManager(Environment) #, PingFest(Environment)]) scenario = Scenario( [ InitClusterManager(Environment), PacketLoss(Environment)]) # Create the Cluster Manager object cm = Environment['CMclass'](Environment) if TruncateLog: lf = open(LogFile, "w"); if lf != None: lf.truncate(0) lf.close() cm.log(">>>>>>>>>>>>>>>> BEGINNING " + repr(NumIter) + " TESTS ") cm.log("HA configuration directory: " + Environment["HAdir"]) cm.log("System log files: " + Environment["LogFileName"]) cm.log("Enable Stonith: " + ("%d" % Environment["DoStonith"])) cm.log("Enable Standby: " + ("%d" % Environment["DoStandby"])) if Environment.has_key("SuppressMonitoring") \ and Environment["SuppressMonitoring"]: cm.log("Resource Monitoring is disabled") cm.log("Cluster nodes: " + repr(config.Parameters["node"])) Audits = AuditList(cm) Tests = [] if TestCase != None: for test in TestList(cm): if test.name == TestCase: Tests.append(test) if Tests == []: usage("--choose: No applicable/valid tests chosen") else: Tests = TestList(cm) tests = RandomTests(scenario, cm, Tests, Audits) Environment.RandomTests = tests overall, detailed = tests.run(NumIter) tests.summarize()