diff --git a/cts/CM_ais.py b/cts/CM_ais.py index 429265e87b..b8540da1fd 100644 --- a/cts/CM_ais.py +++ b/cts/CM_ais.py @@ -1,353 +1,382 @@ '''CTS: Cluster Testing System: AIS dependent modules... ''' __copyright__=''' Copyright (C) 2007 Andrew Beekhof ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. import os, sys, warnings from cts.CTSvars import * from cts.CM_lha import crm_lha from cts.CTS import Process ####################################################################### # # LinuxHA v2 dependent modules # ####################################################################### class crm_ais(crm_lha): ''' The crm version 3 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of openais ''' def __init__(self, Environment, randseed=None): crm_lha.__init__(self, Environment, randseed=randseed) self.update({ "Name" : "crm-ais", "EpocheCmd" : "crm_node -e --openais", "QuorumCmd" : "crm_node -q --openais", "ParitionCmd" : "crm_node -p --openais", "Pat:They_stopped" : "%s crmd:.*Node %s: .* state=lost .new", "Pat:ChildExit" : "Child process .* exited", # Bad news Regexes. Should never occur. "BadRegexes" : ( r"ERROR:", r"CRIT:", r"TRACE:", r"Shutting down\.", r"Forcing shutdown\.", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r", exiting\.", r"WARN.*Ignoring HA message.*vote.*not in our membership list", r"pengine.*Attempting recovery of resource", r"is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Attempting to schedule .* after a stop", r"Resource .* was active at shutdown", r"duplicate entries for call_id", r"Search terminated:", r":global_timer_callback", r"Faking parameter digest creation", r"Parameters to .* action changed:", r"Parameters to .* changed", r"Child process .* terminated with signal 11", r"Executing .* fencing operation", r"LogActions: Recover", # Not inherently bad, but worth tracking #r"No need to invoke the TE", #r"ping.*: DEBUG: Updated connected = 0", #r"Digest mis-match:", ), }) def errorstoignore(self): # At some point implement a more elegant solution that # also produces a report at the end '''Return list of errors which are known and very noisey should be ignored''' if 1: return [ "crm_mon:", "crmadmin:", "update_trace_data", "async_notify: strange, client not found", "ERROR: Message hist queue is filling up" ] return [] def NodeUUID(self, node): return node def ais_components(self): fullcomplist = {} self.complist = [] self.common_ignore = [ "Pending action:", "ERROR: crm_log_message_adv:", "ERROR: MSG: No message to dump", "pending LRM operations at shutdown", "Lost connection to the CIB service", "Connection to the CIB terminated...", "Sending message to CIB service FAILED", "apply_xml_diff: Diff application failed!", "crmd: .*Action A_RECOVER .* not supported", "pingd: .*ERROR: send_update: Could not send update", "send_ipc_message: IPC Channel to .* is not connected", "unconfirmed_actions: Waiting on .* unconfirmed actions", "cib_native_msgready: Message pending on command channel", "crmd:.*do_exit: Performing A_EXIT_1 - forcefully exiting the CRMd", "verify_stopped: Resource .* was active at shutdown. You may ignore this error if it is unmanaged.", "ERROR: attrd_connection_destroy: Lost connection to attrd", "nfo: te_fence_node: Executing .* fencing operation", ] fullcomplist["cib"] = Process(self, "cib", pats = [ "State transition .* S_RECOVERY", "Respawning .* crmd", "Respawning .* attrd", "Lost connection to the CIB service", "Connection to the CIB terminated...", "Child process crmd exited .* rc=2", "Child process attrd exited .* rc=1", "crmd: .*Input I_TERMINATE from do_recover", "crmd: .*I_ERROR.*crmd_cib_connection_destroy", "crmd:.*do_exit: Could not recover from internal error", ], badnews_ignore = self.common_ignore) fullcomplist["lrmd"] = Process(self, "lrmd", pats = [ "State transition .* S_RECOVERY", "LRM Connection failed", "Respawning .* crmd", "crmd: .*I_ERROR.*lrm_connection_destroy", "Child process crmd exited .* rc=2", "crmd: .*Input I_TERMINATE from do_recover", "crmd:.*do_exit: Could not recover from internal error", ], badnews_ignore = self.common_ignore) fullcomplist["crmd"] = Process(self, "crmd", pats = [ # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # Only if the node wasn't the DC: "State transition S_IDLE", "State transition .* -> S_IDLE", ], badnews_ignore = self.common_ignore) fullcomplist["attrd"] = Process(self, "attrd", pats = [ "crmd: .*ERROR: attrd_connection_destroy: Lost connection to attrd" ], badnews_ignore = self.common_ignore) fullcomplist["pengine"] = Process(self, "pengine", dc_pats = [ "State transition .* S_RECOVERY", "Respawning .* crmd", "Child process crmd exited .* rc=2", "crmd: .*pe_connection_destroy: Connection to the Policy Engine failed", "crmd: .*I_ERROR.*save_cib_contents", "crmd: .*Input I_TERMINATE from do_recover", "crmd:.*do_exit: Could not recover from internal error", ], badnews_ignore = self.common_ignore) stonith_ignore = [ "LogActions: Recover Fencing", "update_failcount: Updating failcount for Fencing", "ERROR: te_connect_stonith: Sign-in failed: triggered a retry", ] stonith_ignore.extend(self.common_ignore) fullcomplist["stonith-ng"] = Process(self, "stonith-ng", process="stonithd", pats = [ "CRIT: stonith_dispatch.* Lost connection to the STONITH service", "tengine_stonith_connection_destroy: Fencing daemon connection failed", "Attempting connection to fencing daemon", "te_connect_stonith: Connected", ], badnews_ignore = stonith_ignore) vgrind = self.Env["valgrind-procs"].split() for key in fullcomplist.keys(): if self.Env["valgrind-tests"]: if key in vgrind: # Processes running under valgrind can't be shot with "killall -9 processname" self.log("Filtering %s from the component list as it is being profiled by valgrind" % key) continue if key == "stonith-ng" and not self.Env["DoFencing"]: continue self.complist.append(fullcomplist[key]) #self.complist = [ fullcomplist["pengine"] ] return self.complist class crm_whitetank(crm_ais): ''' The crm version 3 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of openais ''' def __init__(self, Environment, randseed=None): crm_ais.__init__(self, Environment, randseed=randseed) self.update({ "Name" : "crm-whitetank", "StartCmd" : CTSvars.INITDIR+"/openais start", "StopCmd" : CTSvars.INITDIR+"/openais stop", "Pat:We_stopped" : "%s.*openais.*pcmk_shutdown: Shutdown complete", "Pat:They_stopped" : "%s crmd:.*Node %s: .* state=lost .new", "Pat:They_dead" : "openais:.*Node %s is now: lost", "Pat:ChildKilled" : "%s openais.*Child process %s terminated with signal 9", "Pat:ChildRespawn" : "%s openais.*Respawning failed child process: %s", "Pat:ChildExit" : "Child process .* exited", }) def Components(self): self.ais_components() aisexec_ignore = [ "ERROR: ais_dispatch: Receiving message .* failed", "crmd: .*I_ERROR.*crmd_cib_connection_destroy", "cib: .*ERROR: cib_ais_destroy: AIS connection terminated", #"crmd: .*ERROR: crm_ais_destroy: AIS connection terminated", "crmd:.*do_exit: Could not recover from internal error", "crmd: .*I_TERMINATE.*do_recover", "attrd: .*CRIT: attrd_ais_destroy: Lost connection to OpenAIS service!", "stonithd: .*ERROR: AIS connection terminated", ] aisexec_ignore.extend(self.common_ignore) self.complist.append(Process(self, "aisexec", pats = [ "ERROR: ais_dispatch: AIS connection failed", "crmd: .*ERROR: do_exit: Could not recover from internal error", "pengine: .*Scheduling Node .* for STONITH", "stonithd: .*requests a STONITH operation RESET on node", "stonithd: .*Succeeded to STONITH the node", ], badnews_ignore = aisexec_ignore)) -class crm_flatiron(crm_ais): +class crm_cs_v0(crm_ais): ''' The crm version 3 cluster manager class. It implements the things we need to talk to and manipulate - crm clusters running on top of openais + + crm clusters running against version 0 of our plugin ''' def __init__(self, Environment, randseed=None): crm_ais.__init__(self, Environment, randseed=randseed) self.update({ - "Name" : "crm-flatiron", + "Name" : "crm-plugin-v0", "StartCmd" : "service corosync start", "StopCmd" : "service corosync stop", # The next pattern is too early # "Pat:We_stopped" : "%s.*Service engine unloaded: Pacemaker Cluster Manager", # The next pattern would be preferred, but it doesn't always come out # "Pat:We_stopped" : "%s.*Corosync Cluster Engine exiting with status", "Pat:We_stopped" : "%s.*Service engine unloaded: corosync cluster quorum service", "Pat:They_stopped" : "%s crmd:.*Node %s: .* state=lost .new", "Pat:They_dead" : "corosync:.*Node %s is now: lost", "Pat:ChildKilled" : "%s corosync.*Child process %s terminated with signal 9", "Pat:ChildRespawn" : "%s corosync.*Respawning failed child process: %s", }) def Components(self): self.ais_components() corosync_ignore = [ "ERROR: ais_dispatch: Receiving message .* failed", "crmd: .*I_ERROR.*crmd_cib_connection_destroy", "cib: .*ERROR: cib_ais_destroy: AIS connection terminated", #"crmd: .*ERROR: crm_ais_destroy: AIS connection terminated", "crmd:.*do_exit: Could not recover from internal error", "crmd: .*I_TERMINATE.*do_recover", "attrd: .*CRIT: attrd_ais_destroy: Lost connection to Corosync service!", "stonithd: .*ERROR: AIS connection terminated", ] # corosync_ignore.extend(self.common_ignore) # self.complist.append(Process(self, "corosync", pats = [ # "ERROR: ais_dispatch: AIS connection failed", # "crmd: .*ERROR: do_exit: Could not recover from internal error", # "pengine: .*Scheduling Node .* for STONITH", # "stonithd: .*requests a STONITH operation RESET on node", # "stonithd: .*Succeeded to STONITH the node", # ], badnews_ignore = corosync_ignore)) return self.complist -class crm_mcp(crm_flatiron): +class crm_cs_v1(crm_cs_v0): ''' The crm version 3 cluster manager class. It implements the things we need to talk to and manipulate - crm clusters running on top of openais + + crm clusters running on top of version 1 of our plugin ''' def __init__(self, Environment, randseed=None): - crm_flatiron.__init__(self, Environment, randseed=randseed) + crm_cs_v0.__init__(self, Environment, randseed=randseed) self.update({ - "Name" : "crm-mcp", + "Name" : "crm-plugin-v1", "StartCmd" : "service corosync start; service pacemaker start", "StopCmd" : "service pacemaker stop; service corosync stop", "Pat:We_stopped" : "%s.*Service engine unloaded: corosync cluster quorum service", "Pat:They_stopped" : "%s crmd:.*Node %s: .* state=lost .new", "Pat:They_dead" : "crmd:.*Node %s: .* state=lost .new", "Pat:ChildKilled" : "%s pacemakerd.*Child process %s terminated with signal 9", "Pat:ChildRespawn" : "%s pacemakerd.*Respawning failed child process: %s", }) -class crm_cman(crm_flatiron): +class crm_mcp(crm_cs_v0): + ''' + The crm version 4 cluster manager class. + It implements the things we need to talk to and manipulate + crm clusters running on top of native corosync (no plugins) + ''' + def __init__(self, Environment, randseed=None): + crm_cs_v0.__init__(self, Environment, randseed=randseed) + + self.update({ + "Name" : "crm-mcp", + "StartCmd" : "service corosync start; service pacemaker start", + "StopCmd" : "service pacemaker stop; service corosync stop", + + "EpocheCmd" : "crm_node -e", + "QuorumCmd" : "crm_node -q", + "ParitionCmd" : "crm_node -p", + + "Pat:We_stopped" : "%s.*Corosync Cluster Engine exiting normally", + "Pat:They_stopped" : "%s crmd[:\[].*Node %s: .* state=lost .new", + "Pat:They_dead" : "crmd[:\[].*Node %s: .* state=lost .new", + + "Pat:ChildKilled" : "%s pacemakerd.*Child process %s terminated with signal 9", + "Pat:ChildRespawn" : "%s pacemakerd.*Respawning failed child process: %s", + }) + + +class crm_cman(crm_cs_v0): ''' The crm version 3 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of openais ''' def __init__(self, Environment, randseed=None): - crm_flatiron.__init__(self, Environment, randseed=randseed) + crm_cs_v0.__init__(self, Environment, randseed=randseed) self.update({ "Name" : "crm-cman", "StartCmd" : "service cman start; service pacemaker start", "StopCmd" : "service pacemaker stop; service cman stop;", "EpocheCmd" : "crm_node -e --cman", "QuorumCmd" : "crm_node -q --cman", "ParitionCmd" : "crm_node -p --cman", "Pat:We_stopped" : "%s.*Service engine unloaded: corosync cluster quorum service", "Pat:They_stopped" : "%s crmd:.*Node %s: .* state=lost .new", "Pat:They_dead" : "crmd:.*Node %s: .* state=lost .new", "Pat:ChildKilled" : "%s pacemakerd.*Child process %s terminated with signal 9", "Pat:ChildRespawn" : "%s pacemakerd.*Respawning failed child process: %s", }) diff --git a/cts/CTSlab.py b/cts/CTSlab.py index 319486bc87..751e841be6 100755 --- a/cts/CTSlab.py +++ b/cts/CTSlab.py @@ -1,581 +1,586 @@ #!/usr/bin/python '''CTS: Cluster Testing System: Lab environment module ''' __copyright__=''' Copyright (C) 2001,2005 Alan Robertson Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. from UserDict import UserDict import sys, types, string, string, signal, os, socket pdir=os.path.dirname(sys.path[0]) sys.path.insert(0, pdir) # So that things work from the source directory try: from cts.CTSvars import * from cts.CM_ais import * from cts.CM_lha import crm_lha from cts.CTSaudits import AuditList from cts.CTStests import TestList from cts.CTSscenarios import * except ImportError: sys.stderr.write("abort: couldn't find cts libraries in [%s]\n" % ' '.join(sys.path)) sys.stderr.write("(check your install and PYTHONPATH)\n") # Now do it again to get more details from cts.CTSvars import * from cts.CM_ais import * from cts.CM_lha import crm_lha from cts.CTSaudits import AuditList from cts.CTStests import TestList from cts.CTSscenarios import * sys.exit(-1) cm = None Tests = [] Chosen = [] scenario = None # Not really used, the handler in def sig_handler(signum, frame) : if cm: cm.log("Interrupted by signal %d"%signum) if scenario: scenario.summarize() if signum == 15 : if scenario: scenario.TearDown() sys.exit(1) class LabEnvironment(CtsLab): def __init__(self): CtsLab.__init__(self) # Get a random seed for the random number generator. self["DoStandby"] = 1 self["DoFencing"] = 1 self["XmitLoss"] = "0.0" self["RecvLoss"] = "0.0" self["ClobberCIB"] = 0 self["CIBfilename"] = None self["CIBResource"] = 0 self["DoBSC"] = 0 self["use_logd"] = 0 self["oprofile"] = [] self["warn-inactive"] = 0 self["ListTests"] = 0 self["benchmark"] = 0 self["Schema"] = "pacemaker-1.0" self["Stack"] = "openais" self["stonith-type"] = "external/ssh" self["stonith-params"] = "hostlist=all,livedangerously=yes" self["logger"] = ([StdErrLog(self)]) self["loop-minutes"] = 60 self["valgrind-prefix"] = None self["valgrind-procs"] = "cib crmd attrd pengine stonith-ng" self["valgrind-opts"] = """--leak-check=full --show-reachable=yes --trace-children=no --num-callers=25 --gen-suppressions=all --suppressions="""+CTSvars.CTS_home+"""/cts.supp""" #self["valgrind-opts"] = """--trace-children=no --num-callers=25 --gen-suppressions=all --suppressions="""+CTSvars.CTS_home+"""/cts.supp""" self["experimental-tests"] = 0 self["valgrind-tests"] = 0 self["unsafe-tests"] = 1 self["loop-tests"] = 1 self["scenario"] = "random" master = socket.gethostname() # Use the IP where possible to avoid name lookup failures for ip in socket.gethostbyname_ex(master)[2]: if ip != "127.0.0.1": master = ip break; self["cts-master"] = master def usage(arg, status=1): print "Illegal argument " + arg print "usage: " + sys.argv[0] +" [options] number-of-iterations" print "\nCommon options: " print "\t [--nodes 'node list'] list of cluster nodes separated by whitespace" print "\t [--group | -g 'name'] use the nodes listed in the named DSH group (~/.dsh/groups/$name)" print "\t [--limit-nodes max] only use the first 'max' cluster nodes supplied with --nodes" print "\t [--stack (heartbeat|ais)] which cluster stack is installed" print "\t [--list-tests] list the valid tests" print "\t [--benchmark] add the timing information" print "\t " print "Options that CTS will usually auto-detect correctly: " print "\t [--logfile path] where should the test software look for logs from cluster nodes" print "\t [--syslog-facility name] which syslog facility should the test software log to" print "\t [--at-boot (1|0)] does the cluster software start at boot time" print "\t [--test-ip-base ip] offset for generated IP address resources" print "\t " print "Options for release testing: " print "\t [--populate-resources | -r] generate a sample configuration" print "\t [--choose name] run only the named test" print "\t [--stonith (1 | 0 | yes | no | rhcs | ssh)]" print "\t [--once] run all valid tests once" print "\t " print "Additional (less common) options: " print "\t [--clobber-cib | -c ] erase any existing configuration" print "\t [--outputfile path] optional location for the test software to write logs to" print "\t [--trunc] truncate logfile before starting" print "\t [--xmit-loss lost-rate(0.0-1.0)]" print "\t [--recv-loss lost-rate(0.0-1.0)]" print "\t [--standby (1 | 0 | yes | no)]" print "\t [--fencing (1 | 0 | yes | no)]" print "\t [--stonith-type type]" print "\t [--stonith-args name=value]" print "\t [--bsc]" print "\t [--no-loop-tests] dont run looping/time-based tests" print "\t [--no-unsafe-tests] dont run tests that are unsafe for use with ocfs2/drbd" print "\t [--valgrind-tests] include tests using valgrind" print "\t [--experimental-tests] include experimental tests" print "\t [--oprofile 'node list'] list of cluster nodes to run oprofile on]" print "\t [--qarsh] use the QARSH backdoor to access nodes instead of SSH" print "\t [--seed random_seed]" print "\t [--set option=value]" print "\t " print "\t Example: " print "\t python ./CTSlab.py -g virt1 --stack cs -r --stonith ssh --schema pacemaker-1.0 500" sys.exit(status) # # A little test code... # if __name__ == '__main__': Environment = LabEnvironment() rsh = RemoteExec(None, silent=True) NumIter = 0 Version = 1 LimitNodes = 0 TruncateLog = 0 ListTests = 0 HaveSeed = 0 node_list = '' # Set the signal handler signal.signal(15, sig_handler) signal.signal(10, sig_handler) # Process arguments... skipthis=None args=sys.argv[1:] for i in range(0, len(args)): if skipthis: skipthis=None continue elif args[i] == "-l" or args[i] == "--limit-nodes": skipthis=1 LimitNodes = int(args[i+1]) elif args[i] == "-r" or args[i] == "--populate-resources": Environment["CIBResource"] = 1 Environment["ClobberCIB"] = 1 elif args[i] == "-L" or args[i] == "--logfile": skipthis=1 Environment["LogFileName"] = args[i+1] elif args[i] == "--outputfile": skipthis=1 Environment["OutputFile"] = args[i+1] elif args[i] == "--test-ip-base": skipthis=1 Environment["IPBase"] = args[i+1] Environment["CIBResource"] = 1 Environment["ClobberCIB"] = 1 elif args[i] == "--oprofile": skipthis=1 Environment["oprofile"] = args[i+1].split(' ') elif args[i] == "--trunc": Environment["TruncateLog"]=1 elif args[i] == "--list-tests" or args[i] == "--list" : Environment["ListTests"]=1 elif args[i] == "--benchmark": Environment["benchmark"]=1 elif args[i] == "--bsc": Environment["DoBSC"] = 1 Environment["scenario"] = "basic-sanity" elif args[i] == "--qarsh": Environment.rsh.enable_qarsh() rsh.enable_qarsh() elif args[i] == "--fencing": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": Environment["DoFencing"] = 1 elif args[i+1] == "0" or args[i+1] == "no": Environment["DoFencing"] = 0 else: usage(args[i+1]) elif args[i] == "--stonith": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": Environment["DoFencing"]=1 elif args[i+1] == "0" or args[i+1] == "no": Environment["DoFencing"]=0 elif args[i+1] == "rhcs": Environment["DoStonith"]=1 Environment["stonith-type"] = "fence_xvm" Environment["stonith-params"] = "pcmk_arg_map=domain:uname" elif args[i+1] == "ssh" or args[i+1] == "lha": Environment["DoStonith"]=1 Environment["stonith-type"] = "external/ssh" Environment["stonith-params"] = "hostlist=all,livedangerously=yes" elif args[i+1] == "north": Environment["DoStonith"]=1 Environment["stonith-type"] = "fence_apc" Environment["stonith-params"] = "ipaddr=north-apc,login=apc,passwd=apc,pcmk_host_map=north-01:2;north-02:3;north-03:4;north-04:5;north-05:6;north-06:7;north-07:9;north-08:10;north-09:11;north-10:12;north-11:13;north-12:14;north-13:15;north-14:18;north-15:17;north-16:19;" elif args[i+1] == "south": Environment["DoStonith"]=1 Environment["stonith-type"] = "fence_apc" Environment["stonith-params"] = "ipaddr=south-apc,login=apc,passwd=apc,pcmk_host_map=south-01:2;south-02:3;south-03:4;south-04:5;south-05:6;south-06:7;south-07:9;south-08:10;south-09:11;south-10:12;south-11:13;south-12:14;south-13:15;south-14:18;south-15:17;south-16:19;" elif args[i+1] == "east": Environment["DoStonith"]=1 Environment["stonith-type"] = "fence_apc" Environment["stonith-params"] = "ipaddr=east-apc,login=apc,passwd=apc,pcmk_host_map=east-01:2;east-02:3;east-03:4;east-04:5;east-05:6;east-06:7;east-07:9;east-08:10;east-09:11;east-10:12;east-11:13;east-12:14;east-13:15;east-14:18;east-15:17;east-16:19;" elif args[i+1] == "west": Environment["DoStonith"]=1 Environment["stonith-type"] = "fence_apc" Environment["stonith-params"] = "ipaddr=west-apc,login=apc,passwd=apc,pcmk_host_map=west-01:2;west-02:3;west-03:4;west-04:5;west-05:6;west-06:7;west-07:9;west-08:10;west-09:11;west-10:12;west-11:13;west-12:14;west-13:15;west-14:18;west-15:17;west-16:19;" else: usage(args[i+1]) elif args[i] == "--stonith-type": Environment["stonith-type"] = args[i+1] skipthis=1 elif args[i] == "--stonith-args": Environment["stonith-params"] = args[i+1] skipthis=1 elif args[i] == "--standby": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": Environment["DoStandby"] = 1 elif args[i+1] == "0" or args[i+1] == "no": Environment["DoStandby"] = 0 else: usage(args[i+1]) elif args[i] == "--clobber-cib" or args[i] == "-c": Environment["ClobberCIB"] = 1 elif args[i] == "--cib-filename": skipthis=1 Environment["CIBfilename"] = args[i+1] elif args[i] == "--xmit-loss": try: float(args[i+1]) except ValueError: print ("--xmit-loss parameter should be float") usage(args[i+1]) skipthis=1 Environment["XmitLoss"] = args[i+1] elif args[i] == "--recv-loss": try: float(args[i+1]) except ValueError: print ("--recv-loss parameter should be float") usage(args[i+1]) skipthis=1 Environment["RecvLoss"] = args[i+1] elif args[i] == "--choose": skipthis=1 Chosen.append(args[i+1]) Environment["scenario"] = "sequence" elif args[i] == "--nodes": skipthis=1 node_list = args[i+1].split(' ') elif args[i] == "-g" or args[i] == "--group" or args[i] == "--dsh-group": skipthis=1 if os.environ['USER'] == 'root': Environment["OutputFile"] = "/var/log/cluster-%s.log" % args[i+1] else: Environment["OutputFile"] = "%s/cluster-%s.log" % (os.environ['HOME'], args[i+1]) dsh_file = "%s/.dsh/group/%s" % (os.environ['HOME'], args[i+1]) if os.path.isfile(dsh_file): node_list = [] f = open(dsh_file, 'r') for line in f: l = line.strip().rstrip() if not l.startswith('#'): node_list.append(l) f.close() else: print("Unknown DSH group: %s" % args[i+1]) elif args[i] == "--syslog-facility" or args[i] == "--facility": skipthis=1 Environment["SyslogFacility"] = args[i+1] elif args[i] == "--seed": skipthis=1 Environment.SeedRandom(args[i+1]) elif args[i] == "--warn-inactive": Environment["warn-inactive"] = 1 elif args[i] == "--schema": skipthis=1 Environment["Schema"] = args[i+1] elif args[i] == "--ais": Environment["Stack"] = "openais" elif args[i] == "--at-boot" or args[i] == "--cluster-starts-at-boot": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": Environment["at-boot"] = 1 elif args[i+1] == "0" or args[i+1] == "no": Environment["at-boot"] = 0 else: usage(args[i+1]) elif args[i] == "--heartbeat" or args[i] == "--lha": Environment["Stack"] = "heartbeat" elif args[i] == "--hae": Environment["Stack"] = "openais" Environment["Schema"] = "hae" elif args[i] == "--stack": Environment["Stack"] = args[i+1] skipthis=1 elif args[i] == "--once": Environment["scenario"] = "all-once" elif args[i] == "--valgrind-tests": Environment["valgrind-tests"] = 1 elif args[i] == "--no-loop-tests": Environment["loop-tests"] = 0 elif args[i] == "--loop-minutes": skipthis=1 try: Environment["loop-minutes"]=int(args[i+1]) except ValueError: usage(args[i]) elif args[i] == "--no-unsafe-tests": Environment["unsafe-tests"] = 0 elif args[i] == "--experimental-tests": Environment["experimental-tests"] = 1 elif args[i] == "--set": skipthis=1 (name, value) = args[i+1].split('=') Environment[name] = value else: try: NumIter=int(args[i]) except ValueError: usage(args[i]) if Environment["DoBSC"]: NumIter = 2 LimitNodes = 1 Chosen.append("AddResource") Environment["ClobberCIB"] = 1 Environment["CIBResource"] = 0 Environment["logger"].append(FileLog(Environment, Environment["LogFileName"])) elif Environment["OutputFile"]: Environment["logger"].append(FileLog(Environment, Environment["OutputFile"])) elif Environment["SyslogFacility"]: Environment["logger"].append(SysLog(Environment)) if Environment["Stack"] == "heartbeat" or Environment["Stack"] == "lha": Environment["Stack"] = "heartbeat" Environment['CMclass'] = crm_lha elif Environment["Stack"] == "openais" or Environment["Stack"] == "ais" or Environment["Stack"] == "whitetank": Environment["Stack"] = "openais (whitetank)" Environment['CMclass'] = crm_whitetank Environment["use_logd"] = 0 - elif Environment["Stack"] == "corosync" or Environment["Stack"] == "cs" or Environment["Stack"] == "flatiron": - Environment["Stack"] = "corosync (flatiron)" - Environment['CMclass'] = crm_flatiron + elif Environment["Stack"] == "corosync" or Environment["Stack"] == "cs" or Environment["Stack"] == "mcp": + Environment["Stack"] = "corosync" + Environment['CMclass'] = crm_mcp Environment["use_logd"] = 0 elif Environment["Stack"] == "cman": Environment["Stack"] = "corosync (cman)" Environment['CMclass'] = crm_cman Environment["use_logd"] = 0 - elif Environment["Stack"] == "mcp": - Environment["Stack"] = "corosync (mcp)" - Environment['CMclass'] = crm_mcp + elif Environment["Stack"] == "v1": + Environment["Stack"] = "corosync (plugin v1)" + Environment['CMclass'] = crm_cs_v1 + Environment["use_logd"] = 0 + + elif Environment["Stack"] == "v0": + Environment["Stack"] = "corosync (plugin v0)" + Environment['CMclass'] = crm_cs_v0 Environment["use_logd"] = 0 else: print "Unknown stack: "+Environment["Stack"] sys.exit(1) if len(node_list) < 1: print "No nodes specified!" sys.exit(1) if LimitNodes > 0: if len(node_list) > LimitNodes: print("Limiting the number of nodes configured=%d (max=%d)" %(len(node_list), LimitNodes)) while len(node_list) > LimitNodes: node_list.pop(len(node_list)-1) Environment["nodes"] = [] for n in node_list: if len(n.strip()): Environment["nodes"].append(n.strip()) discover = random.Random().choice(Environment["nodes"]) Environment["have_systemd"] = not rsh(discover, "systemctl list-units") # Detect syslog variant if not Environment.has_key("syslogd") or not Environment["syslogd"]: if Environment["have_systemd"]: # Systemd Environment["syslogd"] = rsh(discover, "systemctl list-units | grep syslog.*\.service.*active.*running | sed 's:.service.*::'", stdout=1) else: # SYS-V Environment["syslogd"] = rsh(discover, "chkconfig | grep syslog.*on | awk '{print $1}' | head -n 1", stdout=1) if not Environment.has_key("syslogd") or not Environment["syslogd"]: # default Environment["syslogd"] = "rsyslog" # Detect if the cluster starts at boot if not Environment.has_key("at-boot"): atboot = 0 if Environment["have_systemd"]: # Systemd atboot = atboot or not rsh(discover, "systemctl is-enabled heartbeat.service") atboot = atboot or not rsh(discover, "systemctl is-enabled corosync.service") atboot = atboot or not rsh(discover, "systemctl is-enabled pacemaker.service") else: # SYS-V atboot = atboot or not rsh(discover, "chkconfig | grep -e corosync.*on -e heartbeat.*on -e pacemaker.*on") Environment["at-boot"] = atboot # Try to determinw an offset for IPaddr resources if Environment["CIBResource"] and not Environment.has_key("IPBase"): network=rsh(discover, "ip addr | grep inet | grep -v -e link -e inet6 -e '/32' -e ' lo' | awk '{print $2}'", stdout=1).strip() Environment["IPBase"] = rsh(discover, "nmap -sn %s | grep 'scan report' | sort -u | tail -n 1 | awk '{print $NF}' | sed 's:(::' | sed 's:)::'" % network, stdout=1).strip() if not Environment["IPBase"]: Environment["IPBase"] = "127.0.0.10" Environment.log("Could not determine an offset for IPaddr resources. Perhaps nmap is not installed on the nodes.") Environment.log("Defaulting to '%s', use --test-ip-base to override" % Environment["IPBase"]) # Create the Cluster Manager object cm = Environment['CMclass'](Environment) if TruncateLog: Environment.log("Truncating %s" % LogFile) lf = open(LogFile, "w"); if lf != None: lf.truncate(0) lf.close() Audits = AuditList(cm) if Environment["ListTests"] == 1 : Tests = TestList(cm, Audits) Environment.log("Total %d tests"%len(Tests)) for test in Tests : Environment.log(str(test.name)); sys.exit(0) if len(Chosen) == 0: Tests = TestList(cm, Audits) else: for TestCase in Chosen: match = None for test in TestList(cm, Audits): if test.name == TestCase: match = test if not match: usage("--choose: No applicable/valid tests chosen") else: Tests.append(match) # Scenario selection if Environment["scenario"] == "basic-sanity": scenario = RandomTests(cm, [ BasicSanityCheck(Environment) ], Audits, Tests) elif Environment["scenario"] == "all-once": NumIter = len(Tests) scenario = AllOnce( cm, [ InitClusterManager(Environment), PacketLoss(Environment) ], Audits, Tests) elif Environment["scenario"] == "sequence": scenario = Sequence( cm, [ InitClusterManager(Environment), PacketLoss(Environment) ], Audits, Tests) else: scenario = RandomTests( cm, [ InitClusterManager(Environment), PacketLoss(Environment) ], Audits, Tests) Environment.log(">>>>>>>>>>>>>>>> BEGINNING " + repr(NumIter) + " TESTS ") Environment.log("Stack: %s" % Environment["Stack"]) Environment.log("Schema: %s" % Environment["Schema"]) Environment.log("Scenario: %s" % scenario.__doc__) Environment.log("CTS Master: %s" % Environment["cts-master"]) Environment.log("CTS Logfile: %s" % Environment["OutputFile"]) Environment.log("Random Seed: %s" % Environment["RandSeed"]) Environment.log("Syslog variant: %s" % Environment["syslogd"].strip()) Environment.log("System log files: %s" % Environment["LogFileName"]) # Environment.log(" ") if Environment.has_key("IPBase"): Environment.log("Base IP for resources: %s" % Environment["IPBase"]) Environment.log("Cluster starts at boot: %d" % Environment["at-boot"]) Environment.dump() rc = Environment.run(scenario, NumIter) sys.exit(rc)